* target-def.h (TARGET_HAVE_NAMED_SECTIONS): Move to
[official-gcc.git] / gcc / config / spu / spu.c
blob38db96934f317a11f8bece1b4b8b4360c4adaebd
1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
7 any later version.
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "diagnostic-core.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "machmode.h"
51 #include "gimple.h"
52 #include "tm-constrs.h"
53 #include "ddg.h"
54 #include "sbitmap.h"
55 #include "timevar.h"
56 #include "df.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS,
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
110 SPU_BTI_MAX
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126 struct spu_builtin_range
128 int low, high;
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150 /* Prototypes and external defs. */
151 static void spu_option_override (void);
152 static void spu_option_default_params (void);
153 static void spu_init_builtins (void);
154 static tree spu_builtin_decl (unsigned, bool);
155 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156 static bool spu_vector_mode_supported_p (enum machine_mode mode);
157 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
158 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
160 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161 static rtx get_pic_reg (void);
162 static int need_to_save_reg (int regno, int saving);
163 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167 static void emit_nop_for_insn (rtx insn);
168 static bool insn_clobbers_hbr (rtx insn);
169 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
170 int distance, sbitmap blocks);
171 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
173 static rtx get_branch_target (rtx branch);
174 static void spu_machine_dependent_reorg (void);
175 static int spu_sched_issue_rate (void);
176 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178 static int get_pipe (rtx insn);
179 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
180 static void spu_sched_init_global (FILE *, int, int);
181 static void spu_sched_init (FILE *, int, int);
182 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
183 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
185 bool *no_add_attrs);
186 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
188 bool *no_add_attrs);
189 static int spu_naked_function_p (tree func);
190 static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
192 static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
196 static tree spu_build_builtin_va_list (void);
197 static void spu_va_start (tree, rtx);
198 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
200 static int store_with_one_insn_p (rtx mem);
201 static int mem_is_padded_component_ref (rtx x);
202 static int reg_aligned_for_addr (rtx x);
203 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204 static void spu_asm_globalize_label (FILE * file, const char *name);
205 static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
208 static void spu_init_libfuncs (void);
209 static bool spu_return_in_memory (const_tree type, const_tree fntype);
210 static void fix_range (const char *);
211 static void spu_encode_section_info (tree, rtx, int);
212 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
213 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
215 static tree spu_builtin_mul_widen_even (tree);
216 static tree spu_builtin_mul_widen_odd (tree);
217 static tree spu_builtin_mask_for_load (void);
218 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
219 static bool spu_vector_alignment_reachable (const_tree, bool);
220 static tree spu_builtin_vec_perm (tree, tree *);
221 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224 static rtx spu_addr_space_convert (rtx, tree, tree);
225 static int spu_sms_res_mii (struct ddg *g);
226 static void asm_file_start (void);
227 static unsigned int spu_section_type_flags (tree, const char *, int);
228 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229 static void spu_unique_section (tree, int);
230 static rtx spu_expand_load (rtx, rtx, rtx, int);
231 static void spu_trampoline_init (rtx, tree, rtx);
232 static void spu_conditional_register_usage (void);
233 static bool spu_ref_may_alias_errno (ao_ref *);
234 static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
235 HOST_WIDE_INT, tree);
237 /* Which instruction set architecture to use. */
238 int spu_arch;
239 /* Which cpu are we tuning for. */
240 int spu_tune;
242 /* The hardware requires 8 insns between a hint and the branch it
243 effects. This variable describes how many rtl instructions the
244 compiler needs to see before inserting a hint, and then the compiler
245 will insert enough nops to make it at least 8 insns. The default is
246 for the compiler to allow up to 2 nops be emitted. The nops are
247 inserted in pairs, so we round down. */
248 int spu_hint_dist = (8*4) - (2*4);
250 enum spu_immediate {
251 SPU_NONE,
252 SPU_IL,
253 SPU_ILA,
254 SPU_ILH,
255 SPU_ILHU,
256 SPU_ORI,
257 SPU_ORHI,
258 SPU_ORBI,
259 SPU_IOHL
261 enum immediate_class
263 IC_POOL, /* constant pool */
264 IC_IL1, /* one il* instruction */
265 IC_IL2, /* both ilhu and iohl instructions */
266 IC_IL1s, /* one il* instruction */
267 IC_IL2s, /* both ilhu and iohl instructions */
268 IC_FSMBI, /* the fsmbi instruction */
269 IC_CPAT, /* one of the c*d instructions */
270 IC_FSMBI2 /* fsmbi plus 1 other instruction */
273 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
274 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
275 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
276 static enum immediate_class classify_immediate (rtx op,
277 enum machine_mode mode);
279 static enum machine_mode spu_unwind_word_mode (void);
281 static enum machine_mode
282 spu_libgcc_cmp_return_mode (void);
284 static enum machine_mode
285 spu_libgcc_shift_count_mode (void);
287 /* Pointer mode for __ea references. */
288 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
291 /* Table of machine attributes. */
292 static const struct attribute_spec spu_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
297 false },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
299 false },
300 { NULL, 0, 0, false, false, false, NULL, false }
303 /* TARGET overrides. */
305 #undef TARGET_ADDR_SPACE_POINTER_MODE
306 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
308 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
309 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
311 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
312 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
313 spu_addr_space_legitimate_address_p
315 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
316 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
318 #undef TARGET_ADDR_SPACE_SUBSET_P
319 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
321 #undef TARGET_ADDR_SPACE_CONVERT
322 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS spu_init_builtins
326 #undef TARGET_BUILTIN_DECL
327 #define TARGET_BUILTIN_DECL spu_builtin_decl
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
332 #undef TARGET_UNWIND_WORD_MODE
333 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
338 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
339 and .quad for the debugger. When it is known that the assembler is fixed,
340 these can be removed. */
341 #undef TARGET_ASM_UNALIGNED_SI_OP
342 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
347 /* The .8byte directive doesn't seem to work well for a 32 bit
348 architecture. */
349 #undef TARGET_ASM_UNALIGNED_DI_OP
350 #define TARGET_ASM_UNALIGNED_DI_OP NULL
352 #undef TARGET_RTX_COSTS
353 #define TARGET_RTX_COSTS spu_rtx_costs
355 #undef TARGET_ADDRESS_COST
356 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
358 #undef TARGET_SCHED_ISSUE_RATE
359 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
361 #undef TARGET_SCHED_INIT_GLOBAL
362 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
364 #undef TARGET_SCHED_INIT
365 #define TARGET_SCHED_INIT spu_sched_init
367 #undef TARGET_SCHED_VARIABLE_ISSUE
368 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
370 #undef TARGET_SCHED_REORDER
371 #define TARGET_SCHED_REORDER spu_sched_reorder
373 #undef TARGET_SCHED_REORDER2
374 #define TARGET_SCHED_REORDER2 spu_sched_reorder
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER spu_assemble_integer
385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
386 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
394 #undef TARGET_ASM_GLOBALIZE_LABEL
395 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
397 #undef TARGET_PASS_BY_REFERENCE
398 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
400 #undef TARGET_FUNCTION_ARG
401 #define TARGET_FUNCTION_ARG spu_function_arg
403 #undef TARGET_FUNCTION_ARG_ADVANCE
404 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
406 #undef TARGET_MUST_PASS_IN_STACK
407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
409 #undef TARGET_BUILD_BUILTIN_VA_LIST
410 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
412 #undef TARGET_EXPAND_BUILTIN_VA_START
413 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
415 #undef TARGET_SETUP_INCOMING_VARARGS
416 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
418 #undef TARGET_MACHINE_DEPENDENT_REORG
419 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
421 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
422 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
424 #undef TARGET_INIT_LIBFUNCS
425 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
427 #undef TARGET_RETURN_IN_MEMORY
428 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
430 #undef TARGET_ENCODE_SECTION_INFO
431 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
433 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
434 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
436 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
437 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
439 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
440 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
442 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
443 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
445 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
446 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
448 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
449 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
451 #undef TARGET_LIBGCC_CMP_RETURN_MODE
452 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
454 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
457 #undef TARGET_SCHED_SMS_RES_MII
458 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
460 #undef TARGET_ASM_FILE_START
461 #define TARGET_ASM_FILE_START asm_file_start
463 #undef TARGET_SECTION_TYPE_FLAGS
464 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
466 #undef TARGET_ASM_SELECT_SECTION
467 #define TARGET_ASM_SELECT_SECTION spu_select_section
469 #undef TARGET_ASM_UNIQUE_SECTION
470 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
472 #undef TARGET_LEGITIMATE_ADDRESS_P
473 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
475 #undef TARGET_LEGITIMATE_CONSTANT_P
476 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
478 #undef TARGET_TRAMPOLINE_INIT
479 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
481 #undef TARGET_OPTION_OVERRIDE
482 #define TARGET_OPTION_OVERRIDE spu_option_override
484 #undef TARGET_OPTION_DEFAULT_PARAMS
485 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
487 #undef TARGET_CONDITIONAL_REGISTER_USAGE
488 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
490 #undef TARGET_REF_MAY_ALIAS_ERRNO
491 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
498 /* Variable tracking should be run after all optimizations which
499 change order of insns. It also needs a valid CFG. */
500 #undef TARGET_DELAY_VARTRACK
501 #define TARGET_DELAY_VARTRACK true
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
506 static void
507 spu_option_default_params (void)
509 /* Override some of the default param values. With so many registers
510 larger values are better for these params. */
511 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
514 /* Implement TARGET_OPTION_OVERRIDE. */
515 static void
516 spu_option_override (void)
518 /* Small loops will be unpeeled at -O3. For SPU it is more important
519 to keep code small by default. */
520 if (!flag_unroll_loops && !flag_peel_loops)
521 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
522 global_options.x_param_values,
523 global_options_set.x_param_values);
525 flag_omit_frame_pointer = 1;
527 /* Functions must be 8 byte aligned so we correctly handle dual issue */
528 if (align_functions < 8)
529 align_functions = 8;
531 spu_hint_dist = 8*4 - spu_max_nops*4;
532 if (spu_hint_dist < 0)
533 spu_hint_dist = 0;
535 if (spu_fixed_range_string)
536 fix_range (spu_fixed_range_string);
538 /* Determine processor architectural level. */
539 if (spu_arch_string)
541 if (strcmp (&spu_arch_string[0], "cell") == 0)
542 spu_arch = PROCESSOR_CELL;
543 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
544 spu_arch = PROCESSOR_CELLEDP;
545 else
546 error ("bad value (%s) for -march= switch", spu_arch_string);
549 /* Determine processor to tune for. */
550 if (spu_tune_string)
552 if (strcmp (&spu_tune_string[0], "cell") == 0)
553 spu_tune = PROCESSOR_CELL;
554 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
555 spu_tune = PROCESSOR_CELLEDP;
556 else
557 error ("bad value (%s) for -mtune= switch", spu_tune_string);
560 /* Change defaults according to the processor architecture. */
561 if (spu_arch == PROCESSOR_CELLEDP)
563 /* If no command line option has been otherwise specified, change
564 the default to -mno-safe-hints on celledp -- only the original
565 Cell/B.E. processors require this workaround. */
566 if (!(target_flags_explicit & MASK_SAFE_HINTS))
567 target_flags &= ~MASK_SAFE_HINTS;
570 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
573 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
574 struct attribute_spec.handler. */
576 /* True if MODE is valid for the target. By "valid", we mean able to
577 be manipulated in non-trivial ways. In particular, this means all
578 the arithmetic is supported. */
579 static bool
580 spu_scalar_mode_supported_p (enum machine_mode mode)
582 switch (mode)
584 case QImode:
585 case HImode:
586 case SImode:
587 case SFmode:
588 case DImode:
589 case TImode:
590 case DFmode:
591 return true;
593 default:
594 return false;
598 /* Similarly for vector modes. "Supported" here is less strict. At
599 least some operations are supported; need to check optabs or builtins
600 for further details. */
601 static bool
602 spu_vector_mode_supported_p (enum machine_mode mode)
604 switch (mode)
606 case V16QImode:
607 case V8HImode:
608 case V4SImode:
609 case V2DImode:
610 case V4SFmode:
611 case V2DFmode:
612 return true;
614 default:
615 return false;
619 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
620 least significant bytes of the outer mode. This function returns
621 TRUE for the SUBREG's where this is correct. */
623 valid_subreg (rtx op)
625 enum machine_mode om = GET_MODE (op);
626 enum machine_mode im = GET_MODE (SUBREG_REG (op));
627 return om != VOIDmode && im != VOIDmode
628 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
629 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
630 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
633 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
634 and adjust the start offset. */
635 static rtx
636 adjust_operand (rtx op, HOST_WIDE_INT * start)
638 enum machine_mode mode;
639 int op_size;
640 /* Strip any paradoxical SUBREG. */
641 if (GET_CODE (op) == SUBREG
642 && (GET_MODE_BITSIZE (GET_MODE (op))
643 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
645 if (start)
646 *start -=
647 GET_MODE_BITSIZE (GET_MODE (op)) -
648 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
649 op = SUBREG_REG (op);
651 /* If it is smaller than SI, assure a SUBREG */
652 op_size = GET_MODE_BITSIZE (GET_MODE (op));
653 if (op_size < 32)
655 if (start)
656 *start += 32 - op_size;
657 op_size = 32;
659 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
660 mode = mode_for_size (op_size, MODE_INT, 0);
661 if (mode != GET_MODE (op))
662 op = gen_rtx_SUBREG (mode, op, 0);
663 return op;
666 void
667 spu_expand_extv (rtx ops[], int unsignedp)
669 rtx dst = ops[0], src = ops[1];
670 HOST_WIDE_INT width = INTVAL (ops[2]);
671 HOST_WIDE_INT start = INTVAL (ops[3]);
672 HOST_WIDE_INT align_mask;
673 rtx s0, s1, mask, r0;
675 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
677 if (MEM_P (src))
679 /* First, determine if we need 1 TImode load or 2. We need only 1
680 if the bits being extracted do not cross the alignment boundary
681 as determined by the MEM and its address. */
683 align_mask = -MEM_ALIGN (src);
684 if ((start & align_mask) == ((start + width - 1) & align_mask))
686 /* Alignment is sufficient for 1 load. */
687 s0 = gen_reg_rtx (TImode);
688 r0 = spu_expand_load (s0, 0, src, start / 8);
689 start &= 7;
690 if (r0)
691 emit_insn (gen_rotqby_ti (s0, s0, r0));
693 else
695 /* Need 2 loads. */
696 s0 = gen_reg_rtx (TImode);
697 s1 = gen_reg_rtx (TImode);
698 r0 = spu_expand_load (s0, s1, src, start / 8);
699 start &= 7;
701 gcc_assert (start + width <= 128);
702 if (r0)
704 rtx r1 = gen_reg_rtx (SImode);
705 mask = gen_reg_rtx (TImode);
706 emit_move_insn (mask, GEN_INT (-1));
707 emit_insn (gen_rotqby_ti (s0, s0, r0));
708 emit_insn (gen_rotqby_ti (s1, s1, r0));
709 if (GET_CODE (r0) == CONST_INT)
710 r1 = GEN_INT (INTVAL (r0) & 15);
711 else
712 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
713 emit_insn (gen_shlqby_ti (mask, mask, r1));
714 emit_insn (gen_selb (s0, s1, s0, mask));
719 else if (GET_CODE (src) == SUBREG)
721 rtx r = SUBREG_REG (src);
722 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
723 s0 = gen_reg_rtx (TImode);
724 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
725 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
726 else
727 emit_move_insn (s0, src);
729 else
731 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
732 s0 = gen_reg_rtx (TImode);
733 emit_move_insn (s0, src);
736 /* Now s0 is TImode and contains the bits to extract at start. */
738 if (start)
739 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
741 if (128 - width)
742 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
744 emit_move_insn (dst, s0);
747 void
748 spu_expand_insv (rtx ops[])
750 HOST_WIDE_INT width = INTVAL (ops[1]);
751 HOST_WIDE_INT start = INTVAL (ops[2]);
752 HOST_WIDE_INT maskbits;
753 enum machine_mode dst_mode;
754 rtx dst = ops[0], src = ops[3];
755 int dst_size;
756 rtx mask;
757 rtx shift_reg;
758 int shift;
761 if (GET_CODE (ops[0]) == MEM)
762 dst = gen_reg_rtx (TImode);
763 else
764 dst = adjust_operand (dst, &start);
765 dst_mode = GET_MODE (dst);
766 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
768 if (CONSTANT_P (src))
770 enum machine_mode m =
771 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
772 src = force_reg (m, convert_to_mode (m, src, 0));
774 src = adjust_operand (src, 0);
776 mask = gen_reg_rtx (dst_mode);
777 shift_reg = gen_reg_rtx (dst_mode);
778 shift = dst_size - start - width;
780 /* It's not safe to use subreg here because the compiler assumes
781 that the SUBREG_REG is right justified in the SUBREG. */
782 convert_move (shift_reg, src, 1);
784 if (shift > 0)
786 switch (dst_mode)
788 case SImode:
789 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
790 break;
791 case DImode:
792 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
793 break;
794 case TImode:
795 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
796 break;
797 default:
798 abort ();
801 else if (shift < 0)
802 abort ();
804 switch (dst_size)
806 case 32:
807 maskbits = (-1ll << (32 - width - start));
808 if (start)
809 maskbits += (1ll << (32 - start));
810 emit_move_insn (mask, GEN_INT (maskbits));
811 break;
812 case 64:
813 maskbits = (-1ll << (64 - width - start));
814 if (start)
815 maskbits += (1ll << (64 - start));
816 emit_move_insn (mask, GEN_INT (maskbits));
817 break;
818 case 128:
820 unsigned char arr[16];
821 int i = start / 8;
822 memset (arr, 0, sizeof (arr));
823 arr[i] = 0xff >> (start & 7);
824 for (i++; i <= (start + width - 1) / 8; i++)
825 arr[i] = 0xff;
826 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
827 emit_move_insn (mask, array_to_constant (TImode, arr));
829 break;
830 default:
831 abort ();
833 if (GET_CODE (ops[0]) == MEM)
835 rtx low = gen_reg_rtx (SImode);
836 rtx rotl = gen_reg_rtx (SImode);
837 rtx mask0 = gen_reg_rtx (TImode);
838 rtx addr;
839 rtx addr0;
840 rtx addr1;
841 rtx mem;
843 addr = force_reg (Pmode, XEXP (ops[0], 0));
844 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
845 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
846 emit_insn (gen_negsi2 (rotl, low));
847 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
848 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
849 mem = change_address (ops[0], TImode, addr0);
850 set_mem_alias_set (mem, 0);
851 emit_move_insn (dst, mem);
852 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
853 if (start + width > MEM_ALIGN (ops[0]))
855 rtx shl = gen_reg_rtx (SImode);
856 rtx mask1 = gen_reg_rtx (TImode);
857 rtx dst1 = gen_reg_rtx (TImode);
858 rtx mem1;
859 addr1 = plus_constant (addr, 16);
860 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
861 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
862 emit_insn (gen_shlqby_ti (mask1, mask, shl));
863 mem1 = change_address (ops[0], TImode, addr1);
864 set_mem_alias_set (mem1, 0);
865 emit_move_insn (dst1, mem1);
866 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
867 emit_move_insn (mem1, dst1);
869 emit_move_insn (mem, dst);
871 else
872 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
877 spu_expand_block_move (rtx ops[])
879 HOST_WIDE_INT bytes, align, offset;
880 rtx src, dst, sreg, dreg, target;
881 int i;
882 if (GET_CODE (ops[2]) != CONST_INT
883 || GET_CODE (ops[3]) != CONST_INT
884 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
885 return 0;
887 bytes = INTVAL (ops[2]);
888 align = INTVAL (ops[3]);
890 if (bytes <= 0)
891 return 1;
893 dst = ops[0];
894 src = ops[1];
896 if (align == 16)
898 for (offset = 0; offset + 16 <= bytes; offset += 16)
900 dst = adjust_address (ops[0], V16QImode, offset);
901 src = adjust_address (ops[1], V16QImode, offset);
902 emit_move_insn (dst, src);
904 if (offset < bytes)
906 rtx mask;
907 unsigned char arr[16] = { 0 };
908 for (i = 0; i < bytes - offset; i++)
909 arr[i] = 0xff;
910 dst = adjust_address (ops[0], V16QImode, offset);
911 src = adjust_address (ops[1], V16QImode, offset);
912 mask = gen_reg_rtx (V16QImode);
913 sreg = gen_reg_rtx (V16QImode);
914 dreg = gen_reg_rtx (V16QImode);
915 target = gen_reg_rtx (V16QImode);
916 emit_move_insn (mask, array_to_constant (V16QImode, arr));
917 emit_move_insn (dreg, dst);
918 emit_move_insn (sreg, src);
919 emit_insn (gen_selb (target, dreg, sreg, mask));
920 emit_move_insn (dst, target);
922 return 1;
924 return 0;
927 enum spu_comp_code
928 { SPU_EQ, SPU_GT, SPU_GTU };
930 int spu_comp_icode[12][3] = {
931 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
932 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
933 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
934 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
935 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
936 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
937 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
938 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
939 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
940 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
941 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
942 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
945 /* Generate a compare for CODE. Return a brand-new rtx that represents
946 the result of the compare. GCC can figure this out too if we don't
947 provide all variations of compares, but GCC always wants to use
948 WORD_MODE, we can generate better code in most cases if we do it
949 ourselves. */
950 void
951 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
953 int reverse_compare = 0;
954 int reverse_test = 0;
955 rtx compare_result, eq_result;
956 rtx comp_rtx, eq_rtx;
957 enum machine_mode comp_mode;
958 enum machine_mode op_mode;
959 enum spu_comp_code scode, eq_code;
960 enum insn_code ior_code;
961 enum rtx_code code = GET_CODE (cmp);
962 rtx op0 = XEXP (cmp, 0);
963 rtx op1 = XEXP (cmp, 1);
964 int index;
965 int eq_test = 0;
967 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
968 and so on, to keep the constant in operand 1. */
969 if (GET_CODE (op1) == CONST_INT)
971 HOST_WIDE_INT val = INTVAL (op1) - 1;
972 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
973 switch (code)
975 case GE:
976 op1 = GEN_INT (val);
977 code = GT;
978 break;
979 case LT:
980 op1 = GEN_INT (val);
981 code = LE;
982 break;
983 case GEU:
984 op1 = GEN_INT (val);
985 code = GTU;
986 break;
987 case LTU:
988 op1 = GEN_INT (val);
989 code = LEU;
990 break;
991 default:
992 break;
996 comp_mode = SImode;
997 op_mode = GET_MODE (op0);
999 switch (code)
1001 case GE:
1002 scode = SPU_GT;
1003 if (HONOR_NANS (op_mode))
1005 reverse_compare = 0;
1006 reverse_test = 0;
1007 eq_test = 1;
1008 eq_code = SPU_EQ;
1010 else
1012 reverse_compare = 1;
1013 reverse_test = 1;
1015 break;
1016 case LE:
1017 scode = SPU_GT;
1018 if (HONOR_NANS (op_mode))
1020 reverse_compare = 1;
1021 reverse_test = 0;
1022 eq_test = 1;
1023 eq_code = SPU_EQ;
1025 else
1027 reverse_compare = 0;
1028 reverse_test = 1;
1030 break;
1031 case LT:
1032 reverse_compare = 1;
1033 reverse_test = 0;
1034 scode = SPU_GT;
1035 break;
1036 case GEU:
1037 reverse_compare = 1;
1038 reverse_test = 1;
1039 scode = SPU_GTU;
1040 break;
1041 case LEU:
1042 reverse_compare = 0;
1043 reverse_test = 1;
1044 scode = SPU_GTU;
1045 break;
1046 case LTU:
1047 reverse_compare = 1;
1048 reverse_test = 0;
1049 scode = SPU_GTU;
1050 break;
1051 case NE:
1052 reverse_compare = 0;
1053 reverse_test = 1;
1054 scode = SPU_EQ;
1055 break;
1057 case EQ:
1058 scode = SPU_EQ;
1059 break;
1060 case GT:
1061 scode = SPU_GT;
1062 break;
1063 case GTU:
1064 scode = SPU_GTU;
1065 break;
1066 default:
1067 scode = SPU_EQ;
1068 break;
1071 switch (op_mode)
1073 case QImode:
1074 index = 0;
1075 comp_mode = QImode;
1076 break;
1077 case HImode:
1078 index = 1;
1079 comp_mode = HImode;
1080 break;
1081 case SImode:
1082 index = 2;
1083 break;
1084 case DImode:
1085 index = 3;
1086 break;
1087 case TImode:
1088 index = 4;
1089 break;
1090 case SFmode:
1091 index = 5;
1092 break;
1093 case DFmode:
1094 index = 6;
1095 break;
1096 case V16QImode:
1097 index = 7;
1098 comp_mode = op_mode;
1099 break;
1100 case V8HImode:
1101 index = 8;
1102 comp_mode = op_mode;
1103 break;
1104 case V4SImode:
1105 index = 9;
1106 comp_mode = op_mode;
1107 break;
1108 case V4SFmode:
1109 index = 10;
1110 comp_mode = V4SImode;
1111 break;
1112 case V2DFmode:
1113 index = 11;
1114 comp_mode = V2DImode;
1115 break;
1116 case V2DImode:
1117 default:
1118 abort ();
1121 if (GET_MODE (op1) == DFmode
1122 && (scode != SPU_GT && scode != SPU_EQ))
1123 abort ();
1125 if (is_set == 0 && op1 == const0_rtx
1126 && (GET_MODE (op0) == SImode
1127 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1129 /* Don't need to set a register with the result when we are
1130 comparing against zero and branching. */
1131 reverse_test = !reverse_test;
1132 compare_result = op0;
1134 else
1136 compare_result = gen_reg_rtx (comp_mode);
1138 if (reverse_compare)
1140 rtx t = op1;
1141 op1 = op0;
1142 op0 = t;
1145 if (spu_comp_icode[index][scode] == 0)
1146 abort ();
1148 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1149 (op0, op_mode))
1150 op0 = force_reg (op_mode, op0);
1151 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1152 (op1, op_mode))
1153 op1 = force_reg (op_mode, op1);
1154 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1155 op0, op1);
1156 if (comp_rtx == 0)
1157 abort ();
1158 emit_insn (comp_rtx);
1160 if (eq_test)
1162 eq_result = gen_reg_rtx (comp_mode);
1163 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1164 op0, op1);
1165 if (eq_rtx == 0)
1166 abort ();
1167 emit_insn (eq_rtx);
1168 ior_code = optab_handler (ior_optab, comp_mode);
1169 gcc_assert (ior_code != CODE_FOR_nothing);
1170 emit_insn (GEN_FCN (ior_code)
1171 (compare_result, compare_result, eq_result));
1175 if (is_set == 0)
1177 rtx bcomp;
1178 rtx loc_ref;
1180 /* We don't have branch on QI compare insns, so we convert the
1181 QI compare result to a HI result. */
1182 if (comp_mode == QImode)
1184 rtx old_res = compare_result;
1185 compare_result = gen_reg_rtx (HImode);
1186 comp_mode = HImode;
1187 emit_insn (gen_extendqihi2 (compare_result, old_res));
1190 if (reverse_test)
1191 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1192 else
1193 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1195 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1196 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1197 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1198 loc_ref, pc_rtx)));
1200 else if (is_set == 2)
1202 rtx target = operands[0];
1203 int compare_size = GET_MODE_BITSIZE (comp_mode);
1204 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1205 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1206 rtx select_mask;
1207 rtx op_t = operands[2];
1208 rtx op_f = operands[3];
1210 /* The result of the comparison can be SI, HI or QI mode. Create a
1211 mask based on that result. */
1212 if (target_size > compare_size)
1214 select_mask = gen_reg_rtx (mode);
1215 emit_insn (gen_extend_compare (select_mask, compare_result));
1217 else if (target_size < compare_size)
1218 select_mask =
1219 gen_rtx_SUBREG (mode, compare_result,
1220 (compare_size - target_size) / BITS_PER_UNIT);
1221 else if (comp_mode != mode)
1222 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1223 else
1224 select_mask = compare_result;
1226 if (GET_MODE (target) != GET_MODE (op_t)
1227 || GET_MODE (target) != GET_MODE (op_f))
1228 abort ();
1230 if (reverse_test)
1231 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1232 else
1233 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1235 else
1237 rtx target = operands[0];
1238 if (reverse_test)
1239 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1240 gen_rtx_NOT (comp_mode, compare_result)));
1241 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1242 emit_insn (gen_extendhisi2 (target, compare_result));
1243 else if (GET_MODE (target) == SImode
1244 && GET_MODE (compare_result) == QImode)
1245 emit_insn (gen_extend_compare (target, compare_result));
1246 else
1247 emit_move_insn (target, compare_result);
1251 HOST_WIDE_INT
1252 const_double_to_hwint (rtx x)
1254 HOST_WIDE_INT val;
1255 REAL_VALUE_TYPE rv;
1256 if (GET_MODE (x) == SFmode)
1258 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1259 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1261 else if (GET_MODE (x) == DFmode)
1263 long l[2];
1264 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1265 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1266 val = l[0];
1267 val = (val << 32) | (l[1] & 0xffffffff);
1269 else
1270 abort ();
1271 return val;
1275 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1277 long tv[2];
1278 REAL_VALUE_TYPE rv;
1279 gcc_assert (mode == SFmode || mode == DFmode);
1281 if (mode == SFmode)
1282 tv[0] = (v << 32) >> 32;
1283 else if (mode == DFmode)
1285 tv[1] = (v << 32) >> 32;
1286 tv[0] = v >> 32;
1288 real_from_target (&rv, tv, mode);
1289 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1292 void
1293 print_operand_address (FILE * file, register rtx addr)
1295 rtx reg;
1296 rtx offset;
1298 if (GET_CODE (addr) == AND
1299 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1300 && INTVAL (XEXP (addr, 1)) == -16)
1301 addr = XEXP (addr, 0);
1303 switch (GET_CODE (addr))
1305 case REG:
1306 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1307 break;
1309 case PLUS:
1310 reg = XEXP (addr, 0);
1311 offset = XEXP (addr, 1);
1312 if (GET_CODE (offset) == REG)
1314 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1315 reg_names[REGNO (offset)]);
1317 else if (GET_CODE (offset) == CONST_INT)
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1320 INTVAL (offset), reg_names[REGNO (reg)]);
1322 else
1323 abort ();
1324 break;
1326 case CONST:
1327 case LABEL_REF:
1328 case SYMBOL_REF:
1329 case CONST_INT:
1330 output_addr_const (file, addr);
1331 break;
1333 default:
1334 debug_rtx (addr);
1335 abort ();
1339 void
1340 print_operand (FILE * file, rtx x, int code)
1342 enum machine_mode mode = GET_MODE (x);
1343 HOST_WIDE_INT val;
1344 unsigned char arr[16];
1345 int xcode = GET_CODE (x);
1346 int i, info;
1347 if (GET_MODE (x) == VOIDmode)
1348 switch (code)
1350 case 'L': /* 128 bits, signed */
1351 case 'm': /* 128 bits, signed */
1352 case 'T': /* 128 bits, signed */
1353 case 't': /* 128 bits, signed */
1354 mode = TImode;
1355 break;
1356 case 'K': /* 64 bits, signed */
1357 case 'k': /* 64 bits, signed */
1358 case 'D': /* 64 bits, signed */
1359 case 'd': /* 64 bits, signed */
1360 mode = DImode;
1361 break;
1362 case 'J': /* 32 bits, signed */
1363 case 'j': /* 32 bits, signed */
1364 case 's': /* 32 bits, signed */
1365 case 'S': /* 32 bits, signed */
1366 mode = SImode;
1367 break;
1369 switch (code)
1372 case 'j': /* 32 bits, signed */
1373 case 'k': /* 64 bits, signed */
1374 case 'm': /* 128 bits, signed */
1375 if (xcode == CONST_INT
1376 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1378 gcc_assert (logical_immediate_p (x, mode));
1379 constant_to_array (mode, x, arr);
1380 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1381 val = trunc_int_for_mode (val, SImode);
1382 switch (which_logical_immediate (val))
1384 case SPU_ORI:
1385 break;
1386 case SPU_ORHI:
1387 fprintf (file, "h");
1388 break;
1389 case SPU_ORBI:
1390 fprintf (file, "b");
1391 break;
1392 default:
1393 gcc_unreachable();
1396 else
1397 gcc_unreachable();
1398 return;
1400 case 'J': /* 32 bits, signed */
1401 case 'K': /* 64 bits, signed */
1402 case 'L': /* 128 bits, signed */
1403 if (xcode == CONST_INT
1404 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1406 gcc_assert (logical_immediate_p (x, mode)
1407 || iohl_immediate_p (x, mode));
1408 constant_to_array (mode, x, arr);
1409 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1410 val = trunc_int_for_mode (val, SImode);
1411 switch (which_logical_immediate (val))
1413 case SPU_ORI:
1414 case SPU_IOHL:
1415 break;
1416 case SPU_ORHI:
1417 val = trunc_int_for_mode (val, HImode);
1418 break;
1419 case SPU_ORBI:
1420 val = trunc_int_for_mode (val, QImode);
1421 break;
1422 default:
1423 gcc_unreachable();
1425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1427 else
1428 gcc_unreachable();
1429 return;
1431 case 't': /* 128 bits, signed */
1432 case 'd': /* 64 bits, signed */
1433 case 's': /* 32 bits, signed */
1434 if (CONSTANT_P (x))
1436 enum immediate_class c = classify_immediate (x, mode);
1437 switch (c)
1439 case IC_IL1:
1440 constant_to_array (mode, x, arr);
1441 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1442 val = trunc_int_for_mode (val, SImode);
1443 switch (which_immediate_load (val))
1445 case SPU_IL:
1446 break;
1447 case SPU_ILA:
1448 fprintf (file, "a");
1449 break;
1450 case SPU_ILH:
1451 fprintf (file, "h");
1452 break;
1453 case SPU_ILHU:
1454 fprintf (file, "hu");
1455 break;
1456 default:
1457 gcc_unreachable ();
1459 break;
1460 case IC_CPAT:
1461 constant_to_array (mode, x, arr);
1462 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1463 if (info == 1)
1464 fprintf (file, "b");
1465 else if (info == 2)
1466 fprintf (file, "h");
1467 else if (info == 4)
1468 fprintf (file, "w");
1469 else if (info == 8)
1470 fprintf (file, "d");
1471 break;
1472 case IC_IL1s:
1473 if (xcode == CONST_VECTOR)
1475 x = CONST_VECTOR_ELT (x, 0);
1476 xcode = GET_CODE (x);
1478 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1479 fprintf (file, "a");
1480 else if (xcode == HIGH)
1481 fprintf (file, "hu");
1482 break;
1483 case IC_FSMBI:
1484 case IC_FSMBI2:
1485 case IC_IL2:
1486 case IC_IL2s:
1487 case IC_POOL:
1488 abort ();
1491 else
1492 gcc_unreachable ();
1493 return;
1495 case 'T': /* 128 bits, signed */
1496 case 'D': /* 64 bits, signed */
1497 case 'S': /* 32 bits, signed */
1498 if (CONSTANT_P (x))
1500 enum immediate_class c = classify_immediate (x, mode);
1501 switch (c)
1503 case IC_IL1:
1504 constant_to_array (mode, x, arr);
1505 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1506 val = trunc_int_for_mode (val, SImode);
1507 switch (which_immediate_load (val))
1509 case SPU_IL:
1510 case SPU_ILA:
1511 break;
1512 case SPU_ILH:
1513 case SPU_ILHU:
1514 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1515 break;
1516 default:
1517 gcc_unreachable ();
1519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1520 break;
1521 case IC_FSMBI:
1522 constant_to_array (mode, x, arr);
1523 val = 0;
1524 for (i = 0; i < 16; i++)
1526 val <<= 1;
1527 val |= arr[i] & 1;
1529 print_operand (file, GEN_INT (val), 0);
1530 break;
1531 case IC_CPAT:
1532 constant_to_array (mode, x, arr);
1533 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1535 break;
1536 case IC_IL1s:
1537 if (xcode == HIGH)
1538 x = XEXP (x, 0);
1539 if (GET_CODE (x) == CONST_VECTOR)
1540 x = CONST_VECTOR_ELT (x, 0);
1541 output_addr_const (file, x);
1542 if (xcode == HIGH)
1543 fprintf (file, "@h");
1544 break;
1545 case IC_IL2:
1546 case IC_IL2s:
1547 case IC_FSMBI2:
1548 case IC_POOL:
1549 abort ();
1552 else
1553 gcc_unreachable ();
1554 return;
1556 case 'C':
1557 if (xcode == CONST_INT)
1559 /* Only 4 least significant bits are relevant for generate
1560 control word instructions. */
1561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1562 return;
1564 break;
1566 case 'M': /* print code for c*d */
1567 if (GET_CODE (x) == CONST_INT)
1568 switch (INTVAL (x))
1570 case 1:
1571 fprintf (file, "b");
1572 break;
1573 case 2:
1574 fprintf (file, "h");
1575 break;
1576 case 4:
1577 fprintf (file, "w");
1578 break;
1579 case 8:
1580 fprintf (file, "d");
1581 break;
1582 default:
1583 gcc_unreachable();
1585 else
1586 gcc_unreachable();
1587 return;
1589 case 'N': /* Negate the operand */
1590 if (xcode == CONST_INT)
1591 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1592 else if (xcode == CONST_VECTOR)
1593 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1594 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1595 return;
1597 case 'I': /* enable/disable interrupts */
1598 if (xcode == CONST_INT)
1599 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1600 return;
1602 case 'b': /* branch modifiers */
1603 if (xcode == REG)
1604 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1605 else if (COMPARISON_P (x))
1606 fprintf (file, "%s", xcode == NE ? "n" : "");
1607 return;
1609 case 'i': /* indirect call */
1610 if (xcode == MEM)
1612 if (GET_CODE (XEXP (x, 0)) == REG)
1613 /* Used in indirect function calls. */
1614 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1615 else
1616 output_address (XEXP (x, 0));
1618 return;
1620 case 'p': /* load/store */
1621 if (xcode == MEM)
1623 x = XEXP (x, 0);
1624 xcode = GET_CODE (x);
1626 if (xcode == AND)
1628 x = XEXP (x, 0);
1629 xcode = GET_CODE (x);
1631 if (xcode == REG)
1632 fprintf (file, "d");
1633 else if (xcode == CONST_INT)
1634 fprintf (file, "a");
1635 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1636 fprintf (file, "r");
1637 else if (xcode == PLUS || xcode == LO_SUM)
1639 if (GET_CODE (XEXP (x, 1)) == REG)
1640 fprintf (file, "x");
1641 else
1642 fprintf (file, "d");
1644 return;
1646 case 'e':
1647 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1648 val &= 0x7;
1649 output_addr_const (file, GEN_INT (val));
1650 return;
1652 case 'f':
1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654 val &= 0x1f;
1655 output_addr_const (file, GEN_INT (val));
1656 return;
1658 case 'g':
1659 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1660 val &= 0x3f;
1661 output_addr_const (file, GEN_INT (val));
1662 return;
1664 case 'h':
1665 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1666 val = (val >> 3) & 0x1f;
1667 output_addr_const (file, GEN_INT (val));
1668 return;
1670 case 'E':
1671 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1672 val = -val;
1673 val &= 0x7;
1674 output_addr_const (file, GEN_INT (val));
1675 return;
1677 case 'F':
1678 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1679 val = -val;
1680 val &= 0x1f;
1681 output_addr_const (file, GEN_INT (val));
1682 return;
1684 case 'G':
1685 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1686 val = -val;
1687 val &= 0x3f;
1688 output_addr_const (file, GEN_INT (val));
1689 return;
1691 case 'H':
1692 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1693 val = -(val & -8ll);
1694 val = (val >> 3) & 0x1f;
1695 output_addr_const (file, GEN_INT (val));
1696 return;
1698 case 'v':
1699 case 'w':
1700 constant_to_array (mode, x, arr);
1701 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1702 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1703 return;
1705 case 0:
1706 if (xcode == REG)
1707 fprintf (file, "%s", reg_names[REGNO (x)]);
1708 else if (xcode == MEM)
1709 output_address (XEXP (x, 0));
1710 else if (xcode == CONST_VECTOR)
1711 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1712 else
1713 output_addr_const (file, x);
1714 return;
1716 /* unused letters
1717 o qr u yz
1718 AB OPQR UVWXYZ */
1719 default:
1720 output_operand_lossage ("invalid %%xn code");
1722 gcc_unreachable ();
1725 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1726 caller saved register. For leaf functions it is more efficient to
1727 use a volatile register because we won't need to save and restore the
1728 pic register. This routine is only valid after register allocation
1729 is completed, so we can pick an unused register. */
1730 static rtx
1731 get_pic_reg (void)
1733 rtx pic_reg = pic_offset_table_rtx;
1734 if (!reload_completed && !reload_in_progress)
1735 abort ();
1736 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1737 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1738 return pic_reg;
1741 /* Split constant addresses to handle cases that are too large.
1742 Add in the pic register when in PIC mode.
1743 Split immediates that require more than 1 instruction. */
1745 spu_split_immediate (rtx * ops)
1747 enum machine_mode mode = GET_MODE (ops[0]);
1748 enum immediate_class c = classify_immediate (ops[1], mode);
1750 switch (c)
1752 case IC_IL2:
1754 unsigned char arrhi[16];
1755 unsigned char arrlo[16];
1756 rtx to, temp, hi, lo;
1757 int i;
1758 enum machine_mode imode = mode;
1759 /* We need to do reals as ints because the constant used in the
1760 IOR might not be a legitimate real constant. */
1761 imode = int_mode_for_mode (mode);
1762 constant_to_array (mode, ops[1], arrhi);
1763 if (imode != mode)
1764 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1765 else
1766 to = ops[0];
1767 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1768 for (i = 0; i < 16; i += 4)
1770 arrlo[i + 2] = arrhi[i + 2];
1771 arrlo[i + 3] = arrhi[i + 3];
1772 arrlo[i + 0] = arrlo[i + 1] = 0;
1773 arrhi[i + 2] = arrhi[i + 3] = 0;
1775 hi = array_to_constant (imode, arrhi);
1776 lo = array_to_constant (imode, arrlo);
1777 emit_move_insn (temp, hi);
1778 emit_insn (gen_rtx_SET
1779 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1780 return 1;
1782 case IC_FSMBI2:
1784 unsigned char arr_fsmbi[16];
1785 unsigned char arr_andbi[16];
1786 rtx to, reg_fsmbi, reg_and;
1787 int i;
1788 enum machine_mode imode = mode;
1789 /* We need to do reals as ints because the constant used in the
1790 * AND might not be a legitimate real constant. */
1791 imode = int_mode_for_mode (mode);
1792 constant_to_array (mode, ops[1], arr_fsmbi);
1793 if (imode != mode)
1794 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1795 else
1796 to = ops[0];
1797 for (i = 0; i < 16; i++)
1798 if (arr_fsmbi[i] != 0)
1800 arr_andbi[0] = arr_fsmbi[i];
1801 arr_fsmbi[i] = 0xff;
1803 for (i = 1; i < 16; i++)
1804 arr_andbi[i] = arr_andbi[0];
1805 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1806 reg_and = array_to_constant (imode, arr_andbi);
1807 emit_move_insn (to, reg_fsmbi);
1808 emit_insn (gen_rtx_SET
1809 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1810 return 1;
1812 case IC_POOL:
1813 if (reload_in_progress || reload_completed)
1815 rtx mem = force_const_mem (mode, ops[1]);
1816 if (TARGET_LARGE_MEM)
1818 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1819 emit_move_insn (addr, XEXP (mem, 0));
1820 mem = replace_equiv_address (mem, addr);
1822 emit_move_insn (ops[0], mem);
1823 return 1;
1825 break;
1826 case IC_IL1s:
1827 case IC_IL2s:
1828 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1830 if (c == IC_IL2s)
1832 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1833 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1835 else if (flag_pic)
1836 emit_insn (gen_pic (ops[0], ops[1]));
1837 if (flag_pic)
1839 rtx pic_reg = get_pic_reg ();
1840 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1841 crtl->uses_pic_offset_table = 1;
1843 return flag_pic || c == IC_IL2s;
1845 break;
1846 case IC_IL1:
1847 case IC_FSMBI:
1848 case IC_CPAT:
1849 break;
1851 return 0;
1854 /* SAVING is TRUE when we are generating the actual load and store
1855 instructions for REGNO. When determining the size of the stack
1856 needed for saving register we must allocate enough space for the
1857 worst case, because we don't always have the information early enough
1858 to not allocate it. But we can at least eliminate the actual loads
1859 and stores during the prologue/epilogue. */
1860 static int
1861 need_to_save_reg (int regno, int saving)
1863 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1864 return 1;
1865 if (flag_pic
1866 && regno == PIC_OFFSET_TABLE_REGNUM
1867 && (!saving || crtl->uses_pic_offset_table)
1868 && (!saving
1869 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1870 return 1;
1871 return 0;
1874 /* This function is only correct starting with local register
1875 allocation */
1877 spu_saved_regs_size (void)
1879 int reg_save_size = 0;
1880 int regno;
1882 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1883 if (need_to_save_reg (regno, 0))
1884 reg_save_size += 0x10;
1885 return reg_save_size;
1888 static rtx
1889 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1891 rtx reg = gen_rtx_REG (V4SImode, regno);
1892 rtx mem =
1893 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1894 return emit_insn (gen_movv4si (mem, reg));
1897 static rtx
1898 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1900 rtx reg = gen_rtx_REG (V4SImode, regno);
1901 rtx mem =
1902 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1903 return emit_insn (gen_movv4si (reg, mem));
1906 /* This happens after reload, so we need to expand it. */
1907 static rtx
1908 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1910 rtx insn;
1911 if (satisfies_constraint_K (GEN_INT (imm)))
1913 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1915 else
1917 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1918 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1919 if (REGNO (src) == REGNO (scratch))
1920 abort ();
1922 return insn;
1925 /* Return nonzero if this function is known to have a null epilogue. */
1928 direct_return (void)
1930 if (reload_completed)
1932 if (cfun->static_chain_decl == 0
1933 && (spu_saved_regs_size ()
1934 + get_frame_size ()
1935 + crtl->outgoing_args_size
1936 + crtl->args.pretend_args_size == 0)
1937 && current_function_is_leaf)
1938 return 1;
1940 return 0;
1944 The stack frame looks like this:
1945 +-------------+
1946 | incoming |
1947 | args |
1948 AP -> +-------------+
1949 | $lr save |
1950 +-------------+
1951 prev SP | back chain |
1952 +-------------+
1953 | var args |
1954 | reg save | crtl->args.pretend_args_size bytes
1955 +-------------+
1956 | ... |
1957 | saved regs | spu_saved_regs_size() bytes
1958 FP -> +-------------+
1959 | ... |
1960 | vars | get_frame_size() bytes
1961 HFP -> +-------------+
1962 | ... |
1963 | outgoing |
1964 | args | crtl->outgoing_args_size bytes
1965 +-------------+
1966 | $lr of next |
1967 | frame |
1968 +-------------+
1969 | back chain |
1970 SP -> +-------------+
1973 void
1974 spu_expand_prologue (void)
1976 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1977 HOST_WIDE_INT total_size;
1978 HOST_WIDE_INT saved_regs_size;
1979 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1980 rtx scratch_reg_0, scratch_reg_1;
1981 rtx insn, real;
1983 if (flag_pic && optimize == 0)
1984 crtl->uses_pic_offset_table = 1;
1986 if (spu_naked_function_p (current_function_decl))
1987 return;
1989 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1990 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1992 saved_regs_size = spu_saved_regs_size ();
1993 total_size = size + saved_regs_size
1994 + crtl->outgoing_args_size
1995 + crtl->args.pretend_args_size;
1997 if (!current_function_is_leaf
1998 || cfun->calls_alloca || total_size > 0)
1999 total_size += STACK_POINTER_OFFSET;
2001 /* Save this first because code after this might use the link
2002 register as a scratch register. */
2003 if (!current_function_is_leaf)
2005 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2006 RTX_FRAME_RELATED_P (insn) = 1;
2009 if (total_size > 0)
2011 offset = -crtl->args.pretend_args_size;
2012 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2013 if (need_to_save_reg (regno, 1))
2015 offset -= 16;
2016 insn = frame_emit_store (regno, sp_reg, offset);
2017 RTX_FRAME_RELATED_P (insn) = 1;
2021 if (flag_pic && crtl->uses_pic_offset_table)
2023 rtx pic_reg = get_pic_reg ();
2024 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2025 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2028 if (total_size > 0)
2030 if (flag_stack_check)
2032 /* We compare against total_size-1 because
2033 ($sp >= total_size) <=> ($sp > total_size-1) */
2034 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2035 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2036 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2037 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2039 emit_move_insn (scratch_v4si, size_v4si);
2040 size_v4si = scratch_v4si;
2042 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2043 emit_insn (gen_vec_extractv4si
2044 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2045 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2048 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2049 the value of the previous $sp because we save it as the back
2050 chain. */
2051 if (total_size <= 2000)
2053 /* In this case we save the back chain first. */
2054 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2055 insn =
2056 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2058 else
2060 insn = emit_move_insn (scratch_reg_0, sp_reg);
2061 insn =
2062 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2064 RTX_FRAME_RELATED_P (insn) = 1;
2065 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2066 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2068 if (total_size > 2000)
2070 /* Save the back chain ptr */
2071 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2074 if (frame_pointer_needed)
2076 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2077 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2078 + crtl->outgoing_args_size;
2079 /* Set the new frame_pointer */
2080 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2081 RTX_FRAME_RELATED_P (insn) = 1;
2082 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2083 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2084 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2088 if (flag_stack_usage_info)
2089 current_function_static_stack_size = total_size;
2092 void
2093 spu_expand_epilogue (bool sibcall_p)
2095 int size = get_frame_size (), offset, regno;
2096 HOST_WIDE_INT saved_regs_size, total_size;
2097 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2098 rtx scratch_reg_0;
2100 if (spu_naked_function_p (current_function_decl))
2101 return;
2103 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2105 saved_regs_size = spu_saved_regs_size ();
2106 total_size = size + saved_regs_size
2107 + crtl->outgoing_args_size
2108 + crtl->args.pretend_args_size;
2110 if (!current_function_is_leaf
2111 || cfun->calls_alloca || total_size > 0)
2112 total_size += STACK_POINTER_OFFSET;
2114 if (total_size > 0)
2116 if (cfun->calls_alloca)
2117 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2118 else
2119 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2122 if (saved_regs_size > 0)
2124 offset = -crtl->args.pretend_args_size;
2125 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2126 if (need_to_save_reg (regno, 1))
2128 offset -= 0x10;
2129 frame_emit_load (regno, sp_reg, offset);
2134 if (!current_function_is_leaf)
2135 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2137 if (!sibcall_p)
2139 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2140 emit_jump_insn (gen__return ());
2145 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2147 if (count != 0)
2148 return 0;
2149 /* This is inefficient because it ends up copying to a save-register
2150 which then gets saved even though $lr has already been saved. But
2151 it does generate better code for leaf functions and we don't need
2152 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2153 used for __builtin_return_address anyway, so maybe we don't care if
2154 it's inefficient. */
2155 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2159 /* Given VAL, generate a constant appropriate for MODE.
2160 If MODE is a vector mode, every element will be VAL.
2161 For TImode, VAL will be zero extended to 128 bits. */
2163 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2165 rtx inner;
2166 rtvec v;
2167 int units, i;
2169 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2170 || GET_MODE_CLASS (mode) == MODE_FLOAT
2171 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2172 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2174 if (GET_MODE_CLASS (mode) == MODE_INT)
2175 return immed_double_const (val, 0, mode);
2177 /* val is the bit representation of the float */
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2179 return hwint_to_const_double (mode, val);
2181 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2182 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2183 else
2184 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2186 units = GET_MODE_NUNITS (mode);
2188 v = rtvec_alloc (units);
2190 for (i = 0; i < units; ++i)
2191 RTVEC_ELT (v, i) = inner;
2193 return gen_rtx_CONST_VECTOR (mode, v);
2196 /* Create a MODE vector constant from 4 ints. */
2198 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2200 unsigned char arr[16];
2201 arr[0] = (a >> 24) & 0xff;
2202 arr[1] = (a >> 16) & 0xff;
2203 arr[2] = (a >> 8) & 0xff;
2204 arr[3] = (a >> 0) & 0xff;
2205 arr[4] = (b >> 24) & 0xff;
2206 arr[5] = (b >> 16) & 0xff;
2207 arr[6] = (b >> 8) & 0xff;
2208 arr[7] = (b >> 0) & 0xff;
2209 arr[8] = (c >> 24) & 0xff;
2210 arr[9] = (c >> 16) & 0xff;
2211 arr[10] = (c >> 8) & 0xff;
2212 arr[11] = (c >> 0) & 0xff;
2213 arr[12] = (d >> 24) & 0xff;
2214 arr[13] = (d >> 16) & 0xff;
2215 arr[14] = (d >> 8) & 0xff;
2216 arr[15] = (d >> 0) & 0xff;
2217 return array_to_constant(mode, arr);
2220 /* branch hint stuff */
2222 /* An array of these is used to propagate hints to predecessor blocks. */
2223 struct spu_bb_info
2225 rtx prop_jump; /* propagated from another block */
2226 int bb_index; /* the original block. */
2228 static struct spu_bb_info *spu_bb_info;
2230 #define STOP_HINT_P(INSN) \
2231 (GET_CODE(INSN) == CALL_INSN \
2232 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2233 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2235 /* 1 when RTX is a hinted branch or its target. We keep track of
2236 what has been hinted so the safe-hint code can test it easily. */
2237 #define HINTED_P(RTX) \
2238 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2240 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2241 #define SCHED_ON_EVEN_P(RTX) \
2242 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2244 /* Emit a nop for INSN such that the two will dual issue. This assumes
2245 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2246 We check for TImode to handle a MULTI1 insn which has dual issued its
2247 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2248 ADDR_VEC insns. */
2249 static void
2250 emit_nop_for_insn (rtx insn)
2252 int p;
2253 rtx new_insn;
2254 p = get_pipe (insn);
2255 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2256 new_insn = emit_insn_after (gen_lnop (), insn);
2257 else if (p == 1 && GET_MODE (insn) == TImode)
2259 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2260 PUT_MODE (new_insn, TImode);
2261 PUT_MODE (insn, VOIDmode);
2263 else
2264 new_insn = emit_insn_after (gen_lnop (), insn);
2265 recog_memoized (new_insn);
2266 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
2269 /* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2272 static void
2273 pad_bb(void)
2275 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2276 int length;
2277 int addr;
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2282 /* Keep track of length added by nops. */
2283 length = 0;
2285 prev_insn = 0;
2286 insn = get_insns ();
2287 if (!active_insn_p (insn))
2288 insn = next_active_insn (insn);
2289 for (; insn; insn = next_insn)
2291 next_insn = next_active_insn (insn);
2292 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn) == CODE_FOR_hbr)
2295 if (hbr_insn)
2297 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2298 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2299 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 || (a1 - a0 == 4))
2302 prev_insn = emit_insn_before (gen_lnop (), insn);
2303 PUT_MODE (prev_insn, GET_MODE (insn));
2304 PUT_MODE (insn, TImode);
2305 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
2306 length += 4;
2309 hbr_insn = insn;
2311 if (INSN_CODE (insn) == CODE_FOR_blockage)
2313 if (GET_MODE (insn) == TImode)
2314 PUT_MODE (next_insn, TImode);
2315 insn = next_insn;
2316 next_insn = next_active_insn (insn);
2318 addr = INSN_ADDRESSES (INSN_UID (insn));
2319 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2321 if (((addr + length) & 7) != 0)
2323 emit_nop_for_insn (prev_insn);
2324 length += 4;
2327 else if (GET_MODE (insn) == TImode
2328 && ((next_insn && GET_MODE (next_insn) != TImode)
2329 || get_attr_type (insn) == TYPE_MULTI0)
2330 && ((addr + length) & 7) != 0)
2332 /* prev_insn will always be set because the first insn is
2333 always 8-byte aligned. */
2334 emit_nop_for_insn (prev_insn);
2335 length += 4;
2337 prev_insn = insn;
2342 /* Routines for branch hints. */
2344 static void
2345 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2346 int distance, sbitmap blocks)
2348 rtx branch_label = 0;
2349 rtx hint;
2350 rtx insn;
2351 rtx table;
2353 if (before == 0 || branch == 0 || target == 0)
2354 return;
2356 /* While scheduling we require hints to be no further than 600, so
2357 we need to enforce that here too */
2358 if (distance > 600)
2359 return;
2361 /* If we have a Basic block note, emit it after the basic block note. */
2362 if (NOTE_INSN_BASIC_BLOCK_P (before))
2363 before = NEXT_INSN (before);
2365 branch_label = gen_label_rtx ();
2366 LABEL_NUSES (branch_label)++;
2367 LABEL_PRESERVE_P (branch_label) = 1;
2368 insn = emit_label_before (branch_label, branch);
2369 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2370 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2372 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2373 recog_memoized (hint);
2374 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
2375 HINTED_P (branch) = 1;
2377 if (GET_CODE (target) == LABEL_REF)
2378 HINTED_P (XEXP (target, 0)) = 1;
2379 else if (tablejump_p (branch, 0, &table))
2381 rtvec vec;
2382 int j;
2383 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2384 vec = XVEC (PATTERN (table), 0);
2385 else
2386 vec = XVEC (PATTERN (table), 1);
2387 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2388 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2391 if (distance >= 588)
2393 /* Make sure the hint isn't scheduled any earlier than this point,
2394 which could make it too far for the branch offest to fit */
2395 insn = emit_insn_before (gen_blockage (), hint);
2396 recog_memoized (insn);
2397 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2399 else if (distance <= 8 * 4)
2401 /* To guarantee at least 8 insns between the hint and branch we
2402 insert nops. */
2403 int d;
2404 for (d = distance; d < 8 * 4; d += 4)
2406 insn =
2407 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2408 recog_memoized (insn);
2409 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2412 /* Make sure any nops inserted aren't scheduled before the hint. */
2413 insn = emit_insn_after (gen_blockage (), hint);
2414 recog_memoized (insn);
2415 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2417 /* Make sure any nops inserted aren't scheduled after the call. */
2418 if (CALL_P (branch) && distance < 8 * 4)
2420 insn = emit_insn_before (gen_blockage (), branch);
2421 recog_memoized (insn);
2422 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2427 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2428 the rtx for the branch target. */
2429 static rtx
2430 get_branch_target (rtx branch)
2432 if (GET_CODE (branch) == JUMP_INSN)
2434 rtx set, src;
2436 /* Return statements */
2437 if (GET_CODE (PATTERN (branch)) == RETURN)
2438 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2440 /* jump table */
2441 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2442 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2443 return 0;
2445 /* ASM GOTOs. */
2446 if (extract_asm_operands (PATTERN (branch)) != NULL)
2447 return NULL;
2449 set = single_set (branch);
2450 src = SET_SRC (set);
2451 if (GET_CODE (SET_DEST (set)) != PC)
2452 abort ();
2454 if (GET_CODE (src) == IF_THEN_ELSE)
2456 rtx lab = 0;
2457 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2458 if (note)
2460 /* If the more probable case is not a fall through, then
2461 try a branch hint. */
2462 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2463 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2464 && GET_CODE (XEXP (src, 1)) != PC)
2465 lab = XEXP (src, 1);
2466 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2467 && GET_CODE (XEXP (src, 2)) != PC)
2468 lab = XEXP (src, 2);
2470 if (lab)
2472 if (GET_CODE (lab) == RETURN)
2473 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2474 return lab;
2476 return 0;
2479 return src;
2481 else if (GET_CODE (branch) == CALL_INSN)
2483 rtx call;
2484 /* All of our call patterns are in a PARALLEL and the CALL is
2485 the first pattern in the PARALLEL. */
2486 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2487 abort ();
2488 call = XVECEXP (PATTERN (branch), 0, 0);
2489 if (GET_CODE (call) == SET)
2490 call = SET_SRC (call);
2491 if (GET_CODE (call) != CALL)
2492 abort ();
2493 return XEXP (XEXP (call, 0), 0);
2495 return 0;
2498 /* The special $hbr register is used to prevent the insn scheduler from
2499 moving hbr insns across instructions which invalidate them. It
2500 should only be used in a clobber, and this function searches for
2501 insns which clobber it. */
2502 static bool
2503 insn_clobbers_hbr (rtx insn)
2505 if (INSN_P (insn)
2506 && GET_CODE (PATTERN (insn)) == PARALLEL)
2508 rtx parallel = PATTERN (insn);
2509 rtx clobber;
2510 int j;
2511 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2513 clobber = XVECEXP (parallel, 0, j);
2514 if (GET_CODE (clobber) == CLOBBER
2515 && GET_CODE (XEXP (clobber, 0)) == REG
2516 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2517 return 1;
2520 return 0;
2523 /* Search up to 32 insns starting at FIRST:
2524 - at any kind of hinted branch, just return
2525 - at any unconditional branch in the first 15 insns, just return
2526 - at a call or indirect branch, after the first 15 insns, force it to
2527 an even address and return
2528 - at any unconditional branch, after the first 15 insns, force it to
2529 an even address.
2530 At then end of the search, insert an hbrp within 4 insns of FIRST,
2531 and an hbrp within 16 instructions of FIRST.
2533 static void
2534 insert_hbrp_for_ilb_runout (rtx first)
2536 rtx insn, before_4 = 0, before_16 = 0;
2537 int addr = 0, length, first_addr = -1;
2538 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2539 int insert_lnop_after = 0;
2540 for (insn = first; insn; insn = NEXT_INSN (insn))
2541 if (INSN_P (insn))
2543 if (first_addr == -1)
2544 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2545 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2546 length = get_attr_length (insn);
2548 if (before_4 == 0 && addr + length >= 4 * 4)
2549 before_4 = insn;
2550 /* We test for 14 instructions because the first hbrp will add
2551 up to 2 instructions. */
2552 if (before_16 == 0 && addr + length >= 14 * 4)
2553 before_16 = insn;
2555 if (INSN_CODE (insn) == CODE_FOR_hbr)
2557 /* Make sure an hbrp is at least 2 cycles away from a hint.
2558 Insert an lnop after the hbrp when necessary. */
2559 if (before_4 == 0 && addr > 0)
2561 before_4 = insn;
2562 insert_lnop_after |= 1;
2564 else if (before_4 && addr <= 4 * 4)
2565 insert_lnop_after |= 1;
2566 if (before_16 == 0 && addr > 10 * 4)
2568 before_16 = insn;
2569 insert_lnop_after |= 2;
2571 else if (before_16 && addr <= 14 * 4)
2572 insert_lnop_after |= 2;
2575 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2577 if (addr < hbrp_addr0)
2578 hbrp_addr0 = addr;
2579 else if (addr < hbrp_addr1)
2580 hbrp_addr1 = addr;
2583 if (CALL_P (insn) || JUMP_P (insn))
2585 if (HINTED_P (insn))
2586 return;
2588 /* Any branch after the first 15 insns should be on an even
2589 address to avoid a special case branch. There might be
2590 some nops and/or hbrps inserted, so we test after 10
2591 insns. */
2592 if (addr > 10 * 4)
2593 SCHED_ON_EVEN_P (insn) = 1;
2596 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2597 return;
2600 if (addr + length >= 32 * 4)
2602 gcc_assert (before_4 && before_16);
2603 if (hbrp_addr0 > 4 * 4)
2605 insn =
2606 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2607 recog_memoized (insn);
2608 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2609 INSN_ADDRESSES_NEW (insn,
2610 INSN_ADDRESSES (INSN_UID (before_4)));
2611 PUT_MODE (insn, GET_MODE (before_4));
2612 PUT_MODE (before_4, TImode);
2613 if (insert_lnop_after & 1)
2615 insn = emit_insn_before (gen_lnop (), before_4);
2616 recog_memoized (insn);
2617 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2618 INSN_ADDRESSES_NEW (insn,
2619 INSN_ADDRESSES (INSN_UID (before_4)));
2620 PUT_MODE (insn, TImode);
2623 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2624 && hbrp_addr1 > 16 * 4)
2626 insn =
2627 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2628 recog_memoized (insn);
2629 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2630 INSN_ADDRESSES_NEW (insn,
2631 INSN_ADDRESSES (INSN_UID (before_16)));
2632 PUT_MODE (insn, GET_MODE (before_16));
2633 PUT_MODE (before_16, TImode);
2634 if (insert_lnop_after & 2)
2636 insn = emit_insn_before (gen_lnop (), before_16);
2637 recog_memoized (insn);
2638 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2639 INSN_ADDRESSES_NEW (insn,
2640 INSN_ADDRESSES (INSN_UID
2641 (before_16)));
2642 PUT_MODE (insn, TImode);
2645 return;
2648 else if (BARRIER_P (insn))
2649 return;
2653 /* The SPU might hang when it executes 48 inline instructions after a
2654 hinted branch jumps to its hinted target. The beginning of a
2655 function and the return from a call might have been hinted, and
2656 must be handled as well. To prevent a hang we insert 2 hbrps. The
2657 first should be within 6 insns of the branch target. The second
2658 should be within 22 insns of the branch target. When determining
2659 if hbrps are necessary, we look for only 32 inline instructions,
2660 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2661 when inserting new hbrps, we insert them within 4 and 16 insns of
2662 the target. */
2663 static void
2664 insert_hbrp (void)
2666 rtx insn;
2667 if (TARGET_SAFE_HINTS)
2669 shorten_branches (get_insns ());
2670 /* Insert hbrp at beginning of function */
2671 insn = next_active_insn (get_insns ());
2672 if (insn)
2673 insert_hbrp_for_ilb_runout (insn);
2674 /* Insert hbrp after hinted targets. */
2675 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2676 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2677 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2681 static int in_spu_reorg;
2683 static void
2684 spu_var_tracking (void)
2686 if (flag_var_tracking)
2688 df_analyze ();
2689 timevar_push (TV_VAR_TRACKING);
2690 variable_tracking_main ();
2691 timevar_pop (TV_VAR_TRACKING);
2692 df_finish_pass (false);
2696 /* Insert branch hints. There are no branch optimizations after this
2697 pass, so it's safe to set our branch hints now. */
2698 static void
2699 spu_machine_dependent_reorg (void)
2701 sbitmap blocks;
2702 basic_block bb;
2703 rtx branch, insn;
2704 rtx branch_target = 0;
2705 int branch_addr = 0, insn_addr, required_dist = 0;
2706 int i;
2707 unsigned int j;
2709 if (!TARGET_BRANCH_HINTS || optimize == 0)
2711 /* We still do it for unoptimized code because an external
2712 function might have hinted a call or return. */
2713 insert_hbrp ();
2714 pad_bb ();
2715 spu_var_tracking ();
2716 return;
2719 blocks = sbitmap_alloc (last_basic_block);
2720 sbitmap_zero (blocks);
2722 in_spu_reorg = 1;
2723 compute_bb_for_insn ();
2725 compact_blocks ();
2727 spu_bb_info =
2728 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2729 sizeof (struct spu_bb_info));
2731 /* We need exact insn addresses and lengths. */
2732 shorten_branches (get_insns ());
2734 for (i = n_basic_blocks - 1; i >= 0; i--)
2736 bb = BASIC_BLOCK (i);
2737 branch = 0;
2738 if (spu_bb_info[i].prop_jump)
2740 branch = spu_bb_info[i].prop_jump;
2741 branch_target = get_branch_target (branch);
2742 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2743 required_dist = spu_hint_dist;
2745 /* Search from end of a block to beginning. In this loop, find
2746 jumps which need a branch and emit them only when:
2747 - it's an indirect branch and we're at the insn which sets
2748 the register
2749 - we're at an insn that will invalidate the hint. e.g., a
2750 call, another hint insn, inline asm that clobbers $hbr, and
2751 some inlined operations (divmodsi4). Don't consider jumps
2752 because they are only at the end of a block and are
2753 considered when we are deciding whether to propagate
2754 - we're getting too far away from the branch. The hbr insns
2755 only have a signed 10 bit offset
2756 We go back as far as possible so the branch will be considered
2757 for propagation when we get to the beginning of the block. */
2758 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2760 if (INSN_P (insn))
2762 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2763 if (branch
2764 && ((GET_CODE (branch_target) == REG
2765 && set_of (branch_target, insn) != NULL_RTX)
2766 || insn_clobbers_hbr (insn)
2767 || branch_addr - insn_addr > 600))
2769 rtx next = NEXT_INSN (insn);
2770 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2771 if (insn != BB_END (bb)
2772 && branch_addr - next_addr >= required_dist)
2774 if (dump_file)
2775 fprintf (dump_file,
2776 "hint for %i in block %i before %i\n",
2777 INSN_UID (branch), bb->index,
2778 INSN_UID (next));
2779 spu_emit_branch_hint (next, branch, branch_target,
2780 branch_addr - next_addr, blocks);
2782 branch = 0;
2785 /* JUMP_P will only be true at the end of a block. When
2786 branch is already set it means we've previously decided
2787 to propagate a hint for that branch into this block. */
2788 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2790 branch = 0;
2791 if ((branch_target = get_branch_target (insn)))
2793 branch = insn;
2794 branch_addr = insn_addr;
2795 required_dist = spu_hint_dist;
2799 if (insn == BB_HEAD (bb))
2800 break;
2803 if (branch)
2805 /* If we haven't emitted a hint for this branch yet, it might
2806 be profitable to emit it in one of the predecessor blocks,
2807 especially for loops. */
2808 rtx bbend;
2809 basic_block prev = 0, prop = 0, prev2 = 0;
2810 int loop_exit = 0, simple_loop = 0;
2811 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2813 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2814 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2815 prev = EDGE_PRED (bb, j)->src;
2816 else
2817 prev2 = EDGE_PRED (bb, j)->src;
2819 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2820 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2821 loop_exit = 1;
2822 else if (EDGE_SUCC (bb, j)->dest == bb)
2823 simple_loop = 1;
2825 /* If this branch is a loop exit then propagate to previous
2826 fallthru block. This catches the cases when it is a simple
2827 loop or when there is an initial branch into the loop. */
2828 if (prev && (loop_exit || simple_loop)
2829 && prev->loop_depth <= bb->loop_depth)
2830 prop = prev;
2832 /* If there is only one adjacent predecessor. Don't propagate
2833 outside this loop. This loop_depth test isn't perfect, but
2834 I'm not sure the loop_father member is valid at this point. */
2835 else if (prev && single_pred_p (bb)
2836 && prev->loop_depth == bb->loop_depth)
2837 prop = prev;
2839 /* If this is the JOIN block of a simple IF-THEN then
2840 propogate the hint to the HEADER block. */
2841 else if (prev && prev2
2842 && EDGE_COUNT (bb->preds) == 2
2843 && EDGE_COUNT (prev->preds) == 1
2844 && EDGE_PRED (prev, 0)->src == prev2
2845 && prev2->loop_depth == bb->loop_depth
2846 && GET_CODE (branch_target) != REG)
2847 prop = prev;
2849 /* Don't propagate when:
2850 - this is a simple loop and the hint would be too far
2851 - this is not a simple loop and there are 16 insns in
2852 this block already
2853 - the predecessor block ends in a branch that will be
2854 hinted
2855 - the predecessor block ends in an insn that invalidates
2856 the hint */
2857 if (prop
2858 && prop->index >= 0
2859 && (bbend = BB_END (prop))
2860 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2861 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2862 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2864 if (dump_file)
2865 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2866 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2867 bb->index, prop->index, bb->loop_depth,
2868 INSN_UID (branch), loop_exit, simple_loop,
2869 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2871 spu_bb_info[prop->index].prop_jump = branch;
2872 spu_bb_info[prop->index].bb_index = i;
2874 else if (branch_addr - next_addr >= required_dist)
2876 if (dump_file)
2877 fprintf (dump_file, "hint for %i in block %i before %i\n",
2878 INSN_UID (branch), bb->index,
2879 INSN_UID (NEXT_INSN (insn)));
2880 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2881 branch_addr - next_addr, blocks);
2883 branch = 0;
2886 free (spu_bb_info);
2888 if (!sbitmap_empty_p (blocks))
2889 find_many_sub_basic_blocks (blocks);
2891 /* We have to schedule to make sure alignment is ok. */
2892 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2894 /* The hints need to be scheduled, so call it again. */
2895 schedule_insns ();
2896 df_finish_pass (true);
2898 insert_hbrp ();
2900 pad_bb ();
2902 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2903 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2905 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2906 between its branch label and the branch . We don't move the
2907 label because GCC expects it at the beginning of the block. */
2908 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2909 rtx label_ref = XVECEXP (unspec, 0, 0);
2910 rtx label = XEXP (label_ref, 0);
2911 rtx branch;
2912 int offset = 0;
2913 for (branch = NEXT_INSN (label);
2914 !JUMP_P (branch) && !CALL_P (branch);
2915 branch = NEXT_INSN (branch))
2916 if (NONJUMP_INSN_P (branch))
2917 offset += get_attr_length (branch);
2918 if (offset > 0)
2919 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2922 spu_var_tracking ();
2924 free_bb_for_insn ();
2926 in_spu_reorg = 0;
2930 /* Insn scheduling routines, primarily for dual issue. */
2931 static int
2932 spu_sched_issue_rate (void)
2934 return 2;
2937 static int
2938 uses_ls_unit(rtx insn)
2940 rtx set = single_set (insn);
2941 if (set != 0
2942 && (GET_CODE (SET_DEST (set)) == MEM
2943 || GET_CODE (SET_SRC (set)) == MEM))
2944 return 1;
2945 return 0;
2948 static int
2949 get_pipe (rtx insn)
2951 enum attr_type t;
2952 /* Handle inline asm */
2953 if (INSN_CODE (insn) == -1)
2954 return -1;
2955 t = get_attr_type (insn);
2956 switch (t)
2958 case TYPE_CONVERT:
2959 return -2;
2960 case TYPE_MULTI0:
2961 return -1;
2963 case TYPE_FX2:
2964 case TYPE_FX3:
2965 case TYPE_SPR:
2966 case TYPE_NOP:
2967 case TYPE_FXB:
2968 case TYPE_FPD:
2969 case TYPE_FP6:
2970 case TYPE_FP7:
2971 return 0;
2973 case TYPE_LNOP:
2974 case TYPE_SHUF:
2975 case TYPE_LOAD:
2976 case TYPE_STORE:
2977 case TYPE_BR:
2978 case TYPE_MULTI1:
2979 case TYPE_HBR:
2980 case TYPE_IPREFETCH:
2981 return 1;
2982 default:
2983 abort ();
2988 /* haifa-sched.c has a static variable that keeps track of the current
2989 cycle. It is passed to spu_sched_reorder, and we record it here for
2990 use by spu_sched_variable_issue. It won't be accurate if the
2991 scheduler updates it's clock_var between the two calls. */
2992 static int clock_var;
2994 /* This is used to keep track of insn alignment. Set to 0 at the
2995 beginning of each block and increased by the "length" attr of each
2996 insn scheduled. */
2997 static int spu_sched_length;
2999 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3000 ready list appropriately in spu_sched_reorder(). */
3001 static int pipe0_clock;
3002 static int pipe1_clock;
3004 static int prev_clock_var;
3006 static int prev_priority;
3008 /* The SPU needs to load the next ilb sometime during the execution of
3009 the previous ilb. There is a potential conflict if every cycle has a
3010 load or store. To avoid the conflict we make sure the load/store
3011 unit is free for at least one cycle during the execution of insns in
3012 the previous ilb. */
3013 static int spu_ls_first;
3014 static int prev_ls_clock;
3016 static void
3017 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3018 int max_ready ATTRIBUTE_UNUSED)
3020 spu_sched_length = 0;
3023 static void
3024 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3025 int max_ready ATTRIBUTE_UNUSED)
3027 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3029 /* When any block might be at least 8-byte aligned, assume they
3030 will all be at least 8-byte aligned to make sure dual issue
3031 works out correctly. */
3032 spu_sched_length = 0;
3034 spu_ls_first = INT_MAX;
3035 clock_var = -1;
3036 prev_ls_clock = -1;
3037 pipe0_clock = -1;
3038 pipe1_clock = -1;
3039 prev_clock_var = -1;
3040 prev_priority = -1;
3043 static int
3044 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3045 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3047 int len;
3048 int p;
3049 if (GET_CODE (PATTERN (insn)) == USE
3050 || GET_CODE (PATTERN (insn)) == CLOBBER
3051 || (len = get_attr_length (insn)) == 0)
3052 return more;
3054 spu_sched_length += len;
3056 /* Reset on inline asm */
3057 if (INSN_CODE (insn) == -1)
3059 spu_ls_first = INT_MAX;
3060 pipe0_clock = -1;
3061 pipe1_clock = -1;
3062 return 0;
3064 p = get_pipe (insn);
3065 if (p == 0)
3066 pipe0_clock = clock_var;
3067 else
3068 pipe1_clock = clock_var;
3070 if (in_spu_reorg)
3072 if (clock_var - prev_ls_clock > 1
3073 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3074 spu_ls_first = INT_MAX;
3075 if (uses_ls_unit (insn))
3077 if (spu_ls_first == INT_MAX)
3078 spu_ls_first = spu_sched_length;
3079 prev_ls_clock = clock_var;
3082 /* The scheduler hasn't inserted the nop, but we will later on.
3083 Include those nops in spu_sched_length. */
3084 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3085 spu_sched_length += 4;
3086 prev_clock_var = clock_var;
3088 /* more is -1 when called from spu_sched_reorder for new insns
3089 that don't have INSN_PRIORITY */
3090 if (more >= 0)
3091 prev_priority = INSN_PRIORITY (insn);
3094 /* Always try issueing more insns. spu_sched_reorder will decide
3095 when the cycle should be advanced. */
3096 return 1;
3099 /* This function is called for both TARGET_SCHED_REORDER and
3100 TARGET_SCHED_REORDER2. */
3101 static int
3102 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3103 rtx *ready, int *nreadyp, int clock)
3105 int i, nready = *nreadyp;
3106 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3107 rtx insn;
3109 clock_var = clock;
3111 if (nready <= 0 || pipe1_clock >= clock)
3112 return 0;
3114 /* Find any rtl insns that don't generate assembly insns and schedule
3115 them first. */
3116 for (i = nready - 1; i >= 0; i--)
3118 insn = ready[i];
3119 if (INSN_CODE (insn) == -1
3120 || INSN_CODE (insn) == CODE_FOR_blockage
3121 || (INSN_P (insn) && get_attr_length (insn) == 0))
3123 ready[i] = ready[nready - 1];
3124 ready[nready - 1] = insn;
3125 return 1;
3129 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3130 for (i = 0; i < nready; i++)
3131 if (INSN_CODE (ready[i]) != -1)
3133 insn = ready[i];
3134 switch (get_attr_type (insn))
3136 default:
3137 case TYPE_MULTI0:
3138 case TYPE_CONVERT:
3139 case TYPE_FX2:
3140 case TYPE_FX3:
3141 case TYPE_SPR:
3142 case TYPE_NOP:
3143 case TYPE_FXB:
3144 case TYPE_FPD:
3145 case TYPE_FP6:
3146 case TYPE_FP7:
3147 pipe_0 = i;
3148 break;
3149 case TYPE_LOAD:
3150 case TYPE_STORE:
3151 pipe_ls = i;
3152 case TYPE_LNOP:
3153 case TYPE_SHUF:
3154 case TYPE_BR:
3155 case TYPE_MULTI1:
3156 case TYPE_HBR:
3157 pipe_1 = i;
3158 break;
3159 case TYPE_IPREFETCH:
3160 pipe_hbrp = i;
3161 break;
3165 /* In the first scheduling phase, schedule loads and stores together
3166 to increase the chance they will get merged during postreload CSE. */
3167 if (!reload_completed && pipe_ls >= 0)
3169 insn = ready[pipe_ls];
3170 ready[pipe_ls] = ready[nready - 1];
3171 ready[nready - 1] = insn;
3172 return 1;
3175 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3176 if (pipe_hbrp >= 0)
3177 pipe_1 = pipe_hbrp;
3179 /* When we have loads/stores in every cycle of the last 15 insns and
3180 we are about to schedule another load/store, emit an hbrp insn
3181 instead. */
3182 if (in_spu_reorg
3183 && spu_sched_length - spu_ls_first >= 4 * 15
3184 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3186 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3187 recog_memoized (insn);
3188 if (pipe0_clock < clock)
3189 PUT_MODE (insn, TImode);
3190 spu_sched_variable_issue (file, verbose, insn, -1);
3191 return 0;
3194 /* In general, we want to emit nops to increase dual issue, but dual
3195 issue isn't faster when one of the insns could be scheduled later
3196 without effecting the critical path. We look at INSN_PRIORITY to
3197 make a good guess, but it isn't perfect so -mdual-nops=n can be
3198 used to effect it. */
3199 if (in_spu_reorg && spu_dual_nops < 10)
3201 /* When we are at an even address and we are not issueing nops to
3202 improve scheduling then we need to advance the cycle. */
3203 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3204 && (spu_dual_nops == 0
3205 || (pipe_1 != -1
3206 && prev_priority >
3207 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3208 return 0;
3210 /* When at an odd address, schedule the highest priority insn
3211 without considering pipeline. */
3212 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3213 && (spu_dual_nops == 0
3214 || (prev_priority >
3215 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3216 return 1;
3220 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3221 pipe0 insn in the ready list, schedule it. */
3222 if (pipe0_clock < clock && pipe_0 >= 0)
3223 schedule_i = pipe_0;
3225 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3226 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3227 else
3228 schedule_i = pipe_1;
3230 if (schedule_i > -1)
3232 insn = ready[schedule_i];
3233 ready[schedule_i] = ready[nready - 1];
3234 ready[nready - 1] = insn;
3235 return 1;
3237 return 0;
3240 /* INSN is dependent on DEP_INSN. */
3241 static int
3242 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3244 rtx set;
3246 /* The blockage pattern is used to prevent instructions from being
3247 moved across it and has no cost. */
3248 if (INSN_CODE (insn) == CODE_FOR_blockage
3249 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3250 return 0;
3252 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3253 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3254 return 0;
3256 /* Make sure hbrps are spread out. */
3257 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3258 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3259 return 8;
3261 /* Make sure hints and hbrps are 2 cycles apart. */
3262 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3263 || INSN_CODE (insn) == CODE_FOR_hbr)
3264 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3265 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3266 return 2;
3268 /* An hbrp has no real dependency on other insns. */
3269 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3270 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3271 return 0;
3273 /* Assuming that it is unlikely an argument register will be used in
3274 the first cycle of the called function, we reduce the cost for
3275 slightly better scheduling of dep_insn. When not hinted, the
3276 mispredicted branch would hide the cost as well. */
3277 if (CALL_P (insn))
3279 rtx target = get_branch_target (insn);
3280 if (GET_CODE (target) != REG || !set_of (target, insn))
3281 return cost - 2;
3282 return cost;
3285 /* And when returning from a function, let's assume the return values
3286 are completed sooner too. */
3287 if (CALL_P (dep_insn))
3288 return cost - 2;
3290 /* Make sure an instruction that loads from the back chain is schedule
3291 away from the return instruction so a hint is more likely to get
3292 issued. */
3293 if (INSN_CODE (insn) == CODE_FOR__return
3294 && (set = single_set (dep_insn))
3295 && GET_CODE (SET_DEST (set)) == REG
3296 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3297 return 20;
3299 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3300 scheduler makes every insn in a block anti-dependent on the final
3301 jump_insn. We adjust here so higher cost insns will get scheduled
3302 earlier. */
3303 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3304 return insn_cost (dep_insn) - 3;
3306 return cost;
3309 /* Create a CONST_DOUBLE from a string. */
3311 spu_float_const (const char *string, enum machine_mode mode)
3313 REAL_VALUE_TYPE value;
3314 value = REAL_VALUE_ATOF (string, mode);
3315 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3319 spu_constant_address_p (rtx x)
3321 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3322 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3323 || GET_CODE (x) == HIGH);
3326 static enum spu_immediate
3327 which_immediate_load (HOST_WIDE_INT val)
3329 gcc_assert (val == trunc_int_for_mode (val, SImode));
3331 if (val >= -0x8000 && val <= 0x7fff)
3332 return SPU_IL;
3333 if (val >= 0 && val <= 0x3ffff)
3334 return SPU_ILA;
3335 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3336 return SPU_ILH;
3337 if ((val & 0xffff) == 0)
3338 return SPU_ILHU;
3340 return SPU_NONE;
3343 /* Return true when OP can be loaded by one of the il instructions, or
3344 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3346 immediate_load_p (rtx op, enum machine_mode mode)
3348 if (CONSTANT_P (op))
3350 enum immediate_class c = classify_immediate (op, mode);
3351 return c == IC_IL1 || c == IC_IL1s
3352 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3354 return 0;
3357 /* Return true if the first SIZE bytes of arr is a constant that can be
3358 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3359 represent the size and offset of the instruction to use. */
3360 static int
3361 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3363 int cpat, run, i, start;
3364 cpat = 1;
3365 run = 0;
3366 start = -1;
3367 for (i = 0; i < size && cpat; i++)
3368 if (arr[i] != i+16)
3370 if (!run)
3372 start = i;
3373 if (arr[i] == 3)
3374 run = 1;
3375 else if (arr[i] == 2 && arr[i+1] == 3)
3376 run = 2;
3377 else if (arr[i] == 0)
3379 while (arr[i+run] == run && i+run < 16)
3380 run++;
3381 if (run != 4 && run != 8)
3382 cpat = 0;
3384 else
3385 cpat = 0;
3386 if ((i & (run-1)) != 0)
3387 cpat = 0;
3388 i += run;
3390 else
3391 cpat = 0;
3393 if (cpat && (run || size < 16))
3395 if (run == 0)
3396 run = 1;
3397 if (prun)
3398 *prun = run;
3399 if (pstart)
3400 *pstart = start == -1 ? 16-run : start;
3401 return 1;
3403 return 0;
3406 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3407 it into a register. MODE is only valid when OP is a CONST_INT. */
3408 static enum immediate_class
3409 classify_immediate (rtx op, enum machine_mode mode)
3411 HOST_WIDE_INT val;
3412 unsigned char arr[16];
3413 int i, j, repeated, fsmbi, repeat;
3415 gcc_assert (CONSTANT_P (op));
3417 if (GET_MODE (op) != VOIDmode)
3418 mode = GET_MODE (op);
3420 /* A V4SI const_vector with all identical symbols is ok. */
3421 if (!flag_pic
3422 && mode == V4SImode
3423 && GET_CODE (op) == CONST_VECTOR
3424 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3425 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3426 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3427 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3428 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3429 op = CONST_VECTOR_ELT (op, 0);
3431 switch (GET_CODE (op))
3433 case SYMBOL_REF:
3434 case LABEL_REF:
3435 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3437 case CONST:
3438 /* We can never know if the resulting address fits in 18 bits and can be
3439 loaded with ila. For now, assume the address will not overflow if
3440 the displacement is "small" (fits 'K' constraint). */
3441 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3443 rtx sym = XEXP (XEXP (op, 0), 0);
3444 rtx cst = XEXP (XEXP (op, 0), 1);
3446 if (GET_CODE (sym) == SYMBOL_REF
3447 && GET_CODE (cst) == CONST_INT
3448 && satisfies_constraint_K (cst))
3449 return IC_IL1s;
3451 return IC_IL2s;
3453 case HIGH:
3454 return IC_IL1s;
3456 case CONST_VECTOR:
3457 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3458 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3459 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3460 return IC_POOL;
3461 /* Fall through. */
3463 case CONST_INT:
3464 case CONST_DOUBLE:
3465 constant_to_array (mode, op, arr);
3467 /* Check that each 4-byte slot is identical. */
3468 repeated = 1;
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 repeated = 0;
3474 if (repeated)
3476 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3477 val = trunc_int_for_mode (val, SImode);
3479 if (which_immediate_load (val) != SPU_NONE)
3480 return IC_IL1;
3483 /* Any mode of 2 bytes or smaller can be loaded with an il
3484 instruction. */
3485 gcc_assert (GET_MODE_SIZE (mode) > 2);
3487 fsmbi = 1;
3488 repeat = 0;
3489 for (i = 0; i < 16 && fsmbi; i++)
3490 if (arr[i] != 0 && repeat == 0)
3491 repeat = arr[i];
3492 else if (arr[i] != 0 && arr[i] != repeat)
3493 fsmbi = 0;
3494 if (fsmbi)
3495 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3497 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3498 return IC_CPAT;
3500 if (repeated)
3501 return IC_IL2;
3503 return IC_POOL;
3504 default:
3505 break;
3507 gcc_unreachable ();
3510 static enum spu_immediate
3511 which_logical_immediate (HOST_WIDE_INT val)
3513 gcc_assert (val == trunc_int_for_mode (val, SImode));
3515 if (val >= -0x200 && val <= 0x1ff)
3516 return SPU_ORI;
3517 if (val >= 0 && val <= 0xffff)
3518 return SPU_IOHL;
3519 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3521 val = trunc_int_for_mode (val, HImode);
3522 if (val >= -0x200 && val <= 0x1ff)
3523 return SPU_ORHI;
3524 if ((val & 0xff) == ((val >> 8) & 0xff))
3526 val = trunc_int_for_mode (val, QImode);
3527 if (val >= -0x200 && val <= 0x1ff)
3528 return SPU_ORBI;
3531 return SPU_NONE;
3534 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3535 CONST_DOUBLEs. */
3536 static int
3537 const_vector_immediate_p (rtx x)
3539 int i;
3540 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3541 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3542 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3543 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3544 return 0;
3545 return 1;
3549 logical_immediate_p (rtx op, enum machine_mode mode)
3551 HOST_WIDE_INT val;
3552 unsigned char arr[16];
3553 int i, j;
3555 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3556 || GET_CODE (op) == CONST_VECTOR);
3558 if (GET_CODE (op) == CONST_VECTOR
3559 && !const_vector_immediate_p (op))
3560 return 0;
3562 if (GET_MODE (op) != VOIDmode)
3563 mode = GET_MODE (op);
3565 constant_to_array (mode, op, arr);
3567 /* Check that bytes are repeated. */
3568 for (i = 4; i < 16; i += 4)
3569 for (j = 0; j < 4; j++)
3570 if (arr[j] != arr[i + j])
3571 return 0;
3573 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3574 val = trunc_int_for_mode (val, SImode);
3576 i = which_logical_immediate (val);
3577 return i != SPU_NONE && i != SPU_IOHL;
3581 iohl_immediate_p (rtx op, enum machine_mode mode)
3583 HOST_WIDE_INT val;
3584 unsigned char arr[16];
3585 int i, j;
3587 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3588 || GET_CODE (op) == CONST_VECTOR);
3590 if (GET_CODE (op) == CONST_VECTOR
3591 && !const_vector_immediate_p (op))
3592 return 0;
3594 if (GET_MODE (op) != VOIDmode)
3595 mode = GET_MODE (op);
3597 constant_to_array (mode, op, arr);
3599 /* Check that bytes are repeated. */
3600 for (i = 4; i < 16; i += 4)
3601 for (j = 0; j < 4; j++)
3602 if (arr[j] != arr[i + j])
3603 return 0;
3605 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3606 val = trunc_int_for_mode (val, SImode);
3608 return val >= 0 && val <= 0xffff;
3612 arith_immediate_p (rtx op, enum machine_mode mode,
3613 HOST_WIDE_INT low, HOST_WIDE_INT high)
3615 HOST_WIDE_INT val;
3616 unsigned char arr[16];
3617 int bytes, i, j;
3619 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3620 || GET_CODE (op) == CONST_VECTOR);
3622 if (GET_CODE (op) == CONST_VECTOR
3623 && !const_vector_immediate_p (op))
3624 return 0;
3626 if (GET_MODE (op) != VOIDmode)
3627 mode = GET_MODE (op);
3629 constant_to_array (mode, op, arr);
3631 if (VECTOR_MODE_P (mode))
3632 mode = GET_MODE_INNER (mode);
3634 bytes = GET_MODE_SIZE (mode);
3635 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3637 /* Check that bytes are repeated. */
3638 for (i = bytes; i < 16; i += bytes)
3639 for (j = 0; j < bytes; j++)
3640 if (arr[j] != arr[i + j])
3641 return 0;
3643 val = arr[0];
3644 for (j = 1; j < bytes; j++)
3645 val = (val << 8) | arr[j];
3647 val = trunc_int_for_mode (val, mode);
3649 return val >= low && val <= high;
3652 /* TRUE when op is an immediate and an exact power of 2, and given that
3653 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3654 all entries must be the same. */
3655 bool
3656 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3658 enum machine_mode int_mode;
3659 HOST_WIDE_INT val;
3660 unsigned char arr[16];
3661 int bytes, i, j;
3663 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3664 || GET_CODE (op) == CONST_VECTOR);
3666 if (GET_CODE (op) == CONST_VECTOR
3667 && !const_vector_immediate_p (op))
3668 return 0;
3670 if (GET_MODE (op) != VOIDmode)
3671 mode = GET_MODE (op);
3673 constant_to_array (mode, op, arr);
3675 if (VECTOR_MODE_P (mode))
3676 mode = GET_MODE_INNER (mode);
3678 bytes = GET_MODE_SIZE (mode);
3679 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3681 /* Check that bytes are repeated. */
3682 for (i = bytes; i < 16; i += bytes)
3683 for (j = 0; j < bytes; j++)
3684 if (arr[j] != arr[i + j])
3685 return 0;
3687 val = arr[0];
3688 for (j = 1; j < bytes; j++)
3689 val = (val << 8) | arr[j];
3691 val = trunc_int_for_mode (val, int_mode);
3693 /* Currently, we only handle SFmode */
3694 gcc_assert (mode == SFmode);
3695 if (mode == SFmode)
3697 int exp = (val >> 23) - 127;
3698 return val > 0 && (val & 0x007fffff) == 0
3699 && exp >= low && exp <= high;
3701 return FALSE;
3704 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3706 static int
3707 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3709 rtx x = *px;
3710 tree decl;
3712 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3714 rtx plus = XEXP (x, 0);
3715 rtx op0 = XEXP (plus, 0);
3716 rtx op1 = XEXP (plus, 1);
3717 if (GET_CODE (op1) == CONST_INT)
3718 x = op0;
3721 return (GET_CODE (x) == SYMBOL_REF
3722 && (decl = SYMBOL_REF_DECL (x)) != 0
3723 && TREE_CODE (decl) == VAR_DECL
3724 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3727 /* We accept:
3728 - any 32-bit constant (SImode, SFmode)
3729 - any constant that can be generated with fsmbi (any mode)
3730 - a 64-bit constant where the high and low bits are identical
3731 (DImode, DFmode)
3732 - a 128-bit constant where the four 32-bit words match. */
3733 bool
3734 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3736 if (GET_CODE (x) == HIGH)
3737 x = XEXP (x, 0);
3739 /* Reject any __ea qualified reference. These can't appear in
3740 instructions but must be forced to the constant pool. */
3741 if (for_each_rtx (&x, ea_symbol_ref, 0))
3742 return 0;
3744 /* V4SI with all identical symbols is valid. */
3745 if (!flag_pic
3746 && mode == V4SImode
3747 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3748 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3749 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3750 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3751 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3752 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3754 if (GET_CODE (x) == CONST_VECTOR
3755 && !const_vector_immediate_p (x))
3756 return 0;
3757 return 1;
3760 /* Valid address are:
3761 - symbol_ref, label_ref, const
3762 - reg
3763 - reg + const_int, where const_int is 16 byte aligned
3764 - reg + reg, alignment doesn't matter
3765 The alignment matters in the reg+const case because lqd and stqd
3766 ignore the 4 least significant bits of the const. We only care about
3767 16 byte modes because the expand phase will change all smaller MEM
3768 references to TImode. */
3769 static bool
3770 spu_legitimate_address_p (enum machine_mode mode,
3771 rtx x, bool reg_ok_strict)
3773 int aligned = GET_MODE_SIZE (mode) >= 16;
3774 if (aligned
3775 && GET_CODE (x) == AND
3776 && GET_CODE (XEXP (x, 1)) == CONST_INT
3777 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3778 x = XEXP (x, 0);
3779 switch (GET_CODE (x))
3781 case LABEL_REF:
3782 return !TARGET_LARGE_MEM;
3784 case SYMBOL_REF:
3785 case CONST:
3786 /* Keep __ea references until reload so that spu_expand_mov can see them
3787 in MEMs. */
3788 if (ea_symbol_ref (&x, 0))
3789 return !reload_in_progress && !reload_completed;
3790 return !TARGET_LARGE_MEM;
3792 case CONST_INT:
3793 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3795 case SUBREG:
3796 x = XEXP (x, 0);
3797 if (REG_P (x))
3798 return 0;
3800 case REG:
3801 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3803 case PLUS:
3804 case LO_SUM:
3806 rtx op0 = XEXP (x, 0);
3807 rtx op1 = XEXP (x, 1);
3808 if (GET_CODE (op0) == SUBREG)
3809 op0 = XEXP (op0, 0);
3810 if (GET_CODE (op1) == SUBREG)
3811 op1 = XEXP (op1, 0);
3812 if (GET_CODE (op0) == REG
3813 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3814 && GET_CODE (op1) == CONST_INT
3815 && INTVAL (op1) >= -0x2000
3816 && INTVAL (op1) <= 0x1fff
3817 && (!aligned || (INTVAL (op1) & 15) == 0))
3818 return TRUE;
3819 if (GET_CODE (op0) == REG
3820 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3821 && GET_CODE (op1) == REG
3822 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3823 return TRUE;
3825 break;
3827 default:
3828 break;
3830 return FALSE;
3833 /* Like spu_legitimate_address_p, except with named addresses. */
3834 static bool
3835 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3836 bool reg_ok_strict, addr_space_t as)
3838 if (as == ADDR_SPACE_EA)
3839 return (REG_P (x) && (GET_MODE (x) == EAmode));
3841 else if (as != ADDR_SPACE_GENERIC)
3842 gcc_unreachable ();
3844 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3847 /* When the address is reg + const_int, force the const_int into a
3848 register. */
3850 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3851 enum machine_mode mode ATTRIBUTE_UNUSED)
3853 rtx op0, op1;
3854 /* Make sure both operands are registers. */
3855 if (GET_CODE (x) == PLUS)
3857 op0 = XEXP (x, 0);
3858 op1 = XEXP (x, 1);
3859 if (ALIGNED_SYMBOL_REF_P (op0))
3861 op0 = force_reg (Pmode, op0);
3862 mark_reg_pointer (op0, 128);
3864 else if (GET_CODE (op0) != REG)
3865 op0 = force_reg (Pmode, op0);
3866 if (ALIGNED_SYMBOL_REF_P (op1))
3868 op1 = force_reg (Pmode, op1);
3869 mark_reg_pointer (op1, 128);
3871 else if (GET_CODE (op1) != REG)
3872 op1 = force_reg (Pmode, op1);
3873 x = gen_rtx_PLUS (Pmode, op0, op1);
3875 return x;
3878 /* Like spu_legitimate_address, except with named address support. */
3879 static rtx
3880 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3881 addr_space_t as)
3883 if (as != ADDR_SPACE_GENERIC)
3884 return x;
3886 return spu_legitimize_address (x, oldx, mode);
3889 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3890 struct attribute_spec.handler. */
3891 static tree
3892 spu_handle_fndecl_attribute (tree * node,
3893 tree name,
3894 tree args ATTRIBUTE_UNUSED,
3895 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3897 if (TREE_CODE (*node) != FUNCTION_DECL)
3899 warning (0, "%qE attribute only applies to functions",
3900 name);
3901 *no_add_attrs = true;
3904 return NULL_TREE;
3907 /* Handle the "vector" attribute. */
3908 static tree
3909 spu_handle_vector_attribute (tree * node, tree name,
3910 tree args ATTRIBUTE_UNUSED,
3911 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3913 tree type = *node, result = NULL_TREE;
3914 enum machine_mode mode;
3915 int unsigned_p;
3917 while (POINTER_TYPE_P (type)
3918 || TREE_CODE (type) == FUNCTION_TYPE
3919 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3920 type = TREE_TYPE (type);
3922 mode = TYPE_MODE (type);
3924 unsigned_p = TYPE_UNSIGNED (type);
3925 switch (mode)
3927 case DImode:
3928 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3929 break;
3930 case SImode:
3931 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3932 break;
3933 case HImode:
3934 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3935 break;
3936 case QImode:
3937 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3938 break;
3939 case SFmode:
3940 result = V4SF_type_node;
3941 break;
3942 case DFmode:
3943 result = V2DF_type_node;
3944 break;
3945 default:
3946 break;
3949 /* Propagate qualifiers attached to the element type
3950 onto the vector type. */
3951 if (result && result != type && TYPE_QUALS (type))
3952 result = build_qualified_type (result, TYPE_QUALS (type));
3954 *no_add_attrs = true; /* No need to hang on to the attribute. */
3956 if (!result)
3957 warning (0, "%qE attribute ignored", name);
3958 else
3959 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3961 return NULL_TREE;
3964 /* Return nonzero if FUNC is a naked function. */
3965 static int
3966 spu_naked_function_p (tree func)
3968 tree a;
3970 if (TREE_CODE (func) != FUNCTION_DECL)
3971 abort ();
3973 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3974 return a != NULL_TREE;
3978 spu_initial_elimination_offset (int from, int to)
3980 int saved_regs_size = spu_saved_regs_size ();
3981 int sp_offset = 0;
3982 if (!current_function_is_leaf || crtl->outgoing_args_size
3983 || get_frame_size () || saved_regs_size)
3984 sp_offset = STACK_POINTER_OFFSET;
3985 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3986 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3987 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3988 return get_frame_size ();
3989 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3990 return sp_offset + crtl->outgoing_args_size
3991 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3992 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3993 return get_frame_size () + saved_regs_size + sp_offset;
3994 else
3995 gcc_unreachable ();
3999 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
4001 enum machine_mode mode = TYPE_MODE (type);
4002 int byte_size = ((mode == BLKmode)
4003 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4005 /* Make sure small structs are left justified in a register. */
4006 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4007 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4009 enum machine_mode smode;
4010 rtvec v;
4011 int i;
4012 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4013 int n = byte_size / UNITS_PER_WORD;
4014 v = rtvec_alloc (nregs);
4015 for (i = 0; i < n; i++)
4017 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4018 gen_rtx_REG (TImode,
4019 FIRST_RETURN_REGNUM
4020 + i),
4021 GEN_INT (UNITS_PER_WORD * i));
4022 byte_size -= UNITS_PER_WORD;
4025 if (n < nregs)
4027 if (byte_size < 4)
4028 byte_size = 4;
4029 smode =
4030 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4031 RTVEC_ELT (v, n) =
4032 gen_rtx_EXPR_LIST (VOIDmode,
4033 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4034 GEN_INT (UNITS_PER_WORD * n));
4036 return gen_rtx_PARALLEL (mode, v);
4038 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4041 static rtx
4042 spu_function_arg (CUMULATIVE_ARGS *cum,
4043 enum machine_mode mode,
4044 const_tree type, bool named ATTRIBUTE_UNUSED)
4046 int byte_size;
4048 if (*cum >= MAX_REGISTER_ARGS)
4049 return 0;
4051 byte_size = ((mode == BLKmode)
4052 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4054 /* The ABI does not allow parameters to be passed partially in
4055 reg and partially in stack. */
4056 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4057 return 0;
4059 /* Make sure small structs are left justified in a register. */
4060 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4061 && byte_size < UNITS_PER_WORD && byte_size > 0)
4063 enum machine_mode smode;
4064 rtx gr_reg;
4065 if (byte_size < 4)
4066 byte_size = 4;
4067 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4068 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4069 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
4070 const0_rtx);
4071 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4073 else
4074 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
4077 static void
4078 spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4079 const_tree type, bool named ATTRIBUTE_UNUSED)
4081 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4083 : mode == BLKmode
4084 ? ((int_size_in_bytes (type) + 15) / 16)
4085 : mode == VOIDmode
4087 : HARD_REGNO_NREGS (cum, mode));
4090 /* Variable sized types are passed by reference. */
4091 static bool
4092 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4093 enum machine_mode mode ATTRIBUTE_UNUSED,
4094 const_tree type, bool named ATTRIBUTE_UNUSED)
4096 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4100 /* Var args. */
4102 /* Create and return the va_list datatype.
4104 On SPU, va_list is an array type equivalent to
4106 typedef struct __va_list_tag
4108 void *__args __attribute__((__aligned(16)));
4109 void *__skip __attribute__((__aligned(16)));
4111 } va_list[1];
4113 where __args points to the arg that will be returned by the next
4114 va_arg(), and __skip points to the previous stack frame such that
4115 when __args == __skip we should advance __args by 32 bytes. */
4116 static tree
4117 spu_build_builtin_va_list (void)
4119 tree f_args, f_skip, record, type_decl;
4120 bool owp;
4122 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4124 type_decl =
4125 build_decl (BUILTINS_LOCATION,
4126 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4128 f_args = build_decl (BUILTINS_LOCATION,
4129 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4130 f_skip = build_decl (BUILTINS_LOCATION,
4131 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4133 DECL_FIELD_CONTEXT (f_args) = record;
4134 DECL_ALIGN (f_args) = 128;
4135 DECL_USER_ALIGN (f_args) = 1;
4137 DECL_FIELD_CONTEXT (f_skip) = record;
4138 DECL_ALIGN (f_skip) = 128;
4139 DECL_USER_ALIGN (f_skip) = 1;
4141 TYPE_STUB_DECL (record) = type_decl;
4142 TYPE_NAME (record) = type_decl;
4143 TYPE_FIELDS (record) = f_args;
4144 DECL_CHAIN (f_args) = f_skip;
4146 /* We know this is being padded and we want it too. It is an internal
4147 type so hide the warnings from the user. */
4148 owp = warn_padded;
4149 warn_padded = false;
4151 layout_type (record);
4153 warn_padded = owp;
4155 /* The correct type is an array type of one element. */
4156 return build_array_type (record, build_index_type (size_zero_node));
4159 /* Implement va_start by filling the va_list structure VALIST.
4160 NEXTARG points to the first anonymous stack argument.
4162 The following global variables are used to initialize
4163 the va_list structure:
4165 crtl->args.info;
4166 the CUMULATIVE_ARGS for this function
4168 crtl->args.arg_offset_rtx:
4169 holds the offset of the first anonymous stack argument
4170 (relative to the virtual arg pointer). */
4172 static void
4173 spu_va_start (tree valist, rtx nextarg)
4175 tree f_args, f_skip;
4176 tree args, skip, t;
4178 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4179 f_skip = DECL_CHAIN (f_args);
4181 valist = build_simple_mem_ref (valist);
4182 args =
4183 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4184 skip =
4185 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4187 /* Find the __args area. */
4188 t = make_tree (TREE_TYPE (args), nextarg);
4189 if (crtl->args.pretend_args_size > 0)
4190 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4191 size_int (-STACK_POINTER_OFFSET));
4192 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4193 TREE_SIDE_EFFECTS (t) = 1;
4194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4196 /* Find the __skip area. */
4197 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4198 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4199 size_int (crtl->args.pretend_args_size
4200 - STACK_POINTER_OFFSET));
4201 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4202 TREE_SIDE_EFFECTS (t) = 1;
4203 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4206 /* Gimplify va_arg by updating the va_list structure
4207 VALIST as required to retrieve an argument of type
4208 TYPE, and returning that argument.
4210 ret = va_arg(VALIST, TYPE);
4212 generates code equivalent to:
4214 paddedsize = (sizeof(TYPE) + 15) & -16;
4215 if (VALIST.__args + paddedsize > VALIST.__skip
4216 && VALIST.__args <= VALIST.__skip)
4217 addr = VALIST.__skip + 32;
4218 else
4219 addr = VALIST.__args;
4220 VALIST.__args = addr + paddedsize;
4221 ret = *(TYPE *)addr;
4223 static tree
4224 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4225 gimple_seq * post_p ATTRIBUTE_UNUSED)
4227 tree f_args, f_skip;
4228 tree args, skip;
4229 HOST_WIDE_INT size, rsize;
4230 tree paddedsize, addr, tmp;
4231 bool pass_by_reference_p;
4233 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4234 f_skip = DECL_CHAIN (f_args);
4236 valist = build_simple_mem_ref (valist);
4237 args =
4238 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4239 skip =
4240 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4242 addr = create_tmp_var (ptr_type_node, "va_arg");
4244 /* if an object is dynamically sized, a pointer to it is passed
4245 instead of the object itself. */
4246 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4247 false);
4248 if (pass_by_reference_p)
4249 type = build_pointer_type (type);
4250 size = int_size_in_bytes (type);
4251 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4253 /* build conditional expression to calculate addr. The expression
4254 will be gimplified later. */
4255 paddedsize = size_int (rsize);
4256 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4257 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4258 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4259 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4260 unshare_expr (skip)));
4262 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4263 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4264 size_int (32)), unshare_expr (args));
4266 gimplify_assign (addr, tmp, pre_p);
4268 /* update VALIST.__args */
4269 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4270 gimplify_assign (unshare_expr (args), tmp, pre_p);
4272 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4273 addr);
4275 if (pass_by_reference_p)
4276 addr = build_va_arg_indirect_ref (addr);
4278 return build_va_arg_indirect_ref (addr);
4281 /* Save parameter registers starting with the register that corresponds
4282 to the first unnamed parameters. If the first unnamed parameter is
4283 in the stack then save no registers. Set pretend_args_size to the
4284 amount of space needed to save the registers. */
4285 void
4286 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4287 tree type, int *pretend_size, int no_rtl)
4289 if (!no_rtl)
4291 rtx tmp;
4292 int regno;
4293 int offset;
4294 int ncum = *cum;
4296 /* cum currently points to the last named argument, we want to
4297 start at the next argument. */
4298 spu_function_arg_advance (&ncum, mode, type, true);
4300 offset = -STACK_POINTER_OFFSET;
4301 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4303 tmp = gen_frame_mem (V4SImode,
4304 plus_constant (virtual_incoming_args_rtx,
4305 offset));
4306 emit_move_insn (tmp,
4307 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4308 offset += 16;
4310 *pretend_size = offset + STACK_POINTER_OFFSET;
4314 static void
4315 spu_conditional_register_usage (void)
4317 if (flag_pic)
4319 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4320 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4324 /* This is called any time we inspect the alignment of a register for
4325 addresses. */
4326 static int
4327 reg_aligned_for_addr (rtx x)
4329 int regno =
4330 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4331 return REGNO_POINTER_ALIGN (regno) >= 128;
4334 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4335 into its SYMBOL_REF_FLAGS. */
4336 static void
4337 spu_encode_section_info (tree decl, rtx rtl, int first)
4339 default_encode_section_info (decl, rtl, first);
4341 /* If a variable has a forced alignment to < 16 bytes, mark it with
4342 SYMBOL_FLAG_ALIGN1. */
4343 if (TREE_CODE (decl) == VAR_DECL
4344 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4345 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4348 /* Return TRUE if we are certain the mem refers to a complete object
4349 which is both 16-byte aligned and padded to a 16-byte boundary. This
4350 would make it safe to store with a single instruction.
4351 We guarantee the alignment and padding for static objects by aligning
4352 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4353 FIXME: We currently cannot guarantee this for objects on the stack
4354 because assign_parm_setup_stack calls assign_stack_local with the
4355 alignment of the parameter mode and in that case the alignment never
4356 gets adjusted by LOCAL_ALIGNMENT. */
4357 static int
4358 store_with_one_insn_p (rtx mem)
4360 enum machine_mode mode = GET_MODE (mem);
4361 rtx addr = XEXP (mem, 0);
4362 if (mode == BLKmode)
4363 return 0;
4364 if (GET_MODE_SIZE (mode) >= 16)
4365 return 1;
4366 /* Only static objects. */
4367 if (GET_CODE (addr) == SYMBOL_REF)
4369 /* We use the associated declaration to make sure the access is
4370 referring to the whole object.
4371 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4372 if it is necessary. Will there be cases where one exists, and
4373 the other does not? Will there be cases where both exist, but
4374 have different types? */
4375 tree decl = MEM_EXPR (mem);
4376 if (decl
4377 && TREE_CODE (decl) == VAR_DECL
4378 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4379 return 1;
4380 decl = SYMBOL_REF_DECL (addr);
4381 if (decl
4382 && TREE_CODE (decl) == VAR_DECL
4383 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4384 return 1;
4386 return 0;
4389 /* Return 1 when the address is not valid for a simple load and store as
4390 required by the '_mov*' patterns. We could make this less strict
4391 for loads, but we prefer mem's to look the same so they are more
4392 likely to be merged. */
4393 static int
4394 address_needs_split (rtx mem)
4396 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4397 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4398 || !(store_with_one_insn_p (mem)
4399 || mem_is_padded_component_ref (mem))))
4400 return 1;
4402 return 0;
4405 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4406 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4407 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4409 /* MEM is known to be an __ea qualified memory access. Emit a call to
4410 fetch the ppu memory to local store, and return its address in local
4411 store. */
4413 static void
4414 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4416 if (is_store)
4418 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4419 if (!cache_fetch_dirty)
4420 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4421 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4422 2, ea_addr, EAmode, ndirty, SImode);
4424 else
4426 if (!cache_fetch)
4427 cache_fetch = init_one_libfunc ("__cache_fetch");
4428 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4429 1, ea_addr, EAmode);
4433 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4434 dirty bit marking, inline.
4436 The cache control data structure is an array of
4438 struct __cache_tag_array
4440 unsigned int tag_lo[4];
4441 unsigned int tag_hi[4];
4442 void *data_pointer[4];
4443 int reserved[4];
4444 vector unsigned short dirty_bits[4];
4445 } */
4447 static void
4448 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4450 rtx ea_addr_si;
4451 HOST_WIDE_INT v;
4452 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4453 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4454 rtx index_mask = gen_reg_rtx (SImode);
4455 rtx tag_arr = gen_reg_rtx (Pmode);
4456 rtx splat_mask = gen_reg_rtx (TImode);
4457 rtx splat = gen_reg_rtx (V4SImode);
4458 rtx splat_hi = NULL_RTX;
4459 rtx tag_index = gen_reg_rtx (Pmode);
4460 rtx block_off = gen_reg_rtx (SImode);
4461 rtx tag_addr = gen_reg_rtx (Pmode);
4462 rtx tag = gen_reg_rtx (V4SImode);
4463 rtx cache_tag = gen_reg_rtx (V4SImode);
4464 rtx cache_tag_hi = NULL_RTX;
4465 rtx cache_ptrs = gen_reg_rtx (TImode);
4466 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4467 rtx tag_equal = gen_reg_rtx (V4SImode);
4468 rtx tag_equal_hi = NULL_RTX;
4469 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4470 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4471 rtx eq_index = gen_reg_rtx (SImode);
4472 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4474 if (spu_ea_model != 32)
4476 splat_hi = gen_reg_rtx (V4SImode);
4477 cache_tag_hi = gen_reg_rtx (V4SImode);
4478 tag_equal_hi = gen_reg_rtx (V4SImode);
4481 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4482 emit_move_insn (tag_arr, tag_arr_sym);
4483 v = 0x0001020300010203LL;
4484 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4485 ea_addr_si = ea_addr;
4486 if (spu_ea_model != 32)
4487 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4489 /* tag_index = ea_addr & (tag_array_size - 128) */
4490 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4492 /* splat ea_addr to all 4 slots. */
4493 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4494 /* Similarly for high 32 bits of ea_addr. */
4495 if (spu_ea_model != 32)
4496 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4498 /* block_off = ea_addr & 127 */
4499 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4501 /* tag_addr = tag_arr + tag_index */
4502 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4504 /* Read cache tags. */
4505 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4506 if (spu_ea_model != 32)
4507 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4508 plus_constant (tag_addr, 16)));
4510 /* tag = ea_addr & -128 */
4511 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4513 /* Read all four cache data pointers. */
4514 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4515 plus_constant (tag_addr, 32)));
4517 /* Compare tags. */
4518 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4519 if (spu_ea_model != 32)
4521 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4522 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4525 /* At most one of the tags compare equal, so tag_equal has one
4526 32-bit slot set to all 1's, with the other slots all zero.
4527 gbb picks off low bit from each byte in the 128-bit registers,
4528 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4529 we have a hit. */
4530 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4531 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4533 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4534 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4536 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4537 (rotating eq_index mod 16 bytes). */
4538 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4539 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4541 /* Add block offset to form final data address. */
4542 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4544 /* Check that we did hit. */
4545 hit_label = gen_label_rtx ();
4546 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4547 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4548 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4550 hit_ref, pc_rtx)));
4551 /* Say that this branch is very likely to happen. */
4552 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4553 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4555 ea_load_store (mem, is_store, ea_addr, data_addr);
4556 cont_label = gen_label_rtx ();
4557 emit_jump_insn (gen_jump (cont_label));
4558 emit_barrier ();
4560 emit_label (hit_label);
4562 if (is_store)
4564 HOST_WIDE_INT v_hi;
4565 rtx dirty_bits = gen_reg_rtx (TImode);
4566 rtx dirty_off = gen_reg_rtx (SImode);
4567 rtx dirty_128 = gen_reg_rtx (TImode);
4568 rtx neg_block_off = gen_reg_rtx (SImode);
4570 /* Set up mask with one dirty bit per byte of the mem we are
4571 writing, starting from top bit. */
4572 v_hi = v = -1;
4573 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4574 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4576 v_hi = v;
4577 v = 0;
4579 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4581 /* Form index into cache dirty_bits. eq_index is one of
4582 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4583 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4584 offset to each of the four dirty_bits elements. */
4585 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4587 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4589 /* Rotate bit mask to proper bit. */
4590 emit_insn (gen_negsi2 (neg_block_off, block_off));
4591 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4592 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4594 /* Or in the new dirty bits. */
4595 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4597 /* Store. */
4598 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4601 emit_label (cont_label);
4604 static rtx
4605 expand_ea_mem (rtx mem, bool is_store)
4607 rtx ea_addr;
4608 rtx data_addr = gen_reg_rtx (Pmode);
4609 rtx new_mem;
4611 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4612 if (optimize_size || optimize == 0)
4613 ea_load_store (mem, is_store, ea_addr, data_addr);
4614 else
4615 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4617 if (ea_alias_set == -1)
4618 ea_alias_set = new_alias_set ();
4620 /* We generate a new MEM RTX to refer to the copy of the data
4621 in the cache. We do not copy memory attributes (except the
4622 alignment) from the original MEM, as they may no longer apply
4623 to the cache copy. */
4624 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4625 set_mem_alias_set (new_mem, ea_alias_set);
4626 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4628 return new_mem;
4632 spu_expand_mov (rtx * ops, enum machine_mode mode)
4634 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4636 /* Perform the move in the destination SUBREG's inner mode. */
4637 ops[0] = SUBREG_REG (ops[0]);
4638 mode = GET_MODE (ops[0]);
4639 ops[1] = gen_lowpart_common (mode, ops[1]);
4640 gcc_assert (ops[1]);
4643 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4645 rtx from = SUBREG_REG (ops[1]);
4646 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4648 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4649 && GET_MODE_CLASS (imode) == MODE_INT
4650 && subreg_lowpart_p (ops[1]));
4652 if (GET_MODE_SIZE (imode) < 4)
4653 imode = SImode;
4654 if (imode != GET_MODE (from))
4655 from = gen_rtx_SUBREG (imode, from, 0);
4657 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4659 enum insn_code icode = convert_optab_handler (trunc_optab,
4660 mode, imode);
4661 emit_insn (GEN_FCN (icode) (ops[0], from));
4663 else
4664 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4665 return 1;
4668 /* At least one of the operands needs to be a register. */
4669 if ((reload_in_progress | reload_completed) == 0
4670 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4672 rtx temp = force_reg (mode, ops[1]);
4673 emit_move_insn (ops[0], temp);
4674 return 1;
4676 if (reload_in_progress || reload_completed)
4678 if (CONSTANT_P (ops[1]))
4679 return spu_split_immediate (ops);
4680 return 0;
4683 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4684 extend them. */
4685 if (GET_CODE (ops[1]) == CONST_INT)
4687 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4688 if (val != INTVAL (ops[1]))
4690 emit_move_insn (ops[0], GEN_INT (val));
4691 return 1;
4694 if (MEM_P (ops[0]))
4696 if (MEM_ADDR_SPACE (ops[0]))
4697 ops[0] = expand_ea_mem (ops[0], true);
4698 return spu_split_store (ops);
4700 if (MEM_P (ops[1]))
4702 if (MEM_ADDR_SPACE (ops[1]))
4703 ops[1] = expand_ea_mem (ops[1], false);
4704 return spu_split_load (ops);
4707 return 0;
4710 static void
4711 spu_convert_move (rtx dst, rtx src)
4713 enum machine_mode mode = GET_MODE (dst);
4714 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4715 rtx reg;
4716 gcc_assert (GET_MODE (src) == TImode);
4717 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4718 emit_insn (gen_rtx_SET (VOIDmode, reg,
4719 gen_rtx_TRUNCATE (int_mode,
4720 gen_rtx_LSHIFTRT (TImode, src,
4721 GEN_INT (int_mode == DImode ? 64 : 96)))));
4722 if (int_mode != mode)
4724 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4725 emit_move_insn (dst, reg);
4729 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4730 the address from SRC and SRC+16. Return a REG or CONST_INT that
4731 specifies how many bytes to rotate the loaded registers, plus any
4732 extra from EXTRA_ROTQBY. The address and rotate amounts are
4733 normalized to improve merging of loads and rotate computations. */
4734 static rtx
4735 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4737 rtx addr = XEXP (src, 0);
4738 rtx p0, p1, rot, addr0, addr1;
4739 int rot_amt;
4741 rot = 0;
4742 rot_amt = 0;
4744 if (MEM_ALIGN (src) >= 128)
4745 /* Address is already aligned; simply perform a TImode load. */ ;
4746 else if (GET_CODE (addr) == PLUS)
4748 /* 8 cases:
4749 aligned reg + aligned reg => lqx
4750 aligned reg + unaligned reg => lqx, rotqby
4751 aligned reg + aligned const => lqd
4752 aligned reg + unaligned const => lqd, rotqbyi
4753 unaligned reg + aligned reg => lqx, rotqby
4754 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4755 unaligned reg + aligned const => lqd, rotqby
4756 unaligned reg + unaligned const -> not allowed by legitimate address
4758 p0 = XEXP (addr, 0);
4759 p1 = XEXP (addr, 1);
4760 if (!reg_aligned_for_addr (p0))
4762 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4764 rot = gen_reg_rtx (SImode);
4765 emit_insn (gen_addsi3 (rot, p0, p1));
4767 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4769 if (INTVAL (p1) > 0
4770 && REG_POINTER (p0)
4771 && INTVAL (p1) * BITS_PER_UNIT
4772 < REGNO_POINTER_ALIGN (REGNO (p0)))
4774 rot = gen_reg_rtx (SImode);
4775 emit_insn (gen_addsi3 (rot, p0, p1));
4776 addr = p0;
4778 else
4780 rtx x = gen_reg_rtx (SImode);
4781 emit_move_insn (x, p1);
4782 if (!spu_arith_operand (p1, SImode))
4783 p1 = x;
4784 rot = gen_reg_rtx (SImode);
4785 emit_insn (gen_addsi3 (rot, p0, p1));
4786 addr = gen_rtx_PLUS (Pmode, p0, x);
4789 else
4790 rot = p0;
4792 else
4794 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4796 rot_amt = INTVAL (p1) & 15;
4797 if (INTVAL (p1) & -16)
4799 p1 = GEN_INT (INTVAL (p1) & -16);
4800 addr = gen_rtx_PLUS (SImode, p0, p1);
4802 else
4803 addr = p0;
4805 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4806 rot = p1;
4809 else if (REG_P (addr))
4811 if (!reg_aligned_for_addr (addr))
4812 rot = addr;
4814 else if (GET_CODE (addr) == CONST)
4816 if (GET_CODE (XEXP (addr, 0)) == PLUS
4817 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4818 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4820 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4821 if (rot_amt & -16)
4822 addr = gen_rtx_CONST (Pmode,
4823 gen_rtx_PLUS (Pmode,
4824 XEXP (XEXP (addr, 0), 0),
4825 GEN_INT (rot_amt & -16)));
4826 else
4827 addr = XEXP (XEXP (addr, 0), 0);
4829 else
4831 rot = gen_reg_rtx (Pmode);
4832 emit_move_insn (rot, addr);
4835 else if (GET_CODE (addr) == CONST_INT)
4837 rot_amt = INTVAL (addr);
4838 addr = GEN_INT (rot_amt & -16);
4840 else if (!ALIGNED_SYMBOL_REF_P (addr))
4842 rot = gen_reg_rtx (Pmode);
4843 emit_move_insn (rot, addr);
4846 rot_amt += extra_rotby;
4848 rot_amt &= 15;
4850 if (rot && rot_amt)
4852 rtx x = gen_reg_rtx (SImode);
4853 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4854 rot = x;
4855 rot_amt = 0;
4857 if (!rot && rot_amt)
4858 rot = GEN_INT (rot_amt);
4860 addr0 = copy_rtx (addr);
4861 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4862 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4864 if (dst1)
4866 addr1 = plus_constant (copy_rtx (addr), 16);
4867 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4868 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4871 return rot;
4875 spu_split_load (rtx * ops)
4877 enum machine_mode mode = GET_MODE (ops[0]);
4878 rtx addr, load, rot;
4879 int rot_amt;
4881 if (GET_MODE_SIZE (mode) >= 16)
4882 return 0;
4884 addr = XEXP (ops[1], 0);
4885 gcc_assert (GET_CODE (addr) != AND);
4887 if (!address_needs_split (ops[1]))
4889 ops[1] = change_address (ops[1], TImode, addr);
4890 load = gen_reg_rtx (TImode);
4891 emit_insn (gen__movti (load, ops[1]));
4892 spu_convert_move (ops[0], load);
4893 return 1;
4896 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4898 load = gen_reg_rtx (TImode);
4899 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4901 if (rot)
4902 emit_insn (gen_rotqby_ti (load, load, rot));
4904 spu_convert_move (ops[0], load);
4905 return 1;
4909 spu_split_store (rtx * ops)
4911 enum machine_mode mode = GET_MODE (ops[0]);
4912 rtx reg;
4913 rtx addr, p0, p1, p1_lo, smem;
4914 int aform;
4915 int scalar;
4917 if (GET_MODE_SIZE (mode) >= 16)
4918 return 0;
4920 addr = XEXP (ops[0], 0);
4921 gcc_assert (GET_CODE (addr) != AND);
4923 if (!address_needs_split (ops[0]))
4925 reg = gen_reg_rtx (TImode);
4926 emit_insn (gen_spu_convert (reg, ops[1]));
4927 ops[0] = change_address (ops[0], TImode, addr);
4928 emit_move_insn (ops[0], reg);
4929 return 1;
4932 if (GET_CODE (addr) == PLUS)
4934 /* 8 cases:
4935 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4936 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4937 aligned reg + aligned const => lqd, c?d, shuf, stqx
4938 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4939 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4940 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4941 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4942 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4944 aform = 0;
4945 p0 = XEXP (addr, 0);
4946 p1 = p1_lo = XEXP (addr, 1);
4947 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4949 p1_lo = GEN_INT (INTVAL (p1) & 15);
4950 if (reg_aligned_for_addr (p0))
4952 p1 = GEN_INT (INTVAL (p1) & -16);
4953 if (p1 == const0_rtx)
4954 addr = p0;
4955 else
4956 addr = gen_rtx_PLUS (SImode, p0, p1);
4958 else
4960 rtx x = gen_reg_rtx (SImode);
4961 emit_move_insn (x, p1);
4962 addr = gen_rtx_PLUS (SImode, p0, x);
4966 else if (REG_P (addr))
4968 aform = 0;
4969 p0 = addr;
4970 p1 = p1_lo = const0_rtx;
4972 else
4974 aform = 1;
4975 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4976 p1 = 0; /* aform doesn't use p1 */
4977 p1_lo = addr;
4978 if (ALIGNED_SYMBOL_REF_P (addr))
4979 p1_lo = const0_rtx;
4980 else if (GET_CODE (addr) == CONST
4981 && GET_CODE (XEXP (addr, 0)) == PLUS
4982 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4983 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4985 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4986 if ((v & -16) != 0)
4987 addr = gen_rtx_CONST (Pmode,
4988 gen_rtx_PLUS (Pmode,
4989 XEXP (XEXP (addr, 0), 0),
4990 GEN_INT (v & -16)));
4991 else
4992 addr = XEXP (XEXP (addr, 0), 0);
4993 p1_lo = GEN_INT (v & 15);
4995 else if (GET_CODE (addr) == CONST_INT)
4997 p1_lo = GEN_INT (INTVAL (addr) & 15);
4998 addr = GEN_INT (INTVAL (addr) & -16);
5000 else
5002 p1_lo = gen_reg_rtx (SImode);
5003 emit_move_insn (p1_lo, addr);
5007 gcc_assert (aform == 0 || aform == 1);
5008 reg = gen_reg_rtx (TImode);
5010 scalar = store_with_one_insn_p (ops[0]);
5011 if (!scalar)
5013 /* We could copy the flags from the ops[0] MEM to mem here,
5014 We don't because we want this load to be optimized away if
5015 possible, and copying the flags will prevent that in certain
5016 cases, e.g. consider the volatile flag. */
5018 rtx pat = gen_reg_rtx (TImode);
5019 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5020 set_mem_alias_set (lmem, 0);
5021 emit_insn (gen_movti (reg, lmem));
5023 if (!p0 || reg_aligned_for_addr (p0))
5024 p0 = stack_pointer_rtx;
5025 if (!p1_lo)
5026 p1_lo = const0_rtx;
5028 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5029 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5031 else
5033 if (GET_CODE (ops[1]) == REG)
5034 emit_insn (gen_spu_convert (reg, ops[1]));
5035 else if (GET_CODE (ops[1]) == SUBREG)
5036 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5037 else
5038 abort ();
5041 if (GET_MODE_SIZE (mode) < 4 && scalar)
5042 emit_insn (gen_ashlti3
5043 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5045 smem = change_address (ops[0], TImode, copy_rtx (addr));
5046 /* We can't use the previous alias set because the memory has changed
5047 size and can potentially overlap objects of other types. */
5048 set_mem_alias_set (smem, 0);
5050 emit_insn (gen_movti (smem, reg));
5051 return 1;
5054 /* Return TRUE if X is MEM which is a struct member reference
5055 and the member can safely be loaded and stored with a single
5056 instruction because it is padded. */
5057 static int
5058 mem_is_padded_component_ref (rtx x)
5060 tree t = MEM_EXPR (x);
5061 tree r;
5062 if (!t || TREE_CODE (t) != COMPONENT_REF)
5063 return 0;
5064 t = TREE_OPERAND (t, 1);
5065 if (!t || TREE_CODE (t) != FIELD_DECL
5066 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5067 return 0;
5068 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5069 r = DECL_FIELD_CONTEXT (t);
5070 if (!r || TREE_CODE (r) != RECORD_TYPE)
5071 return 0;
5072 /* Make sure they are the same mode */
5073 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5074 return 0;
5075 /* If there are no following fields then the field alignment assures
5076 the structure is padded to the alignment which means this field is
5077 padded too. */
5078 if (TREE_CHAIN (t) == 0)
5079 return 1;
5080 /* If the following field is also aligned then this field will be
5081 padded. */
5082 t = TREE_CHAIN (t);
5083 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5084 return 1;
5085 return 0;
5088 /* Parse the -mfixed-range= option string. */
5089 static void
5090 fix_range (const char *const_str)
5092 int i, first, last;
5093 char *str, *dash, *comma;
5095 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5096 REG2 are either register names or register numbers. The effect
5097 of this option is to mark the registers in the range from REG1 to
5098 REG2 as ``fixed'' so they won't be used by the compiler. */
5100 i = strlen (const_str);
5101 str = (char *) alloca (i + 1);
5102 memcpy (str, const_str, i + 1);
5104 while (1)
5106 dash = strchr (str, '-');
5107 if (!dash)
5109 warning (0, "value of -mfixed-range must have form REG1-REG2");
5110 return;
5112 *dash = '\0';
5113 comma = strchr (dash + 1, ',');
5114 if (comma)
5115 *comma = '\0';
5117 first = decode_reg_name (str);
5118 if (first < 0)
5120 warning (0, "unknown register name: %s", str);
5121 return;
5124 last = decode_reg_name (dash + 1);
5125 if (last < 0)
5127 warning (0, "unknown register name: %s", dash + 1);
5128 return;
5131 *dash = '-';
5133 if (first > last)
5135 warning (0, "%s-%s is an empty range", str, dash + 1);
5136 return;
5139 for (i = first; i <= last; ++i)
5140 fixed_regs[i] = call_used_regs[i] = 1;
5142 if (!comma)
5143 break;
5145 *comma = ',';
5146 str = comma + 1;
5150 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5151 can be generated using the fsmbi instruction. */
5153 fsmbi_const_p (rtx x)
5155 if (CONSTANT_P (x))
5157 /* We can always choose TImode for CONST_INT because the high bits
5158 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5159 enum immediate_class c = classify_immediate (x, TImode);
5160 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5162 return 0;
5165 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5166 can be generated using the cbd, chd, cwd or cdd instruction. */
5168 cpat_const_p (rtx x, enum machine_mode mode)
5170 if (CONSTANT_P (x))
5172 enum immediate_class c = classify_immediate (x, mode);
5173 return c == IC_CPAT;
5175 return 0;
5179 gen_cpat_const (rtx * ops)
5181 unsigned char dst[16];
5182 int i, offset, shift, isize;
5183 if (GET_CODE (ops[3]) != CONST_INT
5184 || GET_CODE (ops[2]) != CONST_INT
5185 || (GET_CODE (ops[1]) != CONST_INT
5186 && GET_CODE (ops[1]) != REG))
5187 return 0;
5188 if (GET_CODE (ops[1]) == REG
5189 && (!REG_POINTER (ops[1])
5190 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5191 return 0;
5193 for (i = 0; i < 16; i++)
5194 dst[i] = i + 16;
5195 isize = INTVAL (ops[3]);
5196 if (isize == 1)
5197 shift = 3;
5198 else if (isize == 2)
5199 shift = 2;
5200 else
5201 shift = 0;
5202 offset = (INTVAL (ops[2]) +
5203 (GET_CODE (ops[1]) ==
5204 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5205 for (i = 0; i < isize; i++)
5206 dst[offset + i] = i + shift;
5207 return array_to_constant (TImode, dst);
5210 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5211 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5212 than 16 bytes, the value is repeated across the rest of the array. */
5213 void
5214 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5216 HOST_WIDE_INT val;
5217 int i, j, first;
5219 memset (arr, 0, 16);
5220 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5221 if (GET_CODE (x) == CONST_INT
5222 || (GET_CODE (x) == CONST_DOUBLE
5223 && (mode == SFmode || mode == DFmode)))
5225 gcc_assert (mode != VOIDmode && mode != BLKmode);
5227 if (GET_CODE (x) == CONST_DOUBLE)
5228 val = const_double_to_hwint (x);
5229 else
5230 val = INTVAL (x);
5231 first = GET_MODE_SIZE (mode) - 1;
5232 for (i = first; i >= 0; i--)
5234 arr[i] = val & 0xff;
5235 val >>= 8;
5237 /* Splat the constant across the whole array. */
5238 for (j = 0, i = first + 1; i < 16; i++)
5240 arr[i] = arr[j];
5241 j = (j == first) ? 0 : j + 1;
5244 else if (GET_CODE (x) == CONST_DOUBLE)
5246 val = CONST_DOUBLE_LOW (x);
5247 for (i = 15; i >= 8; i--)
5249 arr[i] = val & 0xff;
5250 val >>= 8;
5252 val = CONST_DOUBLE_HIGH (x);
5253 for (i = 7; i >= 0; i--)
5255 arr[i] = val & 0xff;
5256 val >>= 8;
5259 else if (GET_CODE (x) == CONST_VECTOR)
5261 int units;
5262 rtx elt;
5263 mode = GET_MODE_INNER (mode);
5264 units = CONST_VECTOR_NUNITS (x);
5265 for (i = 0; i < units; i++)
5267 elt = CONST_VECTOR_ELT (x, i);
5268 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5270 if (GET_CODE (elt) == CONST_DOUBLE)
5271 val = const_double_to_hwint (elt);
5272 else
5273 val = INTVAL (elt);
5274 first = GET_MODE_SIZE (mode) - 1;
5275 if (first + i * GET_MODE_SIZE (mode) > 16)
5276 abort ();
5277 for (j = first; j >= 0; j--)
5279 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5280 val >>= 8;
5285 else
5286 gcc_unreachable();
5289 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5290 smaller than 16 bytes, use the bytes that would represent that value
5291 in a register, e.g., for QImode return the value of arr[3]. */
5293 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5295 enum machine_mode inner_mode;
5296 rtvec v;
5297 int units, size, i, j, k;
5298 HOST_WIDE_INT val;
5300 if (GET_MODE_CLASS (mode) == MODE_INT
5301 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5303 j = GET_MODE_SIZE (mode);
5304 i = j < 4 ? 4 - j : 0;
5305 for (val = 0; i < j; i++)
5306 val = (val << 8) | arr[i];
5307 val = trunc_int_for_mode (val, mode);
5308 return GEN_INT (val);
5311 if (mode == TImode)
5313 HOST_WIDE_INT high;
5314 for (i = high = 0; i < 8; i++)
5315 high = (high << 8) | arr[i];
5316 for (i = 8, val = 0; i < 16; i++)
5317 val = (val << 8) | arr[i];
5318 return immed_double_const (val, high, TImode);
5320 if (mode == SFmode)
5322 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5323 val = trunc_int_for_mode (val, SImode);
5324 return hwint_to_const_double (SFmode, val);
5326 if (mode == DFmode)
5328 for (i = 0, val = 0; i < 8; i++)
5329 val = (val << 8) | arr[i];
5330 return hwint_to_const_double (DFmode, val);
5333 if (!VECTOR_MODE_P (mode))
5334 abort ();
5336 units = GET_MODE_NUNITS (mode);
5337 size = GET_MODE_UNIT_SIZE (mode);
5338 inner_mode = GET_MODE_INNER (mode);
5339 v = rtvec_alloc (units);
5341 for (k = i = 0; i < units; ++i)
5343 val = 0;
5344 for (j = 0; j < size; j++, k++)
5345 val = (val << 8) | arr[k];
5347 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5348 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5349 else
5350 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5352 if (k > 16)
5353 abort ();
5355 return gen_rtx_CONST_VECTOR (mode, v);
5358 static void
5359 reloc_diagnostic (rtx x)
5361 tree decl = 0;
5362 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5363 return;
5365 if (GET_CODE (x) == SYMBOL_REF)
5366 decl = SYMBOL_REF_DECL (x);
5367 else if (GET_CODE (x) == CONST
5368 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5369 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5371 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5372 if (decl && !DECL_P (decl))
5373 decl = 0;
5375 /* The decl could be a string constant. */
5376 if (decl && DECL_P (decl))
5378 location_t loc;
5379 /* We use last_assemble_variable_decl to get line information. It's
5380 not always going to be right and might not even be close, but will
5381 be right for the more common cases. */
5382 if (!last_assemble_variable_decl || in_section == ctors_section)
5383 loc = DECL_SOURCE_LOCATION (decl);
5384 else
5385 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5387 if (TARGET_WARN_RELOC)
5388 warning_at (loc, 0,
5389 "creating run-time relocation for %qD", decl);
5390 else
5391 error_at (loc,
5392 "creating run-time relocation for %qD", decl);
5394 else
5396 if (TARGET_WARN_RELOC)
5397 warning_at (input_location, 0, "creating run-time relocation");
5398 else
5399 error_at (input_location, "creating run-time relocation");
5403 /* Hook into assemble_integer so we can generate an error for run-time
5404 relocations. The SPU ABI disallows them. */
5405 static bool
5406 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5408 /* By default run-time relocations aren't supported, but we allow them
5409 in case users support it in their own run-time loader. And we provide
5410 a warning for those users that don't. */
5411 if ((GET_CODE (x) == SYMBOL_REF)
5412 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5413 reloc_diagnostic (x);
5415 return default_assemble_integer (x, size, aligned_p);
5418 static void
5419 spu_asm_globalize_label (FILE * file, const char *name)
5421 fputs ("\t.global\t", file);
5422 assemble_name (file, name);
5423 fputs ("\n", file);
5426 static bool
5427 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5428 bool speed ATTRIBUTE_UNUSED)
5430 enum machine_mode mode = GET_MODE (x);
5431 int cost = COSTS_N_INSNS (2);
5433 /* Folding to a CONST_VECTOR will use extra space but there might
5434 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5435 only if it allows us to fold away multiple insns. Changing the cost
5436 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5437 because this cost will only be compared against a single insn.
5438 if (code == CONST_VECTOR)
5439 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5442 /* Use defaults for float operations. Not accurate but good enough. */
5443 if (mode == DFmode)
5445 *total = COSTS_N_INSNS (13);
5446 return true;
5448 if (mode == SFmode)
5450 *total = COSTS_N_INSNS (6);
5451 return true;
5453 switch (code)
5455 case CONST_INT:
5456 if (satisfies_constraint_K (x))
5457 *total = 0;
5458 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5459 *total = COSTS_N_INSNS (1);
5460 else
5461 *total = COSTS_N_INSNS (3);
5462 return true;
5464 case CONST:
5465 *total = COSTS_N_INSNS (3);
5466 return true;
5468 case LABEL_REF:
5469 case SYMBOL_REF:
5470 *total = COSTS_N_INSNS (0);
5471 return true;
5473 case CONST_DOUBLE:
5474 *total = COSTS_N_INSNS (5);
5475 return true;
5477 case FLOAT_EXTEND:
5478 case FLOAT_TRUNCATE:
5479 case FLOAT:
5480 case UNSIGNED_FLOAT:
5481 case FIX:
5482 case UNSIGNED_FIX:
5483 *total = COSTS_N_INSNS (7);
5484 return true;
5486 case PLUS:
5487 if (mode == TImode)
5489 *total = COSTS_N_INSNS (9);
5490 return true;
5492 break;
5494 case MULT:
5495 cost =
5496 GET_CODE (XEXP (x, 0)) ==
5497 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5498 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5500 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5502 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5503 cost = COSTS_N_INSNS (14);
5504 if ((val & 0xffff) == 0)
5505 cost = COSTS_N_INSNS (9);
5506 else if (val > 0 && val < 0x10000)
5507 cost = COSTS_N_INSNS (11);
5510 *total = cost;
5511 return true;
5512 case DIV:
5513 case UDIV:
5514 case MOD:
5515 case UMOD:
5516 *total = COSTS_N_INSNS (20);
5517 return true;
5518 case ROTATE:
5519 case ROTATERT:
5520 case ASHIFT:
5521 case ASHIFTRT:
5522 case LSHIFTRT:
5523 *total = COSTS_N_INSNS (4);
5524 return true;
5525 case UNSPEC:
5526 if (XINT (x, 1) == UNSPEC_CONVERT)
5527 *total = COSTS_N_INSNS (0);
5528 else
5529 *total = COSTS_N_INSNS (4);
5530 return true;
5532 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5533 if (GET_MODE_CLASS (mode) == MODE_INT
5534 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5535 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5536 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5537 *total = cost;
5538 return true;
5541 static enum machine_mode
5542 spu_unwind_word_mode (void)
5544 return SImode;
5547 /* Decide whether we can make a sibling call to a function. DECL is the
5548 declaration of the function being targeted by the call and EXP is the
5549 CALL_EXPR representing the call. */
5550 static bool
5551 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5553 return decl && !TARGET_LARGE_MEM;
5556 /* We need to correctly update the back chain pointer and the Available
5557 Stack Size (which is in the second slot of the sp register.) */
5558 void
5559 spu_allocate_stack (rtx op0, rtx op1)
5561 HOST_WIDE_INT v;
5562 rtx chain = gen_reg_rtx (V4SImode);
5563 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5564 rtx sp = gen_reg_rtx (V4SImode);
5565 rtx splatted = gen_reg_rtx (V4SImode);
5566 rtx pat = gen_reg_rtx (TImode);
5568 /* copy the back chain so we can save it back again. */
5569 emit_move_insn (chain, stack_bot);
5571 op1 = force_reg (SImode, op1);
5573 v = 0x1020300010203ll;
5574 emit_move_insn (pat, immed_double_const (v, v, TImode));
5575 emit_insn (gen_shufb (splatted, op1, op1, pat));
5577 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5578 emit_insn (gen_subv4si3 (sp, sp, splatted));
5580 if (flag_stack_check)
5582 rtx avail = gen_reg_rtx(SImode);
5583 rtx result = gen_reg_rtx(SImode);
5584 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5585 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5586 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5589 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5591 emit_move_insn (stack_bot, chain);
5593 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5596 void
5597 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5599 static unsigned char arr[16] =
5600 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5601 rtx temp = gen_reg_rtx (SImode);
5602 rtx temp2 = gen_reg_rtx (SImode);
5603 rtx temp3 = gen_reg_rtx (V4SImode);
5604 rtx temp4 = gen_reg_rtx (V4SImode);
5605 rtx pat = gen_reg_rtx (TImode);
5606 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5608 /* Restore the backchain from the first word, sp from the second. */
5609 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5610 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5612 emit_move_insn (pat, array_to_constant (TImode, arr));
5614 /* Compute Available Stack Size for sp */
5615 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5616 emit_insn (gen_shufb (temp3, temp, temp, pat));
5618 /* Compute Available Stack Size for back chain */
5619 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5620 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5621 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5623 emit_insn (gen_addv4si3 (sp, sp, temp3));
5624 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5627 static void
5628 spu_init_libfuncs (void)
5630 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5631 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5632 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5633 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5634 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5635 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5636 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5637 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5638 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5639 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5640 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5642 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5643 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5645 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5646 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5647 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5648 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5649 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5650 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5651 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5652 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5653 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5654 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5655 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5656 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5658 set_optab_libfunc (smul_optab, TImode, "__multi3");
5659 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5660 set_optab_libfunc (smod_optab, TImode, "__modti3");
5661 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5662 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5663 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5666 /* Make a subreg, stripping any existing subreg. We could possibly just
5667 call simplify_subreg, but in this case we know what we want. */
5669 spu_gen_subreg (enum machine_mode mode, rtx x)
5671 if (GET_CODE (x) == SUBREG)
5672 x = SUBREG_REG (x);
5673 if (GET_MODE (x) == mode)
5674 return x;
5675 return gen_rtx_SUBREG (mode, x, 0);
5678 static bool
5679 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5681 return (TYPE_MODE (type) == BLKmode
5682 && ((type) == 0
5683 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5684 || int_size_in_bytes (type) >
5685 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5688 /* Create the built-in types and functions */
5690 enum spu_function_code
5692 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5693 #include "spu-builtins.def"
5694 #undef DEF_BUILTIN
5695 NUM_SPU_BUILTINS
5698 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5700 struct spu_builtin_description spu_builtins[] = {
5701 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5702 {fcode, icode, name, type, params},
5703 #include "spu-builtins.def"
5704 #undef DEF_BUILTIN
5707 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5709 /* Returns the spu builtin decl for CODE. */
5711 static tree
5712 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5714 if (code >= NUM_SPU_BUILTINS)
5715 return error_mark_node;
5717 return spu_builtin_decls[code];
5721 static void
5722 spu_init_builtins (void)
5724 struct spu_builtin_description *d;
5725 unsigned int i;
5727 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5728 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5729 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5730 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5731 V4SF_type_node = build_vector_type (float_type_node, 4);
5732 V2DF_type_node = build_vector_type (double_type_node, 2);
5734 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5735 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5736 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5737 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5739 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5741 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5742 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5744 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5748 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5750 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5751 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5754 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5755 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5756 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5757 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5758 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5759 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5760 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5761 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5763 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5764 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5766 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5768 spu_builtin_types[SPU_BTI_PTR] =
5769 build_pointer_type (build_qualified_type
5770 (void_type_node,
5771 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5773 /* For each builtin we build a new prototype. The tree code will make
5774 sure nodes are shared. */
5775 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5777 tree p;
5778 char name[64]; /* build_function will make a copy. */
5779 int parm;
5781 if (d->name == 0)
5782 continue;
5784 /* Find last parm. */
5785 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5788 p = void_list_node;
5789 while (parm > 1)
5790 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5792 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5794 sprintf (name, "__builtin_%s", d->name);
5795 spu_builtin_decls[i] =
5796 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5797 if (d->fcode == SPU_MASK_FOR_LOAD)
5798 TREE_READONLY (spu_builtin_decls[i]) = 1;
5800 /* These builtins don't throw. */
5801 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5805 void
5806 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5808 static unsigned char arr[16] =
5809 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5811 rtx temp = gen_reg_rtx (Pmode);
5812 rtx temp2 = gen_reg_rtx (V4SImode);
5813 rtx temp3 = gen_reg_rtx (V4SImode);
5814 rtx pat = gen_reg_rtx (TImode);
5815 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5817 emit_move_insn (pat, array_to_constant (TImode, arr));
5819 /* Restore the sp. */
5820 emit_move_insn (temp, op1);
5821 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5823 /* Compute available stack size for sp. */
5824 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5825 emit_insn (gen_shufb (temp3, temp, temp, pat));
5827 emit_insn (gen_addv4si3 (sp, sp, temp3));
5828 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5832 spu_safe_dma (HOST_WIDE_INT channel)
5834 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5837 void
5838 spu_builtin_splats (rtx ops[])
5840 enum machine_mode mode = GET_MODE (ops[0]);
5841 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5843 unsigned char arr[16];
5844 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5845 emit_move_insn (ops[0], array_to_constant (mode, arr));
5847 else
5849 rtx reg = gen_reg_rtx (TImode);
5850 rtx shuf;
5851 if (GET_CODE (ops[1]) != REG
5852 && GET_CODE (ops[1]) != SUBREG)
5853 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5854 switch (mode)
5856 case V2DImode:
5857 case V2DFmode:
5858 shuf =
5859 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5860 TImode);
5861 break;
5862 case V4SImode:
5863 case V4SFmode:
5864 shuf =
5865 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5866 TImode);
5867 break;
5868 case V8HImode:
5869 shuf =
5870 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5871 TImode);
5872 break;
5873 case V16QImode:
5874 shuf =
5875 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5876 TImode);
5877 break;
5878 default:
5879 abort ();
5881 emit_move_insn (reg, shuf);
5882 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5886 void
5887 spu_builtin_extract (rtx ops[])
5889 enum machine_mode mode;
5890 rtx rot, from, tmp;
5892 mode = GET_MODE (ops[1]);
5894 if (GET_CODE (ops[2]) == CONST_INT)
5896 switch (mode)
5898 case V16QImode:
5899 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5900 break;
5901 case V8HImode:
5902 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5903 break;
5904 case V4SFmode:
5905 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5906 break;
5907 case V4SImode:
5908 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5909 break;
5910 case V2DImode:
5911 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5912 break;
5913 case V2DFmode:
5914 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5915 break;
5916 default:
5917 abort ();
5919 return;
5922 from = spu_gen_subreg (TImode, ops[1]);
5923 rot = gen_reg_rtx (TImode);
5924 tmp = gen_reg_rtx (SImode);
5926 switch (mode)
5928 case V16QImode:
5929 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5930 break;
5931 case V8HImode:
5932 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5933 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5934 break;
5935 case V4SFmode:
5936 case V4SImode:
5937 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5938 break;
5939 case V2DImode:
5940 case V2DFmode:
5941 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5942 break;
5943 default:
5944 abort ();
5946 emit_insn (gen_rotqby_ti (rot, from, tmp));
5948 emit_insn (gen_spu_convert (ops[0], rot));
5951 void
5952 spu_builtin_insert (rtx ops[])
5954 enum machine_mode mode = GET_MODE (ops[0]);
5955 enum machine_mode imode = GET_MODE_INNER (mode);
5956 rtx mask = gen_reg_rtx (TImode);
5957 rtx offset;
5959 if (GET_CODE (ops[3]) == CONST_INT)
5960 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5961 else
5963 offset = gen_reg_rtx (SImode);
5964 emit_insn (gen_mulsi3
5965 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5967 emit_insn (gen_cpat
5968 (mask, stack_pointer_rtx, offset,
5969 GEN_INT (GET_MODE_SIZE (imode))));
5970 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5973 void
5974 spu_builtin_promote (rtx ops[])
5976 enum machine_mode mode, imode;
5977 rtx rot, from, offset;
5978 HOST_WIDE_INT pos;
5980 mode = GET_MODE (ops[0]);
5981 imode = GET_MODE_INNER (mode);
5983 from = gen_reg_rtx (TImode);
5984 rot = spu_gen_subreg (TImode, ops[0]);
5986 emit_insn (gen_spu_convert (from, ops[1]));
5988 if (GET_CODE (ops[2]) == CONST_INT)
5990 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5991 if (GET_MODE_SIZE (imode) < 4)
5992 pos += 4 - GET_MODE_SIZE (imode);
5993 offset = GEN_INT (pos & 15);
5995 else
5997 offset = gen_reg_rtx (SImode);
5998 switch (mode)
6000 case V16QImode:
6001 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6002 break;
6003 case V8HImode:
6004 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6005 emit_insn (gen_addsi3 (offset, offset, offset));
6006 break;
6007 case V4SFmode:
6008 case V4SImode:
6009 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6010 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6011 break;
6012 case V2DImode:
6013 case V2DFmode:
6014 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6015 break;
6016 default:
6017 abort ();
6020 emit_insn (gen_rotqby_ti (rot, from, offset));
6023 static void
6024 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
6026 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
6027 rtx shuf = gen_reg_rtx (V4SImode);
6028 rtx insn = gen_reg_rtx (V4SImode);
6029 rtx shufc;
6030 rtx insnc;
6031 rtx mem;
6033 fnaddr = force_reg (SImode, fnaddr);
6034 cxt = force_reg (SImode, cxt);
6036 if (TARGET_LARGE_MEM)
6038 rtx rotl = gen_reg_rtx (V4SImode);
6039 rtx mask = gen_reg_rtx (V4SImode);
6040 rtx bi = gen_reg_rtx (SImode);
6041 static unsigned char const shufa[16] = {
6042 2, 3, 0, 1, 18, 19, 16, 17,
6043 0, 1, 2, 3, 16, 17, 18, 19
6045 static unsigned char const insna[16] = {
6046 0x41, 0, 0, 79,
6047 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6048 0x60, 0x80, 0, 79,
6049 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6052 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6053 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6055 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6056 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6057 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6058 emit_insn (gen_selb (insn, insnc, rotl, mask));
6060 mem = adjust_address (m_tramp, V4SImode, 0);
6061 emit_move_insn (mem, insn);
6063 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6064 mem = adjust_address (m_tramp, Pmode, 16);
6065 emit_move_insn (mem, bi);
6067 else
6069 rtx scxt = gen_reg_rtx (SImode);
6070 rtx sfnaddr = gen_reg_rtx (SImode);
6071 static unsigned char const insna[16] = {
6072 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6073 0x30, 0, 0, 0,
6074 0, 0, 0, 0,
6075 0, 0, 0, 0
6078 shufc = gen_reg_rtx (TImode);
6079 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6081 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6082 fits 18 bits and the last 4 are zeros. This will be true if
6083 the stack pointer is initialized to 0x3fff0 at program start,
6084 otherwise the ila instruction will be garbage. */
6086 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6087 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6088 emit_insn (gen_cpat
6089 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6090 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6091 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6093 mem = adjust_address (m_tramp, V4SImode, 0);
6094 emit_move_insn (mem, insn);
6096 emit_insn (gen_sync ());
6099 void
6100 spu_expand_sign_extend (rtx ops[])
6102 unsigned char arr[16];
6103 rtx pat = gen_reg_rtx (TImode);
6104 rtx sign, c;
6105 int i, last;
6106 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6107 if (GET_MODE (ops[1]) == QImode)
6109 sign = gen_reg_rtx (HImode);
6110 emit_insn (gen_extendqihi2 (sign, ops[1]));
6111 for (i = 0; i < 16; i++)
6112 arr[i] = 0x12;
6113 arr[last] = 0x13;
6115 else
6117 for (i = 0; i < 16; i++)
6118 arr[i] = 0x10;
6119 switch (GET_MODE (ops[1]))
6121 case HImode:
6122 sign = gen_reg_rtx (SImode);
6123 emit_insn (gen_extendhisi2 (sign, ops[1]));
6124 arr[last] = 0x03;
6125 arr[last - 1] = 0x02;
6126 break;
6127 case SImode:
6128 sign = gen_reg_rtx (SImode);
6129 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6130 for (i = 0; i < 4; i++)
6131 arr[last - i] = 3 - i;
6132 break;
6133 case DImode:
6134 sign = gen_reg_rtx (SImode);
6135 c = gen_reg_rtx (SImode);
6136 emit_insn (gen_spu_convert (c, ops[1]));
6137 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6138 for (i = 0; i < 8; i++)
6139 arr[last - i] = 7 - i;
6140 break;
6141 default:
6142 abort ();
6145 emit_move_insn (pat, array_to_constant (TImode, arr));
6146 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6149 /* expand vector initialization. If there are any constant parts,
6150 load constant parts first. Then load any non-constant parts. */
6151 void
6152 spu_expand_vector_init (rtx target, rtx vals)
6154 enum machine_mode mode = GET_MODE (target);
6155 int n_elts = GET_MODE_NUNITS (mode);
6156 int n_var = 0;
6157 bool all_same = true;
6158 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6159 int i;
6161 first = XVECEXP (vals, 0, 0);
6162 for (i = 0; i < n_elts; ++i)
6164 x = XVECEXP (vals, 0, i);
6165 if (!(CONST_INT_P (x)
6166 || GET_CODE (x) == CONST_DOUBLE
6167 || GET_CODE (x) == CONST_FIXED))
6168 ++n_var;
6169 else
6171 if (first_constant == NULL_RTX)
6172 first_constant = x;
6174 if (i > 0 && !rtx_equal_p (x, first))
6175 all_same = false;
6178 /* if all elements are the same, use splats to repeat elements */
6179 if (all_same)
6181 if (!CONSTANT_P (first)
6182 && !register_operand (first, GET_MODE (x)))
6183 first = force_reg (GET_MODE (first), first);
6184 emit_insn (gen_spu_splats (target, first));
6185 return;
6188 /* load constant parts */
6189 if (n_var != n_elts)
6191 if (n_var == 0)
6193 emit_move_insn (target,
6194 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6196 else
6198 rtx constant_parts_rtx = copy_rtx (vals);
6200 gcc_assert (first_constant != NULL_RTX);
6201 /* fill empty slots with the first constant, this increases
6202 our chance of using splats in the recursive call below. */
6203 for (i = 0; i < n_elts; ++i)
6205 x = XVECEXP (constant_parts_rtx, 0, i);
6206 if (!(CONST_INT_P (x)
6207 || GET_CODE (x) == CONST_DOUBLE
6208 || GET_CODE (x) == CONST_FIXED))
6209 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6212 spu_expand_vector_init (target, constant_parts_rtx);
6216 /* load variable parts */
6217 if (n_var != 0)
6219 rtx insert_operands[4];
6221 insert_operands[0] = target;
6222 insert_operands[2] = target;
6223 for (i = 0; i < n_elts; ++i)
6225 x = XVECEXP (vals, 0, i);
6226 if (!(CONST_INT_P (x)
6227 || GET_CODE (x) == CONST_DOUBLE
6228 || GET_CODE (x) == CONST_FIXED))
6230 if (!register_operand (x, GET_MODE (x)))
6231 x = force_reg (GET_MODE (x), x);
6232 insert_operands[1] = x;
6233 insert_operands[3] = GEN_INT (i);
6234 spu_builtin_insert (insert_operands);
6240 /* Return insn index for the vector compare instruction for given CODE,
6241 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6243 static int
6244 get_vec_cmp_insn (enum rtx_code code,
6245 enum machine_mode dest_mode,
6246 enum machine_mode op_mode)
6249 switch (code)
6251 case EQ:
6252 if (dest_mode == V16QImode && op_mode == V16QImode)
6253 return CODE_FOR_ceq_v16qi;
6254 if (dest_mode == V8HImode && op_mode == V8HImode)
6255 return CODE_FOR_ceq_v8hi;
6256 if (dest_mode == V4SImode && op_mode == V4SImode)
6257 return CODE_FOR_ceq_v4si;
6258 if (dest_mode == V4SImode && op_mode == V4SFmode)
6259 return CODE_FOR_ceq_v4sf;
6260 if (dest_mode == V2DImode && op_mode == V2DFmode)
6261 return CODE_FOR_ceq_v2df;
6262 break;
6263 case GT:
6264 if (dest_mode == V16QImode && op_mode == V16QImode)
6265 return CODE_FOR_cgt_v16qi;
6266 if (dest_mode == V8HImode && op_mode == V8HImode)
6267 return CODE_FOR_cgt_v8hi;
6268 if (dest_mode == V4SImode && op_mode == V4SImode)
6269 return CODE_FOR_cgt_v4si;
6270 if (dest_mode == V4SImode && op_mode == V4SFmode)
6271 return CODE_FOR_cgt_v4sf;
6272 if (dest_mode == V2DImode && op_mode == V2DFmode)
6273 return CODE_FOR_cgt_v2df;
6274 break;
6275 case GTU:
6276 if (dest_mode == V16QImode && op_mode == V16QImode)
6277 return CODE_FOR_clgt_v16qi;
6278 if (dest_mode == V8HImode && op_mode == V8HImode)
6279 return CODE_FOR_clgt_v8hi;
6280 if (dest_mode == V4SImode && op_mode == V4SImode)
6281 return CODE_FOR_clgt_v4si;
6282 break;
6283 default:
6284 break;
6286 return -1;
6289 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6290 DMODE is expected destination mode. This is a recursive function. */
6292 static rtx
6293 spu_emit_vector_compare (enum rtx_code rcode,
6294 rtx op0, rtx op1,
6295 enum machine_mode dmode)
6297 int vec_cmp_insn;
6298 rtx mask;
6299 enum machine_mode dest_mode;
6300 enum machine_mode op_mode = GET_MODE (op1);
6302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6304 /* Floating point vector compare instructions uses destination V4SImode.
6305 Double floating point vector compare instructions uses destination V2DImode.
6306 Move destination to appropriate mode later. */
6307 if (dmode == V4SFmode)
6308 dest_mode = V4SImode;
6309 else if (dmode == V2DFmode)
6310 dest_mode = V2DImode;
6311 else
6312 dest_mode = dmode;
6314 mask = gen_reg_rtx (dest_mode);
6315 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6317 if (vec_cmp_insn == -1)
6319 bool swap_operands = false;
6320 bool try_again = false;
6321 switch (rcode)
6323 case LT:
6324 rcode = GT;
6325 swap_operands = true;
6326 try_again = true;
6327 break;
6328 case LTU:
6329 rcode = GTU;
6330 swap_operands = true;
6331 try_again = true;
6332 break;
6333 case NE:
6334 /* Treat A != B as ~(A==B). */
6336 enum insn_code nor_code;
6337 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6338 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6339 gcc_assert (nor_code != CODE_FOR_nothing);
6340 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6341 if (dmode != dest_mode)
6343 rtx temp = gen_reg_rtx (dest_mode);
6344 convert_move (temp, mask, 0);
6345 return temp;
6347 return mask;
6349 break;
6350 case GE:
6351 case GEU:
6352 case LE:
6353 case LEU:
6354 /* Try GT/GTU/LT/LTU OR EQ */
6356 rtx c_rtx, eq_rtx;
6357 enum insn_code ior_code;
6358 enum rtx_code new_code;
6360 switch (rcode)
6362 case GE: new_code = GT; break;
6363 case GEU: new_code = GTU; break;
6364 case LE: new_code = LT; break;
6365 case LEU: new_code = LTU; break;
6366 default:
6367 gcc_unreachable ();
6370 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6371 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6373 ior_code = optab_handler (ior_optab, dest_mode);
6374 gcc_assert (ior_code != CODE_FOR_nothing);
6375 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6376 if (dmode != dest_mode)
6378 rtx temp = gen_reg_rtx (dest_mode);
6379 convert_move (temp, mask, 0);
6380 return temp;
6382 return mask;
6384 break;
6385 default:
6386 gcc_unreachable ();
6389 /* You only get two chances. */
6390 if (try_again)
6391 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6393 gcc_assert (vec_cmp_insn != -1);
6395 if (swap_operands)
6397 rtx tmp;
6398 tmp = op0;
6399 op0 = op1;
6400 op1 = tmp;
6404 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6405 if (dmode != dest_mode)
6407 rtx temp = gen_reg_rtx (dest_mode);
6408 convert_move (temp, mask, 0);
6409 return temp;
6411 return mask;
6415 /* Emit vector conditional expression.
6416 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6417 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6420 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6421 rtx cond, rtx cc_op0, rtx cc_op1)
6423 enum machine_mode dest_mode = GET_MODE (dest);
6424 enum rtx_code rcode = GET_CODE (cond);
6425 rtx mask;
6427 /* Get the vector mask for the given relational operations. */
6428 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6430 emit_insn(gen_selb (dest, op2, op1, mask));
6432 return 1;
6435 static rtx
6436 spu_force_reg (enum machine_mode mode, rtx op)
6438 rtx x, r;
6439 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6441 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6442 || GET_MODE (op) == BLKmode)
6443 return force_reg (mode, convert_to_mode (mode, op, 0));
6444 abort ();
6447 r = force_reg (GET_MODE (op), op);
6448 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6450 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6451 if (x)
6452 return x;
6455 x = gen_reg_rtx (mode);
6456 emit_insn (gen_spu_convert (x, r));
6457 return x;
6460 static void
6461 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6463 HOST_WIDE_INT v = 0;
6464 int lsbits;
6465 /* Check the range of immediate operands. */
6466 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6468 int range = p - SPU_BTI_7;
6470 if (!CONSTANT_P (op))
6471 error ("%s expects an integer literal in the range [%d, %d]",
6472 d->name,
6473 spu_builtin_range[range].low, spu_builtin_range[range].high);
6475 if (GET_CODE (op) == CONST
6476 && (GET_CODE (XEXP (op, 0)) == PLUS
6477 || GET_CODE (XEXP (op, 0)) == MINUS))
6479 v = INTVAL (XEXP (XEXP (op, 0), 1));
6480 op = XEXP (XEXP (op, 0), 0);
6482 else if (GET_CODE (op) == CONST_INT)
6483 v = INTVAL (op);
6484 else if (GET_CODE (op) == CONST_VECTOR
6485 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6486 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6488 /* The default for v is 0 which is valid in every range. */
6489 if (v < spu_builtin_range[range].low
6490 || v > spu_builtin_range[range].high)
6491 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6492 d->name,
6493 spu_builtin_range[range].low, spu_builtin_range[range].high,
6496 switch (p)
6498 case SPU_BTI_S10_4:
6499 lsbits = 4;
6500 break;
6501 case SPU_BTI_U16_2:
6502 /* This is only used in lqa, and stqa. Even though the insns
6503 encode 16 bits of the address (all but the 2 least
6504 significant), only 14 bits are used because it is masked to
6505 be 16 byte aligned. */
6506 lsbits = 4;
6507 break;
6508 case SPU_BTI_S16_2:
6509 /* This is used for lqr and stqr. */
6510 lsbits = 2;
6511 break;
6512 default:
6513 lsbits = 0;
6516 if (GET_CODE (op) == LABEL_REF
6517 || (GET_CODE (op) == SYMBOL_REF
6518 && SYMBOL_REF_FUNCTION_P (op))
6519 || (v & ((1 << lsbits) - 1)) != 0)
6520 warning (0, "%d least significant bits of %s are ignored", lsbits,
6521 d->name);
6526 static int
6527 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6528 rtx target, rtx ops[])
6530 enum insn_code icode = (enum insn_code) d->icode;
6531 int i = 0, a;
6533 /* Expand the arguments into rtl. */
6535 if (d->parm[0] != SPU_BTI_VOID)
6536 ops[i++] = target;
6538 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6540 tree arg = CALL_EXPR_ARG (exp, a);
6541 if (arg == 0)
6542 abort ();
6543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6546 gcc_assert (i == insn_data[icode].n_generator_args);
6547 return i;
6550 static rtx
6551 spu_expand_builtin_1 (struct spu_builtin_description *d,
6552 tree exp, rtx target)
6554 rtx pat;
6555 rtx ops[8];
6556 enum insn_code icode = (enum insn_code) d->icode;
6557 enum machine_mode mode, tmode;
6558 int i, p;
6559 int n_operands;
6560 tree return_type;
6562 /* Set up ops[] with values from arglist. */
6563 n_operands = expand_builtin_args (d, exp, target, ops);
6565 /* Handle the target operand which must be operand 0. */
6566 i = 0;
6567 if (d->parm[0] != SPU_BTI_VOID)
6570 /* We prefer the mode specified for the match_operand otherwise
6571 use the mode from the builtin function prototype. */
6572 tmode = insn_data[d->icode].operand[0].mode;
6573 if (tmode == VOIDmode)
6574 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6576 /* Try to use target because not using it can lead to extra copies
6577 and when we are using all of the registers extra copies leads
6578 to extra spills. */
6579 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6580 ops[0] = target;
6581 else
6582 target = ops[0] = gen_reg_rtx (tmode);
6584 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6585 abort ();
6587 i++;
6590 if (d->fcode == SPU_MASK_FOR_LOAD)
6592 enum machine_mode mode = insn_data[icode].operand[1].mode;
6593 tree arg;
6594 rtx addr, op, pat;
6596 /* get addr */
6597 arg = CALL_EXPR_ARG (exp, 0);
6598 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6599 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6600 addr = memory_address (mode, op);
6602 /* negate addr */
6603 op = gen_reg_rtx (GET_MODE (addr));
6604 emit_insn (gen_rtx_SET (VOIDmode, op,
6605 gen_rtx_NEG (GET_MODE (addr), addr)));
6606 op = gen_rtx_MEM (mode, op);
6608 pat = GEN_FCN (icode) (target, op);
6609 if (!pat)
6610 return 0;
6611 emit_insn (pat);
6612 return target;
6615 /* Ignore align_hint, but still expand it's args in case they have
6616 side effects. */
6617 if (icode == CODE_FOR_spu_align_hint)
6618 return 0;
6620 /* Handle the rest of the operands. */
6621 for (p = 1; i < n_operands; i++, p++)
6623 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6624 mode = insn_data[d->icode].operand[i].mode;
6625 else
6626 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6628 /* mode can be VOIDmode here for labels */
6630 /* For specific intrinsics with an immediate operand, e.g.,
6631 si_ai(), we sometimes need to convert the scalar argument to a
6632 vector argument by splatting the scalar. */
6633 if (VECTOR_MODE_P (mode)
6634 && (GET_CODE (ops[i]) == CONST_INT
6635 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6636 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6638 if (GET_CODE (ops[i]) == CONST_INT)
6639 ops[i] = spu_const (mode, INTVAL (ops[i]));
6640 else
6642 rtx reg = gen_reg_rtx (mode);
6643 enum machine_mode imode = GET_MODE_INNER (mode);
6644 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6645 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6646 if (imode != GET_MODE (ops[i]))
6647 ops[i] = convert_to_mode (imode, ops[i],
6648 TYPE_UNSIGNED (spu_builtin_types
6649 [d->parm[i]]));
6650 emit_insn (gen_spu_splats (reg, ops[i]));
6651 ops[i] = reg;
6655 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6657 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6658 ops[i] = spu_force_reg (mode, ops[i]);
6661 switch (n_operands)
6663 case 0:
6664 pat = GEN_FCN (icode) (0);
6665 break;
6666 case 1:
6667 pat = GEN_FCN (icode) (ops[0]);
6668 break;
6669 case 2:
6670 pat = GEN_FCN (icode) (ops[0], ops[1]);
6671 break;
6672 case 3:
6673 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6674 break;
6675 case 4:
6676 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6677 break;
6678 case 5:
6679 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6680 break;
6681 case 6:
6682 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6683 break;
6684 default:
6685 abort ();
6688 if (!pat)
6689 abort ();
6691 if (d->type == B_CALL || d->type == B_BISLED)
6692 emit_call_insn (pat);
6693 else if (d->type == B_JUMP)
6695 emit_jump_insn (pat);
6696 emit_barrier ();
6698 else
6699 emit_insn (pat);
6701 return_type = spu_builtin_types[d->parm[0]];
6702 if (d->parm[0] != SPU_BTI_VOID
6703 && GET_MODE (target) != TYPE_MODE (return_type))
6705 /* target is the return value. It should always be the mode of
6706 the builtin function prototype. */
6707 target = spu_force_reg (TYPE_MODE (return_type), target);
6710 return target;
6714 spu_expand_builtin (tree exp,
6715 rtx target,
6716 rtx subtarget ATTRIBUTE_UNUSED,
6717 enum machine_mode mode ATTRIBUTE_UNUSED,
6718 int ignore ATTRIBUTE_UNUSED)
6720 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6721 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6722 struct spu_builtin_description *d;
6724 if (fcode < NUM_SPU_BUILTINS)
6726 d = &spu_builtins[fcode];
6728 return spu_expand_builtin_1 (d, exp, target);
6730 abort ();
6733 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6734 static tree
6735 spu_builtin_mul_widen_even (tree type)
6737 switch (TYPE_MODE (type))
6739 case V8HImode:
6740 if (TYPE_UNSIGNED (type))
6741 return spu_builtin_decls[SPU_MULE_0];
6742 else
6743 return spu_builtin_decls[SPU_MULE_1];
6744 break;
6745 default:
6746 return NULL_TREE;
6750 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6751 static tree
6752 spu_builtin_mul_widen_odd (tree type)
6754 switch (TYPE_MODE (type))
6756 case V8HImode:
6757 if (TYPE_UNSIGNED (type))
6758 return spu_builtin_decls[SPU_MULO_1];
6759 else
6760 return spu_builtin_decls[SPU_MULO_0];
6761 break;
6762 default:
6763 return NULL_TREE;
6767 /* Implement targetm.vectorize.builtin_mask_for_load. */
6768 static tree
6769 spu_builtin_mask_for_load (void)
6771 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6774 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6775 static int
6776 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6777 tree vectype ATTRIBUTE_UNUSED,
6778 int misalign ATTRIBUTE_UNUSED)
6780 switch (type_of_cost)
6782 case scalar_stmt:
6783 case vector_stmt:
6784 case vector_load:
6785 case vector_store:
6786 case vec_to_scalar:
6787 case scalar_to_vec:
6788 case cond_branch_not_taken:
6789 case vec_perm:
6790 return 1;
6792 case scalar_store:
6793 return 10;
6795 case scalar_load:
6796 /* Load + rotate. */
6797 return 2;
6799 case unaligned_load:
6800 return 2;
6802 case cond_branch_taken:
6803 return 6;
6805 default:
6806 gcc_unreachable ();
6810 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6811 after applying N number of iterations. This routine does not determine
6812 how may iterations are required to reach desired alignment. */
6814 static bool
6815 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6817 if (is_packed)
6818 return false;
6820 /* All other types are naturally aligned. */
6821 return true;
6824 /* Implement targetm.vectorize.builtin_vec_perm. */
6825 tree
6826 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6828 *mask_element_type = unsigned_char_type_node;
6830 switch (TYPE_MODE (type))
6832 case V16QImode:
6833 if (TYPE_UNSIGNED (type))
6834 return spu_builtin_decls[SPU_SHUFFLE_0];
6835 else
6836 return spu_builtin_decls[SPU_SHUFFLE_1];
6838 case V8HImode:
6839 if (TYPE_UNSIGNED (type))
6840 return spu_builtin_decls[SPU_SHUFFLE_2];
6841 else
6842 return spu_builtin_decls[SPU_SHUFFLE_3];
6844 case V4SImode:
6845 if (TYPE_UNSIGNED (type))
6846 return spu_builtin_decls[SPU_SHUFFLE_4];
6847 else
6848 return spu_builtin_decls[SPU_SHUFFLE_5];
6850 case V2DImode:
6851 if (TYPE_UNSIGNED (type))
6852 return spu_builtin_decls[SPU_SHUFFLE_6];
6853 else
6854 return spu_builtin_decls[SPU_SHUFFLE_7];
6856 case V4SFmode:
6857 return spu_builtin_decls[SPU_SHUFFLE_8];
6859 case V2DFmode:
6860 return spu_builtin_decls[SPU_SHUFFLE_9];
6862 default:
6863 return NULL_TREE;
6867 /* Return the appropriate mode for a named address pointer. */
6868 static enum machine_mode
6869 spu_addr_space_pointer_mode (addr_space_t addrspace)
6871 switch (addrspace)
6873 case ADDR_SPACE_GENERIC:
6874 return ptr_mode;
6875 case ADDR_SPACE_EA:
6876 return EAmode;
6877 default:
6878 gcc_unreachable ();
6882 /* Return the appropriate mode for a named address address. */
6883 static enum machine_mode
6884 spu_addr_space_address_mode (addr_space_t addrspace)
6886 switch (addrspace)
6888 case ADDR_SPACE_GENERIC:
6889 return Pmode;
6890 case ADDR_SPACE_EA:
6891 return EAmode;
6892 default:
6893 gcc_unreachable ();
6897 /* Determine if one named address space is a subset of another. */
6899 static bool
6900 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6902 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6903 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6905 if (subset == superset)
6906 return true;
6908 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6909 being subsets but instead as disjoint address spaces. */
6910 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6911 return false;
6913 else
6914 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6917 /* Convert from one address space to another. */
6918 static rtx
6919 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6921 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6922 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6924 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6925 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6927 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6929 rtx result, ls;
6931 ls = gen_const_mem (DImode,
6932 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6933 set_mem_align (ls, 128);
6935 result = gen_reg_rtx (Pmode);
6936 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6937 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6938 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6939 ls, const0_rtx, Pmode, 1);
6941 emit_insn (gen_subsi3 (result, op, ls));
6943 return result;
6946 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6948 rtx result, ls;
6950 ls = gen_const_mem (DImode,
6951 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6952 set_mem_align (ls, 128);
6954 result = gen_reg_rtx (EAmode);
6955 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6956 op = force_reg (Pmode, op);
6957 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6958 ls, const0_rtx, EAmode, 1);
6959 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6961 if (EAmode == SImode)
6962 emit_insn (gen_addsi3 (result, op, ls));
6963 else
6964 emit_insn (gen_adddi3 (result, op, ls));
6966 return result;
6969 else
6970 gcc_unreachable ();
6974 /* Count the total number of instructions in each pipe and return the
6975 maximum, which is used as the Minimum Iteration Interval (MII)
6976 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6977 -2 are instructions that can go in pipe0 or pipe1. */
6978 static int
6979 spu_sms_res_mii (struct ddg *g)
6981 int i;
6982 unsigned t[4] = {0, 0, 0, 0};
6984 for (i = 0; i < g->num_nodes; i++)
6986 rtx insn = g->nodes[i].insn;
6987 int p = get_pipe (insn) + 2;
6989 gcc_assert (p >= 0);
6990 gcc_assert (p < 4);
6992 t[p]++;
6993 if (dump_file && INSN_P (insn))
6994 fprintf (dump_file, "i%d %s %d %d\n",
6995 INSN_UID (insn),
6996 insn_data[INSN_CODE(insn)].name,
6997 p, t[p]);
6999 if (dump_file)
7000 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7002 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7006 void
7007 spu_init_expanders (void)
7009 if (cfun)
7011 rtx r0, r1;
7012 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7013 frame_pointer_needed is true. We don't know that until we're
7014 expanding the prologue. */
7015 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7017 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7018 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7019 to be treated as aligned, so generate them here. */
7020 r0 = gen_reg_rtx (SImode);
7021 r1 = gen_reg_rtx (SImode);
7022 mark_reg_pointer (r0, 128);
7023 mark_reg_pointer (r1, 128);
7024 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7025 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7029 static enum machine_mode
7030 spu_libgcc_cmp_return_mode (void)
7033 /* For SPU word mode is TI mode so it is better to use SImode
7034 for compare returns. */
7035 return SImode;
7038 static enum machine_mode
7039 spu_libgcc_shift_count_mode (void)
7041 /* For SPU word mode is TI mode so it is better to use SImode
7042 for shift counts. */
7043 return SImode;
7046 /* An early place to adjust some flags after GCC has finished processing
7047 * them. */
7048 static void
7049 asm_file_start (void)
7051 default_file_start ();
7054 /* Implement targetm.section_type_flags. */
7055 static unsigned int
7056 spu_section_type_flags (tree decl, const char *name, int reloc)
7058 /* .toe needs to have type @nobits. */
7059 if (strcmp (name, ".toe") == 0)
7060 return SECTION_BSS;
7061 /* Don't load _ea into the current address space. */
7062 if (strcmp (name, "._ea") == 0)
7063 return SECTION_WRITE | SECTION_DEBUG;
7064 return default_section_type_flags (decl, name, reloc);
7067 /* Implement targetm.select_section. */
7068 static section *
7069 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7071 /* Variables and constants defined in the __ea address space
7072 go into a special section named "._ea". */
7073 if (TREE_TYPE (decl) != error_mark_node
7074 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7076 /* We might get called with string constants, but get_named_section
7077 doesn't like them as they are not DECLs. Also, we need to set
7078 flags in that case. */
7079 if (!DECL_P (decl))
7080 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7082 return get_named_section (decl, "._ea", reloc);
7085 return default_elf_select_section (decl, reloc, align);
7088 /* Implement targetm.unique_section. */
7089 static void
7090 spu_unique_section (tree decl, int reloc)
7092 /* We don't support unique section names in the __ea address
7093 space for now. */
7094 if (TREE_TYPE (decl) != error_mark_node
7095 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7096 return;
7098 default_unique_section (decl, reloc);
7101 /* Generate a constant or register which contains 2^SCALE. We assume
7102 the result is valid for MODE. Currently, MODE must be V4SFmode and
7103 SCALE must be SImode. */
7105 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7107 gcc_assert (mode == V4SFmode);
7108 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7109 if (GET_CODE (scale) != CONST_INT)
7111 /* unsigned int exp = (127 + scale) << 23;
7112 __vector float m = (__vector float) spu_splats (exp); */
7113 rtx reg = force_reg (SImode, scale);
7114 rtx exp = gen_reg_rtx (SImode);
7115 rtx mul = gen_reg_rtx (mode);
7116 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7117 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7118 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7119 return mul;
7121 else
7123 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7124 unsigned char arr[16];
7125 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7126 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7127 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7128 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7129 return array_to_constant (mode, arr);
7133 /* After reload, just change the convert into a move instruction
7134 or a dead instruction. */
7135 void
7136 spu_split_convert (rtx ops[])
7138 if (REGNO (ops[0]) == REGNO (ops[1]))
7139 emit_note (NOTE_INSN_DELETED);
7140 else
7142 /* Use TImode always as this might help hard reg copyprop. */
7143 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7144 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7145 emit_insn (gen_move_insn (op0, op1));
7149 void
7150 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7152 fprintf (file, "# profile\n");
7153 fprintf (file, "brsl $75, _mcount\n");
7156 /* Implement targetm.ref_may_alias_errno. */
7157 static bool
7158 spu_ref_may_alias_errno (ao_ref *ref)
7160 tree base = ao_ref_base (ref);
7162 /* With SPU newlib, errno is defined as something like
7163 _impure_data._errno
7164 The default implementation of this target macro does not
7165 recognize such expressions, so special-code for it here. */
7167 if (TREE_CODE (base) == VAR_DECL
7168 && !TREE_STATIC (base)
7169 && DECL_EXTERNAL (base)
7170 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7171 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7172 "_impure_data") == 0
7173 /* _errno is the first member of _impure_data. */
7174 && ref->offset == 0)
7175 return true;
7177 return default_ref_may_alias_errno (ref);
7180 /* Output thunk to FILE that implements a C++ virtual function call (with
7181 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7182 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7183 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7184 relative to the resulting this pointer. */
7186 static void
7187 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7188 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7189 tree function)
7191 rtx op[8];
7193 /* Make sure unwind info is emitted for the thunk if needed. */
7194 final_start_function (emit_barrier (), file, 1);
7196 /* Operand 0 is the target function. */
7197 op[0] = XEXP (DECL_RTL (function), 0);
7199 /* Operand 1 is the 'this' pointer. */
7200 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7201 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7202 else
7203 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7205 /* Operands 2/3 are the low/high halfwords of delta. */
7206 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7207 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7209 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7210 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7211 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7213 /* Operands 6/7 are temporary registers. */
7214 op[6] = gen_rtx_REG (Pmode, 79);
7215 op[7] = gen_rtx_REG (Pmode, 78);
7217 /* Add DELTA to this pointer. */
7218 if (delta)
7220 if (delta >= -0x200 && delta < 0x200)
7221 output_asm_insn ("ai\t%1,%1,%2", op);
7222 else if (delta >= -0x8000 && delta < 0x8000)
7224 output_asm_insn ("il\t%6,%2", op);
7225 output_asm_insn ("a\t%1,%1,%6", op);
7227 else
7229 output_asm_insn ("ilhu\t%6,%3", op);
7230 output_asm_insn ("iohl\t%6,%2", op);
7231 output_asm_insn ("a\t%1,%1,%6", op);
7235 /* Perform vcall adjustment. */
7236 if (vcall_offset)
7238 output_asm_insn ("lqd\t%7,0(%1)", op);
7239 output_asm_insn ("rotqby\t%7,%7,%1", op);
7241 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7242 output_asm_insn ("ai\t%7,%7,%4", op);
7243 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7245 output_asm_insn ("il\t%6,%4", op);
7246 output_asm_insn ("a\t%7,%7,%6", op);
7248 else
7250 output_asm_insn ("ilhu\t%6,%5", op);
7251 output_asm_insn ("iohl\t%6,%4", op);
7252 output_asm_insn ("a\t%7,%7,%6", op);
7255 output_asm_insn ("lqd\t%6,0(%7)", op);
7256 output_asm_insn ("rotqby\t%6,%6,%7", op);
7257 output_asm_insn ("a\t%1,%1,%6", op);
7260 /* Jump to target. */
7261 output_asm_insn ("br\t%0", op);
7263 final_end_function ();
7266 #include "gt-spu.h"