* config/spu/spu.c (spu_init_libfuncs): Install __clrsbdi2.
[official-gcc.git] / gcc / config / spu / spu.c
blob7d9be5e32369f7d9ce92643ae841581b24b98a02
1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
7 any later version.
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "diagnostic-core.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "machmode.h"
51 #include "gimple.h"
52 #include "tm-constrs.h"
53 #include "ddg.h"
54 #include "sbitmap.h"
55 #include "timevar.h"
56 #include "df.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS,
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
110 SPU_BTI_MAX
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126 struct spu_builtin_range
128 int low, high;
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150 /* Prototypes and external defs. */
151 static void spu_option_override (void);
152 static void spu_init_builtins (void);
153 static tree spu_builtin_decl (unsigned, bool);
154 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
155 static bool spu_vector_mode_supported_p (enum machine_mode mode);
156 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
157 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158 bool, addr_space_t);
159 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160 static rtx get_pic_reg (void);
161 static int need_to_save_reg (int regno, int saving);
162 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165 rtx scratch);
166 static void emit_nop_for_insn (rtx insn);
167 static bool insn_clobbers_hbr (rtx insn);
168 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
169 int distance, sbitmap blocks);
170 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
172 static rtx get_branch_target (rtx branch);
173 static void spu_machine_dependent_reorg (void);
174 static int spu_sched_issue_rate (void);
175 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176 int can_issue_more);
177 static int get_pipe (rtx insn);
178 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
179 static void spu_sched_init_global (FILE *, int, int);
180 static void spu_sched_init (FILE *, int, int);
181 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
182 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183 int flags,
184 bool *no_add_attrs);
185 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186 int flags,
187 bool *no_add_attrs);
188 static int spu_naked_function_p (tree func);
189 static bool spu_pass_by_reference (cumulative_args_t cum,
190 enum machine_mode mode,
191 const_tree type, bool named);
192 static rtx spu_function_arg (cumulative_args_t cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static void spu_function_arg_advance (cumulative_args_t cum,
195 enum machine_mode mode,
196 const_tree type, bool named);
197 static tree spu_build_builtin_va_list (void);
198 static void spu_va_start (tree, rtx);
199 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
200 gimple_seq * pre_p, gimple_seq * post_p);
201 static int store_with_one_insn_p (rtx mem);
202 static int mem_is_padded_component_ref (rtx x);
203 static int reg_aligned_for_addr (rtx x);
204 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
205 static void spu_asm_globalize_label (FILE * file, const char *name);
206 static bool spu_rtx_costs (rtx x, int code, int outer_code,
207 int *total, bool speed);
208 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
209 static void spu_init_libfuncs (void);
210 static bool spu_return_in_memory (const_tree type, const_tree fntype);
211 static void fix_range (const char *);
212 static void spu_encode_section_info (tree, rtx, int);
213 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
214 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
215 addr_space_t);
216 static tree spu_builtin_mul_widen_even (tree);
217 static tree spu_builtin_mul_widen_odd (tree);
218 static tree spu_builtin_mask_for_load (void);
219 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
220 static bool spu_vector_alignment_reachable (const_tree, bool);
221 static tree spu_builtin_vec_perm (tree, tree *);
222 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
223 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
224 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
225 static rtx spu_addr_space_convert (rtx, tree, tree);
226 static int spu_sms_res_mii (struct ddg *g);
227 static unsigned int spu_section_type_flags (tree, const char *, int);
228 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229 static void spu_unique_section (tree, int);
230 static rtx spu_expand_load (rtx, rtx, rtx, int);
231 static void spu_trampoline_init (rtx, tree, rtx);
232 static void spu_conditional_register_usage (void);
233 static bool spu_ref_may_alias_errno (ao_ref *);
234 static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
235 HOST_WIDE_INT, tree);
237 /* Which instruction set architecture to use. */
238 int spu_arch;
239 /* Which cpu are we tuning for. */
240 int spu_tune;
242 /* The hardware requires 8 insns between a hint and the branch it
243 effects. This variable describes how many rtl instructions the
244 compiler needs to see before inserting a hint, and then the compiler
245 will insert enough nops to make it at least 8 insns. The default is
246 for the compiler to allow up to 2 nops be emitted. The nops are
247 inserted in pairs, so we round down. */
248 int spu_hint_dist = (8*4) - (2*4);
250 enum spu_immediate {
251 SPU_NONE,
252 SPU_IL,
253 SPU_ILA,
254 SPU_ILH,
255 SPU_ILHU,
256 SPU_ORI,
257 SPU_ORHI,
258 SPU_ORBI,
259 SPU_IOHL
261 enum immediate_class
263 IC_POOL, /* constant pool */
264 IC_IL1, /* one il* instruction */
265 IC_IL2, /* both ilhu and iohl instructions */
266 IC_IL1s, /* one il* instruction */
267 IC_IL2s, /* both ilhu and iohl instructions */
268 IC_FSMBI, /* the fsmbi instruction */
269 IC_CPAT, /* one of the c*d instructions */
270 IC_FSMBI2 /* fsmbi plus 1 other instruction */
273 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
274 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
275 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
276 static enum immediate_class classify_immediate (rtx op,
277 enum machine_mode mode);
279 static enum machine_mode spu_unwind_word_mode (void);
281 static enum machine_mode
282 spu_libgcc_cmp_return_mode (void);
284 static enum machine_mode
285 spu_libgcc_shift_count_mode (void);
287 /* Pointer mode for __ea references. */
288 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
291 /* Table of machine attributes. */
292 static const struct attribute_spec spu_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
297 false },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
299 false },
300 { NULL, 0, 0, false, false, false, NULL, false }
303 /* TARGET overrides. */
305 #undef TARGET_ADDR_SPACE_POINTER_MODE
306 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
308 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
309 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
311 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
312 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
313 spu_addr_space_legitimate_address_p
315 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
316 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
318 #undef TARGET_ADDR_SPACE_SUBSET_P
319 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
321 #undef TARGET_ADDR_SPACE_CONVERT
322 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS spu_init_builtins
326 #undef TARGET_BUILTIN_DECL
327 #define TARGET_BUILTIN_DECL spu_builtin_decl
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
332 #undef TARGET_UNWIND_WORD_MODE
333 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
338 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
339 and .quad for the debugger. When it is known that the assembler is fixed,
340 these can be removed. */
341 #undef TARGET_ASM_UNALIGNED_SI_OP
342 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
347 /* The .8byte directive doesn't seem to work well for a 32 bit
348 architecture. */
349 #undef TARGET_ASM_UNALIGNED_DI_OP
350 #define TARGET_ASM_UNALIGNED_DI_OP NULL
352 #undef TARGET_RTX_COSTS
353 #define TARGET_RTX_COSTS spu_rtx_costs
355 #undef TARGET_ADDRESS_COST
356 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
358 #undef TARGET_SCHED_ISSUE_RATE
359 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
361 #undef TARGET_SCHED_INIT_GLOBAL
362 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
364 #undef TARGET_SCHED_INIT
365 #define TARGET_SCHED_INIT spu_sched_init
367 #undef TARGET_SCHED_VARIABLE_ISSUE
368 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
370 #undef TARGET_SCHED_REORDER
371 #define TARGET_SCHED_REORDER spu_sched_reorder
373 #undef TARGET_SCHED_REORDER2
374 #define TARGET_SCHED_REORDER2 spu_sched_reorder
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER spu_assemble_integer
385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
386 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
394 #undef TARGET_ASM_GLOBALIZE_LABEL
395 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
397 #undef TARGET_PASS_BY_REFERENCE
398 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
400 #undef TARGET_FUNCTION_ARG
401 #define TARGET_FUNCTION_ARG spu_function_arg
403 #undef TARGET_FUNCTION_ARG_ADVANCE
404 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
406 #undef TARGET_MUST_PASS_IN_STACK
407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
409 #undef TARGET_BUILD_BUILTIN_VA_LIST
410 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
412 #undef TARGET_EXPAND_BUILTIN_VA_START
413 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
415 static void spu_setup_incoming_varargs (cumulative_args_t cum,
416 enum machine_mode mode,
417 tree type, int *pretend_size,
418 int no_rtl);
419 #undef TARGET_SETUP_INCOMING_VARARGS
420 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
425 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
426 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
428 #undef TARGET_INIT_LIBFUNCS
429 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
431 #undef TARGET_RETURN_IN_MEMORY
432 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
434 #undef TARGET_ENCODE_SECTION_INFO
435 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
437 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
438 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
440 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
441 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
446 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
447 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
449 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
450 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
452 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
453 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
455 #undef TARGET_LIBGCC_CMP_RETURN_MODE
456 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
458 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
459 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
461 #undef TARGET_SCHED_SMS_RES_MII
462 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
464 #undef TARGET_SECTION_TYPE_FLAGS
465 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
467 #undef TARGET_ASM_SELECT_SECTION
468 #define TARGET_ASM_SELECT_SECTION spu_select_section
470 #undef TARGET_ASM_UNIQUE_SECTION
471 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
473 #undef TARGET_LEGITIMATE_ADDRESS_P
474 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
476 #undef TARGET_LEGITIMATE_CONSTANT_P
477 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
479 #undef TARGET_TRAMPOLINE_INIT
480 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
482 #undef TARGET_OPTION_OVERRIDE
483 #define TARGET_OPTION_OVERRIDE spu_option_override
485 #undef TARGET_CONDITIONAL_REGISTER_USAGE
486 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
488 #undef TARGET_REF_MAY_ALIAS_ERRNO
489 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
491 #undef TARGET_ASM_OUTPUT_MI_THUNK
492 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
493 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
494 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
496 /* Variable tracking should be run after all optimizations which
497 change order of insns. It also needs a valid CFG. */
498 #undef TARGET_DELAY_VARTRACK
499 #define TARGET_DELAY_VARTRACK true
501 struct gcc_target targetm = TARGET_INITIALIZER;
503 /* Implement TARGET_OPTION_OVERRIDE. */
504 static void
505 spu_option_override (void)
507 /* Small loops will be unpeeled at -O3. For SPU it is more important
508 to keep code small by default. */
509 if (!flag_unroll_loops && !flag_peel_loops)
510 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
511 global_options.x_param_values,
512 global_options_set.x_param_values);
514 flag_omit_frame_pointer = 1;
516 /* Functions must be 8 byte aligned so we correctly handle dual issue */
517 if (align_functions < 8)
518 align_functions = 8;
520 spu_hint_dist = 8*4 - spu_max_nops*4;
521 if (spu_hint_dist < 0)
522 spu_hint_dist = 0;
524 if (spu_fixed_range_string)
525 fix_range (spu_fixed_range_string);
527 /* Determine processor architectural level. */
528 if (spu_arch_string)
530 if (strcmp (&spu_arch_string[0], "cell") == 0)
531 spu_arch = PROCESSOR_CELL;
532 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
533 spu_arch = PROCESSOR_CELLEDP;
534 else
535 error ("bad value (%s) for -march= switch", spu_arch_string);
538 /* Determine processor to tune for. */
539 if (spu_tune_string)
541 if (strcmp (&spu_tune_string[0], "cell") == 0)
542 spu_tune = PROCESSOR_CELL;
543 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
544 spu_tune = PROCESSOR_CELLEDP;
545 else
546 error ("bad value (%s) for -mtune= switch", spu_tune_string);
549 /* Change defaults according to the processor architecture. */
550 if (spu_arch == PROCESSOR_CELLEDP)
552 /* If no command line option has been otherwise specified, change
553 the default to -mno-safe-hints on celledp -- only the original
554 Cell/B.E. processors require this workaround. */
555 if (!(target_flags_explicit & MASK_SAFE_HINTS))
556 target_flags &= ~MASK_SAFE_HINTS;
559 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
562 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
563 struct attribute_spec.handler. */
565 /* True if MODE is valid for the target. By "valid", we mean able to
566 be manipulated in non-trivial ways. In particular, this means all
567 the arithmetic is supported. */
568 static bool
569 spu_scalar_mode_supported_p (enum machine_mode mode)
571 switch (mode)
573 case QImode:
574 case HImode:
575 case SImode:
576 case SFmode:
577 case DImode:
578 case TImode:
579 case DFmode:
580 return true;
582 default:
583 return false;
587 /* Similarly for vector modes. "Supported" here is less strict. At
588 least some operations are supported; need to check optabs or builtins
589 for further details. */
590 static bool
591 spu_vector_mode_supported_p (enum machine_mode mode)
593 switch (mode)
595 case V16QImode:
596 case V8HImode:
597 case V4SImode:
598 case V2DImode:
599 case V4SFmode:
600 case V2DFmode:
601 return true;
603 default:
604 return false;
608 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
609 least significant bytes of the outer mode. This function returns
610 TRUE for the SUBREG's where this is correct. */
612 valid_subreg (rtx op)
614 enum machine_mode om = GET_MODE (op);
615 enum machine_mode im = GET_MODE (SUBREG_REG (op));
616 return om != VOIDmode && im != VOIDmode
617 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
618 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
619 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
622 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
623 and adjust the start offset. */
624 static rtx
625 adjust_operand (rtx op, HOST_WIDE_INT * start)
627 enum machine_mode mode;
628 int op_size;
629 /* Strip any paradoxical SUBREG. */
630 if (GET_CODE (op) == SUBREG
631 && (GET_MODE_BITSIZE (GET_MODE (op))
632 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
634 if (start)
635 *start -=
636 GET_MODE_BITSIZE (GET_MODE (op)) -
637 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
638 op = SUBREG_REG (op);
640 /* If it is smaller than SI, assure a SUBREG */
641 op_size = GET_MODE_BITSIZE (GET_MODE (op));
642 if (op_size < 32)
644 if (start)
645 *start += 32 - op_size;
646 op_size = 32;
648 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
649 mode = mode_for_size (op_size, MODE_INT, 0);
650 if (mode != GET_MODE (op))
651 op = gen_rtx_SUBREG (mode, op, 0);
652 return op;
655 void
656 spu_expand_extv (rtx ops[], int unsignedp)
658 rtx dst = ops[0], src = ops[1];
659 HOST_WIDE_INT width = INTVAL (ops[2]);
660 HOST_WIDE_INT start = INTVAL (ops[3]);
661 HOST_WIDE_INT align_mask;
662 rtx s0, s1, mask, r0;
664 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
666 if (MEM_P (src))
668 /* First, determine if we need 1 TImode load or 2. We need only 1
669 if the bits being extracted do not cross the alignment boundary
670 as determined by the MEM and its address. */
672 align_mask = -MEM_ALIGN (src);
673 if ((start & align_mask) == ((start + width - 1) & align_mask))
675 /* Alignment is sufficient for 1 load. */
676 s0 = gen_reg_rtx (TImode);
677 r0 = spu_expand_load (s0, 0, src, start / 8);
678 start &= 7;
679 if (r0)
680 emit_insn (gen_rotqby_ti (s0, s0, r0));
682 else
684 /* Need 2 loads. */
685 s0 = gen_reg_rtx (TImode);
686 s1 = gen_reg_rtx (TImode);
687 r0 = spu_expand_load (s0, s1, src, start / 8);
688 start &= 7;
690 gcc_assert (start + width <= 128);
691 if (r0)
693 rtx r1 = gen_reg_rtx (SImode);
694 mask = gen_reg_rtx (TImode);
695 emit_move_insn (mask, GEN_INT (-1));
696 emit_insn (gen_rotqby_ti (s0, s0, r0));
697 emit_insn (gen_rotqby_ti (s1, s1, r0));
698 if (GET_CODE (r0) == CONST_INT)
699 r1 = GEN_INT (INTVAL (r0) & 15);
700 else
701 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
702 emit_insn (gen_shlqby_ti (mask, mask, r1));
703 emit_insn (gen_selb (s0, s1, s0, mask));
708 else if (GET_CODE (src) == SUBREG)
710 rtx r = SUBREG_REG (src);
711 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
712 s0 = gen_reg_rtx (TImode);
713 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
714 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
715 else
716 emit_move_insn (s0, src);
718 else
720 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
721 s0 = gen_reg_rtx (TImode);
722 emit_move_insn (s0, src);
725 /* Now s0 is TImode and contains the bits to extract at start. */
727 if (start)
728 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
730 if (128 - width)
731 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
733 emit_move_insn (dst, s0);
736 void
737 spu_expand_insv (rtx ops[])
739 HOST_WIDE_INT width = INTVAL (ops[1]);
740 HOST_WIDE_INT start = INTVAL (ops[2]);
741 HOST_WIDE_INT maskbits;
742 enum machine_mode dst_mode;
743 rtx dst = ops[0], src = ops[3];
744 int dst_size;
745 rtx mask;
746 rtx shift_reg;
747 int shift;
750 if (GET_CODE (ops[0]) == MEM)
751 dst = gen_reg_rtx (TImode);
752 else
753 dst = adjust_operand (dst, &start);
754 dst_mode = GET_MODE (dst);
755 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
757 if (CONSTANT_P (src))
759 enum machine_mode m =
760 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
761 src = force_reg (m, convert_to_mode (m, src, 0));
763 src = adjust_operand (src, 0);
765 mask = gen_reg_rtx (dst_mode);
766 shift_reg = gen_reg_rtx (dst_mode);
767 shift = dst_size - start - width;
769 /* It's not safe to use subreg here because the compiler assumes
770 that the SUBREG_REG is right justified in the SUBREG. */
771 convert_move (shift_reg, src, 1);
773 if (shift > 0)
775 switch (dst_mode)
777 case SImode:
778 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
779 break;
780 case DImode:
781 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
782 break;
783 case TImode:
784 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
785 break;
786 default:
787 abort ();
790 else if (shift < 0)
791 abort ();
793 switch (dst_size)
795 case 32:
796 maskbits = (-1ll << (32 - width - start));
797 if (start)
798 maskbits += (1ll << (32 - start));
799 emit_move_insn (mask, GEN_INT (maskbits));
800 break;
801 case 64:
802 maskbits = (-1ll << (64 - width - start));
803 if (start)
804 maskbits += (1ll << (64 - start));
805 emit_move_insn (mask, GEN_INT (maskbits));
806 break;
807 case 128:
809 unsigned char arr[16];
810 int i = start / 8;
811 memset (arr, 0, sizeof (arr));
812 arr[i] = 0xff >> (start & 7);
813 for (i++; i <= (start + width - 1) / 8; i++)
814 arr[i] = 0xff;
815 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
816 emit_move_insn (mask, array_to_constant (TImode, arr));
818 break;
819 default:
820 abort ();
822 if (GET_CODE (ops[0]) == MEM)
824 rtx low = gen_reg_rtx (SImode);
825 rtx rotl = gen_reg_rtx (SImode);
826 rtx mask0 = gen_reg_rtx (TImode);
827 rtx addr;
828 rtx addr0;
829 rtx addr1;
830 rtx mem;
832 addr = force_reg (Pmode, XEXP (ops[0], 0));
833 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
834 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
835 emit_insn (gen_negsi2 (rotl, low));
836 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
837 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
838 mem = change_address (ops[0], TImode, addr0);
839 set_mem_alias_set (mem, 0);
840 emit_move_insn (dst, mem);
841 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
842 if (start + width > MEM_ALIGN (ops[0]))
844 rtx shl = gen_reg_rtx (SImode);
845 rtx mask1 = gen_reg_rtx (TImode);
846 rtx dst1 = gen_reg_rtx (TImode);
847 rtx mem1;
848 addr1 = plus_constant (addr, 16);
849 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
850 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
851 emit_insn (gen_shlqby_ti (mask1, mask, shl));
852 mem1 = change_address (ops[0], TImode, addr1);
853 set_mem_alias_set (mem1, 0);
854 emit_move_insn (dst1, mem1);
855 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
856 emit_move_insn (mem1, dst1);
858 emit_move_insn (mem, dst);
860 else
861 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
866 spu_expand_block_move (rtx ops[])
868 HOST_WIDE_INT bytes, align, offset;
869 rtx src, dst, sreg, dreg, target;
870 int i;
871 if (GET_CODE (ops[2]) != CONST_INT
872 || GET_CODE (ops[3]) != CONST_INT
873 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
874 return 0;
876 bytes = INTVAL (ops[2]);
877 align = INTVAL (ops[3]);
879 if (bytes <= 0)
880 return 1;
882 dst = ops[0];
883 src = ops[1];
885 if (align == 16)
887 for (offset = 0; offset + 16 <= bytes; offset += 16)
889 dst = adjust_address (ops[0], V16QImode, offset);
890 src = adjust_address (ops[1], V16QImode, offset);
891 emit_move_insn (dst, src);
893 if (offset < bytes)
895 rtx mask;
896 unsigned char arr[16] = { 0 };
897 for (i = 0; i < bytes - offset; i++)
898 arr[i] = 0xff;
899 dst = adjust_address (ops[0], V16QImode, offset);
900 src = adjust_address (ops[1], V16QImode, offset);
901 mask = gen_reg_rtx (V16QImode);
902 sreg = gen_reg_rtx (V16QImode);
903 dreg = gen_reg_rtx (V16QImode);
904 target = gen_reg_rtx (V16QImode);
905 emit_move_insn (mask, array_to_constant (V16QImode, arr));
906 emit_move_insn (dreg, dst);
907 emit_move_insn (sreg, src);
908 emit_insn (gen_selb (target, dreg, sreg, mask));
909 emit_move_insn (dst, target);
911 return 1;
913 return 0;
916 enum spu_comp_code
917 { SPU_EQ, SPU_GT, SPU_GTU };
919 int spu_comp_icode[12][3] = {
920 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
921 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
922 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
923 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
924 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
925 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
926 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
927 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
928 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
929 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
930 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
931 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
934 /* Generate a compare for CODE. Return a brand-new rtx that represents
935 the result of the compare. GCC can figure this out too if we don't
936 provide all variations of compares, but GCC always wants to use
937 WORD_MODE, we can generate better code in most cases if we do it
938 ourselves. */
939 void
940 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
942 int reverse_compare = 0;
943 int reverse_test = 0;
944 rtx compare_result, eq_result;
945 rtx comp_rtx, eq_rtx;
946 enum machine_mode comp_mode;
947 enum machine_mode op_mode;
948 enum spu_comp_code scode, eq_code;
949 enum insn_code ior_code;
950 enum rtx_code code = GET_CODE (cmp);
951 rtx op0 = XEXP (cmp, 0);
952 rtx op1 = XEXP (cmp, 1);
953 int index;
954 int eq_test = 0;
956 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
957 and so on, to keep the constant in operand 1. */
958 if (GET_CODE (op1) == CONST_INT)
960 HOST_WIDE_INT val = INTVAL (op1) - 1;
961 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
962 switch (code)
964 case GE:
965 op1 = GEN_INT (val);
966 code = GT;
967 break;
968 case LT:
969 op1 = GEN_INT (val);
970 code = LE;
971 break;
972 case GEU:
973 op1 = GEN_INT (val);
974 code = GTU;
975 break;
976 case LTU:
977 op1 = GEN_INT (val);
978 code = LEU;
979 break;
980 default:
981 break;
985 comp_mode = SImode;
986 op_mode = GET_MODE (op0);
988 switch (code)
990 case GE:
991 scode = SPU_GT;
992 if (HONOR_NANS (op_mode))
994 reverse_compare = 0;
995 reverse_test = 0;
996 eq_test = 1;
997 eq_code = SPU_EQ;
999 else
1001 reverse_compare = 1;
1002 reverse_test = 1;
1004 break;
1005 case LE:
1006 scode = SPU_GT;
1007 if (HONOR_NANS (op_mode))
1009 reverse_compare = 1;
1010 reverse_test = 0;
1011 eq_test = 1;
1012 eq_code = SPU_EQ;
1014 else
1016 reverse_compare = 0;
1017 reverse_test = 1;
1019 break;
1020 case LT:
1021 reverse_compare = 1;
1022 reverse_test = 0;
1023 scode = SPU_GT;
1024 break;
1025 case GEU:
1026 reverse_compare = 1;
1027 reverse_test = 1;
1028 scode = SPU_GTU;
1029 break;
1030 case LEU:
1031 reverse_compare = 0;
1032 reverse_test = 1;
1033 scode = SPU_GTU;
1034 break;
1035 case LTU:
1036 reverse_compare = 1;
1037 reverse_test = 0;
1038 scode = SPU_GTU;
1039 break;
1040 case NE:
1041 reverse_compare = 0;
1042 reverse_test = 1;
1043 scode = SPU_EQ;
1044 break;
1046 case EQ:
1047 scode = SPU_EQ;
1048 break;
1049 case GT:
1050 scode = SPU_GT;
1051 break;
1052 case GTU:
1053 scode = SPU_GTU;
1054 break;
1055 default:
1056 scode = SPU_EQ;
1057 break;
1060 switch (op_mode)
1062 case QImode:
1063 index = 0;
1064 comp_mode = QImode;
1065 break;
1066 case HImode:
1067 index = 1;
1068 comp_mode = HImode;
1069 break;
1070 case SImode:
1071 index = 2;
1072 break;
1073 case DImode:
1074 index = 3;
1075 break;
1076 case TImode:
1077 index = 4;
1078 break;
1079 case SFmode:
1080 index = 5;
1081 break;
1082 case DFmode:
1083 index = 6;
1084 break;
1085 case V16QImode:
1086 index = 7;
1087 comp_mode = op_mode;
1088 break;
1089 case V8HImode:
1090 index = 8;
1091 comp_mode = op_mode;
1092 break;
1093 case V4SImode:
1094 index = 9;
1095 comp_mode = op_mode;
1096 break;
1097 case V4SFmode:
1098 index = 10;
1099 comp_mode = V4SImode;
1100 break;
1101 case V2DFmode:
1102 index = 11;
1103 comp_mode = V2DImode;
1104 break;
1105 case V2DImode:
1106 default:
1107 abort ();
1110 if (GET_MODE (op1) == DFmode
1111 && (scode != SPU_GT && scode != SPU_EQ))
1112 abort ();
1114 if (is_set == 0 && op1 == const0_rtx
1115 && (GET_MODE (op0) == SImode
1116 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1118 /* Don't need to set a register with the result when we are
1119 comparing against zero and branching. */
1120 reverse_test = !reverse_test;
1121 compare_result = op0;
1123 else
1125 compare_result = gen_reg_rtx (comp_mode);
1127 if (reverse_compare)
1129 rtx t = op1;
1130 op1 = op0;
1131 op0 = t;
1134 if (spu_comp_icode[index][scode] == 0)
1135 abort ();
1137 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1138 (op0, op_mode))
1139 op0 = force_reg (op_mode, op0);
1140 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1141 (op1, op_mode))
1142 op1 = force_reg (op_mode, op1);
1143 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1144 op0, op1);
1145 if (comp_rtx == 0)
1146 abort ();
1147 emit_insn (comp_rtx);
1149 if (eq_test)
1151 eq_result = gen_reg_rtx (comp_mode);
1152 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1153 op0, op1);
1154 if (eq_rtx == 0)
1155 abort ();
1156 emit_insn (eq_rtx);
1157 ior_code = optab_handler (ior_optab, comp_mode);
1158 gcc_assert (ior_code != CODE_FOR_nothing);
1159 emit_insn (GEN_FCN (ior_code)
1160 (compare_result, compare_result, eq_result));
1164 if (is_set == 0)
1166 rtx bcomp;
1167 rtx loc_ref;
1169 /* We don't have branch on QI compare insns, so we convert the
1170 QI compare result to a HI result. */
1171 if (comp_mode == QImode)
1173 rtx old_res = compare_result;
1174 compare_result = gen_reg_rtx (HImode);
1175 comp_mode = HImode;
1176 emit_insn (gen_extendqihi2 (compare_result, old_res));
1179 if (reverse_test)
1180 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1181 else
1182 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1184 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1185 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1186 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1187 loc_ref, pc_rtx)));
1189 else if (is_set == 2)
1191 rtx target = operands[0];
1192 int compare_size = GET_MODE_BITSIZE (comp_mode);
1193 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1194 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1195 rtx select_mask;
1196 rtx op_t = operands[2];
1197 rtx op_f = operands[3];
1199 /* The result of the comparison can be SI, HI or QI mode. Create a
1200 mask based on that result. */
1201 if (target_size > compare_size)
1203 select_mask = gen_reg_rtx (mode);
1204 emit_insn (gen_extend_compare (select_mask, compare_result));
1206 else if (target_size < compare_size)
1207 select_mask =
1208 gen_rtx_SUBREG (mode, compare_result,
1209 (compare_size - target_size) / BITS_PER_UNIT);
1210 else if (comp_mode != mode)
1211 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1212 else
1213 select_mask = compare_result;
1215 if (GET_MODE (target) != GET_MODE (op_t)
1216 || GET_MODE (target) != GET_MODE (op_f))
1217 abort ();
1219 if (reverse_test)
1220 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1221 else
1222 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1224 else
1226 rtx target = operands[0];
1227 if (reverse_test)
1228 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1229 gen_rtx_NOT (comp_mode, compare_result)));
1230 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1231 emit_insn (gen_extendhisi2 (target, compare_result));
1232 else if (GET_MODE (target) == SImode
1233 && GET_MODE (compare_result) == QImode)
1234 emit_insn (gen_extend_compare (target, compare_result));
1235 else
1236 emit_move_insn (target, compare_result);
1240 HOST_WIDE_INT
1241 const_double_to_hwint (rtx x)
1243 HOST_WIDE_INT val;
1244 REAL_VALUE_TYPE rv;
1245 if (GET_MODE (x) == SFmode)
1247 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1248 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1250 else if (GET_MODE (x) == DFmode)
1252 long l[2];
1253 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1254 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1255 val = l[0];
1256 val = (val << 32) | (l[1] & 0xffffffff);
1258 else
1259 abort ();
1260 return val;
1264 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1266 long tv[2];
1267 REAL_VALUE_TYPE rv;
1268 gcc_assert (mode == SFmode || mode == DFmode);
1270 if (mode == SFmode)
1271 tv[0] = (v << 32) >> 32;
1272 else if (mode == DFmode)
1274 tv[1] = (v << 32) >> 32;
1275 tv[0] = v >> 32;
1277 real_from_target (&rv, tv, mode);
1278 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1281 void
1282 print_operand_address (FILE * file, register rtx addr)
1284 rtx reg;
1285 rtx offset;
1287 if (GET_CODE (addr) == AND
1288 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1289 && INTVAL (XEXP (addr, 1)) == -16)
1290 addr = XEXP (addr, 0);
1292 switch (GET_CODE (addr))
1294 case REG:
1295 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1296 break;
1298 case PLUS:
1299 reg = XEXP (addr, 0);
1300 offset = XEXP (addr, 1);
1301 if (GET_CODE (offset) == REG)
1303 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1304 reg_names[REGNO (offset)]);
1306 else if (GET_CODE (offset) == CONST_INT)
1308 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1309 INTVAL (offset), reg_names[REGNO (reg)]);
1311 else
1312 abort ();
1313 break;
1315 case CONST:
1316 case LABEL_REF:
1317 case SYMBOL_REF:
1318 case CONST_INT:
1319 output_addr_const (file, addr);
1320 break;
1322 default:
1323 debug_rtx (addr);
1324 abort ();
1328 void
1329 print_operand (FILE * file, rtx x, int code)
1331 enum machine_mode mode = GET_MODE (x);
1332 HOST_WIDE_INT val;
1333 unsigned char arr[16];
1334 int xcode = GET_CODE (x);
1335 int i, info;
1336 if (GET_MODE (x) == VOIDmode)
1337 switch (code)
1339 case 'L': /* 128 bits, signed */
1340 case 'm': /* 128 bits, signed */
1341 case 'T': /* 128 bits, signed */
1342 case 't': /* 128 bits, signed */
1343 mode = TImode;
1344 break;
1345 case 'K': /* 64 bits, signed */
1346 case 'k': /* 64 bits, signed */
1347 case 'D': /* 64 bits, signed */
1348 case 'd': /* 64 bits, signed */
1349 mode = DImode;
1350 break;
1351 case 'J': /* 32 bits, signed */
1352 case 'j': /* 32 bits, signed */
1353 case 's': /* 32 bits, signed */
1354 case 'S': /* 32 bits, signed */
1355 mode = SImode;
1356 break;
1358 switch (code)
1361 case 'j': /* 32 bits, signed */
1362 case 'k': /* 64 bits, signed */
1363 case 'm': /* 128 bits, signed */
1364 if (xcode == CONST_INT
1365 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1367 gcc_assert (logical_immediate_p (x, mode));
1368 constant_to_array (mode, x, arr);
1369 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1370 val = trunc_int_for_mode (val, SImode);
1371 switch (which_logical_immediate (val))
1373 case SPU_ORI:
1374 break;
1375 case SPU_ORHI:
1376 fprintf (file, "h");
1377 break;
1378 case SPU_ORBI:
1379 fprintf (file, "b");
1380 break;
1381 default:
1382 gcc_unreachable();
1385 else
1386 gcc_unreachable();
1387 return;
1389 case 'J': /* 32 bits, signed */
1390 case 'K': /* 64 bits, signed */
1391 case 'L': /* 128 bits, signed */
1392 if (xcode == CONST_INT
1393 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1395 gcc_assert (logical_immediate_p (x, mode)
1396 || iohl_immediate_p (x, mode));
1397 constant_to_array (mode, x, arr);
1398 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1399 val = trunc_int_for_mode (val, SImode);
1400 switch (which_logical_immediate (val))
1402 case SPU_ORI:
1403 case SPU_IOHL:
1404 break;
1405 case SPU_ORHI:
1406 val = trunc_int_for_mode (val, HImode);
1407 break;
1408 case SPU_ORBI:
1409 val = trunc_int_for_mode (val, QImode);
1410 break;
1411 default:
1412 gcc_unreachable();
1414 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1416 else
1417 gcc_unreachable();
1418 return;
1420 case 't': /* 128 bits, signed */
1421 case 'd': /* 64 bits, signed */
1422 case 's': /* 32 bits, signed */
1423 if (CONSTANT_P (x))
1425 enum immediate_class c = classify_immediate (x, mode);
1426 switch (c)
1428 case IC_IL1:
1429 constant_to_array (mode, x, arr);
1430 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1431 val = trunc_int_for_mode (val, SImode);
1432 switch (which_immediate_load (val))
1434 case SPU_IL:
1435 break;
1436 case SPU_ILA:
1437 fprintf (file, "a");
1438 break;
1439 case SPU_ILH:
1440 fprintf (file, "h");
1441 break;
1442 case SPU_ILHU:
1443 fprintf (file, "hu");
1444 break;
1445 default:
1446 gcc_unreachable ();
1448 break;
1449 case IC_CPAT:
1450 constant_to_array (mode, x, arr);
1451 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1452 if (info == 1)
1453 fprintf (file, "b");
1454 else if (info == 2)
1455 fprintf (file, "h");
1456 else if (info == 4)
1457 fprintf (file, "w");
1458 else if (info == 8)
1459 fprintf (file, "d");
1460 break;
1461 case IC_IL1s:
1462 if (xcode == CONST_VECTOR)
1464 x = CONST_VECTOR_ELT (x, 0);
1465 xcode = GET_CODE (x);
1467 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1468 fprintf (file, "a");
1469 else if (xcode == HIGH)
1470 fprintf (file, "hu");
1471 break;
1472 case IC_FSMBI:
1473 case IC_FSMBI2:
1474 case IC_IL2:
1475 case IC_IL2s:
1476 case IC_POOL:
1477 abort ();
1480 else
1481 gcc_unreachable ();
1482 return;
1484 case 'T': /* 128 bits, signed */
1485 case 'D': /* 64 bits, signed */
1486 case 'S': /* 32 bits, signed */
1487 if (CONSTANT_P (x))
1489 enum immediate_class c = classify_immediate (x, mode);
1490 switch (c)
1492 case IC_IL1:
1493 constant_to_array (mode, x, arr);
1494 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1495 val = trunc_int_for_mode (val, SImode);
1496 switch (which_immediate_load (val))
1498 case SPU_IL:
1499 case SPU_ILA:
1500 break;
1501 case SPU_ILH:
1502 case SPU_ILHU:
1503 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1504 break;
1505 default:
1506 gcc_unreachable ();
1508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1509 break;
1510 case IC_FSMBI:
1511 constant_to_array (mode, x, arr);
1512 val = 0;
1513 for (i = 0; i < 16; i++)
1515 val <<= 1;
1516 val |= arr[i] & 1;
1518 print_operand (file, GEN_INT (val), 0);
1519 break;
1520 case IC_CPAT:
1521 constant_to_array (mode, x, arr);
1522 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1524 break;
1525 case IC_IL1s:
1526 if (xcode == HIGH)
1527 x = XEXP (x, 0);
1528 if (GET_CODE (x) == CONST_VECTOR)
1529 x = CONST_VECTOR_ELT (x, 0);
1530 output_addr_const (file, x);
1531 if (xcode == HIGH)
1532 fprintf (file, "@h");
1533 break;
1534 case IC_IL2:
1535 case IC_IL2s:
1536 case IC_FSMBI2:
1537 case IC_POOL:
1538 abort ();
1541 else
1542 gcc_unreachable ();
1543 return;
1545 case 'C':
1546 if (xcode == CONST_INT)
1548 /* Only 4 least significant bits are relevant for generate
1549 control word instructions. */
1550 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1551 return;
1553 break;
1555 case 'M': /* print code for c*d */
1556 if (GET_CODE (x) == CONST_INT)
1557 switch (INTVAL (x))
1559 case 1:
1560 fprintf (file, "b");
1561 break;
1562 case 2:
1563 fprintf (file, "h");
1564 break;
1565 case 4:
1566 fprintf (file, "w");
1567 break;
1568 case 8:
1569 fprintf (file, "d");
1570 break;
1571 default:
1572 gcc_unreachable();
1574 else
1575 gcc_unreachable();
1576 return;
1578 case 'N': /* Negate the operand */
1579 if (xcode == CONST_INT)
1580 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1581 else if (xcode == CONST_VECTOR)
1582 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1583 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1584 return;
1586 case 'I': /* enable/disable interrupts */
1587 if (xcode == CONST_INT)
1588 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1589 return;
1591 case 'b': /* branch modifiers */
1592 if (xcode == REG)
1593 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1594 else if (COMPARISON_P (x))
1595 fprintf (file, "%s", xcode == NE ? "n" : "");
1596 return;
1598 case 'i': /* indirect call */
1599 if (xcode == MEM)
1601 if (GET_CODE (XEXP (x, 0)) == REG)
1602 /* Used in indirect function calls. */
1603 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1604 else
1605 output_address (XEXP (x, 0));
1607 return;
1609 case 'p': /* load/store */
1610 if (xcode == MEM)
1612 x = XEXP (x, 0);
1613 xcode = GET_CODE (x);
1615 if (xcode == AND)
1617 x = XEXP (x, 0);
1618 xcode = GET_CODE (x);
1620 if (xcode == REG)
1621 fprintf (file, "d");
1622 else if (xcode == CONST_INT)
1623 fprintf (file, "a");
1624 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1625 fprintf (file, "r");
1626 else if (xcode == PLUS || xcode == LO_SUM)
1628 if (GET_CODE (XEXP (x, 1)) == REG)
1629 fprintf (file, "x");
1630 else
1631 fprintf (file, "d");
1633 return;
1635 case 'e':
1636 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1637 val &= 0x7;
1638 output_addr_const (file, GEN_INT (val));
1639 return;
1641 case 'f':
1642 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1643 val &= 0x1f;
1644 output_addr_const (file, GEN_INT (val));
1645 return;
1647 case 'g':
1648 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1649 val &= 0x3f;
1650 output_addr_const (file, GEN_INT (val));
1651 return;
1653 case 'h':
1654 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1655 val = (val >> 3) & 0x1f;
1656 output_addr_const (file, GEN_INT (val));
1657 return;
1659 case 'E':
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661 val = -val;
1662 val &= 0x7;
1663 output_addr_const (file, GEN_INT (val));
1664 return;
1666 case 'F':
1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668 val = -val;
1669 val &= 0x1f;
1670 output_addr_const (file, GEN_INT (val));
1671 return;
1673 case 'G':
1674 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1675 val = -val;
1676 val &= 0x3f;
1677 output_addr_const (file, GEN_INT (val));
1678 return;
1680 case 'H':
1681 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1682 val = -(val & -8ll);
1683 val = (val >> 3) & 0x1f;
1684 output_addr_const (file, GEN_INT (val));
1685 return;
1687 case 'v':
1688 case 'w':
1689 constant_to_array (mode, x, arr);
1690 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1691 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1692 return;
1694 case 0:
1695 if (xcode == REG)
1696 fprintf (file, "%s", reg_names[REGNO (x)]);
1697 else if (xcode == MEM)
1698 output_address (XEXP (x, 0));
1699 else if (xcode == CONST_VECTOR)
1700 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1701 else
1702 output_addr_const (file, x);
1703 return;
1705 /* unused letters
1706 o qr u yz
1707 AB OPQR UVWXYZ */
1708 default:
1709 output_operand_lossage ("invalid %%xn code");
1711 gcc_unreachable ();
1714 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1715 caller saved register. For leaf functions it is more efficient to
1716 use a volatile register because we won't need to save and restore the
1717 pic register. This routine is only valid after register allocation
1718 is completed, so we can pick an unused register. */
1719 static rtx
1720 get_pic_reg (void)
1722 rtx pic_reg = pic_offset_table_rtx;
1723 if (!reload_completed && !reload_in_progress)
1724 abort ();
1725 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1726 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1727 return pic_reg;
1730 /* Split constant addresses to handle cases that are too large.
1731 Add in the pic register when in PIC mode.
1732 Split immediates that require more than 1 instruction. */
1734 spu_split_immediate (rtx * ops)
1736 enum machine_mode mode = GET_MODE (ops[0]);
1737 enum immediate_class c = classify_immediate (ops[1], mode);
1739 switch (c)
1741 case IC_IL2:
1743 unsigned char arrhi[16];
1744 unsigned char arrlo[16];
1745 rtx to, temp, hi, lo;
1746 int i;
1747 enum machine_mode imode = mode;
1748 /* We need to do reals as ints because the constant used in the
1749 IOR might not be a legitimate real constant. */
1750 imode = int_mode_for_mode (mode);
1751 constant_to_array (mode, ops[1], arrhi);
1752 if (imode != mode)
1753 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1754 else
1755 to = ops[0];
1756 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1757 for (i = 0; i < 16; i += 4)
1759 arrlo[i + 2] = arrhi[i + 2];
1760 arrlo[i + 3] = arrhi[i + 3];
1761 arrlo[i + 0] = arrlo[i + 1] = 0;
1762 arrhi[i + 2] = arrhi[i + 3] = 0;
1764 hi = array_to_constant (imode, arrhi);
1765 lo = array_to_constant (imode, arrlo);
1766 emit_move_insn (temp, hi);
1767 emit_insn (gen_rtx_SET
1768 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1769 return 1;
1771 case IC_FSMBI2:
1773 unsigned char arr_fsmbi[16];
1774 unsigned char arr_andbi[16];
1775 rtx to, reg_fsmbi, reg_and;
1776 int i;
1777 enum machine_mode imode = mode;
1778 /* We need to do reals as ints because the constant used in the
1779 * AND might not be a legitimate real constant. */
1780 imode = int_mode_for_mode (mode);
1781 constant_to_array (mode, ops[1], arr_fsmbi);
1782 if (imode != mode)
1783 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1784 else
1785 to = ops[0];
1786 for (i = 0; i < 16; i++)
1787 if (arr_fsmbi[i] != 0)
1789 arr_andbi[0] = arr_fsmbi[i];
1790 arr_fsmbi[i] = 0xff;
1792 for (i = 1; i < 16; i++)
1793 arr_andbi[i] = arr_andbi[0];
1794 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1795 reg_and = array_to_constant (imode, arr_andbi);
1796 emit_move_insn (to, reg_fsmbi);
1797 emit_insn (gen_rtx_SET
1798 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1799 return 1;
1801 case IC_POOL:
1802 if (reload_in_progress || reload_completed)
1804 rtx mem = force_const_mem (mode, ops[1]);
1805 if (TARGET_LARGE_MEM)
1807 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1808 emit_move_insn (addr, XEXP (mem, 0));
1809 mem = replace_equiv_address (mem, addr);
1811 emit_move_insn (ops[0], mem);
1812 return 1;
1814 break;
1815 case IC_IL1s:
1816 case IC_IL2s:
1817 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1819 if (c == IC_IL2s)
1821 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1822 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1824 else if (flag_pic)
1825 emit_insn (gen_pic (ops[0], ops[1]));
1826 if (flag_pic)
1828 rtx pic_reg = get_pic_reg ();
1829 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1830 crtl->uses_pic_offset_table = 1;
1832 return flag_pic || c == IC_IL2s;
1834 break;
1835 case IC_IL1:
1836 case IC_FSMBI:
1837 case IC_CPAT:
1838 break;
1840 return 0;
1843 /* SAVING is TRUE when we are generating the actual load and store
1844 instructions for REGNO. When determining the size of the stack
1845 needed for saving register we must allocate enough space for the
1846 worst case, because we don't always have the information early enough
1847 to not allocate it. But we can at least eliminate the actual loads
1848 and stores during the prologue/epilogue. */
1849 static int
1850 need_to_save_reg (int regno, int saving)
1852 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1853 return 1;
1854 if (flag_pic
1855 && regno == PIC_OFFSET_TABLE_REGNUM
1856 && (!saving || crtl->uses_pic_offset_table)
1857 && (!saving
1858 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1859 return 1;
1860 return 0;
1863 /* This function is only correct starting with local register
1864 allocation */
1866 spu_saved_regs_size (void)
1868 int reg_save_size = 0;
1869 int regno;
1871 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1872 if (need_to_save_reg (regno, 0))
1873 reg_save_size += 0x10;
1874 return reg_save_size;
1877 static rtx
1878 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1880 rtx reg = gen_rtx_REG (V4SImode, regno);
1881 rtx mem =
1882 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1883 return emit_insn (gen_movv4si (mem, reg));
1886 static rtx
1887 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1889 rtx reg = gen_rtx_REG (V4SImode, regno);
1890 rtx mem =
1891 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1892 return emit_insn (gen_movv4si (reg, mem));
1895 /* This happens after reload, so we need to expand it. */
1896 static rtx
1897 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1899 rtx insn;
1900 if (satisfies_constraint_K (GEN_INT (imm)))
1902 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1904 else
1906 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1907 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1908 if (REGNO (src) == REGNO (scratch))
1909 abort ();
1911 return insn;
1914 /* Return nonzero if this function is known to have a null epilogue. */
1917 direct_return (void)
1919 if (reload_completed)
1921 if (cfun->static_chain_decl == 0
1922 && (spu_saved_regs_size ()
1923 + get_frame_size ()
1924 + crtl->outgoing_args_size
1925 + crtl->args.pretend_args_size == 0)
1926 && current_function_is_leaf)
1927 return 1;
1929 return 0;
1933 The stack frame looks like this:
1934 +-------------+
1935 | incoming |
1936 | args |
1937 AP -> +-------------+
1938 | $lr save |
1939 +-------------+
1940 prev SP | back chain |
1941 +-------------+
1942 | var args |
1943 | reg save | crtl->args.pretend_args_size bytes
1944 +-------------+
1945 | ... |
1946 | saved regs | spu_saved_regs_size() bytes
1947 FP -> +-------------+
1948 | ... |
1949 | vars | get_frame_size() bytes
1950 HFP -> +-------------+
1951 | ... |
1952 | outgoing |
1953 | args | crtl->outgoing_args_size bytes
1954 +-------------+
1955 | $lr of next |
1956 | frame |
1957 +-------------+
1958 | back chain |
1959 SP -> +-------------+
1962 void
1963 spu_expand_prologue (void)
1965 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1966 HOST_WIDE_INT total_size;
1967 HOST_WIDE_INT saved_regs_size;
1968 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1969 rtx scratch_reg_0, scratch_reg_1;
1970 rtx insn, real;
1972 if (flag_pic && optimize == 0)
1973 crtl->uses_pic_offset_table = 1;
1975 if (spu_naked_function_p (current_function_decl))
1976 return;
1978 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1979 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1981 saved_regs_size = spu_saved_regs_size ();
1982 total_size = size + saved_regs_size
1983 + crtl->outgoing_args_size
1984 + crtl->args.pretend_args_size;
1986 if (!current_function_is_leaf
1987 || cfun->calls_alloca || total_size > 0)
1988 total_size += STACK_POINTER_OFFSET;
1990 /* Save this first because code after this might use the link
1991 register as a scratch register. */
1992 if (!current_function_is_leaf)
1994 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1995 RTX_FRAME_RELATED_P (insn) = 1;
1998 if (total_size > 0)
2000 offset = -crtl->args.pretend_args_size;
2001 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2002 if (need_to_save_reg (regno, 1))
2004 offset -= 16;
2005 insn = frame_emit_store (regno, sp_reg, offset);
2006 RTX_FRAME_RELATED_P (insn) = 1;
2010 if (flag_pic && crtl->uses_pic_offset_table)
2012 rtx pic_reg = get_pic_reg ();
2013 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2014 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2017 if (total_size > 0)
2019 if (flag_stack_check)
2021 /* We compare against total_size-1 because
2022 ($sp >= total_size) <=> ($sp > total_size-1) */
2023 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2024 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2025 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2026 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2028 emit_move_insn (scratch_v4si, size_v4si);
2029 size_v4si = scratch_v4si;
2031 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2032 emit_insn (gen_vec_extractv4si
2033 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2034 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2037 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2038 the value of the previous $sp because we save it as the back
2039 chain. */
2040 if (total_size <= 2000)
2042 /* In this case we save the back chain first. */
2043 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2044 insn =
2045 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2047 else
2049 insn = emit_move_insn (scratch_reg_0, sp_reg);
2050 insn =
2051 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2053 RTX_FRAME_RELATED_P (insn) = 1;
2054 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2055 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2057 if (total_size > 2000)
2059 /* Save the back chain ptr */
2060 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2063 if (frame_pointer_needed)
2065 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2066 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2067 + crtl->outgoing_args_size;
2068 /* Set the new frame_pointer */
2069 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2070 RTX_FRAME_RELATED_P (insn) = 1;
2071 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2072 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2073 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2077 if (flag_stack_usage_info)
2078 current_function_static_stack_size = total_size;
2081 void
2082 spu_expand_epilogue (bool sibcall_p)
2084 int size = get_frame_size (), offset, regno;
2085 HOST_WIDE_INT saved_regs_size, total_size;
2086 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2087 rtx scratch_reg_0;
2089 if (spu_naked_function_p (current_function_decl))
2090 return;
2092 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2094 saved_regs_size = spu_saved_regs_size ();
2095 total_size = size + saved_regs_size
2096 + crtl->outgoing_args_size
2097 + crtl->args.pretend_args_size;
2099 if (!current_function_is_leaf
2100 || cfun->calls_alloca || total_size > 0)
2101 total_size += STACK_POINTER_OFFSET;
2103 if (total_size > 0)
2105 if (cfun->calls_alloca)
2106 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2107 else
2108 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2111 if (saved_regs_size > 0)
2113 offset = -crtl->args.pretend_args_size;
2114 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2115 if (need_to_save_reg (regno, 1))
2117 offset -= 0x10;
2118 frame_emit_load (regno, sp_reg, offset);
2123 if (!current_function_is_leaf)
2124 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2126 if (!sibcall_p)
2128 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2129 emit_jump_insn (gen__return ());
2134 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2136 if (count != 0)
2137 return 0;
2138 /* This is inefficient because it ends up copying to a save-register
2139 which then gets saved even though $lr has already been saved. But
2140 it does generate better code for leaf functions and we don't need
2141 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2142 used for __builtin_return_address anyway, so maybe we don't care if
2143 it's inefficient. */
2144 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2148 /* Given VAL, generate a constant appropriate for MODE.
2149 If MODE is a vector mode, every element will be VAL.
2150 For TImode, VAL will be zero extended to 128 bits. */
2152 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2154 rtx inner;
2155 rtvec v;
2156 int units, i;
2158 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2159 || GET_MODE_CLASS (mode) == MODE_FLOAT
2160 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2161 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2163 if (GET_MODE_CLASS (mode) == MODE_INT)
2164 return immed_double_const (val, 0, mode);
2166 /* val is the bit representation of the float */
2167 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2168 return hwint_to_const_double (mode, val);
2170 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2171 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2172 else
2173 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2175 units = GET_MODE_NUNITS (mode);
2177 v = rtvec_alloc (units);
2179 for (i = 0; i < units; ++i)
2180 RTVEC_ELT (v, i) = inner;
2182 return gen_rtx_CONST_VECTOR (mode, v);
2185 /* Create a MODE vector constant from 4 ints. */
2187 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2189 unsigned char arr[16];
2190 arr[0] = (a >> 24) & 0xff;
2191 arr[1] = (a >> 16) & 0xff;
2192 arr[2] = (a >> 8) & 0xff;
2193 arr[3] = (a >> 0) & 0xff;
2194 arr[4] = (b >> 24) & 0xff;
2195 arr[5] = (b >> 16) & 0xff;
2196 arr[6] = (b >> 8) & 0xff;
2197 arr[7] = (b >> 0) & 0xff;
2198 arr[8] = (c >> 24) & 0xff;
2199 arr[9] = (c >> 16) & 0xff;
2200 arr[10] = (c >> 8) & 0xff;
2201 arr[11] = (c >> 0) & 0xff;
2202 arr[12] = (d >> 24) & 0xff;
2203 arr[13] = (d >> 16) & 0xff;
2204 arr[14] = (d >> 8) & 0xff;
2205 arr[15] = (d >> 0) & 0xff;
2206 return array_to_constant(mode, arr);
2209 /* branch hint stuff */
2211 /* An array of these is used to propagate hints to predecessor blocks. */
2212 struct spu_bb_info
2214 rtx prop_jump; /* propagated from another block */
2215 int bb_index; /* the original block. */
2217 static struct spu_bb_info *spu_bb_info;
2219 #define STOP_HINT_P(INSN) \
2220 (GET_CODE(INSN) == CALL_INSN \
2221 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2222 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2224 /* 1 when RTX is a hinted branch or its target. We keep track of
2225 what has been hinted so the safe-hint code can test it easily. */
2226 #define HINTED_P(RTX) \
2227 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2229 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2230 #define SCHED_ON_EVEN_P(RTX) \
2231 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2233 /* Emit a nop for INSN such that the two will dual issue. This assumes
2234 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2235 We check for TImode to handle a MULTI1 insn which has dual issued its
2236 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2237 ADDR_VEC insns. */
2238 static void
2239 emit_nop_for_insn (rtx insn)
2241 int p;
2242 rtx new_insn;
2243 p = get_pipe (insn);
2244 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2245 new_insn = emit_insn_after (gen_lnop (), insn);
2246 else if (p == 1 && GET_MODE (insn) == TImode)
2248 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2249 PUT_MODE (new_insn, TImode);
2250 PUT_MODE (insn, VOIDmode);
2252 else
2253 new_insn = emit_insn_after (gen_lnop (), insn);
2254 recog_memoized (new_insn);
2255 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
2258 /* Insert nops in basic blocks to meet dual issue alignment
2259 requirements. Also make sure hbrp and hint instructions are at least
2260 one cycle apart, possibly inserting a nop. */
2261 static void
2262 pad_bb(void)
2264 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2265 int length;
2266 int addr;
2268 /* This sets up INSN_ADDRESSES. */
2269 shorten_branches (get_insns ());
2271 /* Keep track of length added by nops. */
2272 length = 0;
2274 prev_insn = 0;
2275 insn = get_insns ();
2276 if (!active_insn_p (insn))
2277 insn = next_active_insn (insn);
2278 for (; insn; insn = next_insn)
2280 next_insn = next_active_insn (insn);
2281 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2282 || INSN_CODE (insn) == CODE_FOR_hbr)
2284 if (hbr_insn)
2286 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2287 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2288 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2289 || (a1 - a0 == 4))
2291 prev_insn = emit_insn_before (gen_lnop (), insn);
2292 PUT_MODE (prev_insn, GET_MODE (insn));
2293 PUT_MODE (insn, TImode);
2294 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
2295 length += 4;
2298 hbr_insn = insn;
2300 if (INSN_CODE (insn) == CODE_FOR_blockage)
2302 if (GET_MODE (insn) == TImode)
2303 PUT_MODE (next_insn, TImode);
2304 insn = next_insn;
2305 next_insn = next_active_insn (insn);
2307 addr = INSN_ADDRESSES (INSN_UID (insn));
2308 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2310 if (((addr + length) & 7) != 0)
2312 emit_nop_for_insn (prev_insn);
2313 length += 4;
2316 else if (GET_MODE (insn) == TImode
2317 && ((next_insn && GET_MODE (next_insn) != TImode)
2318 || get_attr_type (insn) == TYPE_MULTI0)
2319 && ((addr + length) & 7) != 0)
2321 /* prev_insn will always be set because the first insn is
2322 always 8-byte aligned. */
2323 emit_nop_for_insn (prev_insn);
2324 length += 4;
2326 prev_insn = insn;
2331 /* Routines for branch hints. */
2333 static void
2334 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2335 int distance, sbitmap blocks)
2337 rtx branch_label = 0;
2338 rtx hint;
2339 rtx insn;
2340 rtx table;
2342 if (before == 0 || branch == 0 || target == 0)
2343 return;
2345 /* While scheduling we require hints to be no further than 600, so
2346 we need to enforce that here too */
2347 if (distance > 600)
2348 return;
2350 /* If we have a Basic block note, emit it after the basic block note. */
2351 if (NOTE_INSN_BASIC_BLOCK_P (before))
2352 before = NEXT_INSN (before);
2354 branch_label = gen_label_rtx ();
2355 LABEL_NUSES (branch_label)++;
2356 LABEL_PRESERVE_P (branch_label) = 1;
2357 insn = emit_label_before (branch_label, branch);
2358 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2359 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2361 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2362 recog_memoized (hint);
2363 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
2364 HINTED_P (branch) = 1;
2366 if (GET_CODE (target) == LABEL_REF)
2367 HINTED_P (XEXP (target, 0)) = 1;
2368 else if (tablejump_p (branch, 0, &table))
2370 rtvec vec;
2371 int j;
2372 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2373 vec = XVEC (PATTERN (table), 0);
2374 else
2375 vec = XVEC (PATTERN (table), 1);
2376 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2377 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2380 if (distance >= 588)
2382 /* Make sure the hint isn't scheduled any earlier than this point,
2383 which could make it too far for the branch offest to fit */
2384 insn = emit_insn_before (gen_blockage (), hint);
2385 recog_memoized (insn);
2386 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2388 else if (distance <= 8 * 4)
2390 /* To guarantee at least 8 insns between the hint and branch we
2391 insert nops. */
2392 int d;
2393 for (d = distance; d < 8 * 4; d += 4)
2395 insn =
2396 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2397 recog_memoized (insn);
2398 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2401 /* Make sure any nops inserted aren't scheduled before the hint. */
2402 insn = emit_insn_after (gen_blockage (), hint);
2403 recog_memoized (insn);
2404 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2406 /* Make sure any nops inserted aren't scheduled after the call. */
2407 if (CALL_P (branch) && distance < 8 * 4)
2409 insn = emit_insn_before (gen_blockage (), branch);
2410 recog_memoized (insn);
2411 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2416 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2417 the rtx for the branch target. */
2418 static rtx
2419 get_branch_target (rtx branch)
2421 if (GET_CODE (branch) == JUMP_INSN)
2423 rtx set, src;
2425 /* Return statements */
2426 if (GET_CODE (PATTERN (branch)) == RETURN)
2427 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2429 /* jump table */
2430 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2431 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2432 return 0;
2434 /* ASM GOTOs. */
2435 if (extract_asm_operands (PATTERN (branch)) != NULL)
2436 return NULL;
2438 set = single_set (branch);
2439 src = SET_SRC (set);
2440 if (GET_CODE (SET_DEST (set)) != PC)
2441 abort ();
2443 if (GET_CODE (src) == IF_THEN_ELSE)
2445 rtx lab = 0;
2446 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2447 if (note)
2449 /* If the more probable case is not a fall through, then
2450 try a branch hint. */
2451 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2452 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2453 && GET_CODE (XEXP (src, 1)) != PC)
2454 lab = XEXP (src, 1);
2455 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2456 && GET_CODE (XEXP (src, 2)) != PC)
2457 lab = XEXP (src, 2);
2459 if (lab)
2461 if (GET_CODE (lab) == RETURN)
2462 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2463 return lab;
2465 return 0;
2468 return src;
2470 else if (GET_CODE (branch) == CALL_INSN)
2472 rtx call;
2473 /* All of our call patterns are in a PARALLEL and the CALL is
2474 the first pattern in the PARALLEL. */
2475 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2476 abort ();
2477 call = XVECEXP (PATTERN (branch), 0, 0);
2478 if (GET_CODE (call) == SET)
2479 call = SET_SRC (call);
2480 if (GET_CODE (call) != CALL)
2481 abort ();
2482 return XEXP (XEXP (call, 0), 0);
2484 return 0;
2487 /* The special $hbr register is used to prevent the insn scheduler from
2488 moving hbr insns across instructions which invalidate them. It
2489 should only be used in a clobber, and this function searches for
2490 insns which clobber it. */
2491 static bool
2492 insn_clobbers_hbr (rtx insn)
2494 if (INSN_P (insn)
2495 && GET_CODE (PATTERN (insn)) == PARALLEL)
2497 rtx parallel = PATTERN (insn);
2498 rtx clobber;
2499 int j;
2500 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2502 clobber = XVECEXP (parallel, 0, j);
2503 if (GET_CODE (clobber) == CLOBBER
2504 && GET_CODE (XEXP (clobber, 0)) == REG
2505 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2506 return 1;
2509 return 0;
2512 /* Search up to 32 insns starting at FIRST:
2513 - at any kind of hinted branch, just return
2514 - at any unconditional branch in the first 15 insns, just return
2515 - at a call or indirect branch, after the first 15 insns, force it to
2516 an even address and return
2517 - at any unconditional branch, after the first 15 insns, force it to
2518 an even address.
2519 At then end of the search, insert an hbrp within 4 insns of FIRST,
2520 and an hbrp within 16 instructions of FIRST.
2522 static void
2523 insert_hbrp_for_ilb_runout (rtx first)
2525 rtx insn, before_4 = 0, before_16 = 0;
2526 int addr = 0, length, first_addr = -1;
2527 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2528 int insert_lnop_after = 0;
2529 for (insn = first; insn; insn = NEXT_INSN (insn))
2530 if (INSN_P (insn))
2532 if (first_addr == -1)
2533 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2534 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2535 length = get_attr_length (insn);
2537 if (before_4 == 0 && addr + length >= 4 * 4)
2538 before_4 = insn;
2539 /* We test for 14 instructions because the first hbrp will add
2540 up to 2 instructions. */
2541 if (before_16 == 0 && addr + length >= 14 * 4)
2542 before_16 = insn;
2544 if (INSN_CODE (insn) == CODE_FOR_hbr)
2546 /* Make sure an hbrp is at least 2 cycles away from a hint.
2547 Insert an lnop after the hbrp when necessary. */
2548 if (before_4 == 0 && addr > 0)
2550 before_4 = insn;
2551 insert_lnop_after |= 1;
2553 else if (before_4 && addr <= 4 * 4)
2554 insert_lnop_after |= 1;
2555 if (before_16 == 0 && addr > 10 * 4)
2557 before_16 = insn;
2558 insert_lnop_after |= 2;
2560 else if (before_16 && addr <= 14 * 4)
2561 insert_lnop_after |= 2;
2564 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2566 if (addr < hbrp_addr0)
2567 hbrp_addr0 = addr;
2568 else if (addr < hbrp_addr1)
2569 hbrp_addr1 = addr;
2572 if (CALL_P (insn) || JUMP_P (insn))
2574 if (HINTED_P (insn))
2575 return;
2577 /* Any branch after the first 15 insns should be on an even
2578 address to avoid a special case branch. There might be
2579 some nops and/or hbrps inserted, so we test after 10
2580 insns. */
2581 if (addr > 10 * 4)
2582 SCHED_ON_EVEN_P (insn) = 1;
2585 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2586 return;
2589 if (addr + length >= 32 * 4)
2591 gcc_assert (before_4 && before_16);
2592 if (hbrp_addr0 > 4 * 4)
2594 insn =
2595 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2596 recog_memoized (insn);
2597 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2598 INSN_ADDRESSES_NEW (insn,
2599 INSN_ADDRESSES (INSN_UID (before_4)));
2600 PUT_MODE (insn, GET_MODE (before_4));
2601 PUT_MODE (before_4, TImode);
2602 if (insert_lnop_after & 1)
2604 insn = emit_insn_before (gen_lnop (), before_4);
2605 recog_memoized (insn);
2606 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2607 INSN_ADDRESSES_NEW (insn,
2608 INSN_ADDRESSES (INSN_UID (before_4)));
2609 PUT_MODE (insn, TImode);
2612 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2613 && hbrp_addr1 > 16 * 4)
2615 insn =
2616 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2617 recog_memoized (insn);
2618 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2619 INSN_ADDRESSES_NEW (insn,
2620 INSN_ADDRESSES (INSN_UID (before_16)));
2621 PUT_MODE (insn, GET_MODE (before_16));
2622 PUT_MODE (before_16, TImode);
2623 if (insert_lnop_after & 2)
2625 insn = emit_insn_before (gen_lnop (), before_16);
2626 recog_memoized (insn);
2627 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2628 INSN_ADDRESSES_NEW (insn,
2629 INSN_ADDRESSES (INSN_UID
2630 (before_16)));
2631 PUT_MODE (insn, TImode);
2634 return;
2637 else if (BARRIER_P (insn))
2638 return;
2642 /* The SPU might hang when it executes 48 inline instructions after a
2643 hinted branch jumps to its hinted target. The beginning of a
2644 function and the return from a call might have been hinted, and
2645 must be handled as well. To prevent a hang we insert 2 hbrps. The
2646 first should be within 6 insns of the branch target. The second
2647 should be within 22 insns of the branch target. When determining
2648 if hbrps are necessary, we look for only 32 inline instructions,
2649 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2650 when inserting new hbrps, we insert them within 4 and 16 insns of
2651 the target. */
2652 static void
2653 insert_hbrp (void)
2655 rtx insn;
2656 if (TARGET_SAFE_HINTS)
2658 shorten_branches (get_insns ());
2659 /* Insert hbrp at beginning of function */
2660 insn = next_active_insn (get_insns ());
2661 if (insn)
2662 insert_hbrp_for_ilb_runout (insn);
2663 /* Insert hbrp after hinted targets. */
2664 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2665 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2666 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2670 static int in_spu_reorg;
2672 static void
2673 spu_var_tracking (void)
2675 if (flag_var_tracking)
2677 df_analyze ();
2678 timevar_push (TV_VAR_TRACKING);
2679 variable_tracking_main ();
2680 timevar_pop (TV_VAR_TRACKING);
2681 df_finish_pass (false);
2685 /* Insert branch hints. There are no branch optimizations after this
2686 pass, so it's safe to set our branch hints now. */
2687 static void
2688 spu_machine_dependent_reorg (void)
2690 sbitmap blocks;
2691 basic_block bb;
2692 rtx branch, insn;
2693 rtx branch_target = 0;
2694 int branch_addr = 0, insn_addr, required_dist = 0;
2695 int i;
2696 unsigned int j;
2698 if (!TARGET_BRANCH_HINTS || optimize == 0)
2700 /* We still do it for unoptimized code because an external
2701 function might have hinted a call or return. */
2702 compute_bb_for_insn ();
2703 insert_hbrp ();
2704 pad_bb ();
2705 spu_var_tracking ();
2706 free_bb_for_insn ();
2707 return;
2710 blocks = sbitmap_alloc (last_basic_block);
2711 sbitmap_zero (blocks);
2713 in_spu_reorg = 1;
2714 compute_bb_for_insn ();
2716 compact_blocks ();
2718 spu_bb_info =
2719 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2720 sizeof (struct spu_bb_info));
2722 /* We need exact insn addresses and lengths. */
2723 shorten_branches (get_insns ());
2725 for (i = n_basic_blocks - 1; i >= 0; i--)
2727 bb = BASIC_BLOCK (i);
2728 branch = 0;
2729 if (spu_bb_info[i].prop_jump)
2731 branch = spu_bb_info[i].prop_jump;
2732 branch_target = get_branch_target (branch);
2733 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2734 required_dist = spu_hint_dist;
2736 /* Search from end of a block to beginning. In this loop, find
2737 jumps which need a branch and emit them only when:
2738 - it's an indirect branch and we're at the insn which sets
2739 the register
2740 - we're at an insn that will invalidate the hint. e.g., a
2741 call, another hint insn, inline asm that clobbers $hbr, and
2742 some inlined operations (divmodsi4). Don't consider jumps
2743 because they are only at the end of a block and are
2744 considered when we are deciding whether to propagate
2745 - we're getting too far away from the branch. The hbr insns
2746 only have a signed 10 bit offset
2747 We go back as far as possible so the branch will be considered
2748 for propagation when we get to the beginning of the block. */
2749 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2751 if (INSN_P (insn))
2753 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2754 if (branch
2755 && ((GET_CODE (branch_target) == REG
2756 && set_of (branch_target, insn) != NULL_RTX)
2757 || insn_clobbers_hbr (insn)
2758 || branch_addr - insn_addr > 600))
2760 rtx next = NEXT_INSN (insn);
2761 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2762 if (insn != BB_END (bb)
2763 && branch_addr - next_addr >= required_dist)
2765 if (dump_file)
2766 fprintf (dump_file,
2767 "hint for %i in block %i before %i\n",
2768 INSN_UID (branch), bb->index,
2769 INSN_UID (next));
2770 spu_emit_branch_hint (next, branch, branch_target,
2771 branch_addr - next_addr, blocks);
2773 branch = 0;
2776 /* JUMP_P will only be true at the end of a block. When
2777 branch is already set it means we've previously decided
2778 to propagate a hint for that branch into this block. */
2779 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2781 branch = 0;
2782 if ((branch_target = get_branch_target (insn)))
2784 branch = insn;
2785 branch_addr = insn_addr;
2786 required_dist = spu_hint_dist;
2790 if (insn == BB_HEAD (bb))
2791 break;
2794 if (branch)
2796 /* If we haven't emitted a hint for this branch yet, it might
2797 be profitable to emit it in one of the predecessor blocks,
2798 especially for loops. */
2799 rtx bbend;
2800 basic_block prev = 0, prop = 0, prev2 = 0;
2801 int loop_exit = 0, simple_loop = 0;
2802 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2804 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2805 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2806 prev = EDGE_PRED (bb, j)->src;
2807 else
2808 prev2 = EDGE_PRED (bb, j)->src;
2810 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2811 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2812 loop_exit = 1;
2813 else if (EDGE_SUCC (bb, j)->dest == bb)
2814 simple_loop = 1;
2816 /* If this branch is a loop exit then propagate to previous
2817 fallthru block. This catches the cases when it is a simple
2818 loop or when there is an initial branch into the loop. */
2819 if (prev && (loop_exit || simple_loop)
2820 && prev->loop_depth <= bb->loop_depth)
2821 prop = prev;
2823 /* If there is only one adjacent predecessor. Don't propagate
2824 outside this loop. This loop_depth test isn't perfect, but
2825 I'm not sure the loop_father member is valid at this point. */
2826 else if (prev && single_pred_p (bb)
2827 && prev->loop_depth == bb->loop_depth)
2828 prop = prev;
2830 /* If this is the JOIN block of a simple IF-THEN then
2831 propogate the hint to the HEADER block. */
2832 else if (prev && prev2
2833 && EDGE_COUNT (bb->preds) == 2
2834 && EDGE_COUNT (prev->preds) == 1
2835 && EDGE_PRED (prev, 0)->src == prev2
2836 && prev2->loop_depth == bb->loop_depth
2837 && GET_CODE (branch_target) != REG)
2838 prop = prev;
2840 /* Don't propagate when:
2841 - this is a simple loop and the hint would be too far
2842 - this is not a simple loop and there are 16 insns in
2843 this block already
2844 - the predecessor block ends in a branch that will be
2845 hinted
2846 - the predecessor block ends in an insn that invalidates
2847 the hint */
2848 if (prop
2849 && prop->index >= 0
2850 && (bbend = BB_END (prop))
2851 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2852 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2853 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2855 if (dump_file)
2856 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2857 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2858 bb->index, prop->index, bb->loop_depth,
2859 INSN_UID (branch), loop_exit, simple_loop,
2860 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2862 spu_bb_info[prop->index].prop_jump = branch;
2863 spu_bb_info[prop->index].bb_index = i;
2865 else if (branch_addr - next_addr >= required_dist)
2867 if (dump_file)
2868 fprintf (dump_file, "hint for %i in block %i before %i\n",
2869 INSN_UID (branch), bb->index,
2870 INSN_UID (NEXT_INSN (insn)));
2871 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2872 branch_addr - next_addr, blocks);
2874 branch = 0;
2877 free (spu_bb_info);
2879 if (!sbitmap_empty_p (blocks))
2880 find_many_sub_basic_blocks (blocks);
2882 /* We have to schedule to make sure alignment is ok. */
2883 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2885 /* The hints need to be scheduled, so call it again. */
2886 schedule_insns ();
2887 df_finish_pass (true);
2889 insert_hbrp ();
2891 pad_bb ();
2893 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2894 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2896 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2897 between its branch label and the branch . We don't move the
2898 label because GCC expects it at the beginning of the block. */
2899 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2900 rtx label_ref = XVECEXP (unspec, 0, 0);
2901 rtx label = XEXP (label_ref, 0);
2902 rtx branch;
2903 int offset = 0;
2904 for (branch = NEXT_INSN (label);
2905 !JUMP_P (branch) && !CALL_P (branch);
2906 branch = NEXT_INSN (branch))
2907 if (NONJUMP_INSN_P (branch))
2908 offset += get_attr_length (branch);
2909 if (offset > 0)
2910 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2913 spu_var_tracking ();
2915 free_bb_for_insn ();
2917 in_spu_reorg = 0;
2921 /* Insn scheduling routines, primarily for dual issue. */
2922 static int
2923 spu_sched_issue_rate (void)
2925 return 2;
2928 static int
2929 uses_ls_unit(rtx insn)
2931 rtx set = single_set (insn);
2932 if (set != 0
2933 && (GET_CODE (SET_DEST (set)) == MEM
2934 || GET_CODE (SET_SRC (set)) == MEM))
2935 return 1;
2936 return 0;
2939 static int
2940 get_pipe (rtx insn)
2942 enum attr_type t;
2943 /* Handle inline asm */
2944 if (INSN_CODE (insn) == -1)
2945 return -1;
2946 t = get_attr_type (insn);
2947 switch (t)
2949 case TYPE_CONVERT:
2950 return -2;
2951 case TYPE_MULTI0:
2952 return -1;
2954 case TYPE_FX2:
2955 case TYPE_FX3:
2956 case TYPE_SPR:
2957 case TYPE_NOP:
2958 case TYPE_FXB:
2959 case TYPE_FPD:
2960 case TYPE_FP6:
2961 case TYPE_FP7:
2962 return 0;
2964 case TYPE_LNOP:
2965 case TYPE_SHUF:
2966 case TYPE_LOAD:
2967 case TYPE_STORE:
2968 case TYPE_BR:
2969 case TYPE_MULTI1:
2970 case TYPE_HBR:
2971 case TYPE_IPREFETCH:
2972 return 1;
2973 default:
2974 abort ();
2979 /* haifa-sched.c has a static variable that keeps track of the current
2980 cycle. It is passed to spu_sched_reorder, and we record it here for
2981 use by spu_sched_variable_issue. It won't be accurate if the
2982 scheduler updates it's clock_var between the two calls. */
2983 static int clock_var;
2985 /* This is used to keep track of insn alignment. Set to 0 at the
2986 beginning of each block and increased by the "length" attr of each
2987 insn scheduled. */
2988 static int spu_sched_length;
2990 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2991 ready list appropriately in spu_sched_reorder(). */
2992 static int pipe0_clock;
2993 static int pipe1_clock;
2995 static int prev_clock_var;
2997 static int prev_priority;
2999 /* The SPU needs to load the next ilb sometime during the execution of
3000 the previous ilb. There is a potential conflict if every cycle has a
3001 load or store. To avoid the conflict we make sure the load/store
3002 unit is free for at least one cycle during the execution of insns in
3003 the previous ilb. */
3004 static int spu_ls_first;
3005 static int prev_ls_clock;
3007 static void
3008 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3009 int max_ready ATTRIBUTE_UNUSED)
3011 spu_sched_length = 0;
3014 static void
3015 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3016 int max_ready ATTRIBUTE_UNUSED)
3018 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3020 /* When any block might be at least 8-byte aligned, assume they
3021 will all be at least 8-byte aligned to make sure dual issue
3022 works out correctly. */
3023 spu_sched_length = 0;
3025 spu_ls_first = INT_MAX;
3026 clock_var = -1;
3027 prev_ls_clock = -1;
3028 pipe0_clock = -1;
3029 pipe1_clock = -1;
3030 prev_clock_var = -1;
3031 prev_priority = -1;
3034 static int
3035 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3036 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3038 int len;
3039 int p;
3040 if (GET_CODE (PATTERN (insn)) == USE
3041 || GET_CODE (PATTERN (insn)) == CLOBBER
3042 || (len = get_attr_length (insn)) == 0)
3043 return more;
3045 spu_sched_length += len;
3047 /* Reset on inline asm */
3048 if (INSN_CODE (insn) == -1)
3050 spu_ls_first = INT_MAX;
3051 pipe0_clock = -1;
3052 pipe1_clock = -1;
3053 return 0;
3055 p = get_pipe (insn);
3056 if (p == 0)
3057 pipe0_clock = clock_var;
3058 else
3059 pipe1_clock = clock_var;
3061 if (in_spu_reorg)
3063 if (clock_var - prev_ls_clock > 1
3064 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3065 spu_ls_first = INT_MAX;
3066 if (uses_ls_unit (insn))
3068 if (spu_ls_first == INT_MAX)
3069 spu_ls_first = spu_sched_length;
3070 prev_ls_clock = clock_var;
3073 /* The scheduler hasn't inserted the nop, but we will later on.
3074 Include those nops in spu_sched_length. */
3075 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3076 spu_sched_length += 4;
3077 prev_clock_var = clock_var;
3079 /* more is -1 when called from spu_sched_reorder for new insns
3080 that don't have INSN_PRIORITY */
3081 if (more >= 0)
3082 prev_priority = INSN_PRIORITY (insn);
3085 /* Always try issueing more insns. spu_sched_reorder will decide
3086 when the cycle should be advanced. */
3087 return 1;
3090 /* This function is called for both TARGET_SCHED_REORDER and
3091 TARGET_SCHED_REORDER2. */
3092 static int
3093 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3094 rtx *ready, int *nreadyp, int clock)
3096 int i, nready = *nreadyp;
3097 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3098 rtx insn;
3100 clock_var = clock;
3102 if (nready <= 0 || pipe1_clock >= clock)
3103 return 0;
3105 /* Find any rtl insns that don't generate assembly insns and schedule
3106 them first. */
3107 for (i = nready - 1; i >= 0; i--)
3109 insn = ready[i];
3110 if (INSN_CODE (insn) == -1
3111 || INSN_CODE (insn) == CODE_FOR_blockage
3112 || (INSN_P (insn) && get_attr_length (insn) == 0))
3114 ready[i] = ready[nready - 1];
3115 ready[nready - 1] = insn;
3116 return 1;
3120 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3121 for (i = 0; i < nready; i++)
3122 if (INSN_CODE (ready[i]) != -1)
3124 insn = ready[i];
3125 switch (get_attr_type (insn))
3127 default:
3128 case TYPE_MULTI0:
3129 case TYPE_CONVERT:
3130 case TYPE_FX2:
3131 case TYPE_FX3:
3132 case TYPE_SPR:
3133 case TYPE_NOP:
3134 case TYPE_FXB:
3135 case TYPE_FPD:
3136 case TYPE_FP6:
3137 case TYPE_FP7:
3138 pipe_0 = i;
3139 break;
3140 case TYPE_LOAD:
3141 case TYPE_STORE:
3142 pipe_ls = i;
3143 case TYPE_LNOP:
3144 case TYPE_SHUF:
3145 case TYPE_BR:
3146 case TYPE_MULTI1:
3147 case TYPE_HBR:
3148 pipe_1 = i;
3149 break;
3150 case TYPE_IPREFETCH:
3151 pipe_hbrp = i;
3152 break;
3156 /* In the first scheduling phase, schedule loads and stores together
3157 to increase the chance they will get merged during postreload CSE. */
3158 if (!reload_completed && pipe_ls >= 0)
3160 insn = ready[pipe_ls];
3161 ready[pipe_ls] = ready[nready - 1];
3162 ready[nready - 1] = insn;
3163 return 1;
3166 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3167 if (pipe_hbrp >= 0)
3168 pipe_1 = pipe_hbrp;
3170 /* When we have loads/stores in every cycle of the last 15 insns and
3171 we are about to schedule another load/store, emit an hbrp insn
3172 instead. */
3173 if (in_spu_reorg
3174 && spu_sched_length - spu_ls_first >= 4 * 15
3175 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3177 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3178 recog_memoized (insn);
3179 if (pipe0_clock < clock)
3180 PUT_MODE (insn, TImode);
3181 spu_sched_variable_issue (file, verbose, insn, -1);
3182 return 0;
3185 /* In general, we want to emit nops to increase dual issue, but dual
3186 issue isn't faster when one of the insns could be scheduled later
3187 without effecting the critical path. We look at INSN_PRIORITY to
3188 make a good guess, but it isn't perfect so -mdual-nops=n can be
3189 used to effect it. */
3190 if (in_spu_reorg && spu_dual_nops < 10)
3192 /* When we are at an even address and we are not issueing nops to
3193 improve scheduling then we need to advance the cycle. */
3194 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3195 && (spu_dual_nops == 0
3196 || (pipe_1 != -1
3197 && prev_priority >
3198 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3199 return 0;
3201 /* When at an odd address, schedule the highest priority insn
3202 without considering pipeline. */
3203 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3204 && (spu_dual_nops == 0
3205 || (prev_priority >
3206 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3207 return 1;
3211 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3212 pipe0 insn in the ready list, schedule it. */
3213 if (pipe0_clock < clock && pipe_0 >= 0)
3214 schedule_i = pipe_0;
3216 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3217 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3218 else
3219 schedule_i = pipe_1;
3221 if (schedule_i > -1)
3223 insn = ready[schedule_i];
3224 ready[schedule_i] = ready[nready - 1];
3225 ready[nready - 1] = insn;
3226 return 1;
3228 return 0;
3231 /* INSN is dependent on DEP_INSN. */
3232 static int
3233 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3235 rtx set;
3237 /* The blockage pattern is used to prevent instructions from being
3238 moved across it and has no cost. */
3239 if (INSN_CODE (insn) == CODE_FOR_blockage
3240 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3241 return 0;
3243 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3244 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3245 return 0;
3247 /* Make sure hbrps are spread out. */
3248 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3249 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3250 return 8;
3252 /* Make sure hints and hbrps are 2 cycles apart. */
3253 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3254 || INSN_CODE (insn) == CODE_FOR_hbr)
3255 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3256 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3257 return 2;
3259 /* An hbrp has no real dependency on other insns. */
3260 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3261 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3262 return 0;
3264 /* Assuming that it is unlikely an argument register will be used in
3265 the first cycle of the called function, we reduce the cost for
3266 slightly better scheduling of dep_insn. When not hinted, the
3267 mispredicted branch would hide the cost as well. */
3268 if (CALL_P (insn))
3270 rtx target = get_branch_target (insn);
3271 if (GET_CODE (target) != REG || !set_of (target, insn))
3272 return cost - 2;
3273 return cost;
3276 /* And when returning from a function, let's assume the return values
3277 are completed sooner too. */
3278 if (CALL_P (dep_insn))
3279 return cost - 2;
3281 /* Make sure an instruction that loads from the back chain is schedule
3282 away from the return instruction so a hint is more likely to get
3283 issued. */
3284 if (INSN_CODE (insn) == CODE_FOR__return
3285 && (set = single_set (dep_insn))
3286 && GET_CODE (SET_DEST (set)) == REG
3287 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3288 return 20;
3290 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3291 scheduler makes every insn in a block anti-dependent on the final
3292 jump_insn. We adjust here so higher cost insns will get scheduled
3293 earlier. */
3294 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3295 return insn_cost (dep_insn) - 3;
3297 return cost;
3300 /* Create a CONST_DOUBLE from a string. */
3302 spu_float_const (const char *string, enum machine_mode mode)
3304 REAL_VALUE_TYPE value;
3305 value = REAL_VALUE_ATOF (string, mode);
3306 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3310 spu_constant_address_p (rtx x)
3312 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3313 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3314 || GET_CODE (x) == HIGH);
3317 static enum spu_immediate
3318 which_immediate_load (HOST_WIDE_INT val)
3320 gcc_assert (val == trunc_int_for_mode (val, SImode));
3322 if (val >= -0x8000 && val <= 0x7fff)
3323 return SPU_IL;
3324 if (val >= 0 && val <= 0x3ffff)
3325 return SPU_ILA;
3326 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3327 return SPU_ILH;
3328 if ((val & 0xffff) == 0)
3329 return SPU_ILHU;
3331 return SPU_NONE;
3334 /* Return true when OP can be loaded by one of the il instructions, or
3335 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3337 immediate_load_p (rtx op, enum machine_mode mode)
3339 if (CONSTANT_P (op))
3341 enum immediate_class c = classify_immediate (op, mode);
3342 return c == IC_IL1 || c == IC_IL1s
3343 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3345 return 0;
3348 /* Return true if the first SIZE bytes of arr is a constant that can be
3349 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3350 represent the size and offset of the instruction to use. */
3351 static int
3352 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3354 int cpat, run, i, start;
3355 cpat = 1;
3356 run = 0;
3357 start = -1;
3358 for (i = 0; i < size && cpat; i++)
3359 if (arr[i] != i+16)
3361 if (!run)
3363 start = i;
3364 if (arr[i] == 3)
3365 run = 1;
3366 else if (arr[i] == 2 && arr[i+1] == 3)
3367 run = 2;
3368 else if (arr[i] == 0)
3370 while (arr[i+run] == run && i+run < 16)
3371 run++;
3372 if (run != 4 && run != 8)
3373 cpat = 0;
3375 else
3376 cpat = 0;
3377 if ((i & (run-1)) != 0)
3378 cpat = 0;
3379 i += run;
3381 else
3382 cpat = 0;
3384 if (cpat && (run || size < 16))
3386 if (run == 0)
3387 run = 1;
3388 if (prun)
3389 *prun = run;
3390 if (pstart)
3391 *pstart = start == -1 ? 16-run : start;
3392 return 1;
3394 return 0;
3397 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3398 it into a register. MODE is only valid when OP is a CONST_INT. */
3399 static enum immediate_class
3400 classify_immediate (rtx op, enum machine_mode mode)
3402 HOST_WIDE_INT val;
3403 unsigned char arr[16];
3404 int i, j, repeated, fsmbi, repeat;
3406 gcc_assert (CONSTANT_P (op));
3408 if (GET_MODE (op) != VOIDmode)
3409 mode = GET_MODE (op);
3411 /* A V4SI const_vector with all identical symbols is ok. */
3412 if (!flag_pic
3413 && mode == V4SImode
3414 && GET_CODE (op) == CONST_VECTOR
3415 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3416 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3417 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3418 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3419 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3420 op = CONST_VECTOR_ELT (op, 0);
3422 switch (GET_CODE (op))
3424 case SYMBOL_REF:
3425 case LABEL_REF:
3426 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3428 case CONST:
3429 /* We can never know if the resulting address fits in 18 bits and can be
3430 loaded with ila. For now, assume the address will not overflow if
3431 the displacement is "small" (fits 'K' constraint). */
3432 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3434 rtx sym = XEXP (XEXP (op, 0), 0);
3435 rtx cst = XEXP (XEXP (op, 0), 1);
3437 if (GET_CODE (sym) == SYMBOL_REF
3438 && GET_CODE (cst) == CONST_INT
3439 && satisfies_constraint_K (cst))
3440 return IC_IL1s;
3442 return IC_IL2s;
3444 case HIGH:
3445 return IC_IL1s;
3447 case CONST_VECTOR:
3448 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3449 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3450 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3451 return IC_POOL;
3452 /* Fall through. */
3454 case CONST_INT:
3455 case CONST_DOUBLE:
3456 constant_to_array (mode, op, arr);
3458 /* Check that each 4-byte slot is identical. */
3459 repeated = 1;
3460 for (i = 4; i < 16; i += 4)
3461 for (j = 0; j < 4; j++)
3462 if (arr[j] != arr[i + j])
3463 repeated = 0;
3465 if (repeated)
3467 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3468 val = trunc_int_for_mode (val, SImode);
3470 if (which_immediate_load (val) != SPU_NONE)
3471 return IC_IL1;
3474 /* Any mode of 2 bytes or smaller can be loaded with an il
3475 instruction. */
3476 gcc_assert (GET_MODE_SIZE (mode) > 2);
3478 fsmbi = 1;
3479 repeat = 0;
3480 for (i = 0; i < 16 && fsmbi; i++)
3481 if (arr[i] != 0 && repeat == 0)
3482 repeat = arr[i];
3483 else if (arr[i] != 0 && arr[i] != repeat)
3484 fsmbi = 0;
3485 if (fsmbi)
3486 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3488 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3489 return IC_CPAT;
3491 if (repeated)
3492 return IC_IL2;
3494 return IC_POOL;
3495 default:
3496 break;
3498 gcc_unreachable ();
3501 static enum spu_immediate
3502 which_logical_immediate (HOST_WIDE_INT val)
3504 gcc_assert (val == trunc_int_for_mode (val, SImode));
3506 if (val >= -0x200 && val <= 0x1ff)
3507 return SPU_ORI;
3508 if (val >= 0 && val <= 0xffff)
3509 return SPU_IOHL;
3510 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3512 val = trunc_int_for_mode (val, HImode);
3513 if (val >= -0x200 && val <= 0x1ff)
3514 return SPU_ORHI;
3515 if ((val & 0xff) == ((val >> 8) & 0xff))
3517 val = trunc_int_for_mode (val, QImode);
3518 if (val >= -0x200 && val <= 0x1ff)
3519 return SPU_ORBI;
3522 return SPU_NONE;
3525 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3526 CONST_DOUBLEs. */
3527 static int
3528 const_vector_immediate_p (rtx x)
3530 int i;
3531 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3532 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3533 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3534 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3535 return 0;
3536 return 1;
3540 logical_immediate_p (rtx op, enum machine_mode mode)
3542 HOST_WIDE_INT val;
3543 unsigned char arr[16];
3544 int i, j;
3546 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3547 || GET_CODE (op) == CONST_VECTOR);
3549 if (GET_CODE (op) == CONST_VECTOR
3550 && !const_vector_immediate_p (op))
3551 return 0;
3553 if (GET_MODE (op) != VOIDmode)
3554 mode = GET_MODE (op);
3556 constant_to_array (mode, op, arr);
3558 /* Check that bytes are repeated. */
3559 for (i = 4; i < 16; i += 4)
3560 for (j = 0; j < 4; j++)
3561 if (arr[j] != arr[i + j])
3562 return 0;
3564 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3565 val = trunc_int_for_mode (val, SImode);
3567 i = which_logical_immediate (val);
3568 return i != SPU_NONE && i != SPU_IOHL;
3572 iohl_immediate_p (rtx op, enum machine_mode mode)
3574 HOST_WIDE_INT val;
3575 unsigned char arr[16];
3576 int i, j;
3578 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3579 || GET_CODE (op) == CONST_VECTOR);
3581 if (GET_CODE (op) == CONST_VECTOR
3582 && !const_vector_immediate_p (op))
3583 return 0;
3585 if (GET_MODE (op) != VOIDmode)
3586 mode = GET_MODE (op);
3588 constant_to_array (mode, op, arr);
3590 /* Check that bytes are repeated. */
3591 for (i = 4; i < 16; i += 4)
3592 for (j = 0; j < 4; j++)
3593 if (arr[j] != arr[i + j])
3594 return 0;
3596 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3597 val = trunc_int_for_mode (val, SImode);
3599 return val >= 0 && val <= 0xffff;
3603 arith_immediate_p (rtx op, enum machine_mode mode,
3604 HOST_WIDE_INT low, HOST_WIDE_INT high)
3606 HOST_WIDE_INT val;
3607 unsigned char arr[16];
3608 int bytes, i, j;
3610 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3611 || GET_CODE (op) == CONST_VECTOR);
3613 if (GET_CODE (op) == CONST_VECTOR
3614 && !const_vector_immediate_p (op))
3615 return 0;
3617 if (GET_MODE (op) != VOIDmode)
3618 mode = GET_MODE (op);
3620 constant_to_array (mode, op, arr);
3622 if (VECTOR_MODE_P (mode))
3623 mode = GET_MODE_INNER (mode);
3625 bytes = GET_MODE_SIZE (mode);
3626 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3628 /* Check that bytes are repeated. */
3629 for (i = bytes; i < 16; i += bytes)
3630 for (j = 0; j < bytes; j++)
3631 if (arr[j] != arr[i + j])
3632 return 0;
3634 val = arr[0];
3635 for (j = 1; j < bytes; j++)
3636 val = (val << 8) | arr[j];
3638 val = trunc_int_for_mode (val, mode);
3640 return val >= low && val <= high;
3643 /* TRUE when op is an immediate and an exact power of 2, and given that
3644 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3645 all entries must be the same. */
3646 bool
3647 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3649 enum machine_mode int_mode;
3650 HOST_WIDE_INT val;
3651 unsigned char arr[16];
3652 int bytes, i, j;
3654 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3655 || GET_CODE (op) == CONST_VECTOR);
3657 if (GET_CODE (op) == CONST_VECTOR
3658 && !const_vector_immediate_p (op))
3659 return 0;
3661 if (GET_MODE (op) != VOIDmode)
3662 mode = GET_MODE (op);
3664 constant_to_array (mode, op, arr);
3666 if (VECTOR_MODE_P (mode))
3667 mode = GET_MODE_INNER (mode);
3669 bytes = GET_MODE_SIZE (mode);
3670 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3672 /* Check that bytes are repeated. */
3673 for (i = bytes; i < 16; i += bytes)
3674 for (j = 0; j < bytes; j++)
3675 if (arr[j] != arr[i + j])
3676 return 0;
3678 val = arr[0];
3679 for (j = 1; j < bytes; j++)
3680 val = (val << 8) | arr[j];
3682 val = trunc_int_for_mode (val, int_mode);
3684 /* Currently, we only handle SFmode */
3685 gcc_assert (mode == SFmode);
3686 if (mode == SFmode)
3688 int exp = (val >> 23) - 127;
3689 return val > 0 && (val & 0x007fffff) == 0
3690 && exp >= low && exp <= high;
3692 return FALSE;
3695 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3697 static int
3698 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3700 rtx x = *px;
3701 tree decl;
3703 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3705 rtx plus = XEXP (x, 0);
3706 rtx op0 = XEXP (plus, 0);
3707 rtx op1 = XEXP (plus, 1);
3708 if (GET_CODE (op1) == CONST_INT)
3709 x = op0;
3712 return (GET_CODE (x) == SYMBOL_REF
3713 && (decl = SYMBOL_REF_DECL (x)) != 0
3714 && TREE_CODE (decl) == VAR_DECL
3715 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3718 /* We accept:
3719 - any 32-bit constant (SImode, SFmode)
3720 - any constant that can be generated with fsmbi (any mode)
3721 - a 64-bit constant where the high and low bits are identical
3722 (DImode, DFmode)
3723 - a 128-bit constant where the four 32-bit words match. */
3724 bool
3725 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3727 if (GET_CODE (x) == HIGH)
3728 x = XEXP (x, 0);
3730 /* Reject any __ea qualified reference. These can't appear in
3731 instructions but must be forced to the constant pool. */
3732 if (for_each_rtx (&x, ea_symbol_ref, 0))
3733 return 0;
3735 /* V4SI with all identical symbols is valid. */
3736 if (!flag_pic
3737 && mode == V4SImode
3738 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3739 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3740 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3741 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3742 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3743 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3745 if (GET_CODE (x) == CONST_VECTOR
3746 && !const_vector_immediate_p (x))
3747 return 0;
3748 return 1;
3751 /* Valid address are:
3752 - symbol_ref, label_ref, const
3753 - reg
3754 - reg + const_int, where const_int is 16 byte aligned
3755 - reg + reg, alignment doesn't matter
3756 The alignment matters in the reg+const case because lqd and stqd
3757 ignore the 4 least significant bits of the const. We only care about
3758 16 byte modes because the expand phase will change all smaller MEM
3759 references to TImode. */
3760 static bool
3761 spu_legitimate_address_p (enum machine_mode mode,
3762 rtx x, bool reg_ok_strict)
3764 int aligned = GET_MODE_SIZE (mode) >= 16;
3765 if (aligned
3766 && GET_CODE (x) == AND
3767 && GET_CODE (XEXP (x, 1)) == CONST_INT
3768 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3769 x = XEXP (x, 0);
3770 switch (GET_CODE (x))
3772 case LABEL_REF:
3773 return !TARGET_LARGE_MEM;
3775 case SYMBOL_REF:
3776 case CONST:
3777 /* Keep __ea references until reload so that spu_expand_mov can see them
3778 in MEMs. */
3779 if (ea_symbol_ref (&x, 0))
3780 return !reload_in_progress && !reload_completed;
3781 return !TARGET_LARGE_MEM;
3783 case CONST_INT:
3784 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3786 case SUBREG:
3787 x = XEXP (x, 0);
3788 if (REG_P (x))
3789 return 0;
3791 case REG:
3792 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3794 case PLUS:
3795 case LO_SUM:
3797 rtx op0 = XEXP (x, 0);
3798 rtx op1 = XEXP (x, 1);
3799 if (GET_CODE (op0) == SUBREG)
3800 op0 = XEXP (op0, 0);
3801 if (GET_CODE (op1) == SUBREG)
3802 op1 = XEXP (op1, 0);
3803 if (GET_CODE (op0) == REG
3804 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3805 && GET_CODE (op1) == CONST_INT
3806 && INTVAL (op1) >= -0x2000
3807 && INTVAL (op1) <= 0x1fff
3808 && (!aligned || (INTVAL (op1) & 15) == 0))
3809 return TRUE;
3810 if (GET_CODE (op0) == REG
3811 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3812 && GET_CODE (op1) == REG
3813 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3814 return TRUE;
3816 break;
3818 default:
3819 break;
3821 return FALSE;
3824 /* Like spu_legitimate_address_p, except with named addresses. */
3825 static bool
3826 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3827 bool reg_ok_strict, addr_space_t as)
3829 if (as == ADDR_SPACE_EA)
3830 return (REG_P (x) && (GET_MODE (x) == EAmode));
3832 else if (as != ADDR_SPACE_GENERIC)
3833 gcc_unreachable ();
3835 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3838 /* When the address is reg + const_int, force the const_int into a
3839 register. */
3841 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3842 enum machine_mode mode ATTRIBUTE_UNUSED)
3844 rtx op0, op1;
3845 /* Make sure both operands are registers. */
3846 if (GET_CODE (x) == PLUS)
3848 op0 = XEXP (x, 0);
3849 op1 = XEXP (x, 1);
3850 if (ALIGNED_SYMBOL_REF_P (op0))
3852 op0 = force_reg (Pmode, op0);
3853 mark_reg_pointer (op0, 128);
3855 else if (GET_CODE (op0) != REG)
3856 op0 = force_reg (Pmode, op0);
3857 if (ALIGNED_SYMBOL_REF_P (op1))
3859 op1 = force_reg (Pmode, op1);
3860 mark_reg_pointer (op1, 128);
3862 else if (GET_CODE (op1) != REG)
3863 op1 = force_reg (Pmode, op1);
3864 x = gen_rtx_PLUS (Pmode, op0, op1);
3866 return x;
3869 /* Like spu_legitimate_address, except with named address support. */
3870 static rtx
3871 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3872 addr_space_t as)
3874 if (as != ADDR_SPACE_GENERIC)
3875 return x;
3877 return spu_legitimize_address (x, oldx, mode);
3880 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3881 struct attribute_spec.handler. */
3882 static tree
3883 spu_handle_fndecl_attribute (tree * node,
3884 tree name,
3885 tree args ATTRIBUTE_UNUSED,
3886 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3888 if (TREE_CODE (*node) != FUNCTION_DECL)
3890 warning (0, "%qE attribute only applies to functions",
3891 name);
3892 *no_add_attrs = true;
3895 return NULL_TREE;
3898 /* Handle the "vector" attribute. */
3899 static tree
3900 spu_handle_vector_attribute (tree * node, tree name,
3901 tree args ATTRIBUTE_UNUSED,
3902 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3904 tree type = *node, result = NULL_TREE;
3905 enum machine_mode mode;
3906 int unsigned_p;
3908 while (POINTER_TYPE_P (type)
3909 || TREE_CODE (type) == FUNCTION_TYPE
3910 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3911 type = TREE_TYPE (type);
3913 mode = TYPE_MODE (type);
3915 unsigned_p = TYPE_UNSIGNED (type);
3916 switch (mode)
3918 case DImode:
3919 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3920 break;
3921 case SImode:
3922 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3923 break;
3924 case HImode:
3925 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3926 break;
3927 case QImode:
3928 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3929 break;
3930 case SFmode:
3931 result = V4SF_type_node;
3932 break;
3933 case DFmode:
3934 result = V2DF_type_node;
3935 break;
3936 default:
3937 break;
3940 /* Propagate qualifiers attached to the element type
3941 onto the vector type. */
3942 if (result && result != type && TYPE_QUALS (type))
3943 result = build_qualified_type (result, TYPE_QUALS (type));
3945 *no_add_attrs = true; /* No need to hang on to the attribute. */
3947 if (!result)
3948 warning (0, "%qE attribute ignored", name);
3949 else
3950 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3952 return NULL_TREE;
3955 /* Return nonzero if FUNC is a naked function. */
3956 static int
3957 spu_naked_function_p (tree func)
3959 tree a;
3961 if (TREE_CODE (func) != FUNCTION_DECL)
3962 abort ();
3964 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3965 return a != NULL_TREE;
3969 spu_initial_elimination_offset (int from, int to)
3971 int saved_regs_size = spu_saved_regs_size ();
3972 int sp_offset = 0;
3973 if (!current_function_is_leaf || crtl->outgoing_args_size
3974 || get_frame_size () || saved_regs_size)
3975 sp_offset = STACK_POINTER_OFFSET;
3976 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3977 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3978 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3979 return get_frame_size ();
3980 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3981 return sp_offset + crtl->outgoing_args_size
3982 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3983 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3984 return get_frame_size () + saved_regs_size + sp_offset;
3985 else
3986 gcc_unreachable ();
3990 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3992 enum machine_mode mode = TYPE_MODE (type);
3993 int byte_size = ((mode == BLKmode)
3994 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3996 /* Make sure small structs are left justified in a register. */
3997 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3998 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4000 enum machine_mode smode;
4001 rtvec v;
4002 int i;
4003 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4004 int n = byte_size / UNITS_PER_WORD;
4005 v = rtvec_alloc (nregs);
4006 for (i = 0; i < n; i++)
4008 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4009 gen_rtx_REG (TImode,
4010 FIRST_RETURN_REGNUM
4011 + i),
4012 GEN_INT (UNITS_PER_WORD * i));
4013 byte_size -= UNITS_PER_WORD;
4016 if (n < nregs)
4018 if (byte_size < 4)
4019 byte_size = 4;
4020 smode =
4021 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4022 RTVEC_ELT (v, n) =
4023 gen_rtx_EXPR_LIST (VOIDmode,
4024 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4025 GEN_INT (UNITS_PER_WORD * n));
4027 return gen_rtx_PARALLEL (mode, v);
4029 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4032 static rtx
4033 spu_function_arg (cumulative_args_t cum_v,
4034 enum machine_mode mode,
4035 const_tree type, bool named ATTRIBUTE_UNUSED)
4037 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4038 int byte_size;
4040 if (*cum >= MAX_REGISTER_ARGS)
4041 return 0;
4043 byte_size = ((mode == BLKmode)
4044 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4046 /* The ABI does not allow parameters to be passed partially in
4047 reg and partially in stack. */
4048 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4049 return 0;
4051 /* Make sure small structs are left justified in a register. */
4052 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4053 && byte_size < UNITS_PER_WORD && byte_size > 0)
4055 enum machine_mode smode;
4056 rtx gr_reg;
4057 if (byte_size < 4)
4058 byte_size = 4;
4059 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4060 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4061 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
4062 const0_rtx);
4063 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4065 else
4066 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
4069 static void
4070 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4071 const_tree type, bool named ATTRIBUTE_UNUSED)
4073 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4075 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4077 : mode == BLKmode
4078 ? ((int_size_in_bytes (type) + 15) / 16)
4079 : mode == VOIDmode
4081 : HARD_REGNO_NREGS (cum, mode));
4084 /* Variable sized types are passed by reference. */
4085 static bool
4086 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4087 enum machine_mode mode ATTRIBUTE_UNUSED,
4088 const_tree type, bool named ATTRIBUTE_UNUSED)
4090 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4094 /* Var args. */
4096 /* Create and return the va_list datatype.
4098 On SPU, va_list is an array type equivalent to
4100 typedef struct __va_list_tag
4102 void *__args __attribute__((__aligned(16)));
4103 void *__skip __attribute__((__aligned(16)));
4105 } va_list[1];
4107 where __args points to the arg that will be returned by the next
4108 va_arg(), and __skip points to the previous stack frame such that
4109 when __args == __skip we should advance __args by 32 bytes. */
4110 static tree
4111 spu_build_builtin_va_list (void)
4113 tree f_args, f_skip, record, type_decl;
4114 bool owp;
4116 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4118 type_decl =
4119 build_decl (BUILTINS_LOCATION,
4120 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4122 f_args = build_decl (BUILTINS_LOCATION,
4123 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4124 f_skip = build_decl (BUILTINS_LOCATION,
4125 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4127 DECL_FIELD_CONTEXT (f_args) = record;
4128 DECL_ALIGN (f_args) = 128;
4129 DECL_USER_ALIGN (f_args) = 1;
4131 DECL_FIELD_CONTEXT (f_skip) = record;
4132 DECL_ALIGN (f_skip) = 128;
4133 DECL_USER_ALIGN (f_skip) = 1;
4135 TYPE_STUB_DECL (record) = type_decl;
4136 TYPE_NAME (record) = type_decl;
4137 TYPE_FIELDS (record) = f_args;
4138 DECL_CHAIN (f_args) = f_skip;
4140 /* We know this is being padded and we want it too. It is an internal
4141 type so hide the warnings from the user. */
4142 owp = warn_padded;
4143 warn_padded = false;
4145 layout_type (record);
4147 warn_padded = owp;
4149 /* The correct type is an array type of one element. */
4150 return build_array_type (record, build_index_type (size_zero_node));
4153 /* Implement va_start by filling the va_list structure VALIST.
4154 NEXTARG points to the first anonymous stack argument.
4156 The following global variables are used to initialize
4157 the va_list structure:
4159 crtl->args.info;
4160 the CUMULATIVE_ARGS for this function
4162 crtl->args.arg_offset_rtx:
4163 holds the offset of the first anonymous stack argument
4164 (relative to the virtual arg pointer). */
4166 static void
4167 spu_va_start (tree valist, rtx nextarg)
4169 tree f_args, f_skip;
4170 tree args, skip, t;
4172 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4173 f_skip = DECL_CHAIN (f_args);
4175 valist = build_simple_mem_ref (valist);
4176 args =
4177 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4178 skip =
4179 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4181 /* Find the __args area. */
4182 t = make_tree (TREE_TYPE (args), nextarg);
4183 if (crtl->args.pretend_args_size > 0)
4184 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4185 size_int (-STACK_POINTER_OFFSET));
4186 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4187 TREE_SIDE_EFFECTS (t) = 1;
4188 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4190 /* Find the __skip area. */
4191 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4192 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4193 size_int (crtl->args.pretend_args_size
4194 - STACK_POINTER_OFFSET));
4195 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4196 TREE_SIDE_EFFECTS (t) = 1;
4197 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4200 /* Gimplify va_arg by updating the va_list structure
4201 VALIST as required to retrieve an argument of type
4202 TYPE, and returning that argument.
4204 ret = va_arg(VALIST, TYPE);
4206 generates code equivalent to:
4208 paddedsize = (sizeof(TYPE) + 15) & -16;
4209 if (VALIST.__args + paddedsize > VALIST.__skip
4210 && VALIST.__args <= VALIST.__skip)
4211 addr = VALIST.__skip + 32;
4212 else
4213 addr = VALIST.__args;
4214 VALIST.__args = addr + paddedsize;
4215 ret = *(TYPE *)addr;
4217 static tree
4218 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4219 gimple_seq * post_p ATTRIBUTE_UNUSED)
4221 tree f_args, f_skip;
4222 tree args, skip;
4223 HOST_WIDE_INT size, rsize;
4224 tree paddedsize, addr, tmp;
4225 bool pass_by_reference_p;
4227 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4228 f_skip = DECL_CHAIN (f_args);
4230 valist = build_simple_mem_ref (valist);
4231 args =
4232 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4233 skip =
4234 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4236 addr = create_tmp_var (ptr_type_node, "va_arg");
4238 /* if an object is dynamically sized, a pointer to it is passed
4239 instead of the object itself. */
4240 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4241 false);
4242 if (pass_by_reference_p)
4243 type = build_pointer_type (type);
4244 size = int_size_in_bytes (type);
4245 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4247 /* build conditional expression to calculate addr. The expression
4248 will be gimplified later. */
4249 paddedsize = size_int (rsize);
4250 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4251 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4252 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4253 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4254 unshare_expr (skip)));
4256 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4257 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4258 size_int (32)), unshare_expr (args));
4260 gimplify_assign (addr, tmp, pre_p);
4262 /* update VALIST.__args */
4263 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4264 gimplify_assign (unshare_expr (args), tmp, pre_p);
4266 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4267 addr);
4269 if (pass_by_reference_p)
4270 addr = build_va_arg_indirect_ref (addr);
4272 return build_va_arg_indirect_ref (addr);
4275 /* Save parameter registers starting with the register that corresponds
4276 to the first unnamed parameters. If the first unnamed parameter is
4277 in the stack then save no registers. Set pretend_args_size to the
4278 amount of space needed to save the registers. */
4279 static void
4280 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4281 tree type, int *pretend_size, int no_rtl)
4283 if (!no_rtl)
4285 rtx tmp;
4286 int regno;
4287 int offset;
4288 int ncum = *get_cumulative_args (cum);
4290 /* cum currently points to the last named argument, we want to
4291 start at the next argument. */
4292 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4294 offset = -STACK_POINTER_OFFSET;
4295 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4297 tmp = gen_frame_mem (V4SImode,
4298 plus_constant (virtual_incoming_args_rtx,
4299 offset));
4300 emit_move_insn (tmp,
4301 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4302 offset += 16;
4304 *pretend_size = offset + STACK_POINTER_OFFSET;
4308 static void
4309 spu_conditional_register_usage (void)
4311 if (flag_pic)
4313 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4314 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4318 /* This is called any time we inspect the alignment of a register for
4319 addresses. */
4320 static int
4321 reg_aligned_for_addr (rtx x)
4323 int regno =
4324 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4325 return REGNO_POINTER_ALIGN (regno) >= 128;
4328 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4329 into its SYMBOL_REF_FLAGS. */
4330 static void
4331 spu_encode_section_info (tree decl, rtx rtl, int first)
4333 default_encode_section_info (decl, rtl, first);
4335 /* If a variable has a forced alignment to < 16 bytes, mark it with
4336 SYMBOL_FLAG_ALIGN1. */
4337 if (TREE_CODE (decl) == VAR_DECL
4338 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4339 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4342 /* Return TRUE if we are certain the mem refers to a complete object
4343 which is both 16-byte aligned and padded to a 16-byte boundary. This
4344 would make it safe to store with a single instruction.
4345 We guarantee the alignment and padding for static objects by aligning
4346 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4347 FIXME: We currently cannot guarantee this for objects on the stack
4348 because assign_parm_setup_stack calls assign_stack_local with the
4349 alignment of the parameter mode and in that case the alignment never
4350 gets adjusted by LOCAL_ALIGNMENT. */
4351 static int
4352 store_with_one_insn_p (rtx mem)
4354 enum machine_mode mode = GET_MODE (mem);
4355 rtx addr = XEXP (mem, 0);
4356 if (mode == BLKmode)
4357 return 0;
4358 if (GET_MODE_SIZE (mode) >= 16)
4359 return 1;
4360 /* Only static objects. */
4361 if (GET_CODE (addr) == SYMBOL_REF)
4363 /* We use the associated declaration to make sure the access is
4364 referring to the whole object.
4365 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4366 if it is necessary. Will there be cases where one exists, and
4367 the other does not? Will there be cases where both exist, but
4368 have different types? */
4369 tree decl = MEM_EXPR (mem);
4370 if (decl
4371 && TREE_CODE (decl) == VAR_DECL
4372 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4373 return 1;
4374 decl = SYMBOL_REF_DECL (addr);
4375 if (decl
4376 && TREE_CODE (decl) == VAR_DECL
4377 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4378 return 1;
4380 return 0;
4383 /* Return 1 when the address is not valid for a simple load and store as
4384 required by the '_mov*' patterns. We could make this less strict
4385 for loads, but we prefer mem's to look the same so they are more
4386 likely to be merged. */
4387 static int
4388 address_needs_split (rtx mem)
4390 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4391 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4392 || !(store_with_one_insn_p (mem)
4393 || mem_is_padded_component_ref (mem))))
4394 return 1;
4396 return 0;
4399 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4400 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4401 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4403 /* MEM is known to be an __ea qualified memory access. Emit a call to
4404 fetch the ppu memory to local store, and return its address in local
4405 store. */
4407 static void
4408 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4410 if (is_store)
4412 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4413 if (!cache_fetch_dirty)
4414 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4415 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4416 2, ea_addr, EAmode, ndirty, SImode);
4418 else
4420 if (!cache_fetch)
4421 cache_fetch = init_one_libfunc ("__cache_fetch");
4422 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4423 1, ea_addr, EAmode);
4427 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4428 dirty bit marking, inline.
4430 The cache control data structure is an array of
4432 struct __cache_tag_array
4434 unsigned int tag_lo[4];
4435 unsigned int tag_hi[4];
4436 void *data_pointer[4];
4437 int reserved[4];
4438 vector unsigned short dirty_bits[4];
4439 } */
4441 static void
4442 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4444 rtx ea_addr_si;
4445 HOST_WIDE_INT v;
4446 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4447 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4448 rtx index_mask = gen_reg_rtx (SImode);
4449 rtx tag_arr = gen_reg_rtx (Pmode);
4450 rtx splat_mask = gen_reg_rtx (TImode);
4451 rtx splat = gen_reg_rtx (V4SImode);
4452 rtx splat_hi = NULL_RTX;
4453 rtx tag_index = gen_reg_rtx (Pmode);
4454 rtx block_off = gen_reg_rtx (SImode);
4455 rtx tag_addr = gen_reg_rtx (Pmode);
4456 rtx tag = gen_reg_rtx (V4SImode);
4457 rtx cache_tag = gen_reg_rtx (V4SImode);
4458 rtx cache_tag_hi = NULL_RTX;
4459 rtx cache_ptrs = gen_reg_rtx (TImode);
4460 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4461 rtx tag_equal = gen_reg_rtx (V4SImode);
4462 rtx tag_equal_hi = NULL_RTX;
4463 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4464 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4465 rtx eq_index = gen_reg_rtx (SImode);
4466 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4468 if (spu_ea_model != 32)
4470 splat_hi = gen_reg_rtx (V4SImode);
4471 cache_tag_hi = gen_reg_rtx (V4SImode);
4472 tag_equal_hi = gen_reg_rtx (V4SImode);
4475 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4476 emit_move_insn (tag_arr, tag_arr_sym);
4477 v = 0x0001020300010203LL;
4478 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4479 ea_addr_si = ea_addr;
4480 if (spu_ea_model != 32)
4481 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4483 /* tag_index = ea_addr & (tag_array_size - 128) */
4484 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4486 /* splat ea_addr to all 4 slots. */
4487 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4488 /* Similarly for high 32 bits of ea_addr. */
4489 if (spu_ea_model != 32)
4490 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4492 /* block_off = ea_addr & 127 */
4493 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4495 /* tag_addr = tag_arr + tag_index */
4496 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4498 /* Read cache tags. */
4499 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4500 if (spu_ea_model != 32)
4501 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4502 plus_constant (tag_addr, 16)));
4504 /* tag = ea_addr & -128 */
4505 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4507 /* Read all four cache data pointers. */
4508 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4509 plus_constant (tag_addr, 32)));
4511 /* Compare tags. */
4512 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4513 if (spu_ea_model != 32)
4515 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4516 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4519 /* At most one of the tags compare equal, so tag_equal has one
4520 32-bit slot set to all 1's, with the other slots all zero.
4521 gbb picks off low bit from each byte in the 128-bit registers,
4522 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4523 we have a hit. */
4524 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4525 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4527 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4528 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4530 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4531 (rotating eq_index mod 16 bytes). */
4532 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4533 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4535 /* Add block offset to form final data address. */
4536 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4538 /* Check that we did hit. */
4539 hit_label = gen_label_rtx ();
4540 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4541 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4542 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4543 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4544 hit_ref, pc_rtx)));
4545 /* Say that this branch is very likely to happen. */
4546 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4547 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4549 ea_load_store (mem, is_store, ea_addr, data_addr);
4550 cont_label = gen_label_rtx ();
4551 emit_jump_insn (gen_jump (cont_label));
4552 emit_barrier ();
4554 emit_label (hit_label);
4556 if (is_store)
4558 HOST_WIDE_INT v_hi;
4559 rtx dirty_bits = gen_reg_rtx (TImode);
4560 rtx dirty_off = gen_reg_rtx (SImode);
4561 rtx dirty_128 = gen_reg_rtx (TImode);
4562 rtx neg_block_off = gen_reg_rtx (SImode);
4564 /* Set up mask with one dirty bit per byte of the mem we are
4565 writing, starting from top bit. */
4566 v_hi = v = -1;
4567 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4568 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4570 v_hi = v;
4571 v = 0;
4573 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4575 /* Form index into cache dirty_bits. eq_index is one of
4576 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4577 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4578 offset to each of the four dirty_bits elements. */
4579 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4581 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4583 /* Rotate bit mask to proper bit. */
4584 emit_insn (gen_negsi2 (neg_block_off, block_off));
4585 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4586 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4588 /* Or in the new dirty bits. */
4589 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4591 /* Store. */
4592 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4595 emit_label (cont_label);
4598 static rtx
4599 expand_ea_mem (rtx mem, bool is_store)
4601 rtx ea_addr;
4602 rtx data_addr = gen_reg_rtx (Pmode);
4603 rtx new_mem;
4605 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4606 if (optimize_size || optimize == 0)
4607 ea_load_store (mem, is_store, ea_addr, data_addr);
4608 else
4609 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4611 if (ea_alias_set == -1)
4612 ea_alias_set = new_alias_set ();
4614 /* We generate a new MEM RTX to refer to the copy of the data
4615 in the cache. We do not copy memory attributes (except the
4616 alignment) from the original MEM, as they may no longer apply
4617 to the cache copy. */
4618 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4619 set_mem_alias_set (new_mem, ea_alias_set);
4620 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4622 return new_mem;
4626 spu_expand_mov (rtx * ops, enum machine_mode mode)
4628 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4630 /* Perform the move in the destination SUBREG's inner mode. */
4631 ops[0] = SUBREG_REG (ops[0]);
4632 mode = GET_MODE (ops[0]);
4633 ops[1] = gen_lowpart_common (mode, ops[1]);
4634 gcc_assert (ops[1]);
4637 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4639 rtx from = SUBREG_REG (ops[1]);
4640 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4642 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4643 && GET_MODE_CLASS (imode) == MODE_INT
4644 && subreg_lowpart_p (ops[1]));
4646 if (GET_MODE_SIZE (imode) < 4)
4647 imode = SImode;
4648 if (imode != GET_MODE (from))
4649 from = gen_rtx_SUBREG (imode, from, 0);
4651 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4653 enum insn_code icode = convert_optab_handler (trunc_optab,
4654 mode, imode);
4655 emit_insn (GEN_FCN (icode) (ops[0], from));
4657 else
4658 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4659 return 1;
4662 /* At least one of the operands needs to be a register. */
4663 if ((reload_in_progress | reload_completed) == 0
4664 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4666 rtx temp = force_reg (mode, ops[1]);
4667 emit_move_insn (ops[0], temp);
4668 return 1;
4670 if (reload_in_progress || reload_completed)
4672 if (CONSTANT_P (ops[1]))
4673 return spu_split_immediate (ops);
4674 return 0;
4677 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4678 extend them. */
4679 if (GET_CODE (ops[1]) == CONST_INT)
4681 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4682 if (val != INTVAL (ops[1]))
4684 emit_move_insn (ops[0], GEN_INT (val));
4685 return 1;
4688 if (MEM_P (ops[0]))
4690 if (MEM_ADDR_SPACE (ops[0]))
4691 ops[0] = expand_ea_mem (ops[0], true);
4692 return spu_split_store (ops);
4694 if (MEM_P (ops[1]))
4696 if (MEM_ADDR_SPACE (ops[1]))
4697 ops[1] = expand_ea_mem (ops[1], false);
4698 return spu_split_load (ops);
4701 return 0;
4704 static void
4705 spu_convert_move (rtx dst, rtx src)
4707 enum machine_mode mode = GET_MODE (dst);
4708 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4709 rtx reg;
4710 gcc_assert (GET_MODE (src) == TImode);
4711 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4712 emit_insn (gen_rtx_SET (VOIDmode, reg,
4713 gen_rtx_TRUNCATE (int_mode,
4714 gen_rtx_LSHIFTRT (TImode, src,
4715 GEN_INT (int_mode == DImode ? 64 : 96)))));
4716 if (int_mode != mode)
4718 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4719 emit_move_insn (dst, reg);
4723 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4724 the address from SRC and SRC+16. Return a REG or CONST_INT that
4725 specifies how many bytes to rotate the loaded registers, plus any
4726 extra from EXTRA_ROTQBY. The address and rotate amounts are
4727 normalized to improve merging of loads and rotate computations. */
4728 static rtx
4729 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4731 rtx addr = XEXP (src, 0);
4732 rtx p0, p1, rot, addr0, addr1;
4733 int rot_amt;
4735 rot = 0;
4736 rot_amt = 0;
4738 if (MEM_ALIGN (src) >= 128)
4739 /* Address is already aligned; simply perform a TImode load. */ ;
4740 else if (GET_CODE (addr) == PLUS)
4742 /* 8 cases:
4743 aligned reg + aligned reg => lqx
4744 aligned reg + unaligned reg => lqx, rotqby
4745 aligned reg + aligned const => lqd
4746 aligned reg + unaligned const => lqd, rotqbyi
4747 unaligned reg + aligned reg => lqx, rotqby
4748 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4749 unaligned reg + aligned const => lqd, rotqby
4750 unaligned reg + unaligned const -> not allowed by legitimate address
4752 p0 = XEXP (addr, 0);
4753 p1 = XEXP (addr, 1);
4754 if (!reg_aligned_for_addr (p0))
4756 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4758 rot = gen_reg_rtx (SImode);
4759 emit_insn (gen_addsi3 (rot, p0, p1));
4761 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4763 if (INTVAL (p1) > 0
4764 && REG_POINTER (p0)
4765 && INTVAL (p1) * BITS_PER_UNIT
4766 < REGNO_POINTER_ALIGN (REGNO (p0)))
4768 rot = gen_reg_rtx (SImode);
4769 emit_insn (gen_addsi3 (rot, p0, p1));
4770 addr = p0;
4772 else
4774 rtx x = gen_reg_rtx (SImode);
4775 emit_move_insn (x, p1);
4776 if (!spu_arith_operand (p1, SImode))
4777 p1 = x;
4778 rot = gen_reg_rtx (SImode);
4779 emit_insn (gen_addsi3 (rot, p0, p1));
4780 addr = gen_rtx_PLUS (Pmode, p0, x);
4783 else
4784 rot = p0;
4786 else
4788 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4790 rot_amt = INTVAL (p1) & 15;
4791 if (INTVAL (p1) & -16)
4793 p1 = GEN_INT (INTVAL (p1) & -16);
4794 addr = gen_rtx_PLUS (SImode, p0, p1);
4796 else
4797 addr = p0;
4799 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4800 rot = p1;
4803 else if (REG_P (addr))
4805 if (!reg_aligned_for_addr (addr))
4806 rot = addr;
4808 else if (GET_CODE (addr) == CONST)
4810 if (GET_CODE (XEXP (addr, 0)) == PLUS
4811 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4812 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4814 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4815 if (rot_amt & -16)
4816 addr = gen_rtx_CONST (Pmode,
4817 gen_rtx_PLUS (Pmode,
4818 XEXP (XEXP (addr, 0), 0),
4819 GEN_INT (rot_amt & -16)));
4820 else
4821 addr = XEXP (XEXP (addr, 0), 0);
4823 else
4825 rot = gen_reg_rtx (Pmode);
4826 emit_move_insn (rot, addr);
4829 else if (GET_CODE (addr) == CONST_INT)
4831 rot_amt = INTVAL (addr);
4832 addr = GEN_INT (rot_amt & -16);
4834 else if (!ALIGNED_SYMBOL_REF_P (addr))
4836 rot = gen_reg_rtx (Pmode);
4837 emit_move_insn (rot, addr);
4840 rot_amt += extra_rotby;
4842 rot_amt &= 15;
4844 if (rot && rot_amt)
4846 rtx x = gen_reg_rtx (SImode);
4847 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4848 rot = x;
4849 rot_amt = 0;
4851 if (!rot && rot_amt)
4852 rot = GEN_INT (rot_amt);
4854 addr0 = copy_rtx (addr);
4855 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4856 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4858 if (dst1)
4860 addr1 = plus_constant (copy_rtx (addr), 16);
4861 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4862 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4865 return rot;
4869 spu_split_load (rtx * ops)
4871 enum machine_mode mode = GET_MODE (ops[0]);
4872 rtx addr, load, rot;
4873 int rot_amt;
4875 if (GET_MODE_SIZE (mode) >= 16)
4876 return 0;
4878 addr = XEXP (ops[1], 0);
4879 gcc_assert (GET_CODE (addr) != AND);
4881 if (!address_needs_split (ops[1]))
4883 ops[1] = change_address (ops[1], TImode, addr);
4884 load = gen_reg_rtx (TImode);
4885 emit_insn (gen__movti (load, ops[1]));
4886 spu_convert_move (ops[0], load);
4887 return 1;
4890 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4892 load = gen_reg_rtx (TImode);
4893 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4895 if (rot)
4896 emit_insn (gen_rotqby_ti (load, load, rot));
4898 spu_convert_move (ops[0], load);
4899 return 1;
4903 spu_split_store (rtx * ops)
4905 enum machine_mode mode = GET_MODE (ops[0]);
4906 rtx reg;
4907 rtx addr, p0, p1, p1_lo, smem;
4908 int aform;
4909 int scalar;
4911 if (GET_MODE_SIZE (mode) >= 16)
4912 return 0;
4914 addr = XEXP (ops[0], 0);
4915 gcc_assert (GET_CODE (addr) != AND);
4917 if (!address_needs_split (ops[0]))
4919 reg = gen_reg_rtx (TImode);
4920 emit_insn (gen_spu_convert (reg, ops[1]));
4921 ops[0] = change_address (ops[0], TImode, addr);
4922 emit_move_insn (ops[0], reg);
4923 return 1;
4926 if (GET_CODE (addr) == PLUS)
4928 /* 8 cases:
4929 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4930 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4931 aligned reg + aligned const => lqd, c?d, shuf, stqx
4932 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4933 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4934 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4935 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4936 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4938 aform = 0;
4939 p0 = XEXP (addr, 0);
4940 p1 = p1_lo = XEXP (addr, 1);
4941 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4943 p1_lo = GEN_INT (INTVAL (p1) & 15);
4944 if (reg_aligned_for_addr (p0))
4946 p1 = GEN_INT (INTVAL (p1) & -16);
4947 if (p1 == const0_rtx)
4948 addr = p0;
4949 else
4950 addr = gen_rtx_PLUS (SImode, p0, p1);
4952 else
4954 rtx x = gen_reg_rtx (SImode);
4955 emit_move_insn (x, p1);
4956 addr = gen_rtx_PLUS (SImode, p0, x);
4960 else if (REG_P (addr))
4962 aform = 0;
4963 p0 = addr;
4964 p1 = p1_lo = const0_rtx;
4966 else
4968 aform = 1;
4969 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4970 p1 = 0; /* aform doesn't use p1 */
4971 p1_lo = addr;
4972 if (ALIGNED_SYMBOL_REF_P (addr))
4973 p1_lo = const0_rtx;
4974 else if (GET_CODE (addr) == CONST
4975 && GET_CODE (XEXP (addr, 0)) == PLUS
4976 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4977 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4979 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4980 if ((v & -16) != 0)
4981 addr = gen_rtx_CONST (Pmode,
4982 gen_rtx_PLUS (Pmode,
4983 XEXP (XEXP (addr, 0), 0),
4984 GEN_INT (v & -16)));
4985 else
4986 addr = XEXP (XEXP (addr, 0), 0);
4987 p1_lo = GEN_INT (v & 15);
4989 else if (GET_CODE (addr) == CONST_INT)
4991 p1_lo = GEN_INT (INTVAL (addr) & 15);
4992 addr = GEN_INT (INTVAL (addr) & -16);
4994 else
4996 p1_lo = gen_reg_rtx (SImode);
4997 emit_move_insn (p1_lo, addr);
5001 gcc_assert (aform == 0 || aform == 1);
5002 reg = gen_reg_rtx (TImode);
5004 scalar = store_with_one_insn_p (ops[0]);
5005 if (!scalar)
5007 /* We could copy the flags from the ops[0] MEM to mem here,
5008 We don't because we want this load to be optimized away if
5009 possible, and copying the flags will prevent that in certain
5010 cases, e.g. consider the volatile flag. */
5012 rtx pat = gen_reg_rtx (TImode);
5013 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5014 set_mem_alias_set (lmem, 0);
5015 emit_insn (gen_movti (reg, lmem));
5017 if (!p0 || reg_aligned_for_addr (p0))
5018 p0 = stack_pointer_rtx;
5019 if (!p1_lo)
5020 p1_lo = const0_rtx;
5022 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5023 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5025 else
5027 if (GET_CODE (ops[1]) == REG)
5028 emit_insn (gen_spu_convert (reg, ops[1]));
5029 else if (GET_CODE (ops[1]) == SUBREG)
5030 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5031 else
5032 abort ();
5035 if (GET_MODE_SIZE (mode) < 4 && scalar)
5036 emit_insn (gen_ashlti3
5037 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5039 smem = change_address (ops[0], TImode, copy_rtx (addr));
5040 /* We can't use the previous alias set because the memory has changed
5041 size and can potentially overlap objects of other types. */
5042 set_mem_alias_set (smem, 0);
5044 emit_insn (gen_movti (smem, reg));
5045 return 1;
5048 /* Return TRUE if X is MEM which is a struct member reference
5049 and the member can safely be loaded and stored with a single
5050 instruction because it is padded. */
5051 static int
5052 mem_is_padded_component_ref (rtx x)
5054 tree t = MEM_EXPR (x);
5055 tree r;
5056 if (!t || TREE_CODE (t) != COMPONENT_REF)
5057 return 0;
5058 t = TREE_OPERAND (t, 1);
5059 if (!t || TREE_CODE (t) != FIELD_DECL
5060 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5061 return 0;
5062 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5063 r = DECL_FIELD_CONTEXT (t);
5064 if (!r || TREE_CODE (r) != RECORD_TYPE)
5065 return 0;
5066 /* Make sure they are the same mode */
5067 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5068 return 0;
5069 /* If there are no following fields then the field alignment assures
5070 the structure is padded to the alignment which means this field is
5071 padded too. */
5072 if (TREE_CHAIN (t) == 0)
5073 return 1;
5074 /* If the following field is also aligned then this field will be
5075 padded. */
5076 t = TREE_CHAIN (t);
5077 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5078 return 1;
5079 return 0;
5082 /* Parse the -mfixed-range= option string. */
5083 static void
5084 fix_range (const char *const_str)
5086 int i, first, last;
5087 char *str, *dash, *comma;
5089 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5090 REG2 are either register names or register numbers. The effect
5091 of this option is to mark the registers in the range from REG1 to
5092 REG2 as ``fixed'' so they won't be used by the compiler. */
5094 i = strlen (const_str);
5095 str = (char *) alloca (i + 1);
5096 memcpy (str, const_str, i + 1);
5098 while (1)
5100 dash = strchr (str, '-');
5101 if (!dash)
5103 warning (0, "value of -mfixed-range must have form REG1-REG2");
5104 return;
5106 *dash = '\0';
5107 comma = strchr (dash + 1, ',');
5108 if (comma)
5109 *comma = '\0';
5111 first = decode_reg_name (str);
5112 if (first < 0)
5114 warning (0, "unknown register name: %s", str);
5115 return;
5118 last = decode_reg_name (dash + 1);
5119 if (last < 0)
5121 warning (0, "unknown register name: %s", dash + 1);
5122 return;
5125 *dash = '-';
5127 if (first > last)
5129 warning (0, "%s-%s is an empty range", str, dash + 1);
5130 return;
5133 for (i = first; i <= last; ++i)
5134 fixed_regs[i] = call_used_regs[i] = 1;
5136 if (!comma)
5137 break;
5139 *comma = ',';
5140 str = comma + 1;
5144 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5145 can be generated using the fsmbi instruction. */
5147 fsmbi_const_p (rtx x)
5149 if (CONSTANT_P (x))
5151 /* We can always choose TImode for CONST_INT because the high bits
5152 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5153 enum immediate_class c = classify_immediate (x, TImode);
5154 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5156 return 0;
5159 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5160 can be generated using the cbd, chd, cwd or cdd instruction. */
5162 cpat_const_p (rtx x, enum machine_mode mode)
5164 if (CONSTANT_P (x))
5166 enum immediate_class c = classify_immediate (x, mode);
5167 return c == IC_CPAT;
5169 return 0;
5173 gen_cpat_const (rtx * ops)
5175 unsigned char dst[16];
5176 int i, offset, shift, isize;
5177 if (GET_CODE (ops[3]) != CONST_INT
5178 || GET_CODE (ops[2]) != CONST_INT
5179 || (GET_CODE (ops[1]) != CONST_INT
5180 && GET_CODE (ops[1]) != REG))
5181 return 0;
5182 if (GET_CODE (ops[1]) == REG
5183 && (!REG_POINTER (ops[1])
5184 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5185 return 0;
5187 for (i = 0; i < 16; i++)
5188 dst[i] = i + 16;
5189 isize = INTVAL (ops[3]);
5190 if (isize == 1)
5191 shift = 3;
5192 else if (isize == 2)
5193 shift = 2;
5194 else
5195 shift = 0;
5196 offset = (INTVAL (ops[2]) +
5197 (GET_CODE (ops[1]) ==
5198 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5199 for (i = 0; i < isize; i++)
5200 dst[offset + i] = i + shift;
5201 return array_to_constant (TImode, dst);
5204 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5205 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5206 than 16 bytes, the value is repeated across the rest of the array. */
5207 void
5208 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5210 HOST_WIDE_INT val;
5211 int i, j, first;
5213 memset (arr, 0, 16);
5214 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5215 if (GET_CODE (x) == CONST_INT
5216 || (GET_CODE (x) == CONST_DOUBLE
5217 && (mode == SFmode || mode == DFmode)))
5219 gcc_assert (mode != VOIDmode && mode != BLKmode);
5221 if (GET_CODE (x) == CONST_DOUBLE)
5222 val = const_double_to_hwint (x);
5223 else
5224 val = INTVAL (x);
5225 first = GET_MODE_SIZE (mode) - 1;
5226 for (i = first; i >= 0; i--)
5228 arr[i] = val & 0xff;
5229 val >>= 8;
5231 /* Splat the constant across the whole array. */
5232 for (j = 0, i = first + 1; i < 16; i++)
5234 arr[i] = arr[j];
5235 j = (j == first) ? 0 : j + 1;
5238 else if (GET_CODE (x) == CONST_DOUBLE)
5240 val = CONST_DOUBLE_LOW (x);
5241 for (i = 15; i >= 8; i--)
5243 arr[i] = val & 0xff;
5244 val >>= 8;
5246 val = CONST_DOUBLE_HIGH (x);
5247 for (i = 7; i >= 0; i--)
5249 arr[i] = val & 0xff;
5250 val >>= 8;
5253 else if (GET_CODE (x) == CONST_VECTOR)
5255 int units;
5256 rtx elt;
5257 mode = GET_MODE_INNER (mode);
5258 units = CONST_VECTOR_NUNITS (x);
5259 for (i = 0; i < units; i++)
5261 elt = CONST_VECTOR_ELT (x, i);
5262 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5264 if (GET_CODE (elt) == CONST_DOUBLE)
5265 val = const_double_to_hwint (elt);
5266 else
5267 val = INTVAL (elt);
5268 first = GET_MODE_SIZE (mode) - 1;
5269 if (first + i * GET_MODE_SIZE (mode) > 16)
5270 abort ();
5271 for (j = first; j >= 0; j--)
5273 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5274 val >>= 8;
5279 else
5280 gcc_unreachable();
5283 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5284 smaller than 16 bytes, use the bytes that would represent that value
5285 in a register, e.g., for QImode return the value of arr[3]. */
5287 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5289 enum machine_mode inner_mode;
5290 rtvec v;
5291 int units, size, i, j, k;
5292 HOST_WIDE_INT val;
5294 if (GET_MODE_CLASS (mode) == MODE_INT
5295 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5297 j = GET_MODE_SIZE (mode);
5298 i = j < 4 ? 4 - j : 0;
5299 for (val = 0; i < j; i++)
5300 val = (val << 8) | arr[i];
5301 val = trunc_int_for_mode (val, mode);
5302 return GEN_INT (val);
5305 if (mode == TImode)
5307 HOST_WIDE_INT high;
5308 for (i = high = 0; i < 8; i++)
5309 high = (high << 8) | arr[i];
5310 for (i = 8, val = 0; i < 16; i++)
5311 val = (val << 8) | arr[i];
5312 return immed_double_const (val, high, TImode);
5314 if (mode == SFmode)
5316 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5317 val = trunc_int_for_mode (val, SImode);
5318 return hwint_to_const_double (SFmode, val);
5320 if (mode == DFmode)
5322 for (i = 0, val = 0; i < 8; i++)
5323 val = (val << 8) | arr[i];
5324 return hwint_to_const_double (DFmode, val);
5327 if (!VECTOR_MODE_P (mode))
5328 abort ();
5330 units = GET_MODE_NUNITS (mode);
5331 size = GET_MODE_UNIT_SIZE (mode);
5332 inner_mode = GET_MODE_INNER (mode);
5333 v = rtvec_alloc (units);
5335 for (k = i = 0; i < units; ++i)
5337 val = 0;
5338 for (j = 0; j < size; j++, k++)
5339 val = (val << 8) | arr[k];
5341 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5342 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5343 else
5344 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5346 if (k > 16)
5347 abort ();
5349 return gen_rtx_CONST_VECTOR (mode, v);
5352 static void
5353 reloc_diagnostic (rtx x)
5355 tree decl = 0;
5356 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5357 return;
5359 if (GET_CODE (x) == SYMBOL_REF)
5360 decl = SYMBOL_REF_DECL (x);
5361 else if (GET_CODE (x) == CONST
5362 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5363 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5365 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5366 if (decl && !DECL_P (decl))
5367 decl = 0;
5369 /* The decl could be a string constant. */
5370 if (decl && DECL_P (decl))
5372 location_t loc;
5373 /* We use last_assemble_variable_decl to get line information. It's
5374 not always going to be right and might not even be close, but will
5375 be right for the more common cases. */
5376 if (!last_assemble_variable_decl || in_section == ctors_section)
5377 loc = DECL_SOURCE_LOCATION (decl);
5378 else
5379 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5381 if (TARGET_WARN_RELOC)
5382 warning_at (loc, 0,
5383 "creating run-time relocation for %qD", decl);
5384 else
5385 error_at (loc,
5386 "creating run-time relocation for %qD", decl);
5388 else
5390 if (TARGET_WARN_RELOC)
5391 warning_at (input_location, 0, "creating run-time relocation");
5392 else
5393 error_at (input_location, "creating run-time relocation");
5397 /* Hook into assemble_integer so we can generate an error for run-time
5398 relocations. The SPU ABI disallows them. */
5399 static bool
5400 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5402 /* By default run-time relocations aren't supported, but we allow them
5403 in case users support it in their own run-time loader. And we provide
5404 a warning for those users that don't. */
5405 if ((GET_CODE (x) == SYMBOL_REF)
5406 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5407 reloc_diagnostic (x);
5409 return default_assemble_integer (x, size, aligned_p);
5412 static void
5413 spu_asm_globalize_label (FILE * file, const char *name)
5415 fputs ("\t.global\t", file);
5416 assemble_name (file, name);
5417 fputs ("\n", file);
5420 static bool
5421 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5422 bool speed ATTRIBUTE_UNUSED)
5424 enum machine_mode mode = GET_MODE (x);
5425 int cost = COSTS_N_INSNS (2);
5427 /* Folding to a CONST_VECTOR will use extra space but there might
5428 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5429 only if it allows us to fold away multiple insns. Changing the cost
5430 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5431 because this cost will only be compared against a single insn.
5432 if (code == CONST_VECTOR)
5433 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5436 /* Use defaults for float operations. Not accurate but good enough. */
5437 if (mode == DFmode)
5439 *total = COSTS_N_INSNS (13);
5440 return true;
5442 if (mode == SFmode)
5444 *total = COSTS_N_INSNS (6);
5445 return true;
5447 switch (code)
5449 case CONST_INT:
5450 if (satisfies_constraint_K (x))
5451 *total = 0;
5452 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5453 *total = COSTS_N_INSNS (1);
5454 else
5455 *total = COSTS_N_INSNS (3);
5456 return true;
5458 case CONST:
5459 *total = COSTS_N_INSNS (3);
5460 return true;
5462 case LABEL_REF:
5463 case SYMBOL_REF:
5464 *total = COSTS_N_INSNS (0);
5465 return true;
5467 case CONST_DOUBLE:
5468 *total = COSTS_N_INSNS (5);
5469 return true;
5471 case FLOAT_EXTEND:
5472 case FLOAT_TRUNCATE:
5473 case FLOAT:
5474 case UNSIGNED_FLOAT:
5475 case FIX:
5476 case UNSIGNED_FIX:
5477 *total = COSTS_N_INSNS (7);
5478 return true;
5480 case PLUS:
5481 if (mode == TImode)
5483 *total = COSTS_N_INSNS (9);
5484 return true;
5486 break;
5488 case MULT:
5489 cost =
5490 GET_CODE (XEXP (x, 0)) ==
5491 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5492 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5494 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5496 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5497 cost = COSTS_N_INSNS (14);
5498 if ((val & 0xffff) == 0)
5499 cost = COSTS_N_INSNS (9);
5500 else if (val > 0 && val < 0x10000)
5501 cost = COSTS_N_INSNS (11);
5504 *total = cost;
5505 return true;
5506 case DIV:
5507 case UDIV:
5508 case MOD:
5509 case UMOD:
5510 *total = COSTS_N_INSNS (20);
5511 return true;
5512 case ROTATE:
5513 case ROTATERT:
5514 case ASHIFT:
5515 case ASHIFTRT:
5516 case LSHIFTRT:
5517 *total = COSTS_N_INSNS (4);
5518 return true;
5519 case UNSPEC:
5520 if (XINT (x, 1) == UNSPEC_CONVERT)
5521 *total = COSTS_N_INSNS (0);
5522 else
5523 *total = COSTS_N_INSNS (4);
5524 return true;
5526 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5527 if (GET_MODE_CLASS (mode) == MODE_INT
5528 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5529 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5530 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5531 *total = cost;
5532 return true;
5535 static enum machine_mode
5536 spu_unwind_word_mode (void)
5538 return SImode;
5541 /* Decide whether we can make a sibling call to a function. DECL is the
5542 declaration of the function being targeted by the call and EXP is the
5543 CALL_EXPR representing the call. */
5544 static bool
5545 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5547 return decl && !TARGET_LARGE_MEM;
5550 /* We need to correctly update the back chain pointer and the Available
5551 Stack Size (which is in the second slot of the sp register.) */
5552 void
5553 spu_allocate_stack (rtx op0, rtx op1)
5555 HOST_WIDE_INT v;
5556 rtx chain = gen_reg_rtx (V4SImode);
5557 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5558 rtx sp = gen_reg_rtx (V4SImode);
5559 rtx splatted = gen_reg_rtx (V4SImode);
5560 rtx pat = gen_reg_rtx (TImode);
5562 /* copy the back chain so we can save it back again. */
5563 emit_move_insn (chain, stack_bot);
5565 op1 = force_reg (SImode, op1);
5567 v = 0x1020300010203ll;
5568 emit_move_insn (pat, immed_double_const (v, v, TImode));
5569 emit_insn (gen_shufb (splatted, op1, op1, pat));
5571 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5572 emit_insn (gen_subv4si3 (sp, sp, splatted));
5574 if (flag_stack_check)
5576 rtx avail = gen_reg_rtx(SImode);
5577 rtx result = gen_reg_rtx(SImode);
5578 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5579 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5580 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5583 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5585 emit_move_insn (stack_bot, chain);
5587 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5590 void
5591 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5593 static unsigned char arr[16] =
5594 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5595 rtx temp = gen_reg_rtx (SImode);
5596 rtx temp2 = gen_reg_rtx (SImode);
5597 rtx temp3 = gen_reg_rtx (V4SImode);
5598 rtx temp4 = gen_reg_rtx (V4SImode);
5599 rtx pat = gen_reg_rtx (TImode);
5600 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5602 /* Restore the backchain from the first word, sp from the second. */
5603 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5604 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5606 emit_move_insn (pat, array_to_constant (TImode, arr));
5608 /* Compute Available Stack Size for sp */
5609 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5610 emit_insn (gen_shufb (temp3, temp, temp, pat));
5612 /* Compute Available Stack Size for back chain */
5613 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5614 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5615 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5617 emit_insn (gen_addv4si3 (sp, sp, temp3));
5618 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5621 static void
5622 spu_init_libfuncs (void)
5624 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5625 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5626 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5627 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5628 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5629 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5630 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5631 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5632 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5633 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5634 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5635 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5637 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5638 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5640 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5641 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5642 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5643 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5644 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5645 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5646 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5647 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5648 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5649 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5650 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5651 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5653 set_optab_libfunc (smul_optab, TImode, "__multi3");
5654 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5655 set_optab_libfunc (smod_optab, TImode, "__modti3");
5656 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5657 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5658 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5661 /* Make a subreg, stripping any existing subreg. We could possibly just
5662 call simplify_subreg, but in this case we know what we want. */
5664 spu_gen_subreg (enum machine_mode mode, rtx x)
5666 if (GET_CODE (x) == SUBREG)
5667 x = SUBREG_REG (x);
5668 if (GET_MODE (x) == mode)
5669 return x;
5670 return gen_rtx_SUBREG (mode, x, 0);
5673 static bool
5674 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5676 return (TYPE_MODE (type) == BLKmode
5677 && ((type) == 0
5678 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5679 || int_size_in_bytes (type) >
5680 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5683 /* Create the built-in types and functions */
5685 enum spu_function_code
5687 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5688 #include "spu-builtins.def"
5689 #undef DEF_BUILTIN
5690 NUM_SPU_BUILTINS
5693 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5695 struct spu_builtin_description spu_builtins[] = {
5696 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5697 {fcode, icode, name, type, params},
5698 #include "spu-builtins.def"
5699 #undef DEF_BUILTIN
5702 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5704 /* Returns the spu builtin decl for CODE. */
5706 static tree
5707 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5709 if (code >= NUM_SPU_BUILTINS)
5710 return error_mark_node;
5712 return spu_builtin_decls[code];
5716 static void
5717 spu_init_builtins (void)
5719 struct spu_builtin_description *d;
5720 unsigned int i;
5722 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5723 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5724 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5725 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5726 V4SF_type_node = build_vector_type (float_type_node, 4);
5727 V2DF_type_node = build_vector_type (double_type_node, 2);
5729 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5730 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5731 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5732 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5734 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5736 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5737 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5738 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5739 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5740 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5741 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5742 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5744 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5750 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5751 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5753 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5754 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5755 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5756 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5758 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5759 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5761 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5763 spu_builtin_types[SPU_BTI_PTR] =
5764 build_pointer_type (build_qualified_type
5765 (void_type_node,
5766 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5768 /* For each builtin we build a new prototype. The tree code will make
5769 sure nodes are shared. */
5770 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5772 tree p;
5773 char name[64]; /* build_function will make a copy. */
5774 int parm;
5776 if (d->name == 0)
5777 continue;
5779 /* Find last parm. */
5780 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5783 p = void_list_node;
5784 while (parm > 1)
5785 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5787 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5789 sprintf (name, "__builtin_%s", d->name);
5790 spu_builtin_decls[i] =
5791 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5792 if (d->fcode == SPU_MASK_FOR_LOAD)
5793 TREE_READONLY (spu_builtin_decls[i]) = 1;
5795 /* These builtins don't throw. */
5796 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5800 void
5801 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5803 static unsigned char arr[16] =
5804 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5806 rtx temp = gen_reg_rtx (Pmode);
5807 rtx temp2 = gen_reg_rtx (V4SImode);
5808 rtx temp3 = gen_reg_rtx (V4SImode);
5809 rtx pat = gen_reg_rtx (TImode);
5810 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5812 emit_move_insn (pat, array_to_constant (TImode, arr));
5814 /* Restore the sp. */
5815 emit_move_insn (temp, op1);
5816 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5818 /* Compute available stack size for sp. */
5819 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5820 emit_insn (gen_shufb (temp3, temp, temp, pat));
5822 emit_insn (gen_addv4si3 (sp, sp, temp3));
5823 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5827 spu_safe_dma (HOST_WIDE_INT channel)
5829 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5832 void
5833 spu_builtin_splats (rtx ops[])
5835 enum machine_mode mode = GET_MODE (ops[0]);
5836 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5838 unsigned char arr[16];
5839 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5840 emit_move_insn (ops[0], array_to_constant (mode, arr));
5842 else
5844 rtx reg = gen_reg_rtx (TImode);
5845 rtx shuf;
5846 if (GET_CODE (ops[1]) != REG
5847 && GET_CODE (ops[1]) != SUBREG)
5848 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5849 switch (mode)
5851 case V2DImode:
5852 case V2DFmode:
5853 shuf =
5854 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5855 TImode);
5856 break;
5857 case V4SImode:
5858 case V4SFmode:
5859 shuf =
5860 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5861 TImode);
5862 break;
5863 case V8HImode:
5864 shuf =
5865 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5866 TImode);
5867 break;
5868 case V16QImode:
5869 shuf =
5870 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5871 TImode);
5872 break;
5873 default:
5874 abort ();
5876 emit_move_insn (reg, shuf);
5877 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5881 void
5882 spu_builtin_extract (rtx ops[])
5884 enum machine_mode mode;
5885 rtx rot, from, tmp;
5887 mode = GET_MODE (ops[1]);
5889 if (GET_CODE (ops[2]) == CONST_INT)
5891 switch (mode)
5893 case V16QImode:
5894 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5895 break;
5896 case V8HImode:
5897 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5898 break;
5899 case V4SFmode:
5900 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5901 break;
5902 case V4SImode:
5903 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5904 break;
5905 case V2DImode:
5906 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5907 break;
5908 case V2DFmode:
5909 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5910 break;
5911 default:
5912 abort ();
5914 return;
5917 from = spu_gen_subreg (TImode, ops[1]);
5918 rot = gen_reg_rtx (TImode);
5919 tmp = gen_reg_rtx (SImode);
5921 switch (mode)
5923 case V16QImode:
5924 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5925 break;
5926 case V8HImode:
5927 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5928 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5929 break;
5930 case V4SFmode:
5931 case V4SImode:
5932 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5933 break;
5934 case V2DImode:
5935 case V2DFmode:
5936 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5937 break;
5938 default:
5939 abort ();
5941 emit_insn (gen_rotqby_ti (rot, from, tmp));
5943 emit_insn (gen_spu_convert (ops[0], rot));
5946 void
5947 spu_builtin_insert (rtx ops[])
5949 enum machine_mode mode = GET_MODE (ops[0]);
5950 enum machine_mode imode = GET_MODE_INNER (mode);
5951 rtx mask = gen_reg_rtx (TImode);
5952 rtx offset;
5954 if (GET_CODE (ops[3]) == CONST_INT)
5955 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5956 else
5958 offset = gen_reg_rtx (SImode);
5959 emit_insn (gen_mulsi3
5960 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5962 emit_insn (gen_cpat
5963 (mask, stack_pointer_rtx, offset,
5964 GEN_INT (GET_MODE_SIZE (imode))));
5965 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5968 void
5969 spu_builtin_promote (rtx ops[])
5971 enum machine_mode mode, imode;
5972 rtx rot, from, offset;
5973 HOST_WIDE_INT pos;
5975 mode = GET_MODE (ops[0]);
5976 imode = GET_MODE_INNER (mode);
5978 from = gen_reg_rtx (TImode);
5979 rot = spu_gen_subreg (TImode, ops[0]);
5981 emit_insn (gen_spu_convert (from, ops[1]));
5983 if (GET_CODE (ops[2]) == CONST_INT)
5985 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5986 if (GET_MODE_SIZE (imode) < 4)
5987 pos += 4 - GET_MODE_SIZE (imode);
5988 offset = GEN_INT (pos & 15);
5990 else
5992 offset = gen_reg_rtx (SImode);
5993 switch (mode)
5995 case V16QImode:
5996 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5997 break;
5998 case V8HImode:
5999 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6000 emit_insn (gen_addsi3 (offset, offset, offset));
6001 break;
6002 case V4SFmode:
6003 case V4SImode:
6004 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6005 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6006 break;
6007 case V2DImode:
6008 case V2DFmode:
6009 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6010 break;
6011 default:
6012 abort ();
6015 emit_insn (gen_rotqby_ti (rot, from, offset));
6018 static void
6019 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
6021 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
6022 rtx shuf = gen_reg_rtx (V4SImode);
6023 rtx insn = gen_reg_rtx (V4SImode);
6024 rtx shufc;
6025 rtx insnc;
6026 rtx mem;
6028 fnaddr = force_reg (SImode, fnaddr);
6029 cxt = force_reg (SImode, cxt);
6031 if (TARGET_LARGE_MEM)
6033 rtx rotl = gen_reg_rtx (V4SImode);
6034 rtx mask = gen_reg_rtx (V4SImode);
6035 rtx bi = gen_reg_rtx (SImode);
6036 static unsigned char const shufa[16] = {
6037 2, 3, 0, 1, 18, 19, 16, 17,
6038 0, 1, 2, 3, 16, 17, 18, 19
6040 static unsigned char const insna[16] = {
6041 0x41, 0, 0, 79,
6042 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6043 0x60, 0x80, 0, 79,
6044 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6047 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6048 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6050 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6051 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6052 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6053 emit_insn (gen_selb (insn, insnc, rotl, mask));
6055 mem = adjust_address (m_tramp, V4SImode, 0);
6056 emit_move_insn (mem, insn);
6058 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6059 mem = adjust_address (m_tramp, Pmode, 16);
6060 emit_move_insn (mem, bi);
6062 else
6064 rtx scxt = gen_reg_rtx (SImode);
6065 rtx sfnaddr = gen_reg_rtx (SImode);
6066 static unsigned char const insna[16] = {
6067 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6068 0x30, 0, 0, 0,
6069 0, 0, 0, 0,
6070 0, 0, 0, 0
6073 shufc = gen_reg_rtx (TImode);
6074 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6076 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6077 fits 18 bits and the last 4 are zeros. This will be true if
6078 the stack pointer is initialized to 0x3fff0 at program start,
6079 otherwise the ila instruction will be garbage. */
6081 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6082 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6083 emit_insn (gen_cpat
6084 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6085 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6086 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6088 mem = adjust_address (m_tramp, V4SImode, 0);
6089 emit_move_insn (mem, insn);
6091 emit_insn (gen_sync ());
6094 void
6095 spu_expand_sign_extend (rtx ops[])
6097 unsigned char arr[16];
6098 rtx pat = gen_reg_rtx (TImode);
6099 rtx sign, c;
6100 int i, last;
6101 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6102 if (GET_MODE (ops[1]) == QImode)
6104 sign = gen_reg_rtx (HImode);
6105 emit_insn (gen_extendqihi2 (sign, ops[1]));
6106 for (i = 0; i < 16; i++)
6107 arr[i] = 0x12;
6108 arr[last] = 0x13;
6110 else
6112 for (i = 0; i < 16; i++)
6113 arr[i] = 0x10;
6114 switch (GET_MODE (ops[1]))
6116 case HImode:
6117 sign = gen_reg_rtx (SImode);
6118 emit_insn (gen_extendhisi2 (sign, ops[1]));
6119 arr[last] = 0x03;
6120 arr[last - 1] = 0x02;
6121 break;
6122 case SImode:
6123 sign = gen_reg_rtx (SImode);
6124 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6125 for (i = 0; i < 4; i++)
6126 arr[last - i] = 3 - i;
6127 break;
6128 case DImode:
6129 sign = gen_reg_rtx (SImode);
6130 c = gen_reg_rtx (SImode);
6131 emit_insn (gen_spu_convert (c, ops[1]));
6132 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6133 for (i = 0; i < 8; i++)
6134 arr[last - i] = 7 - i;
6135 break;
6136 default:
6137 abort ();
6140 emit_move_insn (pat, array_to_constant (TImode, arr));
6141 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6144 /* expand vector initialization. If there are any constant parts,
6145 load constant parts first. Then load any non-constant parts. */
6146 void
6147 spu_expand_vector_init (rtx target, rtx vals)
6149 enum machine_mode mode = GET_MODE (target);
6150 int n_elts = GET_MODE_NUNITS (mode);
6151 int n_var = 0;
6152 bool all_same = true;
6153 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6154 int i;
6156 first = XVECEXP (vals, 0, 0);
6157 for (i = 0; i < n_elts; ++i)
6159 x = XVECEXP (vals, 0, i);
6160 if (!(CONST_INT_P (x)
6161 || GET_CODE (x) == CONST_DOUBLE
6162 || GET_CODE (x) == CONST_FIXED))
6163 ++n_var;
6164 else
6166 if (first_constant == NULL_RTX)
6167 first_constant = x;
6169 if (i > 0 && !rtx_equal_p (x, first))
6170 all_same = false;
6173 /* if all elements are the same, use splats to repeat elements */
6174 if (all_same)
6176 if (!CONSTANT_P (first)
6177 && !register_operand (first, GET_MODE (x)))
6178 first = force_reg (GET_MODE (first), first);
6179 emit_insn (gen_spu_splats (target, first));
6180 return;
6183 /* load constant parts */
6184 if (n_var != n_elts)
6186 if (n_var == 0)
6188 emit_move_insn (target,
6189 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6191 else
6193 rtx constant_parts_rtx = copy_rtx (vals);
6195 gcc_assert (first_constant != NULL_RTX);
6196 /* fill empty slots with the first constant, this increases
6197 our chance of using splats in the recursive call below. */
6198 for (i = 0; i < n_elts; ++i)
6200 x = XVECEXP (constant_parts_rtx, 0, i);
6201 if (!(CONST_INT_P (x)
6202 || GET_CODE (x) == CONST_DOUBLE
6203 || GET_CODE (x) == CONST_FIXED))
6204 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6207 spu_expand_vector_init (target, constant_parts_rtx);
6211 /* load variable parts */
6212 if (n_var != 0)
6214 rtx insert_operands[4];
6216 insert_operands[0] = target;
6217 insert_operands[2] = target;
6218 for (i = 0; i < n_elts; ++i)
6220 x = XVECEXP (vals, 0, i);
6221 if (!(CONST_INT_P (x)
6222 || GET_CODE (x) == CONST_DOUBLE
6223 || GET_CODE (x) == CONST_FIXED))
6225 if (!register_operand (x, GET_MODE (x)))
6226 x = force_reg (GET_MODE (x), x);
6227 insert_operands[1] = x;
6228 insert_operands[3] = GEN_INT (i);
6229 spu_builtin_insert (insert_operands);
6235 /* Return insn index for the vector compare instruction for given CODE,
6236 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6238 static int
6239 get_vec_cmp_insn (enum rtx_code code,
6240 enum machine_mode dest_mode,
6241 enum machine_mode op_mode)
6244 switch (code)
6246 case EQ:
6247 if (dest_mode == V16QImode && op_mode == V16QImode)
6248 return CODE_FOR_ceq_v16qi;
6249 if (dest_mode == V8HImode && op_mode == V8HImode)
6250 return CODE_FOR_ceq_v8hi;
6251 if (dest_mode == V4SImode && op_mode == V4SImode)
6252 return CODE_FOR_ceq_v4si;
6253 if (dest_mode == V4SImode && op_mode == V4SFmode)
6254 return CODE_FOR_ceq_v4sf;
6255 if (dest_mode == V2DImode && op_mode == V2DFmode)
6256 return CODE_FOR_ceq_v2df;
6257 break;
6258 case GT:
6259 if (dest_mode == V16QImode && op_mode == V16QImode)
6260 return CODE_FOR_cgt_v16qi;
6261 if (dest_mode == V8HImode && op_mode == V8HImode)
6262 return CODE_FOR_cgt_v8hi;
6263 if (dest_mode == V4SImode && op_mode == V4SImode)
6264 return CODE_FOR_cgt_v4si;
6265 if (dest_mode == V4SImode && op_mode == V4SFmode)
6266 return CODE_FOR_cgt_v4sf;
6267 if (dest_mode == V2DImode && op_mode == V2DFmode)
6268 return CODE_FOR_cgt_v2df;
6269 break;
6270 case GTU:
6271 if (dest_mode == V16QImode && op_mode == V16QImode)
6272 return CODE_FOR_clgt_v16qi;
6273 if (dest_mode == V8HImode && op_mode == V8HImode)
6274 return CODE_FOR_clgt_v8hi;
6275 if (dest_mode == V4SImode && op_mode == V4SImode)
6276 return CODE_FOR_clgt_v4si;
6277 break;
6278 default:
6279 break;
6281 return -1;
6284 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6285 DMODE is expected destination mode. This is a recursive function. */
6287 static rtx
6288 spu_emit_vector_compare (enum rtx_code rcode,
6289 rtx op0, rtx op1,
6290 enum machine_mode dmode)
6292 int vec_cmp_insn;
6293 rtx mask;
6294 enum machine_mode dest_mode;
6295 enum machine_mode op_mode = GET_MODE (op1);
6297 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6299 /* Floating point vector compare instructions uses destination V4SImode.
6300 Double floating point vector compare instructions uses destination V2DImode.
6301 Move destination to appropriate mode later. */
6302 if (dmode == V4SFmode)
6303 dest_mode = V4SImode;
6304 else if (dmode == V2DFmode)
6305 dest_mode = V2DImode;
6306 else
6307 dest_mode = dmode;
6309 mask = gen_reg_rtx (dest_mode);
6310 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6312 if (vec_cmp_insn == -1)
6314 bool swap_operands = false;
6315 bool try_again = false;
6316 switch (rcode)
6318 case LT:
6319 rcode = GT;
6320 swap_operands = true;
6321 try_again = true;
6322 break;
6323 case LTU:
6324 rcode = GTU;
6325 swap_operands = true;
6326 try_again = true;
6327 break;
6328 case NE:
6329 /* Treat A != B as ~(A==B). */
6331 enum insn_code nor_code;
6332 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6333 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6334 gcc_assert (nor_code != CODE_FOR_nothing);
6335 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6336 if (dmode != dest_mode)
6338 rtx temp = gen_reg_rtx (dest_mode);
6339 convert_move (temp, mask, 0);
6340 return temp;
6342 return mask;
6344 break;
6345 case GE:
6346 case GEU:
6347 case LE:
6348 case LEU:
6349 /* Try GT/GTU/LT/LTU OR EQ */
6351 rtx c_rtx, eq_rtx;
6352 enum insn_code ior_code;
6353 enum rtx_code new_code;
6355 switch (rcode)
6357 case GE: new_code = GT; break;
6358 case GEU: new_code = GTU; break;
6359 case LE: new_code = LT; break;
6360 case LEU: new_code = LTU; break;
6361 default:
6362 gcc_unreachable ();
6365 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6366 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6368 ior_code = optab_handler (ior_optab, dest_mode);
6369 gcc_assert (ior_code != CODE_FOR_nothing);
6370 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6371 if (dmode != dest_mode)
6373 rtx temp = gen_reg_rtx (dest_mode);
6374 convert_move (temp, mask, 0);
6375 return temp;
6377 return mask;
6379 break;
6380 default:
6381 gcc_unreachable ();
6384 /* You only get two chances. */
6385 if (try_again)
6386 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6388 gcc_assert (vec_cmp_insn != -1);
6390 if (swap_operands)
6392 rtx tmp;
6393 tmp = op0;
6394 op0 = op1;
6395 op1 = tmp;
6399 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6400 if (dmode != dest_mode)
6402 rtx temp = gen_reg_rtx (dest_mode);
6403 convert_move (temp, mask, 0);
6404 return temp;
6406 return mask;
6410 /* Emit vector conditional expression.
6411 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6412 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6415 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6416 rtx cond, rtx cc_op0, rtx cc_op1)
6418 enum machine_mode dest_mode = GET_MODE (dest);
6419 enum rtx_code rcode = GET_CODE (cond);
6420 rtx mask;
6422 /* Get the vector mask for the given relational operations. */
6423 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6425 emit_insn(gen_selb (dest, op2, op1, mask));
6427 return 1;
6430 static rtx
6431 spu_force_reg (enum machine_mode mode, rtx op)
6433 rtx x, r;
6434 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6436 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6437 || GET_MODE (op) == BLKmode)
6438 return force_reg (mode, convert_to_mode (mode, op, 0));
6439 abort ();
6442 r = force_reg (GET_MODE (op), op);
6443 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6445 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6446 if (x)
6447 return x;
6450 x = gen_reg_rtx (mode);
6451 emit_insn (gen_spu_convert (x, r));
6452 return x;
6455 static void
6456 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6458 HOST_WIDE_INT v = 0;
6459 int lsbits;
6460 /* Check the range of immediate operands. */
6461 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6463 int range = p - SPU_BTI_7;
6465 if (!CONSTANT_P (op))
6466 error ("%s expects an integer literal in the range [%d, %d]",
6467 d->name,
6468 spu_builtin_range[range].low, spu_builtin_range[range].high);
6470 if (GET_CODE (op) == CONST
6471 && (GET_CODE (XEXP (op, 0)) == PLUS
6472 || GET_CODE (XEXP (op, 0)) == MINUS))
6474 v = INTVAL (XEXP (XEXP (op, 0), 1));
6475 op = XEXP (XEXP (op, 0), 0);
6477 else if (GET_CODE (op) == CONST_INT)
6478 v = INTVAL (op);
6479 else if (GET_CODE (op) == CONST_VECTOR
6480 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6481 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6483 /* The default for v is 0 which is valid in every range. */
6484 if (v < spu_builtin_range[range].low
6485 || v > spu_builtin_range[range].high)
6486 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6487 d->name,
6488 spu_builtin_range[range].low, spu_builtin_range[range].high,
6491 switch (p)
6493 case SPU_BTI_S10_4:
6494 lsbits = 4;
6495 break;
6496 case SPU_BTI_U16_2:
6497 /* This is only used in lqa, and stqa. Even though the insns
6498 encode 16 bits of the address (all but the 2 least
6499 significant), only 14 bits are used because it is masked to
6500 be 16 byte aligned. */
6501 lsbits = 4;
6502 break;
6503 case SPU_BTI_S16_2:
6504 /* This is used for lqr and stqr. */
6505 lsbits = 2;
6506 break;
6507 default:
6508 lsbits = 0;
6511 if (GET_CODE (op) == LABEL_REF
6512 || (GET_CODE (op) == SYMBOL_REF
6513 && SYMBOL_REF_FUNCTION_P (op))
6514 || (v & ((1 << lsbits) - 1)) != 0)
6515 warning (0, "%d least significant bits of %s are ignored", lsbits,
6516 d->name);
6521 static int
6522 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6523 rtx target, rtx ops[])
6525 enum insn_code icode = (enum insn_code) d->icode;
6526 int i = 0, a;
6528 /* Expand the arguments into rtl. */
6530 if (d->parm[0] != SPU_BTI_VOID)
6531 ops[i++] = target;
6533 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6535 tree arg = CALL_EXPR_ARG (exp, a);
6536 if (arg == 0)
6537 abort ();
6538 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6541 gcc_assert (i == insn_data[icode].n_generator_args);
6542 return i;
6545 static rtx
6546 spu_expand_builtin_1 (struct spu_builtin_description *d,
6547 tree exp, rtx target)
6549 rtx pat;
6550 rtx ops[8];
6551 enum insn_code icode = (enum insn_code) d->icode;
6552 enum machine_mode mode, tmode;
6553 int i, p;
6554 int n_operands;
6555 tree return_type;
6557 /* Set up ops[] with values from arglist. */
6558 n_operands = expand_builtin_args (d, exp, target, ops);
6560 /* Handle the target operand which must be operand 0. */
6561 i = 0;
6562 if (d->parm[0] != SPU_BTI_VOID)
6565 /* We prefer the mode specified for the match_operand otherwise
6566 use the mode from the builtin function prototype. */
6567 tmode = insn_data[d->icode].operand[0].mode;
6568 if (tmode == VOIDmode)
6569 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6571 /* Try to use target because not using it can lead to extra copies
6572 and when we are using all of the registers extra copies leads
6573 to extra spills. */
6574 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6575 ops[0] = target;
6576 else
6577 target = ops[0] = gen_reg_rtx (tmode);
6579 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6580 abort ();
6582 i++;
6585 if (d->fcode == SPU_MASK_FOR_LOAD)
6587 enum machine_mode mode = insn_data[icode].operand[1].mode;
6588 tree arg;
6589 rtx addr, op, pat;
6591 /* get addr */
6592 arg = CALL_EXPR_ARG (exp, 0);
6593 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6594 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6595 addr = memory_address (mode, op);
6597 /* negate addr */
6598 op = gen_reg_rtx (GET_MODE (addr));
6599 emit_insn (gen_rtx_SET (VOIDmode, op,
6600 gen_rtx_NEG (GET_MODE (addr), addr)));
6601 op = gen_rtx_MEM (mode, op);
6603 pat = GEN_FCN (icode) (target, op);
6604 if (!pat)
6605 return 0;
6606 emit_insn (pat);
6607 return target;
6610 /* Ignore align_hint, but still expand it's args in case they have
6611 side effects. */
6612 if (icode == CODE_FOR_spu_align_hint)
6613 return 0;
6615 /* Handle the rest of the operands. */
6616 for (p = 1; i < n_operands; i++, p++)
6618 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6619 mode = insn_data[d->icode].operand[i].mode;
6620 else
6621 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6623 /* mode can be VOIDmode here for labels */
6625 /* For specific intrinsics with an immediate operand, e.g.,
6626 si_ai(), we sometimes need to convert the scalar argument to a
6627 vector argument by splatting the scalar. */
6628 if (VECTOR_MODE_P (mode)
6629 && (GET_CODE (ops[i]) == CONST_INT
6630 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6631 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6633 if (GET_CODE (ops[i]) == CONST_INT)
6634 ops[i] = spu_const (mode, INTVAL (ops[i]));
6635 else
6637 rtx reg = gen_reg_rtx (mode);
6638 enum machine_mode imode = GET_MODE_INNER (mode);
6639 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6640 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6641 if (imode != GET_MODE (ops[i]))
6642 ops[i] = convert_to_mode (imode, ops[i],
6643 TYPE_UNSIGNED (spu_builtin_types
6644 [d->parm[i]]));
6645 emit_insn (gen_spu_splats (reg, ops[i]));
6646 ops[i] = reg;
6650 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6652 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6653 ops[i] = spu_force_reg (mode, ops[i]);
6656 switch (n_operands)
6658 case 0:
6659 pat = GEN_FCN (icode) (0);
6660 break;
6661 case 1:
6662 pat = GEN_FCN (icode) (ops[0]);
6663 break;
6664 case 2:
6665 pat = GEN_FCN (icode) (ops[0], ops[1]);
6666 break;
6667 case 3:
6668 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6669 break;
6670 case 4:
6671 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6672 break;
6673 case 5:
6674 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6675 break;
6676 case 6:
6677 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6678 break;
6679 default:
6680 abort ();
6683 if (!pat)
6684 abort ();
6686 if (d->type == B_CALL || d->type == B_BISLED)
6687 emit_call_insn (pat);
6688 else if (d->type == B_JUMP)
6690 emit_jump_insn (pat);
6691 emit_barrier ();
6693 else
6694 emit_insn (pat);
6696 return_type = spu_builtin_types[d->parm[0]];
6697 if (d->parm[0] != SPU_BTI_VOID
6698 && GET_MODE (target) != TYPE_MODE (return_type))
6700 /* target is the return value. It should always be the mode of
6701 the builtin function prototype. */
6702 target = spu_force_reg (TYPE_MODE (return_type), target);
6705 return target;
6709 spu_expand_builtin (tree exp,
6710 rtx target,
6711 rtx subtarget ATTRIBUTE_UNUSED,
6712 enum machine_mode mode ATTRIBUTE_UNUSED,
6713 int ignore ATTRIBUTE_UNUSED)
6715 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6716 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6717 struct spu_builtin_description *d;
6719 if (fcode < NUM_SPU_BUILTINS)
6721 d = &spu_builtins[fcode];
6723 return spu_expand_builtin_1 (d, exp, target);
6725 abort ();
6728 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6729 static tree
6730 spu_builtin_mul_widen_even (tree type)
6732 switch (TYPE_MODE (type))
6734 case V8HImode:
6735 if (TYPE_UNSIGNED (type))
6736 return spu_builtin_decls[SPU_MULE_0];
6737 else
6738 return spu_builtin_decls[SPU_MULE_1];
6739 break;
6740 default:
6741 return NULL_TREE;
6745 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6746 static tree
6747 spu_builtin_mul_widen_odd (tree type)
6749 switch (TYPE_MODE (type))
6751 case V8HImode:
6752 if (TYPE_UNSIGNED (type))
6753 return spu_builtin_decls[SPU_MULO_1];
6754 else
6755 return spu_builtin_decls[SPU_MULO_0];
6756 break;
6757 default:
6758 return NULL_TREE;
6762 /* Implement targetm.vectorize.builtin_mask_for_load. */
6763 static tree
6764 spu_builtin_mask_for_load (void)
6766 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6769 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6770 static int
6771 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6772 tree vectype ATTRIBUTE_UNUSED,
6773 int misalign ATTRIBUTE_UNUSED)
6775 switch (type_of_cost)
6777 case scalar_stmt:
6778 case vector_stmt:
6779 case vector_load:
6780 case vector_store:
6781 case vec_to_scalar:
6782 case scalar_to_vec:
6783 case cond_branch_not_taken:
6784 case vec_perm:
6785 return 1;
6787 case scalar_store:
6788 return 10;
6790 case scalar_load:
6791 /* Load + rotate. */
6792 return 2;
6794 case unaligned_load:
6795 return 2;
6797 case cond_branch_taken:
6798 return 6;
6800 default:
6801 gcc_unreachable ();
6805 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6806 after applying N number of iterations. This routine does not determine
6807 how may iterations are required to reach desired alignment. */
6809 static bool
6810 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6812 if (is_packed)
6813 return false;
6815 /* All other types are naturally aligned. */
6816 return true;
6819 /* Implement targetm.vectorize.builtin_vec_perm. */
6820 tree
6821 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6823 *mask_element_type = unsigned_char_type_node;
6825 switch (TYPE_MODE (type))
6827 case V16QImode:
6828 if (TYPE_UNSIGNED (type))
6829 return spu_builtin_decls[SPU_SHUFFLE_0];
6830 else
6831 return spu_builtin_decls[SPU_SHUFFLE_1];
6833 case V8HImode:
6834 if (TYPE_UNSIGNED (type))
6835 return spu_builtin_decls[SPU_SHUFFLE_2];
6836 else
6837 return spu_builtin_decls[SPU_SHUFFLE_3];
6839 case V4SImode:
6840 if (TYPE_UNSIGNED (type))
6841 return spu_builtin_decls[SPU_SHUFFLE_4];
6842 else
6843 return spu_builtin_decls[SPU_SHUFFLE_5];
6845 case V2DImode:
6846 if (TYPE_UNSIGNED (type))
6847 return spu_builtin_decls[SPU_SHUFFLE_6];
6848 else
6849 return spu_builtin_decls[SPU_SHUFFLE_7];
6851 case V4SFmode:
6852 return spu_builtin_decls[SPU_SHUFFLE_8];
6854 case V2DFmode:
6855 return spu_builtin_decls[SPU_SHUFFLE_9];
6857 default:
6858 return NULL_TREE;
6862 /* Return the appropriate mode for a named address pointer. */
6863 static enum machine_mode
6864 spu_addr_space_pointer_mode (addr_space_t addrspace)
6866 switch (addrspace)
6868 case ADDR_SPACE_GENERIC:
6869 return ptr_mode;
6870 case ADDR_SPACE_EA:
6871 return EAmode;
6872 default:
6873 gcc_unreachable ();
6877 /* Return the appropriate mode for a named address address. */
6878 static enum machine_mode
6879 spu_addr_space_address_mode (addr_space_t addrspace)
6881 switch (addrspace)
6883 case ADDR_SPACE_GENERIC:
6884 return Pmode;
6885 case ADDR_SPACE_EA:
6886 return EAmode;
6887 default:
6888 gcc_unreachable ();
6892 /* Determine if one named address space is a subset of another. */
6894 static bool
6895 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6897 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6898 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6900 if (subset == superset)
6901 return true;
6903 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6904 being subsets but instead as disjoint address spaces. */
6905 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6906 return false;
6908 else
6909 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6912 /* Convert from one address space to another. */
6913 static rtx
6914 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6916 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6917 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6919 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6920 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6922 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6924 rtx result, ls;
6926 ls = gen_const_mem (DImode,
6927 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6928 set_mem_align (ls, 128);
6930 result = gen_reg_rtx (Pmode);
6931 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6932 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6933 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6934 ls, const0_rtx, Pmode, 1);
6936 emit_insn (gen_subsi3 (result, op, ls));
6938 return result;
6941 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6943 rtx result, ls;
6945 ls = gen_const_mem (DImode,
6946 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6947 set_mem_align (ls, 128);
6949 result = gen_reg_rtx (EAmode);
6950 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6951 op = force_reg (Pmode, op);
6952 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6953 ls, const0_rtx, EAmode, 1);
6954 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6956 if (EAmode == SImode)
6957 emit_insn (gen_addsi3 (result, op, ls));
6958 else
6959 emit_insn (gen_adddi3 (result, op, ls));
6961 return result;
6964 else
6965 gcc_unreachable ();
6969 /* Count the total number of instructions in each pipe and return the
6970 maximum, which is used as the Minimum Iteration Interval (MII)
6971 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6972 -2 are instructions that can go in pipe0 or pipe1. */
6973 static int
6974 spu_sms_res_mii (struct ddg *g)
6976 int i;
6977 unsigned t[4] = {0, 0, 0, 0};
6979 for (i = 0; i < g->num_nodes; i++)
6981 rtx insn = g->nodes[i].insn;
6982 int p = get_pipe (insn) + 2;
6984 gcc_assert (p >= 0);
6985 gcc_assert (p < 4);
6987 t[p]++;
6988 if (dump_file && INSN_P (insn))
6989 fprintf (dump_file, "i%d %s %d %d\n",
6990 INSN_UID (insn),
6991 insn_data[INSN_CODE(insn)].name,
6992 p, t[p]);
6994 if (dump_file)
6995 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6997 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7001 void
7002 spu_init_expanders (void)
7004 if (cfun)
7006 rtx r0, r1;
7007 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7008 frame_pointer_needed is true. We don't know that until we're
7009 expanding the prologue. */
7010 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7012 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7013 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7014 to be treated as aligned, so generate them here. */
7015 r0 = gen_reg_rtx (SImode);
7016 r1 = gen_reg_rtx (SImode);
7017 mark_reg_pointer (r0, 128);
7018 mark_reg_pointer (r1, 128);
7019 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7020 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7024 static enum machine_mode
7025 spu_libgcc_cmp_return_mode (void)
7028 /* For SPU word mode is TI mode so it is better to use SImode
7029 for compare returns. */
7030 return SImode;
7033 static enum machine_mode
7034 spu_libgcc_shift_count_mode (void)
7036 /* For SPU word mode is TI mode so it is better to use SImode
7037 for shift counts. */
7038 return SImode;
7041 /* Implement targetm.section_type_flags. */
7042 static unsigned int
7043 spu_section_type_flags (tree decl, const char *name, int reloc)
7045 /* .toe needs to have type @nobits. */
7046 if (strcmp (name, ".toe") == 0)
7047 return SECTION_BSS;
7048 /* Don't load _ea into the current address space. */
7049 if (strcmp (name, "._ea") == 0)
7050 return SECTION_WRITE | SECTION_DEBUG;
7051 return default_section_type_flags (decl, name, reloc);
7054 /* Implement targetm.select_section. */
7055 static section *
7056 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7058 /* Variables and constants defined in the __ea address space
7059 go into a special section named "._ea". */
7060 if (TREE_TYPE (decl) != error_mark_node
7061 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7063 /* We might get called with string constants, but get_named_section
7064 doesn't like them as they are not DECLs. Also, we need to set
7065 flags in that case. */
7066 if (!DECL_P (decl))
7067 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7069 return get_named_section (decl, "._ea", reloc);
7072 return default_elf_select_section (decl, reloc, align);
7075 /* Implement targetm.unique_section. */
7076 static void
7077 spu_unique_section (tree decl, int reloc)
7079 /* We don't support unique section names in the __ea address
7080 space for now. */
7081 if (TREE_TYPE (decl) != error_mark_node
7082 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7083 return;
7085 default_unique_section (decl, reloc);
7088 /* Generate a constant or register which contains 2^SCALE. We assume
7089 the result is valid for MODE. Currently, MODE must be V4SFmode and
7090 SCALE must be SImode. */
7092 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7094 gcc_assert (mode == V4SFmode);
7095 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7096 if (GET_CODE (scale) != CONST_INT)
7098 /* unsigned int exp = (127 + scale) << 23;
7099 __vector float m = (__vector float) spu_splats (exp); */
7100 rtx reg = force_reg (SImode, scale);
7101 rtx exp = gen_reg_rtx (SImode);
7102 rtx mul = gen_reg_rtx (mode);
7103 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7104 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7105 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7106 return mul;
7108 else
7110 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7111 unsigned char arr[16];
7112 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7113 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7114 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7115 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7116 return array_to_constant (mode, arr);
7120 /* After reload, just change the convert into a move instruction
7121 or a dead instruction. */
7122 void
7123 spu_split_convert (rtx ops[])
7125 if (REGNO (ops[0]) == REGNO (ops[1]))
7126 emit_note (NOTE_INSN_DELETED);
7127 else
7129 /* Use TImode always as this might help hard reg copyprop. */
7130 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7131 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7132 emit_insn (gen_move_insn (op0, op1));
7136 void
7137 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7139 fprintf (file, "# profile\n");
7140 fprintf (file, "brsl $75, _mcount\n");
7143 /* Implement targetm.ref_may_alias_errno. */
7144 static bool
7145 spu_ref_may_alias_errno (ao_ref *ref)
7147 tree base = ao_ref_base (ref);
7149 /* With SPU newlib, errno is defined as something like
7150 _impure_data._errno
7151 The default implementation of this target macro does not
7152 recognize such expressions, so special-code for it here. */
7154 if (TREE_CODE (base) == VAR_DECL
7155 && !TREE_STATIC (base)
7156 && DECL_EXTERNAL (base)
7157 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7158 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7159 "_impure_data") == 0
7160 /* _errno is the first member of _impure_data. */
7161 && ref->offset == 0)
7162 return true;
7164 return default_ref_may_alias_errno (ref);
7167 /* Output thunk to FILE that implements a C++ virtual function call (with
7168 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7169 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7170 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7171 relative to the resulting this pointer. */
7173 static void
7174 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7175 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7176 tree function)
7178 rtx op[8];
7180 /* Make sure unwind info is emitted for the thunk if needed. */
7181 final_start_function (emit_barrier (), file, 1);
7183 /* Operand 0 is the target function. */
7184 op[0] = XEXP (DECL_RTL (function), 0);
7186 /* Operand 1 is the 'this' pointer. */
7187 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7188 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7189 else
7190 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7192 /* Operands 2/3 are the low/high halfwords of delta. */
7193 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7194 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7196 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7197 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7198 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7200 /* Operands 6/7 are temporary registers. */
7201 op[6] = gen_rtx_REG (Pmode, 79);
7202 op[7] = gen_rtx_REG (Pmode, 78);
7204 /* Add DELTA to this pointer. */
7205 if (delta)
7207 if (delta >= -0x200 && delta < 0x200)
7208 output_asm_insn ("ai\t%1,%1,%2", op);
7209 else if (delta >= -0x8000 && delta < 0x8000)
7211 output_asm_insn ("il\t%6,%2", op);
7212 output_asm_insn ("a\t%1,%1,%6", op);
7214 else
7216 output_asm_insn ("ilhu\t%6,%3", op);
7217 output_asm_insn ("iohl\t%6,%2", op);
7218 output_asm_insn ("a\t%1,%1,%6", op);
7222 /* Perform vcall adjustment. */
7223 if (vcall_offset)
7225 output_asm_insn ("lqd\t%7,0(%1)", op);
7226 output_asm_insn ("rotqby\t%7,%7,%1", op);
7228 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7229 output_asm_insn ("ai\t%7,%7,%4", op);
7230 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7232 output_asm_insn ("il\t%6,%4", op);
7233 output_asm_insn ("a\t%7,%7,%6", op);
7235 else
7237 output_asm_insn ("ilhu\t%6,%5", op);
7238 output_asm_insn ("iohl\t%6,%4", op);
7239 output_asm_insn ("a\t%7,%7,%6", op);
7242 output_asm_insn ("lqd\t%6,0(%7)", op);
7243 output_asm_insn ("rotqby\t%6,%6,%7", op);
7244 output_asm_insn ("a\t%1,%1,%6", op);
7247 /* Jump to target. */
7248 output_asm_insn ("br\t%0", op);
7250 final_end_function ();
7253 #include "gt-spu.h"