tm.texi (LEGITIMIZE_ADDRESS): Revise documentation.
[official-gcc.git] / gcc / config / spu / spu.c
blob61113aa51da93c4f774cb7b5754ed61a292092ff
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "gimple.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "sbitmap.h"
57 #include "timevar.h"
58 #include "df.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS,
66 /* We create new type nodes for these. */
67 SPU_BTI_V16QI,
68 SPU_BTI_V8HI,
69 SPU_BTI_V4SI,
70 SPU_BTI_V2DI,
71 SPU_BTI_V4SF,
72 SPU_BTI_V2DF,
73 SPU_BTI_UV16QI,
74 SPU_BTI_UV8HI,
75 SPU_BTI_UV4SI,
76 SPU_BTI_UV2DI,
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
79 SPU_BTI_QUADWORD,
81 /* These all correspond to intSI_type_node */
82 SPU_BTI_7,
83 SPU_BTI_S7,
84 SPU_BTI_U7,
85 SPU_BTI_S10,
86 SPU_BTI_S10_4,
87 SPU_BTI_U14,
88 SPU_BTI_16,
89 SPU_BTI_S16,
90 SPU_BTI_S16_2,
91 SPU_BTI_U16,
92 SPU_BTI_U16_2,
93 SPU_BTI_U18,
95 /* These correspond to the standard types */
96 SPU_BTI_INTQI,
97 SPU_BTI_INTHI,
98 SPU_BTI_INTSI,
99 SPU_BTI_INTDI,
101 SPU_BTI_UINTQI,
102 SPU_BTI_UINTHI,
103 SPU_BTI_UINTSI,
104 SPU_BTI_UINTDI,
106 SPU_BTI_FLOAT,
107 SPU_BTI_DOUBLE,
109 SPU_BTI_VOID,
110 SPU_BTI_PTR,
112 SPU_BTI_MAX
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128 struct spu_builtin_range
130 int low, high;
133 static struct spu_builtin_range spu_builtin_range[] = {
134 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
135 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
136 {0ll, 0x7fll}, /* SPU_BTI_U7 */
137 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
138 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
139 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
140 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
141 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
142 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
143 {0ll, 0xffffll}, /* SPU_BTI_U16 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
145 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149 /* Target specific attribute specifications. */
150 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
152 /* Prototypes and external defs. */
153 static void spu_init_builtins (void);
154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int regno_aligned_for_load (int regno);
193 static int store_with_one_insn_p (rtx mem);
194 static int mem_is_padded_component_ref (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
205 static tree spu_builtin_mul_widen_even (tree);
206 static tree spu_builtin_mul_widen_odd (tree);
207 static tree spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree, bool);
210 static tree spu_builtin_vec_perm (tree, tree *);
211 static int spu_sms_res_mii (struct ddg *g);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree, const char *, int);
215 extern const char *reg_names[];
216 rtx spu_compare_op0, spu_compare_op1;
218 /* Which instruction set architecture to use. */
219 int spu_arch;
220 /* Which cpu are we tuning for. */
221 int spu_tune;
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist = (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
232 reorganization. */
233 static int spu_flag_var_tracking;
235 enum spu_immediate {
236 SPU_NONE,
237 SPU_IL,
238 SPU_ILA,
239 SPU_ILH,
240 SPU_ILHU,
241 SPU_ORI,
242 SPU_ORHI,
243 SPU_ORBI,
244 SPU_IOHL
246 enum immediate_class
248 IC_POOL, /* constant pool */
249 IC_IL1, /* one il* instruction */
250 IC_IL2, /* both ilhu and iohl instructions */
251 IC_IL1s, /* one il* instruction */
252 IC_IL2s, /* both ilhu and iohl instructions */
253 IC_FSMBI, /* the fsmbi instruction */
254 IC_CPAT, /* one of the c*d instructions */
255 IC_FSMBI2 /* fsmbi plus 1 other instruction */
258 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
259 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
260 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
261 static enum immediate_class classify_immediate (rtx op,
262 enum machine_mode mode);
264 static enum machine_mode spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
273 /* TARGET overrides. */
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS spu_init_builtins
278 #undef TARGET_EXPAND_BUILTIN
279 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
281 #undef TARGET_UNWIND_WORD_MODE
282 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
287 /* The .8byte directive doesn't seem to work well for a 32 bit
288 architecture. */
289 #undef TARGET_ASM_UNALIGNED_DI_OP
290 #define TARGET_ASM_UNALIGNED_DI_OP NULL
292 #undef TARGET_RTX_COSTS
293 #define TARGET_RTX_COSTS spu_rtx_costs
295 #undef TARGET_ADDRESS_COST
296 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
298 #undef TARGET_SCHED_ISSUE_RATE
299 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
301 #undef TARGET_SCHED_INIT_GLOBAL
302 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
304 #undef TARGET_SCHED_INIT
305 #define TARGET_SCHED_INIT spu_sched_init
307 #undef TARGET_SCHED_VARIABLE_ISSUE
308 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
310 #undef TARGET_SCHED_REORDER
311 #define TARGET_SCHED_REORDER spu_sched_reorder
313 #undef TARGET_SCHED_REORDER2
314 #define TARGET_SCHED_REORDER2 spu_sched_reorder
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
319 const struct attribute_spec spu_attribute_table[];
320 #undef TARGET_ATTRIBUTE_TABLE
321 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER spu_assemble_integer
326 #undef TARGET_SCALAR_MODE_SUPPORTED_P
327 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
330 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
332 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
333 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
335 #undef TARGET_ASM_GLOBALIZE_LABEL
336 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
338 #undef TARGET_PASS_BY_REFERENCE
339 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_BUILD_BUILTIN_VA_LIST
345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
347 #undef TARGET_EXPAND_BUILTIN_VA_START
348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
350 #undef TARGET_SETUP_INCOMING_VARARGS
351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
353 #undef TARGET_MACHINE_DEPENDENT_REORG
354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
359 #undef TARGET_DEFAULT_TARGET_FLAGS
360 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
362 #undef TARGET_INIT_LIBFUNCS
363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
365 #undef TARGET_RETURN_IN_MEMORY
366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
371 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
372 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
374 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
375 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
377 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
378 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
383 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
384 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
386 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
387 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
395 #undef TARGET_SCHED_SMS_RES_MII
396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
398 #undef TARGET_ASM_FILE_START
399 #define TARGET_ASM_FILE_START asm_file_start
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
404 struct gcc_target targetm = TARGET_INITIALIZER;
406 void
407 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
409 /* Override some of the default param values. With so many registers
410 larger values are better for these params. */
411 MAX_PENDING_LIST_LENGTH = 128;
413 /* With so many registers this is better on by default. */
414 flag_rename_registers = 1;
417 /* Sometimes certain combinations of command options do not make sense
418 on a particular target machine. You can define a macro
419 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
420 executed once just after all the command options have been parsed. */
421 void
422 spu_override_options (void)
424 /* Small loops will be unpeeled at -O3. For SPU it is more important
425 to keep code small by default. */
426 if (!flag_unroll_loops && !flag_peel_loops
427 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
428 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
430 flag_omit_frame_pointer = 1;
432 /* Functions must be 8 byte aligned so we correctly handle dual issue */
433 if (align_functions < 8)
434 align_functions = 8;
436 spu_hint_dist = 8*4 - spu_max_nops*4;
437 if (spu_hint_dist < 0)
438 spu_hint_dist = 0;
440 if (spu_fixed_range_string)
441 fix_range (spu_fixed_range_string);
443 /* Determine processor architectural level. */
444 if (spu_arch_string)
446 if (strcmp (&spu_arch_string[0], "cell") == 0)
447 spu_arch = PROCESSOR_CELL;
448 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
449 spu_arch = PROCESSOR_CELLEDP;
450 else
451 error ("Unknown architecture '%s'", &spu_arch_string[0]);
454 /* Determine processor to tune for. */
455 if (spu_tune_string)
457 if (strcmp (&spu_tune_string[0], "cell") == 0)
458 spu_tune = PROCESSOR_CELL;
459 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
460 spu_tune = PROCESSOR_CELLEDP;
461 else
462 error ("Unknown architecture '%s'", &spu_tune_string[0]);
465 /* Change defaults according to the processor architecture. */
466 if (spu_arch == PROCESSOR_CELLEDP)
468 /* If no command line option has been otherwise specified, change
469 the default to -mno-safe-hints on celledp -- only the original
470 Cell/B.E. processors require this workaround. */
471 if (!(target_flags_explicit & MASK_SAFE_HINTS))
472 target_flags &= ~MASK_SAFE_HINTS;
475 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
478 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
479 struct attribute_spec.handler. */
481 /* Table of machine attributes. */
482 const struct attribute_spec spu_attribute_table[] =
484 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
485 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
486 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
487 { NULL, 0, 0, false, false, false, NULL }
490 /* True if MODE is valid for the target. By "valid", we mean able to
491 be manipulated in non-trivial ways. In particular, this means all
492 the arithmetic is supported. */
493 static bool
494 spu_scalar_mode_supported_p (enum machine_mode mode)
496 switch (mode)
498 case QImode:
499 case HImode:
500 case SImode:
501 case SFmode:
502 case DImode:
503 case TImode:
504 case DFmode:
505 return true;
507 default:
508 return false;
512 /* Similarly for vector modes. "Supported" here is less strict. At
513 least some operations are supported; need to check optabs or builtins
514 for further details. */
515 static bool
516 spu_vector_mode_supported_p (enum machine_mode mode)
518 switch (mode)
520 case V16QImode:
521 case V8HImode:
522 case V4SImode:
523 case V2DImode:
524 case V4SFmode:
525 case V2DFmode:
526 return true;
528 default:
529 return false;
533 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
534 least significant bytes of the outer mode. This function returns
535 TRUE for the SUBREG's where this is correct. */
537 valid_subreg (rtx op)
539 enum machine_mode om = GET_MODE (op);
540 enum machine_mode im = GET_MODE (SUBREG_REG (op));
541 return om != VOIDmode && im != VOIDmode
542 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
543 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
544 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
547 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
548 and adjust the start offset. */
549 static rtx
550 adjust_operand (rtx op, HOST_WIDE_INT * start)
552 enum machine_mode mode;
553 int op_size;
554 /* Strip any paradoxical SUBREG. */
555 if (GET_CODE (op) == SUBREG
556 && (GET_MODE_BITSIZE (GET_MODE (op))
557 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
559 if (start)
560 *start -=
561 GET_MODE_BITSIZE (GET_MODE (op)) -
562 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
563 op = SUBREG_REG (op);
565 /* If it is smaller than SI, assure a SUBREG */
566 op_size = GET_MODE_BITSIZE (GET_MODE (op));
567 if (op_size < 32)
569 if (start)
570 *start += 32 - op_size;
571 op_size = 32;
573 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
574 mode = mode_for_size (op_size, MODE_INT, 0);
575 if (mode != GET_MODE (op))
576 op = gen_rtx_SUBREG (mode, op, 0);
577 return op;
580 void
581 spu_expand_extv (rtx ops[], int unsignedp)
583 HOST_WIDE_INT width = INTVAL (ops[2]);
584 HOST_WIDE_INT start = INTVAL (ops[3]);
585 HOST_WIDE_INT src_size, dst_size;
586 enum machine_mode src_mode, dst_mode;
587 rtx dst = ops[0], src = ops[1];
588 rtx s;
590 dst = adjust_operand (ops[0], 0);
591 dst_mode = GET_MODE (dst);
592 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
594 src = adjust_operand (src, &start);
595 src_mode = GET_MODE (src);
596 src_size = GET_MODE_BITSIZE (GET_MODE (src));
598 if (start > 0)
600 s = gen_reg_rtx (src_mode);
601 switch (src_mode)
603 case SImode:
604 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
605 break;
606 case DImode:
607 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
608 break;
609 case TImode:
610 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
611 break;
612 default:
613 abort ();
615 src = s;
618 if (width < src_size)
620 rtx pat;
621 int icode;
622 switch (src_mode)
624 case SImode:
625 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
626 break;
627 case DImode:
628 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
629 break;
630 case TImode:
631 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
632 break;
633 default:
634 abort ();
636 s = gen_reg_rtx (src_mode);
637 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
638 emit_insn (pat);
639 src = s;
642 convert_move (dst, src, unsignedp);
645 void
646 spu_expand_insv (rtx ops[])
648 HOST_WIDE_INT width = INTVAL (ops[1]);
649 HOST_WIDE_INT start = INTVAL (ops[2]);
650 HOST_WIDE_INT maskbits;
651 enum machine_mode dst_mode, src_mode;
652 rtx dst = ops[0], src = ops[3];
653 int dst_size, src_size;
654 rtx mask;
655 rtx shift_reg;
656 int shift;
659 if (GET_CODE (ops[0]) == MEM)
660 dst = gen_reg_rtx (TImode);
661 else
662 dst = adjust_operand (dst, &start);
663 dst_mode = GET_MODE (dst);
664 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
666 if (CONSTANT_P (src))
668 enum machine_mode m =
669 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
670 src = force_reg (m, convert_to_mode (m, src, 0));
672 src = adjust_operand (src, 0);
673 src_mode = GET_MODE (src);
674 src_size = GET_MODE_BITSIZE (GET_MODE (src));
676 mask = gen_reg_rtx (dst_mode);
677 shift_reg = gen_reg_rtx (dst_mode);
678 shift = dst_size - start - width;
680 /* It's not safe to use subreg here because the compiler assumes
681 that the SUBREG_REG is right justified in the SUBREG. */
682 convert_move (shift_reg, src, 1);
684 if (shift > 0)
686 switch (dst_mode)
688 case SImode:
689 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
690 break;
691 case DImode:
692 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
693 break;
694 case TImode:
695 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
696 break;
697 default:
698 abort ();
701 else if (shift < 0)
702 abort ();
704 switch (dst_size)
706 case 32:
707 maskbits = (-1ll << (32 - width - start));
708 if (start)
709 maskbits += (1ll << (32 - start));
710 emit_move_insn (mask, GEN_INT (maskbits));
711 break;
712 case 64:
713 maskbits = (-1ll << (64 - width - start));
714 if (start)
715 maskbits += (1ll << (64 - start));
716 emit_move_insn (mask, GEN_INT (maskbits));
717 break;
718 case 128:
720 unsigned char arr[16];
721 int i = start / 8;
722 memset (arr, 0, sizeof (arr));
723 arr[i] = 0xff >> (start & 7);
724 for (i++; i <= (start + width - 1) / 8; i++)
725 arr[i] = 0xff;
726 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
727 emit_move_insn (mask, array_to_constant (TImode, arr));
729 break;
730 default:
731 abort ();
733 if (GET_CODE (ops[0]) == MEM)
735 rtx aligned = gen_reg_rtx (SImode);
736 rtx low = gen_reg_rtx (SImode);
737 rtx addr = gen_reg_rtx (SImode);
738 rtx rotl = gen_reg_rtx (SImode);
739 rtx mask0 = gen_reg_rtx (TImode);
740 rtx mem;
742 emit_move_insn (addr, XEXP (ops[0], 0));
743 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
744 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
745 emit_insn (gen_negsi2 (rotl, low));
746 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
747 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
748 mem = change_address (ops[0], TImode, aligned);
749 set_mem_alias_set (mem, 0);
750 emit_move_insn (dst, mem);
751 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
752 emit_move_insn (mem, dst);
753 if (start + width > MEM_ALIGN (ops[0]))
755 rtx shl = gen_reg_rtx (SImode);
756 rtx mask1 = gen_reg_rtx (TImode);
757 rtx dst1 = gen_reg_rtx (TImode);
758 rtx mem1;
759 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
760 emit_insn (gen_shlqby_ti (mask1, mask, shl));
761 mem1 = adjust_address (mem, TImode, 16);
762 set_mem_alias_set (mem1, 0);
763 emit_move_insn (dst1, mem1);
764 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
765 emit_move_insn (mem1, dst1);
768 else
769 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
774 spu_expand_block_move (rtx ops[])
776 HOST_WIDE_INT bytes, align, offset;
777 rtx src, dst, sreg, dreg, target;
778 int i;
779 if (GET_CODE (ops[2]) != CONST_INT
780 || GET_CODE (ops[3]) != CONST_INT
781 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
782 return 0;
784 bytes = INTVAL (ops[2]);
785 align = INTVAL (ops[3]);
787 if (bytes <= 0)
788 return 1;
790 dst = ops[0];
791 src = ops[1];
793 if (align == 16)
795 for (offset = 0; offset + 16 <= bytes; offset += 16)
797 dst = adjust_address (ops[0], V16QImode, offset);
798 src = adjust_address (ops[1], V16QImode, offset);
799 emit_move_insn (dst, src);
801 if (offset < bytes)
803 rtx mask;
804 unsigned char arr[16] = { 0 };
805 for (i = 0; i < bytes - offset; i++)
806 arr[i] = 0xff;
807 dst = adjust_address (ops[0], V16QImode, offset);
808 src = adjust_address (ops[1], V16QImode, offset);
809 mask = gen_reg_rtx (V16QImode);
810 sreg = gen_reg_rtx (V16QImode);
811 dreg = gen_reg_rtx (V16QImode);
812 target = gen_reg_rtx (V16QImode);
813 emit_move_insn (mask, array_to_constant (V16QImode, arr));
814 emit_move_insn (dreg, dst);
815 emit_move_insn (sreg, src);
816 emit_insn (gen_selb (target, dreg, sreg, mask));
817 emit_move_insn (dst, target);
819 return 1;
821 return 0;
824 enum spu_comp_code
825 { SPU_EQ, SPU_GT, SPU_GTU };
827 int spu_comp_icode[12][3] = {
828 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
829 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
830 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
831 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
832 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
833 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
834 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
835 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
836 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
837 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
838 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
839 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
842 /* Generate a compare for CODE. Return a brand-new rtx that represents
843 the result of the compare. GCC can figure this out too if we don't
844 provide all variations of compares, but GCC always wants to use
845 WORD_MODE, we can generate better code in most cases if we do it
846 ourselves. */
847 void
848 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
850 int reverse_compare = 0;
851 int reverse_test = 0;
852 rtx compare_result, eq_result;
853 rtx comp_rtx, eq_rtx;
854 rtx target = operands[0];
855 enum machine_mode comp_mode;
856 enum machine_mode op_mode;
857 enum spu_comp_code scode, eq_code;
858 enum insn_code ior_code;
859 int index;
860 int eq_test = 0;
862 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
863 and so on, to keep the constant in operand 1. */
864 if (GET_CODE (spu_compare_op1) == CONST_INT)
866 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
867 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
868 switch (code)
870 case GE:
871 spu_compare_op1 = GEN_INT (val);
872 code = GT;
873 break;
874 case LT:
875 spu_compare_op1 = GEN_INT (val);
876 code = LE;
877 break;
878 case GEU:
879 spu_compare_op1 = GEN_INT (val);
880 code = GTU;
881 break;
882 case LTU:
883 spu_compare_op1 = GEN_INT (val);
884 code = LEU;
885 break;
886 default:
887 break;
891 comp_mode = SImode;
892 op_mode = GET_MODE (spu_compare_op0);
894 switch (code)
896 case GE:
897 scode = SPU_GT;
898 if (HONOR_NANS (op_mode))
900 reverse_compare = 0;
901 reverse_test = 0;
902 eq_test = 1;
903 eq_code = SPU_EQ;
905 else
907 reverse_compare = 1;
908 reverse_test = 1;
910 break;
911 case LE:
912 scode = SPU_GT;
913 if (HONOR_NANS (op_mode))
915 reverse_compare = 1;
916 reverse_test = 0;
917 eq_test = 1;
918 eq_code = SPU_EQ;
920 else
922 reverse_compare = 0;
923 reverse_test = 1;
925 break;
926 case LT:
927 reverse_compare = 1;
928 reverse_test = 0;
929 scode = SPU_GT;
930 break;
931 case GEU:
932 reverse_compare = 1;
933 reverse_test = 1;
934 scode = SPU_GTU;
935 break;
936 case LEU:
937 reverse_compare = 0;
938 reverse_test = 1;
939 scode = SPU_GTU;
940 break;
941 case LTU:
942 reverse_compare = 1;
943 reverse_test = 0;
944 scode = SPU_GTU;
945 break;
946 case NE:
947 reverse_compare = 0;
948 reverse_test = 1;
949 scode = SPU_EQ;
950 break;
952 case EQ:
953 scode = SPU_EQ;
954 break;
955 case GT:
956 scode = SPU_GT;
957 break;
958 case GTU:
959 scode = SPU_GTU;
960 break;
961 default:
962 scode = SPU_EQ;
963 break;
966 switch (op_mode)
968 case QImode:
969 index = 0;
970 comp_mode = QImode;
971 break;
972 case HImode:
973 index = 1;
974 comp_mode = HImode;
975 break;
976 case SImode:
977 index = 2;
978 break;
979 case DImode:
980 index = 3;
981 break;
982 case TImode:
983 index = 4;
984 break;
985 case SFmode:
986 index = 5;
987 break;
988 case DFmode:
989 index = 6;
990 break;
991 case V16QImode:
992 index = 7;
993 comp_mode = op_mode;
994 break;
995 case V8HImode:
996 index = 8;
997 comp_mode = op_mode;
998 break;
999 case V4SImode:
1000 index = 9;
1001 comp_mode = op_mode;
1002 break;
1003 case V4SFmode:
1004 index = 10;
1005 comp_mode = V4SImode;
1006 break;
1007 case V2DFmode:
1008 index = 11;
1009 comp_mode = V2DImode;
1010 break;
1011 case V2DImode:
1012 default:
1013 abort ();
1016 if (GET_MODE (spu_compare_op1) == DFmode
1017 && (scode != SPU_GT && scode != SPU_EQ))
1018 abort ();
1020 if (is_set == 0 && spu_compare_op1 == const0_rtx
1021 && (GET_MODE (spu_compare_op0) == SImode
1022 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
1024 /* Don't need to set a register with the result when we are
1025 comparing against zero and branching. */
1026 reverse_test = !reverse_test;
1027 compare_result = spu_compare_op0;
1029 else
1031 compare_result = gen_reg_rtx (comp_mode);
1033 if (reverse_compare)
1035 rtx t = spu_compare_op1;
1036 spu_compare_op1 = spu_compare_op0;
1037 spu_compare_op0 = t;
1040 if (spu_comp_icode[index][scode] == 0)
1041 abort ();
1043 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1044 (spu_compare_op0, op_mode))
1045 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
1046 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1047 (spu_compare_op1, op_mode))
1048 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
1049 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1050 spu_compare_op0,
1051 spu_compare_op1);
1052 if (comp_rtx == 0)
1053 abort ();
1054 emit_insn (comp_rtx);
1056 if (eq_test)
1058 eq_result = gen_reg_rtx (comp_mode);
1059 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1060 spu_compare_op0,
1061 spu_compare_op1);
1062 if (eq_rtx == 0)
1063 abort ();
1064 emit_insn (eq_rtx);
1065 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1066 gcc_assert (ior_code != CODE_FOR_nothing);
1067 emit_insn (GEN_FCN (ior_code)
1068 (compare_result, compare_result, eq_result));
1072 if (is_set == 0)
1074 rtx bcomp;
1075 rtx loc_ref;
1077 /* We don't have branch on QI compare insns, so we convert the
1078 QI compare result to a HI result. */
1079 if (comp_mode == QImode)
1081 rtx old_res = compare_result;
1082 compare_result = gen_reg_rtx (HImode);
1083 comp_mode = HImode;
1084 emit_insn (gen_extendqihi2 (compare_result, old_res));
1087 if (reverse_test)
1088 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1089 else
1090 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1092 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1093 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1094 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1095 loc_ref, pc_rtx)));
1097 else if (is_set == 2)
1099 int compare_size = GET_MODE_BITSIZE (comp_mode);
1100 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1101 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1102 rtx select_mask;
1103 rtx op_t = operands[2];
1104 rtx op_f = operands[3];
1106 /* The result of the comparison can be SI, HI or QI mode. Create a
1107 mask based on that result. */
1108 if (target_size > compare_size)
1110 select_mask = gen_reg_rtx (mode);
1111 emit_insn (gen_extend_compare (select_mask, compare_result));
1113 else if (target_size < compare_size)
1114 select_mask =
1115 gen_rtx_SUBREG (mode, compare_result,
1116 (compare_size - target_size) / BITS_PER_UNIT);
1117 else if (comp_mode != mode)
1118 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1119 else
1120 select_mask = compare_result;
1122 if (GET_MODE (target) != GET_MODE (op_t)
1123 || GET_MODE (target) != GET_MODE (op_f))
1124 abort ();
1126 if (reverse_test)
1127 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1128 else
1129 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1131 else
1133 if (reverse_test)
1134 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1135 gen_rtx_NOT (comp_mode, compare_result)));
1136 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1137 emit_insn (gen_extendhisi2 (target, compare_result));
1138 else if (GET_MODE (target) == SImode
1139 && GET_MODE (compare_result) == QImode)
1140 emit_insn (gen_extend_compare (target, compare_result));
1141 else
1142 emit_move_insn (target, compare_result);
1146 HOST_WIDE_INT
1147 const_double_to_hwint (rtx x)
1149 HOST_WIDE_INT val;
1150 REAL_VALUE_TYPE rv;
1151 if (GET_MODE (x) == SFmode)
1153 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1154 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1156 else if (GET_MODE (x) == DFmode)
1158 long l[2];
1159 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1160 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1161 val = l[0];
1162 val = (val << 32) | (l[1] & 0xffffffff);
1164 else
1165 abort ();
1166 return val;
1170 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1172 long tv[2];
1173 REAL_VALUE_TYPE rv;
1174 gcc_assert (mode == SFmode || mode == DFmode);
1176 if (mode == SFmode)
1177 tv[0] = (v << 32) >> 32;
1178 else if (mode == DFmode)
1180 tv[1] = (v << 32) >> 32;
1181 tv[0] = v >> 32;
1183 real_from_target (&rv, tv, mode);
1184 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1187 void
1188 print_operand_address (FILE * file, register rtx addr)
1190 rtx reg;
1191 rtx offset;
1193 if (GET_CODE (addr) == AND
1194 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1195 && INTVAL (XEXP (addr, 1)) == -16)
1196 addr = XEXP (addr, 0);
1198 switch (GET_CODE (addr))
1200 case REG:
1201 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1202 break;
1204 case PLUS:
1205 reg = XEXP (addr, 0);
1206 offset = XEXP (addr, 1);
1207 if (GET_CODE (offset) == REG)
1209 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1210 reg_names[REGNO (offset)]);
1212 else if (GET_CODE (offset) == CONST_INT)
1214 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1215 INTVAL (offset), reg_names[REGNO (reg)]);
1217 else
1218 abort ();
1219 break;
1221 case CONST:
1222 case LABEL_REF:
1223 case SYMBOL_REF:
1224 case CONST_INT:
1225 output_addr_const (file, addr);
1226 break;
1228 default:
1229 debug_rtx (addr);
1230 abort ();
1234 void
1235 print_operand (FILE * file, rtx x, int code)
1237 enum machine_mode mode = GET_MODE (x);
1238 HOST_WIDE_INT val;
1239 unsigned char arr[16];
1240 int xcode = GET_CODE (x);
1241 int i, info;
1242 if (GET_MODE (x) == VOIDmode)
1243 switch (code)
1245 case 'L': /* 128 bits, signed */
1246 case 'm': /* 128 bits, signed */
1247 case 'T': /* 128 bits, signed */
1248 case 't': /* 128 bits, signed */
1249 mode = TImode;
1250 break;
1251 case 'K': /* 64 bits, signed */
1252 case 'k': /* 64 bits, signed */
1253 case 'D': /* 64 bits, signed */
1254 case 'd': /* 64 bits, signed */
1255 mode = DImode;
1256 break;
1257 case 'J': /* 32 bits, signed */
1258 case 'j': /* 32 bits, signed */
1259 case 's': /* 32 bits, signed */
1260 case 'S': /* 32 bits, signed */
1261 mode = SImode;
1262 break;
1264 switch (code)
1267 case 'j': /* 32 bits, signed */
1268 case 'k': /* 64 bits, signed */
1269 case 'm': /* 128 bits, signed */
1270 if (xcode == CONST_INT
1271 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1273 gcc_assert (logical_immediate_p (x, mode));
1274 constant_to_array (mode, x, arr);
1275 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1276 val = trunc_int_for_mode (val, SImode);
1277 switch (which_logical_immediate (val))
1279 case SPU_ORI:
1280 break;
1281 case SPU_ORHI:
1282 fprintf (file, "h");
1283 break;
1284 case SPU_ORBI:
1285 fprintf (file, "b");
1286 break;
1287 default:
1288 gcc_unreachable();
1291 else
1292 gcc_unreachable();
1293 return;
1295 case 'J': /* 32 bits, signed */
1296 case 'K': /* 64 bits, signed */
1297 case 'L': /* 128 bits, signed */
1298 if (xcode == CONST_INT
1299 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1301 gcc_assert (logical_immediate_p (x, mode)
1302 || iohl_immediate_p (x, mode));
1303 constant_to_array (mode, x, arr);
1304 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1305 val = trunc_int_for_mode (val, SImode);
1306 switch (which_logical_immediate (val))
1308 case SPU_ORI:
1309 case SPU_IOHL:
1310 break;
1311 case SPU_ORHI:
1312 val = trunc_int_for_mode (val, HImode);
1313 break;
1314 case SPU_ORBI:
1315 val = trunc_int_for_mode (val, QImode);
1316 break;
1317 default:
1318 gcc_unreachable();
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1322 else
1323 gcc_unreachable();
1324 return;
1326 case 't': /* 128 bits, signed */
1327 case 'd': /* 64 bits, signed */
1328 case 's': /* 32 bits, signed */
1329 if (CONSTANT_P (x))
1331 enum immediate_class c = classify_immediate (x, mode);
1332 switch (c)
1334 case IC_IL1:
1335 constant_to_array (mode, x, arr);
1336 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1337 val = trunc_int_for_mode (val, SImode);
1338 switch (which_immediate_load (val))
1340 case SPU_IL:
1341 break;
1342 case SPU_ILA:
1343 fprintf (file, "a");
1344 break;
1345 case SPU_ILH:
1346 fprintf (file, "h");
1347 break;
1348 case SPU_ILHU:
1349 fprintf (file, "hu");
1350 break;
1351 default:
1352 gcc_unreachable ();
1354 break;
1355 case IC_CPAT:
1356 constant_to_array (mode, x, arr);
1357 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1358 if (info == 1)
1359 fprintf (file, "b");
1360 else if (info == 2)
1361 fprintf (file, "h");
1362 else if (info == 4)
1363 fprintf (file, "w");
1364 else if (info == 8)
1365 fprintf (file, "d");
1366 break;
1367 case IC_IL1s:
1368 if (xcode == CONST_VECTOR)
1370 x = CONST_VECTOR_ELT (x, 0);
1371 xcode = GET_CODE (x);
1373 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1374 fprintf (file, "a");
1375 else if (xcode == HIGH)
1376 fprintf (file, "hu");
1377 break;
1378 case IC_FSMBI:
1379 case IC_FSMBI2:
1380 case IC_IL2:
1381 case IC_IL2s:
1382 case IC_POOL:
1383 abort ();
1386 else
1387 gcc_unreachable ();
1388 return;
1390 case 'T': /* 128 bits, signed */
1391 case 'D': /* 64 bits, signed */
1392 case 'S': /* 32 bits, signed */
1393 if (CONSTANT_P (x))
1395 enum immediate_class c = classify_immediate (x, mode);
1396 switch (c)
1398 case IC_IL1:
1399 constant_to_array (mode, x, arr);
1400 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1401 val = trunc_int_for_mode (val, SImode);
1402 switch (which_immediate_load (val))
1404 case SPU_IL:
1405 case SPU_ILA:
1406 break;
1407 case SPU_ILH:
1408 case SPU_ILHU:
1409 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1410 break;
1411 default:
1412 gcc_unreachable ();
1414 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1415 break;
1416 case IC_FSMBI:
1417 constant_to_array (mode, x, arr);
1418 val = 0;
1419 for (i = 0; i < 16; i++)
1421 val <<= 1;
1422 val |= arr[i] & 1;
1424 print_operand (file, GEN_INT (val), 0);
1425 break;
1426 case IC_CPAT:
1427 constant_to_array (mode, x, arr);
1428 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1430 break;
1431 case IC_IL1s:
1432 if (xcode == HIGH)
1433 x = XEXP (x, 0);
1434 if (GET_CODE (x) == CONST_VECTOR)
1435 x = CONST_VECTOR_ELT (x, 0);
1436 output_addr_const (file, x);
1437 if (xcode == HIGH)
1438 fprintf (file, "@h");
1439 break;
1440 case IC_IL2:
1441 case IC_IL2s:
1442 case IC_FSMBI2:
1443 case IC_POOL:
1444 abort ();
1447 else
1448 gcc_unreachable ();
1449 return;
1451 case 'C':
1452 if (xcode == CONST_INT)
1454 /* Only 4 least significant bits are relevant for generate
1455 control word instructions. */
1456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1457 return;
1459 break;
1461 case 'M': /* print code for c*d */
1462 if (GET_CODE (x) == CONST_INT)
1463 switch (INTVAL (x))
1465 case 1:
1466 fprintf (file, "b");
1467 break;
1468 case 2:
1469 fprintf (file, "h");
1470 break;
1471 case 4:
1472 fprintf (file, "w");
1473 break;
1474 case 8:
1475 fprintf (file, "d");
1476 break;
1477 default:
1478 gcc_unreachable();
1480 else
1481 gcc_unreachable();
1482 return;
1484 case 'N': /* Negate the operand */
1485 if (xcode == CONST_INT)
1486 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1487 else if (xcode == CONST_VECTOR)
1488 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1489 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1490 return;
1492 case 'I': /* enable/disable interrupts */
1493 if (xcode == CONST_INT)
1494 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1495 return;
1497 case 'b': /* branch modifiers */
1498 if (xcode == REG)
1499 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1500 else if (COMPARISON_P (x))
1501 fprintf (file, "%s", xcode == NE ? "n" : "");
1502 return;
1504 case 'i': /* indirect call */
1505 if (xcode == MEM)
1507 if (GET_CODE (XEXP (x, 0)) == REG)
1508 /* Used in indirect function calls. */
1509 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1510 else
1511 output_address (XEXP (x, 0));
1513 return;
1515 case 'p': /* load/store */
1516 if (xcode == MEM)
1518 x = XEXP (x, 0);
1519 xcode = GET_CODE (x);
1521 if (xcode == AND)
1523 x = XEXP (x, 0);
1524 xcode = GET_CODE (x);
1526 if (xcode == REG)
1527 fprintf (file, "d");
1528 else if (xcode == CONST_INT)
1529 fprintf (file, "a");
1530 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1531 fprintf (file, "r");
1532 else if (xcode == PLUS || xcode == LO_SUM)
1534 if (GET_CODE (XEXP (x, 1)) == REG)
1535 fprintf (file, "x");
1536 else
1537 fprintf (file, "d");
1539 return;
1541 case 'e':
1542 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1543 val &= 0x7;
1544 output_addr_const (file, GEN_INT (val));
1545 return;
1547 case 'f':
1548 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1549 val &= 0x1f;
1550 output_addr_const (file, GEN_INT (val));
1551 return;
1553 case 'g':
1554 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1555 val &= 0x3f;
1556 output_addr_const (file, GEN_INT (val));
1557 return;
1559 case 'h':
1560 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1561 val = (val >> 3) & 0x1f;
1562 output_addr_const (file, GEN_INT (val));
1563 return;
1565 case 'E':
1566 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1567 val = -val;
1568 val &= 0x7;
1569 output_addr_const (file, GEN_INT (val));
1570 return;
1572 case 'F':
1573 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1574 val = -val;
1575 val &= 0x1f;
1576 output_addr_const (file, GEN_INT (val));
1577 return;
1579 case 'G':
1580 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1581 val = -val;
1582 val &= 0x3f;
1583 output_addr_const (file, GEN_INT (val));
1584 return;
1586 case 'H':
1587 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1588 val = -(val & -8ll);
1589 val = (val >> 3) & 0x1f;
1590 output_addr_const (file, GEN_INT (val));
1591 return;
1593 case 'v':
1594 case 'w':
1595 constant_to_array (mode, x, arr);
1596 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1597 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1598 return;
1600 case 0:
1601 if (xcode == REG)
1602 fprintf (file, "%s", reg_names[REGNO (x)]);
1603 else if (xcode == MEM)
1604 output_address (XEXP (x, 0));
1605 else if (xcode == CONST_VECTOR)
1606 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1607 else
1608 output_addr_const (file, x);
1609 return;
1611 /* unused letters
1612 o qr u yz
1613 AB OPQR UVWXYZ */
1614 default:
1615 output_operand_lossage ("invalid %%xn code");
1617 gcc_unreachable ();
1620 extern char call_used_regs[];
1622 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1623 caller saved register. For leaf functions it is more efficient to
1624 use a volatile register because we won't need to save and restore the
1625 pic register. This routine is only valid after register allocation
1626 is completed, so we can pick an unused register. */
1627 static rtx
1628 get_pic_reg (void)
1630 rtx pic_reg = pic_offset_table_rtx;
1631 if (!reload_completed && !reload_in_progress)
1632 abort ();
1633 return pic_reg;
1636 /* Split constant addresses to handle cases that are too large.
1637 Add in the pic register when in PIC mode.
1638 Split immediates that require more than 1 instruction. */
1640 spu_split_immediate (rtx * ops)
1642 enum machine_mode mode = GET_MODE (ops[0]);
1643 enum immediate_class c = classify_immediate (ops[1], mode);
1645 switch (c)
1647 case IC_IL2:
1649 unsigned char arrhi[16];
1650 unsigned char arrlo[16];
1651 rtx to, temp, hi, lo;
1652 int i;
1653 enum machine_mode imode = mode;
1654 /* We need to do reals as ints because the constant used in the
1655 IOR might not be a legitimate real constant. */
1656 imode = int_mode_for_mode (mode);
1657 constant_to_array (mode, ops[1], arrhi);
1658 if (imode != mode)
1659 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1660 else
1661 to = ops[0];
1662 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1663 for (i = 0; i < 16; i += 4)
1665 arrlo[i + 2] = arrhi[i + 2];
1666 arrlo[i + 3] = arrhi[i + 3];
1667 arrlo[i + 0] = arrlo[i + 1] = 0;
1668 arrhi[i + 2] = arrhi[i + 3] = 0;
1670 hi = array_to_constant (imode, arrhi);
1671 lo = array_to_constant (imode, arrlo);
1672 emit_move_insn (temp, hi);
1673 emit_insn (gen_rtx_SET
1674 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1675 return 1;
1677 case IC_FSMBI2:
1679 unsigned char arr_fsmbi[16];
1680 unsigned char arr_andbi[16];
1681 rtx to, reg_fsmbi, reg_and;
1682 int i;
1683 enum machine_mode imode = mode;
1684 /* We need to do reals as ints because the constant used in the
1685 * AND might not be a legitimate real constant. */
1686 imode = int_mode_for_mode (mode);
1687 constant_to_array (mode, ops[1], arr_fsmbi);
1688 if (imode != mode)
1689 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1690 else
1691 to = ops[0];
1692 for (i = 0; i < 16; i++)
1693 if (arr_fsmbi[i] != 0)
1695 arr_andbi[0] = arr_fsmbi[i];
1696 arr_fsmbi[i] = 0xff;
1698 for (i = 1; i < 16; i++)
1699 arr_andbi[i] = arr_andbi[0];
1700 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1701 reg_and = array_to_constant (imode, arr_andbi);
1702 emit_move_insn (to, reg_fsmbi);
1703 emit_insn (gen_rtx_SET
1704 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1705 return 1;
1707 case IC_POOL:
1708 if (reload_in_progress || reload_completed)
1710 rtx mem = force_const_mem (mode, ops[1]);
1711 if (TARGET_LARGE_MEM)
1713 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1714 emit_move_insn (addr, XEXP (mem, 0));
1715 mem = replace_equiv_address (mem, addr);
1717 emit_move_insn (ops[0], mem);
1718 return 1;
1720 break;
1721 case IC_IL1s:
1722 case IC_IL2s:
1723 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1725 if (c == IC_IL2s)
1727 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1728 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1730 else if (flag_pic)
1731 emit_insn (gen_pic (ops[0], ops[1]));
1732 if (flag_pic)
1734 rtx pic_reg = get_pic_reg ();
1735 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1736 crtl->uses_pic_offset_table = 1;
1738 return flag_pic || c == IC_IL2s;
1740 break;
1741 case IC_IL1:
1742 case IC_FSMBI:
1743 case IC_CPAT:
1744 break;
1746 return 0;
1749 /* SAVING is TRUE when we are generating the actual load and store
1750 instructions for REGNO. When determining the size of the stack
1751 needed for saving register we must allocate enough space for the
1752 worst case, because we don't always have the information early enough
1753 to not allocate it. But we can at least eliminate the actual loads
1754 and stores during the prologue/epilogue. */
1755 static int
1756 need_to_save_reg (int regno, int saving)
1758 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1759 return 1;
1760 if (flag_pic
1761 && regno == PIC_OFFSET_TABLE_REGNUM
1762 && (!saving || crtl->uses_pic_offset_table)
1763 && (!saving
1764 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1765 return 1;
1766 return 0;
1769 /* This function is only correct starting with local register
1770 allocation */
1772 spu_saved_regs_size (void)
1774 int reg_save_size = 0;
1775 int regno;
1777 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1778 if (need_to_save_reg (regno, 0))
1779 reg_save_size += 0x10;
1780 return reg_save_size;
1783 static rtx
1784 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1786 rtx reg = gen_rtx_REG (V4SImode, regno);
1787 rtx mem =
1788 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1789 return emit_insn (gen_movv4si (mem, reg));
1792 static rtx
1793 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1795 rtx reg = gen_rtx_REG (V4SImode, regno);
1796 rtx mem =
1797 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1798 return emit_insn (gen_movv4si (reg, mem));
1801 /* This happens after reload, so we need to expand it. */
1802 static rtx
1803 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1805 rtx insn;
1806 if (satisfies_constraint_K (GEN_INT (imm)))
1808 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1810 else
1812 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1813 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1814 if (REGNO (src) == REGNO (scratch))
1815 abort ();
1817 return insn;
1820 /* Return nonzero if this function is known to have a null epilogue. */
1823 direct_return (void)
1825 if (reload_completed)
1827 if (cfun->static_chain_decl == 0
1828 && (spu_saved_regs_size ()
1829 + get_frame_size ()
1830 + crtl->outgoing_args_size
1831 + crtl->args.pretend_args_size == 0)
1832 && current_function_is_leaf)
1833 return 1;
1835 return 0;
1839 The stack frame looks like this:
1840 +-------------+
1841 | incoming |
1842 | args |
1843 AP -> +-------------+
1844 | $lr save |
1845 +-------------+
1846 prev SP | back chain |
1847 +-------------+
1848 | var args |
1849 | reg save | crtl->args.pretend_args_size bytes
1850 +-------------+
1851 | ... |
1852 | saved regs | spu_saved_regs_size() bytes
1853 FP -> +-------------+
1854 | ... |
1855 | vars | get_frame_size() bytes
1856 HFP -> +-------------+
1857 | ... |
1858 | outgoing |
1859 | args | crtl->outgoing_args_size bytes
1860 +-------------+
1861 | $lr of next |
1862 | frame |
1863 +-------------+
1864 | back chain |
1865 SP -> +-------------+
1868 void
1869 spu_expand_prologue (void)
1871 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1872 HOST_WIDE_INT total_size;
1873 HOST_WIDE_INT saved_regs_size;
1874 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1875 rtx scratch_reg_0, scratch_reg_1;
1876 rtx insn, real;
1878 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1879 the "toplevel" insn chain. */
1880 emit_note (NOTE_INSN_DELETED);
1882 if (flag_pic && optimize == 0)
1883 crtl->uses_pic_offset_table = 1;
1885 if (spu_naked_function_p (current_function_decl))
1886 return;
1888 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1889 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1891 saved_regs_size = spu_saved_regs_size ();
1892 total_size = size + saved_regs_size
1893 + crtl->outgoing_args_size
1894 + crtl->args.pretend_args_size;
1896 if (!current_function_is_leaf
1897 || cfun->calls_alloca || total_size > 0)
1898 total_size += STACK_POINTER_OFFSET;
1900 /* Save this first because code after this might use the link
1901 register as a scratch register. */
1902 if (!current_function_is_leaf)
1904 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1908 if (total_size > 0)
1910 offset = -crtl->args.pretend_args_size;
1911 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1912 if (need_to_save_reg (regno, 1))
1914 offset -= 16;
1915 insn = frame_emit_store (regno, sp_reg, offset);
1916 RTX_FRAME_RELATED_P (insn) = 1;
1920 if (flag_pic && crtl->uses_pic_offset_table)
1922 rtx pic_reg = get_pic_reg ();
1923 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1924 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1927 if (total_size > 0)
1929 if (flag_stack_check)
1931 /* We compare against total_size-1 because
1932 ($sp >= total_size) <=> ($sp > total_size-1) */
1933 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1934 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1935 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1936 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1938 emit_move_insn (scratch_v4si, size_v4si);
1939 size_v4si = scratch_v4si;
1941 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1942 emit_insn (gen_vec_extractv4si
1943 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1944 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1947 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1948 the value of the previous $sp because we save it as the back
1949 chain. */
1950 if (total_size <= 2000)
1952 /* In this case we save the back chain first. */
1953 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1954 insn =
1955 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1957 else
1959 insn = emit_move_insn (scratch_reg_0, sp_reg);
1960 insn =
1961 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1963 RTX_FRAME_RELATED_P (insn) = 1;
1964 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1965 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1967 if (total_size > 2000)
1969 /* Save the back chain ptr */
1970 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1973 if (frame_pointer_needed)
1975 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1976 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1977 + crtl->outgoing_args_size;
1978 /* Set the new frame_pointer */
1979 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1980 RTX_FRAME_RELATED_P (insn) = 1;
1981 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1982 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1983 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1987 emit_note (NOTE_INSN_DELETED);
1990 void
1991 spu_expand_epilogue (bool sibcall_p)
1993 int size = get_frame_size (), offset, regno;
1994 HOST_WIDE_INT saved_regs_size, total_size;
1995 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1996 rtx jump, scratch_reg_0;
1998 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1999 the "toplevel" insn chain. */
2000 emit_note (NOTE_INSN_DELETED);
2002 if (spu_naked_function_p (current_function_decl))
2003 return;
2005 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2007 saved_regs_size = spu_saved_regs_size ();
2008 total_size = size + saved_regs_size
2009 + crtl->outgoing_args_size
2010 + crtl->args.pretend_args_size;
2012 if (!current_function_is_leaf
2013 || cfun->calls_alloca || total_size > 0)
2014 total_size += STACK_POINTER_OFFSET;
2016 if (total_size > 0)
2018 if (cfun->calls_alloca)
2019 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2020 else
2021 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2024 if (saved_regs_size > 0)
2026 offset = -crtl->args.pretend_args_size;
2027 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2028 if (need_to_save_reg (regno, 1))
2030 offset -= 0x10;
2031 frame_emit_load (regno, sp_reg, offset);
2036 if (!current_function_is_leaf)
2037 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2039 if (!sibcall_p)
2041 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2042 jump = emit_jump_insn (gen__return ());
2043 emit_barrier_after (jump);
2046 emit_note (NOTE_INSN_DELETED);
2050 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2052 if (count != 0)
2053 return 0;
2054 /* This is inefficient because it ends up copying to a save-register
2055 which then gets saved even though $lr has already been saved. But
2056 it does generate better code for leaf functions and we don't need
2057 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2058 used for __builtin_return_address anyway, so maybe we don't care if
2059 it's inefficient. */
2060 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2064 /* Given VAL, generate a constant appropriate for MODE.
2065 If MODE is a vector mode, every element will be VAL.
2066 For TImode, VAL will be zero extended to 128 bits. */
2068 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2070 rtx inner;
2071 rtvec v;
2072 int units, i;
2074 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2075 || GET_MODE_CLASS (mode) == MODE_FLOAT
2076 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2077 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2079 if (GET_MODE_CLASS (mode) == MODE_INT)
2080 return immed_double_const (val, 0, mode);
2082 /* val is the bit representation of the float */
2083 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2084 return hwint_to_const_double (mode, val);
2086 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2087 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2088 else
2089 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2091 units = GET_MODE_NUNITS (mode);
2093 v = rtvec_alloc (units);
2095 for (i = 0; i < units; ++i)
2096 RTVEC_ELT (v, i) = inner;
2098 return gen_rtx_CONST_VECTOR (mode, v);
2101 /* Create a MODE vector constant from 4 ints. */
2103 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2105 unsigned char arr[16];
2106 arr[0] = (a >> 24) & 0xff;
2107 arr[1] = (a >> 16) & 0xff;
2108 arr[2] = (a >> 8) & 0xff;
2109 arr[3] = (a >> 0) & 0xff;
2110 arr[4] = (b >> 24) & 0xff;
2111 arr[5] = (b >> 16) & 0xff;
2112 arr[6] = (b >> 8) & 0xff;
2113 arr[7] = (b >> 0) & 0xff;
2114 arr[8] = (c >> 24) & 0xff;
2115 arr[9] = (c >> 16) & 0xff;
2116 arr[10] = (c >> 8) & 0xff;
2117 arr[11] = (c >> 0) & 0xff;
2118 arr[12] = (d >> 24) & 0xff;
2119 arr[13] = (d >> 16) & 0xff;
2120 arr[14] = (d >> 8) & 0xff;
2121 arr[15] = (d >> 0) & 0xff;
2122 return array_to_constant(mode, arr);
2125 /* branch hint stuff */
2127 /* An array of these is used to propagate hints to predecessor blocks. */
2128 struct spu_bb_info
2130 rtx prop_jump; /* propagated from another block */
2131 int bb_index; /* the original block. */
2133 static struct spu_bb_info *spu_bb_info;
2135 #define STOP_HINT_P(INSN) \
2136 (GET_CODE(INSN) == CALL_INSN \
2137 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2138 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2140 /* 1 when RTX is a hinted branch or its target. We keep track of
2141 what has been hinted so the safe-hint code can test it easily. */
2142 #define HINTED_P(RTX) \
2143 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2145 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2146 #define SCHED_ON_EVEN_P(RTX) \
2147 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2149 /* Emit a nop for INSN such that the two will dual issue. This assumes
2150 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2151 We check for TImode to handle a MULTI1 insn which has dual issued its
2152 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2153 ADDR_VEC insns. */
2154 static void
2155 emit_nop_for_insn (rtx insn)
2157 int p;
2158 rtx new_insn;
2159 p = get_pipe (insn);
2160 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2161 new_insn = emit_insn_after (gen_lnop (), insn);
2162 else if (p == 1 && GET_MODE (insn) == TImode)
2164 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2165 PUT_MODE (new_insn, TImode);
2166 PUT_MODE (insn, VOIDmode);
2168 else
2169 new_insn = emit_insn_after (gen_lnop (), insn);
2170 recog_memoized (new_insn);
2173 /* Insert nops in basic blocks to meet dual issue alignment
2174 requirements. Also make sure hbrp and hint instructions are at least
2175 one cycle apart, possibly inserting a nop. */
2176 static void
2177 pad_bb(void)
2179 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2180 int length;
2181 int addr;
2183 /* This sets up INSN_ADDRESSES. */
2184 shorten_branches (get_insns ());
2186 /* Keep track of length added by nops. */
2187 length = 0;
2189 prev_insn = 0;
2190 insn = get_insns ();
2191 if (!active_insn_p (insn))
2192 insn = next_active_insn (insn);
2193 for (; insn; insn = next_insn)
2195 next_insn = next_active_insn (insn);
2196 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2197 || INSN_CODE (insn) == CODE_FOR_hbr)
2199 if (hbr_insn)
2201 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2202 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2203 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2204 || (a1 - a0 == 4))
2206 prev_insn = emit_insn_before (gen_lnop (), insn);
2207 PUT_MODE (prev_insn, GET_MODE (insn));
2208 PUT_MODE (insn, TImode);
2209 length += 4;
2212 hbr_insn = insn;
2214 if (INSN_CODE (insn) == CODE_FOR_blockage)
2216 if (GET_MODE (insn) == TImode)
2217 PUT_MODE (next_insn, TImode);
2218 insn = next_insn;
2219 next_insn = next_active_insn (insn);
2221 addr = INSN_ADDRESSES (INSN_UID (insn));
2222 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2224 if (((addr + length) & 7) != 0)
2226 emit_nop_for_insn (prev_insn);
2227 length += 4;
2230 else if (GET_MODE (insn) == TImode
2231 && ((next_insn && GET_MODE (next_insn) != TImode)
2232 || get_attr_type (insn) == TYPE_MULTI0)
2233 && ((addr + length) & 7) != 0)
2235 /* prev_insn will always be set because the first insn is
2236 always 8-byte aligned. */
2237 emit_nop_for_insn (prev_insn);
2238 length += 4;
2240 prev_insn = insn;
2245 /* Routines for branch hints. */
2247 static void
2248 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2249 int distance, sbitmap blocks)
2251 rtx branch_label = 0;
2252 rtx hint;
2253 rtx insn;
2254 rtx table;
2256 if (before == 0 || branch == 0 || target == 0)
2257 return;
2259 /* While scheduling we require hints to be no further than 600, so
2260 we need to enforce that here too */
2261 if (distance > 600)
2262 return;
2264 /* If we have a Basic block note, emit it after the basic block note. */
2265 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2266 before = NEXT_INSN (before);
2268 branch_label = gen_label_rtx ();
2269 LABEL_NUSES (branch_label)++;
2270 LABEL_PRESERVE_P (branch_label) = 1;
2271 insn = emit_label_before (branch_label, branch);
2272 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2273 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2275 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2276 recog_memoized (hint);
2277 HINTED_P (branch) = 1;
2279 if (GET_CODE (target) == LABEL_REF)
2280 HINTED_P (XEXP (target, 0)) = 1;
2281 else if (tablejump_p (branch, 0, &table))
2283 rtvec vec;
2284 int j;
2285 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2286 vec = XVEC (PATTERN (table), 0);
2287 else
2288 vec = XVEC (PATTERN (table), 1);
2289 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2290 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2293 if (distance >= 588)
2295 /* Make sure the hint isn't scheduled any earlier than this point,
2296 which could make it too far for the branch offest to fit */
2297 recog_memoized (emit_insn_before (gen_blockage (), hint));
2299 else if (distance <= 8 * 4)
2301 /* To guarantee at least 8 insns between the hint and branch we
2302 insert nops. */
2303 int d;
2304 for (d = distance; d < 8 * 4; d += 4)
2306 insn =
2307 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2308 recog_memoized (insn);
2311 /* Make sure any nops inserted aren't scheduled before the hint. */
2312 recog_memoized (emit_insn_after (gen_blockage (), hint));
2314 /* Make sure any nops inserted aren't scheduled after the call. */
2315 if (CALL_P (branch) && distance < 8 * 4)
2316 recog_memoized (emit_insn_before (gen_blockage (), branch));
2320 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2321 the rtx for the branch target. */
2322 static rtx
2323 get_branch_target (rtx branch)
2325 if (GET_CODE (branch) == JUMP_INSN)
2327 rtx set, src;
2329 /* Return statements */
2330 if (GET_CODE (PATTERN (branch)) == RETURN)
2331 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2333 /* jump table */
2334 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2335 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2336 return 0;
2338 set = single_set (branch);
2339 src = SET_SRC (set);
2340 if (GET_CODE (SET_DEST (set)) != PC)
2341 abort ();
2343 if (GET_CODE (src) == IF_THEN_ELSE)
2345 rtx lab = 0;
2346 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2347 if (note)
2349 /* If the more probable case is not a fall through, then
2350 try a branch hint. */
2351 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2352 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2353 && GET_CODE (XEXP (src, 1)) != PC)
2354 lab = XEXP (src, 1);
2355 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2356 && GET_CODE (XEXP (src, 2)) != PC)
2357 lab = XEXP (src, 2);
2359 if (lab)
2361 if (GET_CODE (lab) == RETURN)
2362 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2363 return lab;
2365 return 0;
2368 return src;
2370 else if (GET_CODE (branch) == CALL_INSN)
2372 rtx call;
2373 /* All of our call patterns are in a PARALLEL and the CALL is
2374 the first pattern in the PARALLEL. */
2375 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2376 abort ();
2377 call = XVECEXP (PATTERN (branch), 0, 0);
2378 if (GET_CODE (call) == SET)
2379 call = SET_SRC (call);
2380 if (GET_CODE (call) != CALL)
2381 abort ();
2382 return XEXP (XEXP (call, 0), 0);
2384 return 0;
2387 /* The special $hbr register is used to prevent the insn scheduler from
2388 moving hbr insns across instructions which invalidate them. It
2389 should only be used in a clobber, and this function searches for
2390 insns which clobber it. */
2391 static bool
2392 insn_clobbers_hbr (rtx insn)
2394 if (INSN_P (insn)
2395 && GET_CODE (PATTERN (insn)) == PARALLEL)
2397 rtx parallel = PATTERN (insn);
2398 rtx clobber;
2399 int j;
2400 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2402 clobber = XVECEXP (parallel, 0, j);
2403 if (GET_CODE (clobber) == CLOBBER
2404 && GET_CODE (XEXP (clobber, 0)) == REG
2405 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2406 return 1;
2409 return 0;
2412 /* Search up to 32 insns starting at FIRST:
2413 - at any kind of hinted branch, just return
2414 - at any unconditional branch in the first 15 insns, just return
2415 - at a call or indirect branch, after the first 15 insns, force it to
2416 an even address and return
2417 - at any unconditional branch, after the first 15 insns, force it to
2418 an even address.
2419 At then end of the search, insert an hbrp within 4 insns of FIRST,
2420 and an hbrp within 16 instructions of FIRST.
2422 static void
2423 insert_hbrp_for_ilb_runout (rtx first)
2425 rtx insn, before_4 = 0, before_16 = 0;
2426 int addr = 0, length, first_addr = -1;
2427 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2428 int insert_lnop_after = 0;
2429 for (insn = first; insn; insn = NEXT_INSN (insn))
2430 if (INSN_P (insn))
2432 if (first_addr == -1)
2433 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2434 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2435 length = get_attr_length (insn);
2437 if (before_4 == 0 && addr + length >= 4 * 4)
2438 before_4 = insn;
2439 /* We test for 14 instructions because the first hbrp will add
2440 up to 2 instructions. */
2441 if (before_16 == 0 && addr + length >= 14 * 4)
2442 before_16 = insn;
2444 if (INSN_CODE (insn) == CODE_FOR_hbr)
2446 /* Make sure an hbrp is at least 2 cycles away from a hint.
2447 Insert an lnop after the hbrp when necessary. */
2448 if (before_4 == 0 && addr > 0)
2450 before_4 = insn;
2451 insert_lnop_after |= 1;
2453 else if (before_4 && addr <= 4 * 4)
2454 insert_lnop_after |= 1;
2455 if (before_16 == 0 && addr > 10 * 4)
2457 before_16 = insn;
2458 insert_lnop_after |= 2;
2460 else if (before_16 && addr <= 14 * 4)
2461 insert_lnop_after |= 2;
2464 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2466 if (addr < hbrp_addr0)
2467 hbrp_addr0 = addr;
2468 else if (addr < hbrp_addr1)
2469 hbrp_addr1 = addr;
2472 if (CALL_P (insn) || JUMP_P (insn))
2474 if (HINTED_P (insn))
2475 return;
2477 /* Any branch after the first 15 insns should be on an even
2478 address to avoid a special case branch. There might be
2479 some nops and/or hbrps inserted, so we test after 10
2480 insns. */
2481 if (addr > 10 * 4)
2482 SCHED_ON_EVEN_P (insn) = 1;
2485 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2486 return;
2489 if (addr + length >= 32 * 4)
2491 gcc_assert (before_4 && before_16);
2492 if (hbrp_addr0 > 4 * 4)
2494 insn =
2495 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2496 recog_memoized (insn);
2497 INSN_ADDRESSES_NEW (insn,
2498 INSN_ADDRESSES (INSN_UID (before_4)));
2499 PUT_MODE (insn, GET_MODE (before_4));
2500 PUT_MODE (before_4, TImode);
2501 if (insert_lnop_after & 1)
2503 insn = emit_insn_before (gen_lnop (), before_4);
2504 recog_memoized (insn);
2505 INSN_ADDRESSES_NEW (insn,
2506 INSN_ADDRESSES (INSN_UID (before_4)));
2507 PUT_MODE (insn, TImode);
2510 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2511 && hbrp_addr1 > 16 * 4)
2513 insn =
2514 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2515 recog_memoized (insn);
2516 INSN_ADDRESSES_NEW (insn,
2517 INSN_ADDRESSES (INSN_UID (before_16)));
2518 PUT_MODE (insn, GET_MODE (before_16));
2519 PUT_MODE (before_16, TImode);
2520 if (insert_lnop_after & 2)
2522 insn = emit_insn_before (gen_lnop (), before_16);
2523 recog_memoized (insn);
2524 INSN_ADDRESSES_NEW (insn,
2525 INSN_ADDRESSES (INSN_UID
2526 (before_16)));
2527 PUT_MODE (insn, TImode);
2530 return;
2533 else if (BARRIER_P (insn))
2534 return;
2538 /* The SPU might hang when it executes 48 inline instructions after a
2539 hinted branch jumps to its hinted target. The beginning of a
2540 function and the return from a call might have been hinted, and must
2541 be handled as well. To prevent a hang we insert 2 hbrps. The first
2542 should be within 6 insns of the branch target. The second should be
2543 within 22 insns of the branch target. When determining if hbrps are
2544 necessary, we look for only 32 inline instructions, because up to to
2545 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2546 new hbrps, we insert them within 4 and 16 insns of the target. */
2547 static void
2548 insert_hbrp (void)
2550 rtx insn;
2551 if (TARGET_SAFE_HINTS)
2553 shorten_branches (get_insns ());
2554 /* Insert hbrp at beginning of function */
2555 insn = next_active_insn (get_insns ());
2556 if (insn)
2557 insert_hbrp_for_ilb_runout (insn);
2558 /* Insert hbrp after hinted targets. */
2559 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2560 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2561 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2565 static int in_spu_reorg;
2567 /* Insert branch hints. There are no branch optimizations after this
2568 pass, so it's safe to set our branch hints now. */
2569 static void
2570 spu_machine_dependent_reorg (void)
2572 sbitmap blocks;
2573 basic_block bb;
2574 rtx branch, insn;
2575 rtx branch_target = 0;
2576 int branch_addr = 0, insn_addr, required_dist = 0;
2577 int i;
2578 unsigned int j;
2580 if (!TARGET_BRANCH_HINTS || optimize == 0)
2582 /* We still do it for unoptimized code because an external
2583 function might have hinted a call or return. */
2584 insert_hbrp ();
2585 pad_bb ();
2586 return;
2589 blocks = sbitmap_alloc (last_basic_block);
2590 sbitmap_zero (blocks);
2592 in_spu_reorg = 1;
2593 compute_bb_for_insn ();
2595 compact_blocks ();
2597 spu_bb_info =
2598 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2599 sizeof (struct spu_bb_info));
2601 /* We need exact insn addresses and lengths. */
2602 shorten_branches (get_insns ());
2604 for (i = n_basic_blocks - 1; i >= 0; i--)
2606 bb = BASIC_BLOCK (i);
2607 branch = 0;
2608 if (spu_bb_info[i].prop_jump)
2610 branch = spu_bb_info[i].prop_jump;
2611 branch_target = get_branch_target (branch);
2612 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2613 required_dist = spu_hint_dist;
2615 /* Search from end of a block to beginning. In this loop, find
2616 jumps which need a branch and emit them only when:
2617 - it's an indirect branch and we're at the insn which sets
2618 the register
2619 - we're at an insn that will invalidate the hint. e.g., a
2620 call, another hint insn, inline asm that clobbers $hbr, and
2621 some inlined operations (divmodsi4). Don't consider jumps
2622 because they are only at the end of a block and are
2623 considered when we are deciding whether to propagate
2624 - we're getting too far away from the branch. The hbr insns
2625 only have a signed 10 bit offset
2626 We go back as far as possible so the branch will be considered
2627 for propagation when we get to the beginning of the block. */
2628 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2630 if (INSN_P (insn))
2632 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2633 if (branch
2634 && ((GET_CODE (branch_target) == REG
2635 && set_of (branch_target, insn) != NULL_RTX)
2636 || insn_clobbers_hbr (insn)
2637 || branch_addr - insn_addr > 600))
2639 rtx next = NEXT_INSN (insn);
2640 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2641 if (insn != BB_END (bb)
2642 && branch_addr - next_addr >= required_dist)
2644 if (dump_file)
2645 fprintf (dump_file,
2646 "hint for %i in block %i before %i\n",
2647 INSN_UID (branch), bb->index,
2648 INSN_UID (next));
2649 spu_emit_branch_hint (next, branch, branch_target,
2650 branch_addr - next_addr, blocks);
2652 branch = 0;
2655 /* JUMP_P will only be true at the end of a block. When
2656 branch is already set it means we've previously decided
2657 to propagate a hint for that branch into this block. */
2658 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2660 branch = 0;
2661 if ((branch_target = get_branch_target (insn)))
2663 branch = insn;
2664 branch_addr = insn_addr;
2665 required_dist = spu_hint_dist;
2669 if (insn == BB_HEAD (bb))
2670 break;
2673 if (branch)
2675 /* If we haven't emitted a hint for this branch yet, it might
2676 be profitable to emit it in one of the predecessor blocks,
2677 especially for loops. */
2678 rtx bbend;
2679 basic_block prev = 0, prop = 0, prev2 = 0;
2680 int loop_exit = 0, simple_loop = 0;
2681 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2683 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2684 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2685 prev = EDGE_PRED (bb, j)->src;
2686 else
2687 prev2 = EDGE_PRED (bb, j)->src;
2689 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2690 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2691 loop_exit = 1;
2692 else if (EDGE_SUCC (bb, j)->dest == bb)
2693 simple_loop = 1;
2695 /* If this branch is a loop exit then propagate to previous
2696 fallthru block. This catches the cases when it is a simple
2697 loop or when there is an initial branch into the loop. */
2698 if (prev && (loop_exit || simple_loop)
2699 && prev->loop_depth <= bb->loop_depth)
2700 prop = prev;
2702 /* If there is only one adjacent predecessor. Don't propagate
2703 outside this loop. This loop_depth test isn't perfect, but
2704 I'm not sure the loop_father member is valid at this point. */
2705 else if (prev && single_pred_p (bb)
2706 && prev->loop_depth == bb->loop_depth)
2707 prop = prev;
2709 /* If this is the JOIN block of a simple IF-THEN then
2710 propogate the hint to the HEADER block. */
2711 else if (prev && prev2
2712 && EDGE_COUNT (bb->preds) == 2
2713 && EDGE_COUNT (prev->preds) == 1
2714 && EDGE_PRED (prev, 0)->src == prev2
2715 && prev2->loop_depth == bb->loop_depth
2716 && GET_CODE (branch_target) != REG)
2717 prop = prev;
2719 /* Don't propagate when:
2720 - this is a simple loop and the hint would be too far
2721 - this is not a simple loop and there are 16 insns in
2722 this block already
2723 - the predecessor block ends in a branch that will be
2724 hinted
2725 - the predecessor block ends in an insn that invalidates
2726 the hint */
2727 if (prop
2728 && prop->index >= 0
2729 && (bbend = BB_END (prop))
2730 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2731 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2732 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2734 if (dump_file)
2735 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2736 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2737 bb->index, prop->index, bb->loop_depth,
2738 INSN_UID (branch), loop_exit, simple_loop,
2739 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2741 spu_bb_info[prop->index].prop_jump = branch;
2742 spu_bb_info[prop->index].bb_index = i;
2744 else if (branch_addr - next_addr >= required_dist)
2746 if (dump_file)
2747 fprintf (dump_file, "hint for %i in block %i before %i\n",
2748 INSN_UID (branch), bb->index,
2749 INSN_UID (NEXT_INSN (insn)));
2750 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2751 branch_addr - next_addr, blocks);
2753 branch = 0;
2756 free (spu_bb_info);
2758 if (!sbitmap_empty_p (blocks))
2759 find_many_sub_basic_blocks (blocks);
2761 /* We have to schedule to make sure alignment is ok. */
2762 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2764 /* The hints need to be scheduled, so call it again. */
2765 schedule_insns ();
2767 insert_hbrp ();
2769 pad_bb ();
2771 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2772 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2774 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2775 between its branch label and the branch . We don't move the
2776 label because GCC expects it at the beginning of the block. */
2777 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2778 rtx label_ref = XVECEXP (unspec, 0, 0);
2779 rtx label = XEXP (label_ref, 0);
2780 rtx branch;
2781 int offset = 0;
2782 for (branch = NEXT_INSN (label);
2783 !JUMP_P (branch) && !CALL_P (branch);
2784 branch = NEXT_INSN (branch))
2785 if (NONJUMP_INSN_P (branch))
2786 offset += get_attr_length (branch);
2787 if (offset > 0)
2788 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2791 if (spu_flag_var_tracking)
2793 df_analyze ();
2794 timevar_push (TV_VAR_TRACKING);
2795 variable_tracking_main ();
2796 timevar_pop (TV_VAR_TRACKING);
2797 df_finish_pass (false);
2800 free_bb_for_insn ();
2802 in_spu_reorg = 0;
2806 /* Insn scheduling routines, primarily for dual issue. */
2807 static int
2808 spu_sched_issue_rate (void)
2810 return 2;
2813 static int
2814 uses_ls_unit(rtx insn)
2816 rtx set = single_set (insn);
2817 if (set != 0
2818 && (GET_CODE (SET_DEST (set)) == MEM
2819 || GET_CODE (SET_SRC (set)) == MEM))
2820 return 1;
2821 return 0;
2824 static int
2825 get_pipe (rtx insn)
2827 enum attr_type t;
2828 /* Handle inline asm */
2829 if (INSN_CODE (insn) == -1)
2830 return -1;
2831 t = get_attr_type (insn);
2832 switch (t)
2834 case TYPE_CONVERT:
2835 return -2;
2836 case TYPE_MULTI0:
2837 return -1;
2839 case TYPE_FX2:
2840 case TYPE_FX3:
2841 case TYPE_SPR:
2842 case TYPE_NOP:
2843 case TYPE_FXB:
2844 case TYPE_FPD:
2845 case TYPE_FP6:
2846 case TYPE_FP7:
2847 return 0;
2849 case TYPE_LNOP:
2850 case TYPE_SHUF:
2851 case TYPE_LOAD:
2852 case TYPE_STORE:
2853 case TYPE_BR:
2854 case TYPE_MULTI1:
2855 case TYPE_HBR:
2856 case TYPE_IPREFETCH:
2857 return 1;
2858 default:
2859 abort ();
2864 /* haifa-sched.c has a static variable that keeps track of the current
2865 cycle. It is passed to spu_sched_reorder, and we record it here for
2866 use by spu_sched_variable_issue. It won't be accurate if the
2867 scheduler updates it's clock_var between the two calls. */
2868 static int clock_var;
2870 /* This is used to keep track of insn alignment. Set to 0 at the
2871 beginning of each block and increased by the "length" attr of each
2872 insn scheduled. */
2873 static int spu_sched_length;
2875 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2876 ready list appropriately in spu_sched_reorder(). */
2877 static int pipe0_clock;
2878 static int pipe1_clock;
2880 static int prev_clock_var;
2882 static int prev_priority;
2884 /* The SPU needs to load the next ilb sometime during the execution of
2885 the previous ilb. There is a potential conflict if every cycle has a
2886 load or store. To avoid the conflict we make sure the load/store
2887 unit is free for at least one cycle during the execution of insns in
2888 the previous ilb. */
2889 static int spu_ls_first;
2890 static int prev_ls_clock;
2892 static void
2893 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2894 int max_ready ATTRIBUTE_UNUSED)
2896 spu_sched_length = 0;
2899 static void
2900 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2901 int max_ready ATTRIBUTE_UNUSED)
2903 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2905 /* When any block might be at least 8-byte aligned, assume they
2906 will all be at least 8-byte aligned to make sure dual issue
2907 works out correctly. */
2908 spu_sched_length = 0;
2910 spu_ls_first = INT_MAX;
2911 clock_var = -1;
2912 prev_ls_clock = -1;
2913 pipe0_clock = -1;
2914 pipe1_clock = -1;
2915 prev_clock_var = -1;
2916 prev_priority = -1;
2919 static int
2920 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2921 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2923 int len;
2924 int p;
2925 if (GET_CODE (PATTERN (insn)) == USE
2926 || GET_CODE (PATTERN (insn)) == CLOBBER
2927 || (len = get_attr_length (insn)) == 0)
2928 return more;
2930 spu_sched_length += len;
2932 /* Reset on inline asm */
2933 if (INSN_CODE (insn) == -1)
2935 spu_ls_first = INT_MAX;
2936 pipe0_clock = -1;
2937 pipe1_clock = -1;
2938 return 0;
2940 p = get_pipe (insn);
2941 if (p == 0)
2942 pipe0_clock = clock_var;
2943 else
2944 pipe1_clock = clock_var;
2946 if (in_spu_reorg)
2948 if (clock_var - prev_ls_clock > 1
2949 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2950 spu_ls_first = INT_MAX;
2951 if (uses_ls_unit (insn))
2953 if (spu_ls_first == INT_MAX)
2954 spu_ls_first = spu_sched_length;
2955 prev_ls_clock = clock_var;
2958 /* The scheduler hasn't inserted the nop, but we will later on.
2959 Include those nops in spu_sched_length. */
2960 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2961 spu_sched_length += 4;
2962 prev_clock_var = clock_var;
2964 /* more is -1 when called from spu_sched_reorder for new insns
2965 that don't have INSN_PRIORITY */
2966 if (more >= 0)
2967 prev_priority = INSN_PRIORITY (insn);
2970 /* Always try issueing more insns. spu_sched_reorder will decide
2971 when the cycle should be advanced. */
2972 return 1;
2975 /* This function is called for both TARGET_SCHED_REORDER and
2976 TARGET_SCHED_REORDER2. */
2977 static int
2978 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2979 rtx *ready, int *nreadyp, int clock)
2981 int i, nready = *nreadyp;
2982 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2983 rtx insn;
2985 clock_var = clock;
2987 if (nready <= 0 || pipe1_clock >= clock)
2988 return 0;
2990 /* Find any rtl insns that don't generate assembly insns and schedule
2991 them first. */
2992 for (i = nready - 1; i >= 0; i--)
2994 insn = ready[i];
2995 if (INSN_CODE (insn) == -1
2996 || INSN_CODE (insn) == CODE_FOR_blockage
2997 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2999 ready[i] = ready[nready - 1];
3000 ready[nready - 1] = insn;
3001 return 1;
3005 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3006 for (i = 0; i < nready; i++)
3007 if (INSN_CODE (ready[i]) != -1)
3009 insn = ready[i];
3010 switch (get_attr_type (insn))
3012 default:
3013 case TYPE_MULTI0:
3014 case TYPE_CONVERT:
3015 case TYPE_FX2:
3016 case TYPE_FX3:
3017 case TYPE_SPR:
3018 case TYPE_NOP:
3019 case TYPE_FXB:
3020 case TYPE_FPD:
3021 case TYPE_FP6:
3022 case TYPE_FP7:
3023 pipe_0 = i;
3024 break;
3025 case TYPE_LOAD:
3026 case TYPE_STORE:
3027 pipe_ls = i;
3028 case TYPE_LNOP:
3029 case TYPE_SHUF:
3030 case TYPE_BR:
3031 case TYPE_MULTI1:
3032 case TYPE_HBR:
3033 pipe_1 = i;
3034 break;
3035 case TYPE_IPREFETCH:
3036 pipe_hbrp = i;
3037 break;
3041 /* In the first scheduling phase, schedule loads and stores together
3042 to increase the chance they will get merged during postreload CSE. */
3043 if (!reload_completed && pipe_ls >= 0)
3045 insn = ready[pipe_ls];
3046 ready[pipe_ls] = ready[nready - 1];
3047 ready[nready - 1] = insn;
3048 return 1;
3051 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3052 if (pipe_hbrp >= 0)
3053 pipe_1 = pipe_hbrp;
3055 /* When we have loads/stores in every cycle of the last 15 insns and
3056 we are about to schedule another load/store, emit an hbrp insn
3057 instead. */
3058 if (in_spu_reorg
3059 && spu_sched_length - spu_ls_first >= 4 * 15
3060 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3062 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3063 recog_memoized (insn);
3064 if (pipe0_clock < clock)
3065 PUT_MODE (insn, TImode);
3066 spu_sched_variable_issue (file, verbose, insn, -1);
3067 return 0;
3070 /* In general, we want to emit nops to increase dual issue, but dual
3071 issue isn't faster when one of the insns could be scheduled later
3072 without effecting the critical path. We look at INSN_PRIORITY to
3073 make a good guess, but it isn't perfect so -mdual-nops=n can be
3074 used to effect it. */
3075 if (in_spu_reorg && spu_dual_nops < 10)
3077 /* When we are at an even address and we are not issueing nops to
3078 improve scheduling then we need to advance the cycle. */
3079 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3080 && (spu_dual_nops == 0
3081 || (pipe_1 != -1
3082 && prev_priority >
3083 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3084 return 0;
3086 /* When at an odd address, schedule the highest priority insn
3087 without considering pipeline. */
3088 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3089 && (spu_dual_nops == 0
3090 || (prev_priority >
3091 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3092 return 1;
3096 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3097 pipe0 insn in the ready list, schedule it. */
3098 if (pipe0_clock < clock && pipe_0 >= 0)
3099 schedule_i = pipe_0;
3101 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3102 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3103 else
3104 schedule_i = pipe_1;
3106 if (schedule_i > -1)
3108 insn = ready[schedule_i];
3109 ready[schedule_i] = ready[nready - 1];
3110 ready[nready - 1] = insn;
3111 return 1;
3113 return 0;
3116 /* INSN is dependent on DEP_INSN. */
3117 static int
3118 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3120 rtx set;
3122 /* The blockage pattern is used to prevent instructions from being
3123 moved across it and has no cost. */
3124 if (INSN_CODE (insn) == CODE_FOR_blockage
3125 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3126 return 0;
3128 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3129 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3130 return 0;
3132 /* Make sure hbrps are spread out. */
3133 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3134 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3135 return 8;
3137 /* Make sure hints and hbrps are 2 cycles apart. */
3138 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3139 || INSN_CODE (insn) == CODE_FOR_hbr)
3140 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3141 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3142 return 2;
3144 /* An hbrp has no real dependency on other insns. */
3145 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3146 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3147 return 0;
3149 /* Assuming that it is unlikely an argument register will be used in
3150 the first cycle of the called function, we reduce the cost for
3151 slightly better scheduling of dep_insn. When not hinted, the
3152 mispredicted branch would hide the cost as well. */
3153 if (CALL_P (insn))
3155 rtx target = get_branch_target (insn);
3156 if (GET_CODE (target) != REG || !set_of (target, insn))
3157 return cost - 2;
3158 return cost;
3161 /* And when returning from a function, let's assume the return values
3162 are completed sooner too. */
3163 if (CALL_P (dep_insn))
3164 return cost - 2;
3166 /* Make sure an instruction that loads from the back chain is schedule
3167 away from the return instruction so a hint is more likely to get
3168 issued. */
3169 if (INSN_CODE (insn) == CODE_FOR__return
3170 && (set = single_set (dep_insn))
3171 && GET_CODE (SET_DEST (set)) == REG
3172 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3173 return 20;
3175 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3176 scheduler makes every insn in a block anti-dependent on the final
3177 jump_insn. We adjust here so higher cost insns will get scheduled
3178 earlier. */
3179 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3180 return insn_cost (dep_insn) - 3;
3182 return cost;
3185 /* Create a CONST_DOUBLE from a string. */
3186 struct rtx_def *
3187 spu_float_const (const char *string, enum machine_mode mode)
3189 REAL_VALUE_TYPE value;
3190 value = REAL_VALUE_ATOF (string, mode);
3191 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3195 spu_constant_address_p (rtx x)
3197 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3198 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3199 || GET_CODE (x) == HIGH);
3202 static enum spu_immediate
3203 which_immediate_load (HOST_WIDE_INT val)
3205 gcc_assert (val == trunc_int_for_mode (val, SImode));
3207 if (val >= -0x8000 && val <= 0x7fff)
3208 return SPU_IL;
3209 if (val >= 0 && val <= 0x3ffff)
3210 return SPU_ILA;
3211 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3212 return SPU_ILH;
3213 if ((val & 0xffff) == 0)
3214 return SPU_ILHU;
3216 return SPU_NONE;
3219 /* Return true when OP can be loaded by one of the il instructions, or
3220 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3222 immediate_load_p (rtx op, enum machine_mode mode)
3224 if (CONSTANT_P (op))
3226 enum immediate_class c = classify_immediate (op, mode);
3227 return c == IC_IL1 || c == IC_IL1s
3228 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3230 return 0;
3233 /* Return true if the first SIZE bytes of arr is a constant that can be
3234 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3235 represent the size and offset of the instruction to use. */
3236 static int
3237 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3239 int cpat, run, i, start;
3240 cpat = 1;
3241 run = 0;
3242 start = -1;
3243 for (i = 0; i < size && cpat; i++)
3244 if (arr[i] != i+16)
3246 if (!run)
3248 start = i;
3249 if (arr[i] == 3)
3250 run = 1;
3251 else if (arr[i] == 2 && arr[i+1] == 3)
3252 run = 2;
3253 else if (arr[i] == 0)
3255 while (arr[i+run] == run && i+run < 16)
3256 run++;
3257 if (run != 4 && run != 8)
3258 cpat = 0;
3260 else
3261 cpat = 0;
3262 if ((i & (run-1)) != 0)
3263 cpat = 0;
3264 i += run;
3266 else
3267 cpat = 0;
3269 if (cpat && (run || size < 16))
3271 if (run == 0)
3272 run = 1;
3273 if (prun)
3274 *prun = run;
3275 if (pstart)
3276 *pstart = start == -1 ? 16-run : start;
3277 return 1;
3279 return 0;
3282 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3283 it into a register. MODE is only valid when OP is a CONST_INT. */
3284 static enum immediate_class
3285 classify_immediate (rtx op, enum machine_mode mode)
3287 HOST_WIDE_INT val;
3288 unsigned char arr[16];
3289 int i, j, repeated, fsmbi, repeat;
3291 gcc_assert (CONSTANT_P (op));
3293 if (GET_MODE (op) != VOIDmode)
3294 mode = GET_MODE (op);
3296 /* A V4SI const_vector with all identical symbols is ok. */
3297 if (!flag_pic
3298 && mode == V4SImode
3299 && GET_CODE (op) == CONST_VECTOR
3300 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3301 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3302 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3303 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3304 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3305 op = CONST_VECTOR_ELT (op, 0);
3307 switch (GET_CODE (op))
3309 case SYMBOL_REF:
3310 case LABEL_REF:
3311 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3313 case CONST:
3314 /* We can never know if the resulting address fits in 18 bits and can be
3315 loaded with ila. For now, assume the address will not overflow if
3316 the displacement is "small" (fits 'K' constraint). */
3317 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3319 rtx sym = XEXP (XEXP (op, 0), 0);
3320 rtx cst = XEXP (XEXP (op, 0), 1);
3322 if (GET_CODE (sym) == SYMBOL_REF
3323 && GET_CODE (cst) == CONST_INT
3324 && satisfies_constraint_K (cst))
3325 return IC_IL1s;
3327 return IC_IL2s;
3329 case HIGH:
3330 return IC_IL1s;
3332 case CONST_VECTOR:
3333 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3334 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3335 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3336 return IC_POOL;
3337 /* Fall through. */
3339 case CONST_INT:
3340 case CONST_DOUBLE:
3341 constant_to_array (mode, op, arr);
3343 /* Check that each 4-byte slot is identical. */
3344 repeated = 1;
3345 for (i = 4; i < 16; i += 4)
3346 for (j = 0; j < 4; j++)
3347 if (arr[j] != arr[i + j])
3348 repeated = 0;
3350 if (repeated)
3352 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3353 val = trunc_int_for_mode (val, SImode);
3355 if (which_immediate_load (val) != SPU_NONE)
3356 return IC_IL1;
3359 /* Any mode of 2 bytes or smaller can be loaded with an il
3360 instruction. */
3361 gcc_assert (GET_MODE_SIZE (mode) > 2);
3363 fsmbi = 1;
3364 repeat = 0;
3365 for (i = 0; i < 16 && fsmbi; i++)
3366 if (arr[i] != 0 && repeat == 0)
3367 repeat = arr[i];
3368 else if (arr[i] != 0 && arr[i] != repeat)
3369 fsmbi = 0;
3370 if (fsmbi)
3371 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3373 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3374 return IC_CPAT;
3376 if (repeated)
3377 return IC_IL2;
3379 return IC_POOL;
3380 default:
3381 break;
3383 gcc_unreachable ();
3386 static enum spu_immediate
3387 which_logical_immediate (HOST_WIDE_INT val)
3389 gcc_assert (val == trunc_int_for_mode (val, SImode));
3391 if (val >= -0x200 && val <= 0x1ff)
3392 return SPU_ORI;
3393 if (val >= 0 && val <= 0xffff)
3394 return SPU_IOHL;
3395 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3397 val = trunc_int_for_mode (val, HImode);
3398 if (val >= -0x200 && val <= 0x1ff)
3399 return SPU_ORHI;
3400 if ((val & 0xff) == ((val >> 8) & 0xff))
3402 val = trunc_int_for_mode (val, QImode);
3403 if (val >= -0x200 && val <= 0x1ff)
3404 return SPU_ORBI;
3407 return SPU_NONE;
3410 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3411 CONST_DOUBLEs. */
3412 static int
3413 const_vector_immediate_p (rtx x)
3415 int i;
3416 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3417 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3418 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3419 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3420 return 0;
3421 return 1;
3425 logical_immediate_p (rtx op, enum machine_mode mode)
3427 HOST_WIDE_INT val;
3428 unsigned char arr[16];
3429 int i, j;
3431 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3432 || GET_CODE (op) == CONST_VECTOR);
3434 if (GET_CODE (op) == CONST_VECTOR
3435 && !const_vector_immediate_p (op))
3436 return 0;
3438 if (GET_MODE (op) != VOIDmode)
3439 mode = GET_MODE (op);
3441 constant_to_array (mode, op, arr);
3443 /* Check that bytes are repeated. */
3444 for (i = 4; i < 16; i += 4)
3445 for (j = 0; j < 4; j++)
3446 if (arr[j] != arr[i + j])
3447 return 0;
3449 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3450 val = trunc_int_for_mode (val, SImode);
3452 i = which_logical_immediate (val);
3453 return i != SPU_NONE && i != SPU_IOHL;
3457 iohl_immediate_p (rtx op, enum machine_mode mode)
3459 HOST_WIDE_INT val;
3460 unsigned char arr[16];
3461 int i, j;
3463 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3464 || GET_CODE (op) == CONST_VECTOR);
3466 if (GET_CODE (op) == CONST_VECTOR
3467 && !const_vector_immediate_p (op))
3468 return 0;
3470 if (GET_MODE (op) != VOIDmode)
3471 mode = GET_MODE (op);
3473 constant_to_array (mode, op, arr);
3475 /* Check that bytes are repeated. */
3476 for (i = 4; i < 16; i += 4)
3477 for (j = 0; j < 4; j++)
3478 if (arr[j] != arr[i + j])
3479 return 0;
3481 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3482 val = trunc_int_for_mode (val, SImode);
3484 return val >= 0 && val <= 0xffff;
3488 arith_immediate_p (rtx op, enum machine_mode mode,
3489 HOST_WIDE_INT low, HOST_WIDE_INT high)
3491 HOST_WIDE_INT val;
3492 unsigned char arr[16];
3493 int bytes, i, j;
3495 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3496 || GET_CODE (op) == CONST_VECTOR);
3498 if (GET_CODE (op) == CONST_VECTOR
3499 && !const_vector_immediate_p (op))
3500 return 0;
3502 if (GET_MODE (op) != VOIDmode)
3503 mode = GET_MODE (op);
3505 constant_to_array (mode, op, arr);
3507 if (VECTOR_MODE_P (mode))
3508 mode = GET_MODE_INNER (mode);
3510 bytes = GET_MODE_SIZE (mode);
3511 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3513 /* Check that bytes are repeated. */
3514 for (i = bytes; i < 16; i += bytes)
3515 for (j = 0; j < bytes; j++)
3516 if (arr[j] != arr[i + j])
3517 return 0;
3519 val = arr[0];
3520 for (j = 1; j < bytes; j++)
3521 val = (val << 8) | arr[j];
3523 val = trunc_int_for_mode (val, mode);
3525 return val >= low && val <= high;
3528 /* TRUE when op is an immediate and an exact power of 2, and given that
3529 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3530 all entries must be the same. */
3531 bool
3532 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3534 enum machine_mode int_mode;
3535 HOST_WIDE_INT val;
3536 unsigned char arr[16];
3537 int bytes, i, j;
3539 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3540 || GET_CODE (op) == CONST_VECTOR);
3542 if (GET_CODE (op) == CONST_VECTOR
3543 && !const_vector_immediate_p (op))
3544 return 0;
3546 if (GET_MODE (op) != VOIDmode)
3547 mode = GET_MODE (op);
3549 constant_to_array (mode, op, arr);
3551 if (VECTOR_MODE_P (mode))
3552 mode = GET_MODE_INNER (mode);
3554 bytes = GET_MODE_SIZE (mode);
3555 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3557 /* Check that bytes are repeated. */
3558 for (i = bytes; i < 16; i += bytes)
3559 for (j = 0; j < bytes; j++)
3560 if (arr[j] != arr[i + j])
3561 return 0;
3563 val = arr[0];
3564 for (j = 1; j < bytes; j++)
3565 val = (val << 8) | arr[j];
3567 val = trunc_int_for_mode (val, int_mode);
3569 /* Currently, we only handle SFmode */
3570 gcc_assert (mode == SFmode);
3571 if (mode == SFmode)
3573 int exp = (val >> 23) - 127;
3574 return val > 0 && (val & 0x007fffff) == 0
3575 && exp >= low && exp <= high;
3577 return FALSE;
3580 /* We accept:
3581 - any 32-bit constant (SImode, SFmode)
3582 - any constant that can be generated with fsmbi (any mode)
3583 - a 64-bit constant where the high and low bits are identical
3584 (DImode, DFmode)
3585 - a 128-bit constant where the four 32-bit words match. */
3587 spu_legitimate_constant_p (rtx x)
3589 if (GET_CODE (x) == HIGH)
3590 x = XEXP (x, 0);
3591 /* V4SI with all identical symbols is valid. */
3592 if (!flag_pic
3593 && GET_MODE (x) == V4SImode
3594 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3595 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3596 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3597 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3598 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3599 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3601 if (GET_CODE (x) == CONST_VECTOR
3602 && !const_vector_immediate_p (x))
3603 return 0;
3604 return 1;
3607 /* Valid address are:
3608 - symbol_ref, label_ref, const
3609 - reg
3610 - reg + const, where either reg or const is 16 byte aligned
3611 - reg + reg, alignment doesn't matter
3612 The alignment matters in the reg+const case because lqd and stqd
3613 ignore the 4 least significant bits of the const. (TODO: It might be
3614 preferable to allow any alignment and fix it up when splitting.) */
3616 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3617 rtx x, int reg_ok_strict)
3619 if (mode == TImode && GET_CODE (x) == AND
3620 && GET_CODE (XEXP (x, 1)) == CONST_INT
3621 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3622 x = XEXP (x, 0);
3623 switch (GET_CODE (x))
3625 case SYMBOL_REF:
3626 case LABEL_REF:
3627 return !TARGET_LARGE_MEM;
3629 case CONST:
3630 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3632 rtx sym = XEXP (XEXP (x, 0), 0);
3633 rtx cst = XEXP (XEXP (x, 0), 1);
3635 /* Accept any symbol_ref + constant, assuming it does not
3636 wrap around the local store addressability limit. */
3637 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3638 return 1;
3640 return 0;
3642 case CONST_INT:
3643 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3645 case SUBREG:
3646 x = XEXP (x, 0);
3647 gcc_assert (GET_CODE (x) == REG);
3649 case REG:
3650 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3652 case PLUS:
3653 case LO_SUM:
3655 rtx op0 = XEXP (x, 0);
3656 rtx op1 = XEXP (x, 1);
3657 if (GET_CODE (op0) == SUBREG)
3658 op0 = XEXP (op0, 0);
3659 if (GET_CODE (op1) == SUBREG)
3660 op1 = XEXP (op1, 0);
3661 /* We can't just accept any aligned register because CSE can
3662 change it to a register that is not marked aligned and then
3663 recog will fail. So we only accept frame registers because
3664 they will only be changed to other frame registers. */
3665 if (GET_CODE (op0) == REG
3666 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3667 && GET_CODE (op1) == CONST_INT
3668 && INTVAL (op1) >= -0x2000
3669 && INTVAL (op1) <= 0x1fff
3670 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3671 return 1;
3672 if (GET_CODE (op0) == REG
3673 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3674 && GET_CODE (op1) == REG
3675 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3676 return 1;
3678 break;
3680 default:
3681 break;
3683 return 0;
3686 /* When the address is reg + const_int, force the const_int into a
3687 register. */
3689 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3690 enum machine_mode mode ATTRIBUTE_UNUSED)
3692 rtx op0, op1;
3693 /* Make sure both operands are registers. */
3694 if (GET_CODE (x) == PLUS)
3696 op0 = XEXP (x, 0);
3697 op1 = XEXP (x, 1);
3698 if (ALIGNED_SYMBOL_REF_P (op0))
3700 op0 = force_reg (Pmode, op0);
3701 mark_reg_pointer (op0, 128);
3703 else if (GET_CODE (op0) != REG)
3704 op0 = force_reg (Pmode, op0);
3705 if (ALIGNED_SYMBOL_REF_P (op1))
3707 op1 = force_reg (Pmode, op1);
3708 mark_reg_pointer (op1, 128);
3710 else if (GET_CODE (op1) != REG)
3711 op1 = force_reg (Pmode, op1);
3712 x = gen_rtx_PLUS (Pmode, op0, op1);
3714 return x;
3717 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3718 struct attribute_spec.handler. */
3719 static tree
3720 spu_handle_fndecl_attribute (tree * node,
3721 tree name,
3722 tree args ATTRIBUTE_UNUSED,
3723 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3725 if (TREE_CODE (*node) != FUNCTION_DECL)
3727 warning (0, "`%s' attribute only applies to functions",
3728 IDENTIFIER_POINTER (name));
3729 *no_add_attrs = true;
3732 return NULL_TREE;
3735 /* Handle the "vector" attribute. */
3736 static tree
3737 spu_handle_vector_attribute (tree * node, tree name,
3738 tree args ATTRIBUTE_UNUSED,
3739 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3741 tree type = *node, result = NULL_TREE;
3742 enum machine_mode mode;
3743 int unsigned_p;
3745 while (POINTER_TYPE_P (type)
3746 || TREE_CODE (type) == FUNCTION_TYPE
3747 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3748 type = TREE_TYPE (type);
3750 mode = TYPE_MODE (type);
3752 unsigned_p = TYPE_UNSIGNED (type);
3753 switch (mode)
3755 case DImode:
3756 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3757 break;
3758 case SImode:
3759 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3760 break;
3761 case HImode:
3762 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3763 break;
3764 case QImode:
3765 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3766 break;
3767 case SFmode:
3768 result = V4SF_type_node;
3769 break;
3770 case DFmode:
3771 result = V2DF_type_node;
3772 break;
3773 default:
3774 break;
3777 /* Propagate qualifiers attached to the element type
3778 onto the vector type. */
3779 if (result && result != type && TYPE_QUALS (type))
3780 result = build_qualified_type (result, TYPE_QUALS (type));
3782 *no_add_attrs = true; /* No need to hang on to the attribute. */
3784 if (!result)
3785 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3786 else
3787 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3789 return NULL_TREE;
3792 /* Return nonzero if FUNC is a naked function. */
3793 static int
3794 spu_naked_function_p (tree func)
3796 tree a;
3798 if (TREE_CODE (func) != FUNCTION_DECL)
3799 abort ();
3801 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3802 return a != NULL_TREE;
3806 spu_initial_elimination_offset (int from, int to)
3808 int saved_regs_size = spu_saved_regs_size ();
3809 int sp_offset = 0;
3810 if (!current_function_is_leaf || crtl->outgoing_args_size
3811 || get_frame_size () || saved_regs_size)
3812 sp_offset = STACK_POINTER_OFFSET;
3813 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3814 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3815 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3816 return get_frame_size ();
3817 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3818 return sp_offset + crtl->outgoing_args_size
3819 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3820 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3821 return get_frame_size () + saved_regs_size + sp_offset;
3822 else
3823 gcc_unreachable ();
3827 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3829 enum machine_mode mode = TYPE_MODE (type);
3830 int byte_size = ((mode == BLKmode)
3831 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3833 /* Make sure small structs are left justified in a register. */
3834 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3835 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3837 enum machine_mode smode;
3838 rtvec v;
3839 int i;
3840 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3841 int n = byte_size / UNITS_PER_WORD;
3842 v = rtvec_alloc (nregs);
3843 for (i = 0; i < n; i++)
3845 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3846 gen_rtx_REG (TImode,
3847 FIRST_RETURN_REGNUM
3848 + i),
3849 GEN_INT (UNITS_PER_WORD * i));
3850 byte_size -= UNITS_PER_WORD;
3853 if (n < nregs)
3855 if (byte_size < 4)
3856 byte_size = 4;
3857 smode =
3858 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3859 RTVEC_ELT (v, n) =
3860 gen_rtx_EXPR_LIST (VOIDmode,
3861 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3862 GEN_INT (UNITS_PER_WORD * n));
3864 return gen_rtx_PARALLEL (mode, v);
3866 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3870 spu_function_arg (CUMULATIVE_ARGS cum,
3871 enum machine_mode mode,
3872 tree type, int named ATTRIBUTE_UNUSED)
3874 int byte_size;
3876 if (cum >= MAX_REGISTER_ARGS)
3877 return 0;
3879 byte_size = ((mode == BLKmode)
3880 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3882 /* The ABI does not allow parameters to be passed partially in
3883 reg and partially in stack. */
3884 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3885 return 0;
3887 /* Make sure small structs are left justified in a register. */
3888 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3889 && byte_size < UNITS_PER_WORD && byte_size > 0)
3891 enum machine_mode smode;
3892 rtx gr_reg;
3893 if (byte_size < 4)
3894 byte_size = 4;
3895 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3896 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3897 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3898 const0_rtx);
3899 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3901 else
3902 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3905 /* Variable sized types are passed by reference. */
3906 static bool
3907 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3908 enum machine_mode mode ATTRIBUTE_UNUSED,
3909 const_tree type, bool named ATTRIBUTE_UNUSED)
3911 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3915 /* Var args. */
3917 /* Create and return the va_list datatype.
3919 On SPU, va_list is an array type equivalent to
3921 typedef struct __va_list_tag
3923 void *__args __attribute__((__aligned(16)));
3924 void *__skip __attribute__((__aligned(16)));
3926 } va_list[1];
3928 where __args points to the arg that will be returned by the next
3929 va_arg(), and __skip points to the previous stack frame such that
3930 when __args == __skip we should advance __args by 32 bytes. */
3931 static tree
3932 spu_build_builtin_va_list (void)
3934 tree f_args, f_skip, record, type_decl;
3935 bool owp;
3937 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3939 type_decl =
3940 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3942 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3943 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3945 DECL_FIELD_CONTEXT (f_args) = record;
3946 DECL_ALIGN (f_args) = 128;
3947 DECL_USER_ALIGN (f_args) = 1;
3949 DECL_FIELD_CONTEXT (f_skip) = record;
3950 DECL_ALIGN (f_skip) = 128;
3951 DECL_USER_ALIGN (f_skip) = 1;
3953 TREE_CHAIN (record) = type_decl;
3954 TYPE_NAME (record) = type_decl;
3955 TYPE_FIELDS (record) = f_args;
3956 TREE_CHAIN (f_args) = f_skip;
3958 /* We know this is being padded and we want it too. It is an internal
3959 type so hide the warnings from the user. */
3960 owp = warn_padded;
3961 warn_padded = false;
3963 layout_type (record);
3965 warn_padded = owp;
3967 /* The correct type is an array type of one element. */
3968 return build_array_type (record, build_index_type (size_zero_node));
3971 /* Implement va_start by filling the va_list structure VALIST.
3972 NEXTARG points to the first anonymous stack argument.
3974 The following global variables are used to initialize
3975 the va_list structure:
3977 crtl->args.info;
3978 the CUMULATIVE_ARGS for this function
3980 crtl->args.arg_offset_rtx:
3981 holds the offset of the first anonymous stack argument
3982 (relative to the virtual arg pointer). */
3984 static void
3985 spu_va_start (tree valist, rtx nextarg)
3987 tree f_args, f_skip;
3988 tree args, skip, t;
3990 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3991 f_skip = TREE_CHAIN (f_args);
3993 valist = build_va_arg_indirect_ref (valist);
3994 args =
3995 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3996 skip =
3997 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3999 /* Find the __args area. */
4000 t = make_tree (TREE_TYPE (args), nextarg);
4001 if (crtl->args.pretend_args_size > 0)
4002 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4003 size_int (-STACK_POINTER_OFFSET));
4004 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4005 TREE_SIDE_EFFECTS (t) = 1;
4006 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4008 /* Find the __skip area. */
4009 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4010 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4011 size_int (crtl->args.pretend_args_size
4012 - STACK_POINTER_OFFSET));
4013 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4014 TREE_SIDE_EFFECTS (t) = 1;
4015 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4018 /* Gimplify va_arg by updating the va_list structure
4019 VALIST as required to retrieve an argument of type
4020 TYPE, and returning that argument.
4022 ret = va_arg(VALIST, TYPE);
4024 generates code equivalent to:
4026 paddedsize = (sizeof(TYPE) + 15) & -16;
4027 if (VALIST.__args + paddedsize > VALIST.__skip
4028 && VALIST.__args <= VALIST.__skip)
4029 addr = VALIST.__skip + 32;
4030 else
4031 addr = VALIST.__args;
4032 VALIST.__args = addr + paddedsize;
4033 ret = *(TYPE *)addr;
4035 static tree
4036 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4037 gimple_seq * post_p ATTRIBUTE_UNUSED)
4039 tree f_args, f_skip;
4040 tree args, skip;
4041 HOST_WIDE_INT size, rsize;
4042 tree paddedsize, addr, tmp;
4043 bool pass_by_reference_p;
4045 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4046 f_skip = TREE_CHAIN (f_args);
4048 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4049 args =
4050 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4051 skip =
4052 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4054 addr = create_tmp_var (ptr_type_node, "va_arg");
4055 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4057 /* if an object is dynamically sized, a pointer to it is passed
4058 instead of the object itself. */
4059 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4060 false);
4061 if (pass_by_reference_p)
4062 type = build_pointer_type (type);
4063 size = int_size_in_bytes (type);
4064 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4066 /* build conditional expression to calculate addr. The expression
4067 will be gimplified later. */
4068 paddedsize = size_int (rsize);
4069 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4070 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4071 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4072 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4073 unshare_expr (skip)));
4075 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4076 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4077 size_int (32)), unshare_expr (args));
4079 gimplify_assign (addr, tmp, pre_p);
4081 /* update VALIST.__args */
4082 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4083 gimplify_assign (unshare_expr (args), tmp, pre_p);
4085 addr = fold_convert (build_pointer_type (type), addr);
4087 if (pass_by_reference_p)
4088 addr = build_va_arg_indirect_ref (addr);
4090 return build_va_arg_indirect_ref (addr);
4093 /* Save parameter registers starting with the register that corresponds
4094 to the first unnamed parameters. If the first unnamed parameter is
4095 in the stack then save no registers. Set pretend_args_size to the
4096 amount of space needed to save the registers. */
4097 void
4098 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4099 tree type, int *pretend_size, int no_rtl)
4101 if (!no_rtl)
4103 rtx tmp;
4104 int regno;
4105 int offset;
4106 int ncum = *cum;
4108 /* cum currently points to the last named argument, we want to
4109 start at the next argument. */
4110 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4112 offset = -STACK_POINTER_OFFSET;
4113 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4115 tmp = gen_frame_mem (V4SImode,
4116 plus_constant (virtual_incoming_args_rtx,
4117 offset));
4118 emit_move_insn (tmp,
4119 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4120 offset += 16;
4122 *pretend_size = offset + STACK_POINTER_OFFSET;
4126 void
4127 spu_conditional_register_usage (void)
4129 if (flag_pic)
4131 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4132 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4136 /* This is called to decide when we can simplify a load instruction. We
4137 must only return true for registers which we know will always be
4138 aligned. Taking into account that CSE might replace this reg with
4139 another one that has not been marked aligned.
4140 So this is really only true for frame, stack and virtual registers,
4141 which we know are always aligned and should not be adversely effected
4142 by CSE. */
4143 static int
4144 regno_aligned_for_load (int regno)
4146 return regno == FRAME_POINTER_REGNUM
4147 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4148 || regno == ARG_POINTER_REGNUM
4149 || regno == STACK_POINTER_REGNUM
4150 || (regno >= FIRST_VIRTUAL_REGISTER
4151 && regno <= LAST_VIRTUAL_REGISTER);
4154 /* Return TRUE when mem is known to be 16-byte aligned. */
4156 aligned_mem_p (rtx mem)
4158 if (MEM_ALIGN (mem) >= 128)
4159 return 1;
4160 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4161 return 1;
4162 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4164 rtx p0 = XEXP (XEXP (mem, 0), 0);
4165 rtx p1 = XEXP (XEXP (mem, 0), 1);
4166 if (regno_aligned_for_load (REGNO (p0)))
4168 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4169 return 1;
4170 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4171 return 1;
4174 else if (GET_CODE (XEXP (mem, 0)) == REG)
4176 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4177 return 1;
4179 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4180 return 1;
4181 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4183 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4184 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4185 if (GET_CODE (p0) == SYMBOL_REF
4186 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4187 return 1;
4189 return 0;
4192 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4193 into its SYMBOL_REF_FLAGS. */
4194 static void
4195 spu_encode_section_info (tree decl, rtx rtl, int first)
4197 default_encode_section_info (decl, rtl, first);
4199 /* If a variable has a forced alignment to < 16 bytes, mark it with
4200 SYMBOL_FLAG_ALIGN1. */
4201 if (TREE_CODE (decl) == VAR_DECL
4202 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4203 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4206 /* Return TRUE if we are certain the mem refers to a complete object
4207 which is both 16-byte aligned and padded to a 16-byte boundary. This
4208 would make it safe to store with a single instruction.
4209 We guarantee the alignment and padding for static objects by aligning
4210 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4211 FIXME: We currently cannot guarantee this for objects on the stack
4212 because assign_parm_setup_stack calls assign_stack_local with the
4213 alignment of the parameter mode and in that case the alignment never
4214 gets adjusted by LOCAL_ALIGNMENT. */
4215 static int
4216 store_with_one_insn_p (rtx mem)
4218 rtx addr = XEXP (mem, 0);
4219 if (GET_MODE (mem) == BLKmode)
4220 return 0;
4221 /* Only static objects. */
4222 if (GET_CODE (addr) == SYMBOL_REF)
4224 /* We use the associated declaration to make sure the access is
4225 referring to the whole object.
4226 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4227 if it is necessary. Will there be cases where one exists, and
4228 the other does not? Will there be cases where both exist, but
4229 have different types? */
4230 tree decl = MEM_EXPR (mem);
4231 if (decl
4232 && TREE_CODE (decl) == VAR_DECL
4233 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4234 return 1;
4235 decl = SYMBOL_REF_DECL (addr);
4236 if (decl
4237 && TREE_CODE (decl) == VAR_DECL
4238 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4239 return 1;
4241 return 0;
4245 spu_expand_mov (rtx * ops, enum machine_mode mode)
4247 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4248 abort ();
4250 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4252 rtx from = SUBREG_REG (ops[1]);
4253 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4255 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4256 && GET_MODE_CLASS (imode) == MODE_INT
4257 && subreg_lowpart_p (ops[1]));
4259 if (GET_MODE_SIZE (imode) < 4)
4260 imode = SImode;
4261 if (imode != GET_MODE (from))
4262 from = gen_rtx_SUBREG (imode, from, 0);
4264 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4266 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4267 emit_insn (GEN_FCN (icode) (ops[0], from));
4269 else
4270 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4271 return 1;
4274 /* At least one of the operands needs to be a register. */
4275 if ((reload_in_progress | reload_completed) == 0
4276 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4278 rtx temp = force_reg (mode, ops[1]);
4279 emit_move_insn (ops[0], temp);
4280 return 1;
4282 if (reload_in_progress || reload_completed)
4284 if (CONSTANT_P (ops[1]))
4285 return spu_split_immediate (ops);
4286 return 0;
4288 else
4290 if (GET_CODE (ops[0]) == MEM)
4292 if (!spu_valid_move (ops))
4294 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4295 gen_reg_rtx (TImode)));
4296 return 1;
4299 else if (GET_CODE (ops[1]) == MEM)
4301 if (!spu_valid_move (ops))
4303 emit_insn (gen_load
4304 (ops[0], ops[1], gen_reg_rtx (TImode),
4305 gen_reg_rtx (SImode)));
4306 return 1;
4309 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4310 extend them. */
4311 if (GET_CODE (ops[1]) == CONST_INT)
4313 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4314 if (val != INTVAL (ops[1]))
4316 emit_move_insn (ops[0], GEN_INT (val));
4317 return 1;
4321 return 0;
4324 void
4325 spu_split_load (rtx * ops)
4327 enum machine_mode mode = GET_MODE (ops[0]);
4328 rtx addr, load, rot, mem, p0, p1;
4329 int rot_amt;
4331 addr = XEXP (ops[1], 0);
4333 rot = 0;
4334 rot_amt = 0;
4335 if (GET_CODE (addr) == PLUS)
4337 /* 8 cases:
4338 aligned reg + aligned reg => lqx
4339 aligned reg + unaligned reg => lqx, rotqby
4340 aligned reg + aligned const => lqd
4341 aligned reg + unaligned const => lqd, rotqbyi
4342 unaligned reg + aligned reg => lqx, rotqby
4343 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4344 unaligned reg + aligned const => lqd, rotqby
4345 unaligned reg + unaligned const -> not allowed by legitimate address
4347 p0 = XEXP (addr, 0);
4348 p1 = XEXP (addr, 1);
4349 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4351 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4353 emit_insn (gen_addsi3 (ops[3], p0, p1));
4354 rot = ops[3];
4356 else
4357 rot = p0;
4359 else
4361 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4363 rot_amt = INTVAL (p1) & 15;
4364 p1 = GEN_INT (INTVAL (p1) & -16);
4365 addr = gen_rtx_PLUS (SImode, p0, p1);
4367 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4368 rot = p1;
4371 else if (GET_CODE (addr) == REG)
4373 if (!regno_aligned_for_load (REGNO (addr)))
4374 rot = addr;
4376 else if (GET_CODE (addr) == CONST)
4378 if (GET_CODE (XEXP (addr, 0)) == PLUS
4379 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4380 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4382 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4383 if (rot_amt & -16)
4384 addr = gen_rtx_CONST (Pmode,
4385 gen_rtx_PLUS (Pmode,
4386 XEXP (XEXP (addr, 0), 0),
4387 GEN_INT (rot_amt & -16)));
4388 else
4389 addr = XEXP (XEXP (addr, 0), 0);
4391 else
4392 rot = addr;
4394 else if (GET_CODE (addr) == CONST_INT)
4396 rot_amt = INTVAL (addr);
4397 addr = GEN_INT (rot_amt & -16);
4399 else if (!ALIGNED_SYMBOL_REF_P (addr))
4400 rot = addr;
4402 if (GET_MODE_SIZE (mode) < 4)
4403 rot_amt += GET_MODE_SIZE (mode) - 4;
4405 rot_amt &= 15;
4407 if (rot && rot_amt)
4409 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4410 rot = ops[3];
4411 rot_amt = 0;
4414 load = ops[2];
4416 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4417 mem = change_address (ops[1], TImode, addr);
4419 emit_insn (gen_movti (load, mem));
4421 if (rot)
4422 emit_insn (gen_rotqby_ti (load, load, rot));
4423 else if (rot_amt)
4424 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4426 if (reload_completed)
4427 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4428 else
4429 emit_insn (gen_spu_convert (ops[0], load));
4432 void
4433 spu_split_store (rtx * ops)
4435 enum machine_mode mode = GET_MODE (ops[0]);
4436 rtx pat = ops[2];
4437 rtx reg = ops[3];
4438 rtx addr, p0, p1, p1_lo, smem;
4439 int aform;
4440 int scalar;
4442 addr = XEXP (ops[0], 0);
4444 if (GET_CODE (addr) == PLUS)
4446 /* 8 cases:
4447 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4448 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4449 aligned reg + aligned const => lqd, c?d, shuf, stqx
4450 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4451 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4452 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4453 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4454 unaligned reg + unaligned const -> not allowed by legitimate address
4456 aform = 0;
4457 p0 = XEXP (addr, 0);
4458 p1 = p1_lo = XEXP (addr, 1);
4459 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4461 p1_lo = GEN_INT (INTVAL (p1) & 15);
4462 p1 = GEN_INT (INTVAL (p1) & -16);
4463 addr = gen_rtx_PLUS (SImode, p0, p1);
4466 else if (GET_CODE (addr) == REG)
4468 aform = 0;
4469 p0 = addr;
4470 p1 = p1_lo = const0_rtx;
4472 else
4474 aform = 1;
4475 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4476 p1 = 0; /* aform doesn't use p1 */
4477 p1_lo = addr;
4478 if (ALIGNED_SYMBOL_REF_P (addr))
4479 p1_lo = const0_rtx;
4480 else if (GET_CODE (addr) == CONST)
4482 if (GET_CODE (XEXP (addr, 0)) == PLUS
4483 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4484 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4486 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4487 if ((v & -16) != 0)
4488 addr = gen_rtx_CONST (Pmode,
4489 gen_rtx_PLUS (Pmode,
4490 XEXP (XEXP (addr, 0), 0),
4491 GEN_INT (v & -16)));
4492 else
4493 addr = XEXP (XEXP (addr, 0), 0);
4494 p1_lo = GEN_INT (v & 15);
4497 else if (GET_CODE (addr) == CONST_INT)
4499 p1_lo = GEN_INT (INTVAL (addr) & 15);
4500 addr = GEN_INT (INTVAL (addr) & -16);
4504 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4506 scalar = store_with_one_insn_p (ops[0]);
4507 if (!scalar)
4509 /* We could copy the flags from the ops[0] MEM to mem here,
4510 We don't because we want this load to be optimized away if
4511 possible, and copying the flags will prevent that in certain
4512 cases, e.g. consider the volatile flag. */
4514 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4515 set_mem_alias_set (lmem, 0);
4516 emit_insn (gen_movti (reg, lmem));
4518 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4519 p0 = stack_pointer_rtx;
4520 if (!p1_lo)
4521 p1_lo = const0_rtx;
4523 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4524 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4526 else if (reload_completed)
4528 if (GET_CODE (ops[1]) == REG)
4529 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4530 else if (GET_CODE (ops[1]) == SUBREG)
4531 emit_move_insn (reg,
4532 gen_rtx_REG (GET_MODE (reg),
4533 REGNO (SUBREG_REG (ops[1]))));
4534 else
4535 abort ();
4537 else
4539 if (GET_CODE (ops[1]) == REG)
4540 emit_insn (gen_spu_convert (reg, ops[1]));
4541 else if (GET_CODE (ops[1]) == SUBREG)
4542 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4543 else
4544 abort ();
4547 if (GET_MODE_SIZE (mode) < 4 && scalar)
4548 emit_insn (gen_shlqby_ti
4549 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4551 smem = change_address (ops[0], TImode, addr);
4552 /* We can't use the previous alias set because the memory has changed
4553 size and can potentially overlap objects of other types. */
4554 set_mem_alias_set (smem, 0);
4556 emit_insn (gen_movti (smem, reg));
4559 /* Return TRUE if X is MEM which is a struct member reference
4560 and the member can safely be loaded and stored with a single
4561 instruction because it is padded. */
4562 static int
4563 mem_is_padded_component_ref (rtx x)
4565 tree t = MEM_EXPR (x);
4566 tree r;
4567 if (!t || TREE_CODE (t) != COMPONENT_REF)
4568 return 0;
4569 t = TREE_OPERAND (t, 1);
4570 if (!t || TREE_CODE (t) != FIELD_DECL
4571 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4572 return 0;
4573 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4574 r = DECL_FIELD_CONTEXT (t);
4575 if (!r || TREE_CODE (r) != RECORD_TYPE)
4576 return 0;
4577 /* Make sure they are the same mode */
4578 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4579 return 0;
4580 /* If there are no following fields then the field alignment assures
4581 the structure is padded to the alignment which means this field is
4582 padded too. */
4583 if (TREE_CHAIN (t) == 0)
4584 return 1;
4585 /* If the following field is also aligned then this field will be
4586 padded. */
4587 t = TREE_CHAIN (t);
4588 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4589 return 1;
4590 return 0;
4593 /* Parse the -mfixed-range= option string. */
4594 static void
4595 fix_range (const char *const_str)
4597 int i, first, last;
4598 char *str, *dash, *comma;
4600 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4601 REG2 are either register names or register numbers. The effect
4602 of this option is to mark the registers in the range from REG1 to
4603 REG2 as ``fixed'' so they won't be used by the compiler. */
4605 i = strlen (const_str);
4606 str = (char *) alloca (i + 1);
4607 memcpy (str, const_str, i + 1);
4609 while (1)
4611 dash = strchr (str, '-');
4612 if (!dash)
4614 warning (0, "value of -mfixed-range must have form REG1-REG2");
4615 return;
4617 *dash = '\0';
4618 comma = strchr (dash + 1, ',');
4619 if (comma)
4620 *comma = '\0';
4622 first = decode_reg_name (str);
4623 if (first < 0)
4625 warning (0, "unknown register name: %s", str);
4626 return;
4629 last = decode_reg_name (dash + 1);
4630 if (last < 0)
4632 warning (0, "unknown register name: %s", dash + 1);
4633 return;
4636 *dash = '-';
4638 if (first > last)
4640 warning (0, "%s-%s is an empty range", str, dash + 1);
4641 return;
4644 for (i = first; i <= last; ++i)
4645 fixed_regs[i] = call_used_regs[i] = 1;
4647 if (!comma)
4648 break;
4650 *comma = ',';
4651 str = comma + 1;
4656 spu_valid_move (rtx * ops)
4658 enum machine_mode mode = GET_MODE (ops[0]);
4659 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4660 return 0;
4662 /* init_expr_once tries to recog against load and store insns to set
4663 the direct_load[] and direct_store[] arrays. We always want to
4664 consider those loads and stores valid. init_expr_once is called in
4665 the context of a dummy function which does not have a decl. */
4666 if (cfun->decl == 0)
4667 return 1;
4669 /* Don't allows loads/stores which would require more than 1 insn.
4670 During and after reload we assume loads and stores only take 1
4671 insn. */
4672 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4674 if (GET_CODE (ops[0]) == MEM
4675 && (GET_MODE_SIZE (mode) < 4
4676 || !(store_with_one_insn_p (ops[0])
4677 || mem_is_padded_component_ref (ops[0]))))
4678 return 0;
4679 if (GET_CODE (ops[1]) == MEM
4680 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4681 return 0;
4683 return 1;
4686 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4687 can be generated using the fsmbi instruction. */
4689 fsmbi_const_p (rtx x)
4691 if (CONSTANT_P (x))
4693 /* We can always choose TImode for CONST_INT because the high bits
4694 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4695 enum immediate_class c = classify_immediate (x, TImode);
4696 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4698 return 0;
4701 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4702 can be generated using the cbd, chd, cwd or cdd instruction. */
4704 cpat_const_p (rtx x, enum machine_mode mode)
4706 if (CONSTANT_P (x))
4708 enum immediate_class c = classify_immediate (x, mode);
4709 return c == IC_CPAT;
4711 return 0;
4715 gen_cpat_const (rtx * ops)
4717 unsigned char dst[16];
4718 int i, offset, shift, isize;
4719 if (GET_CODE (ops[3]) != CONST_INT
4720 || GET_CODE (ops[2]) != CONST_INT
4721 || (GET_CODE (ops[1]) != CONST_INT
4722 && GET_CODE (ops[1]) != REG))
4723 return 0;
4724 if (GET_CODE (ops[1]) == REG
4725 && (!REG_POINTER (ops[1])
4726 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4727 return 0;
4729 for (i = 0; i < 16; i++)
4730 dst[i] = i + 16;
4731 isize = INTVAL (ops[3]);
4732 if (isize == 1)
4733 shift = 3;
4734 else if (isize == 2)
4735 shift = 2;
4736 else
4737 shift = 0;
4738 offset = (INTVAL (ops[2]) +
4739 (GET_CODE (ops[1]) ==
4740 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4741 for (i = 0; i < isize; i++)
4742 dst[offset + i] = i + shift;
4743 return array_to_constant (TImode, dst);
4746 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4747 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4748 than 16 bytes, the value is repeated across the rest of the array. */
4749 void
4750 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4752 HOST_WIDE_INT val;
4753 int i, j, first;
4755 memset (arr, 0, 16);
4756 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4757 if (GET_CODE (x) == CONST_INT
4758 || (GET_CODE (x) == CONST_DOUBLE
4759 && (mode == SFmode || mode == DFmode)))
4761 gcc_assert (mode != VOIDmode && mode != BLKmode);
4763 if (GET_CODE (x) == CONST_DOUBLE)
4764 val = const_double_to_hwint (x);
4765 else
4766 val = INTVAL (x);
4767 first = GET_MODE_SIZE (mode) - 1;
4768 for (i = first; i >= 0; i--)
4770 arr[i] = val & 0xff;
4771 val >>= 8;
4773 /* Splat the constant across the whole array. */
4774 for (j = 0, i = first + 1; i < 16; i++)
4776 arr[i] = arr[j];
4777 j = (j == first) ? 0 : j + 1;
4780 else if (GET_CODE (x) == CONST_DOUBLE)
4782 val = CONST_DOUBLE_LOW (x);
4783 for (i = 15; i >= 8; i--)
4785 arr[i] = val & 0xff;
4786 val >>= 8;
4788 val = CONST_DOUBLE_HIGH (x);
4789 for (i = 7; i >= 0; i--)
4791 arr[i] = val & 0xff;
4792 val >>= 8;
4795 else if (GET_CODE (x) == CONST_VECTOR)
4797 int units;
4798 rtx elt;
4799 mode = GET_MODE_INNER (mode);
4800 units = CONST_VECTOR_NUNITS (x);
4801 for (i = 0; i < units; i++)
4803 elt = CONST_VECTOR_ELT (x, i);
4804 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4806 if (GET_CODE (elt) == CONST_DOUBLE)
4807 val = const_double_to_hwint (elt);
4808 else
4809 val = INTVAL (elt);
4810 first = GET_MODE_SIZE (mode) - 1;
4811 if (first + i * GET_MODE_SIZE (mode) > 16)
4812 abort ();
4813 for (j = first; j >= 0; j--)
4815 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4816 val >>= 8;
4821 else
4822 gcc_unreachable();
4825 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4826 smaller than 16 bytes, use the bytes that would represent that value
4827 in a register, e.g., for QImode return the value of arr[3]. */
4829 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4831 enum machine_mode inner_mode;
4832 rtvec v;
4833 int units, size, i, j, k;
4834 HOST_WIDE_INT val;
4836 if (GET_MODE_CLASS (mode) == MODE_INT
4837 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4839 j = GET_MODE_SIZE (mode);
4840 i = j < 4 ? 4 - j : 0;
4841 for (val = 0; i < j; i++)
4842 val = (val << 8) | arr[i];
4843 val = trunc_int_for_mode (val, mode);
4844 return GEN_INT (val);
4847 if (mode == TImode)
4849 HOST_WIDE_INT high;
4850 for (i = high = 0; i < 8; i++)
4851 high = (high << 8) | arr[i];
4852 for (i = 8, val = 0; i < 16; i++)
4853 val = (val << 8) | arr[i];
4854 return immed_double_const (val, high, TImode);
4856 if (mode == SFmode)
4858 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4859 val = trunc_int_for_mode (val, SImode);
4860 return hwint_to_const_double (SFmode, val);
4862 if (mode == DFmode)
4864 for (i = 0, val = 0; i < 8; i++)
4865 val = (val << 8) | arr[i];
4866 return hwint_to_const_double (DFmode, val);
4869 if (!VECTOR_MODE_P (mode))
4870 abort ();
4872 units = GET_MODE_NUNITS (mode);
4873 size = GET_MODE_UNIT_SIZE (mode);
4874 inner_mode = GET_MODE_INNER (mode);
4875 v = rtvec_alloc (units);
4877 for (k = i = 0; i < units; ++i)
4879 val = 0;
4880 for (j = 0; j < size; j++, k++)
4881 val = (val << 8) | arr[k];
4883 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4884 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4885 else
4886 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4888 if (k > 16)
4889 abort ();
4891 return gen_rtx_CONST_VECTOR (mode, v);
4894 static void
4895 reloc_diagnostic (rtx x)
4897 tree loc_decl, decl = 0;
4898 const char *msg;
4899 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4900 return;
4902 if (GET_CODE (x) == SYMBOL_REF)
4903 decl = SYMBOL_REF_DECL (x);
4904 else if (GET_CODE (x) == CONST
4905 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4906 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4908 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4909 if (decl && !DECL_P (decl))
4910 decl = 0;
4912 /* We use last_assemble_variable_decl to get line information. It's
4913 not always going to be right and might not even be close, but will
4914 be right for the more common cases. */
4915 if (!last_assemble_variable_decl || in_section == ctors_section)
4916 loc_decl = decl;
4917 else
4918 loc_decl = last_assemble_variable_decl;
4920 /* The decl could be a string constant. */
4921 if (decl && DECL_P (decl))
4922 msg = "%Jcreating run-time relocation for %qD";
4923 else
4924 msg = "creating run-time relocation";
4926 if (TARGET_WARN_RELOC)
4927 warning (0, msg, loc_decl, decl);
4928 else
4929 error (msg, loc_decl, decl);
4932 /* Hook into assemble_integer so we can generate an error for run-time
4933 relocations. The SPU ABI disallows them. */
4934 static bool
4935 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4937 /* By default run-time relocations aren't supported, but we allow them
4938 in case users support it in their own run-time loader. And we provide
4939 a warning for those users that don't. */
4940 if ((GET_CODE (x) == SYMBOL_REF)
4941 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4942 reloc_diagnostic (x);
4944 return default_assemble_integer (x, size, aligned_p);
4947 static void
4948 spu_asm_globalize_label (FILE * file, const char *name)
4950 fputs ("\t.global\t", file);
4951 assemble_name (file, name);
4952 fputs ("\n", file);
4955 static bool
4956 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4957 bool speed ATTRIBUTE_UNUSED)
4959 enum machine_mode mode = GET_MODE (x);
4960 int cost = COSTS_N_INSNS (2);
4962 /* Folding to a CONST_VECTOR will use extra space but there might
4963 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4964 only if it allows us to fold away multiple insns. Changing the cost
4965 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4966 because this cost will only be compared against a single insn.
4967 if (code == CONST_VECTOR)
4968 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4971 /* Use defaults for float operations. Not accurate but good enough. */
4972 if (mode == DFmode)
4974 *total = COSTS_N_INSNS (13);
4975 return true;
4977 if (mode == SFmode)
4979 *total = COSTS_N_INSNS (6);
4980 return true;
4982 switch (code)
4984 case CONST_INT:
4985 if (satisfies_constraint_K (x))
4986 *total = 0;
4987 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4988 *total = COSTS_N_INSNS (1);
4989 else
4990 *total = COSTS_N_INSNS (3);
4991 return true;
4993 case CONST:
4994 *total = COSTS_N_INSNS (3);
4995 return true;
4997 case LABEL_REF:
4998 case SYMBOL_REF:
4999 *total = COSTS_N_INSNS (0);
5000 return true;
5002 case CONST_DOUBLE:
5003 *total = COSTS_N_INSNS (5);
5004 return true;
5006 case FLOAT_EXTEND:
5007 case FLOAT_TRUNCATE:
5008 case FLOAT:
5009 case UNSIGNED_FLOAT:
5010 case FIX:
5011 case UNSIGNED_FIX:
5012 *total = COSTS_N_INSNS (7);
5013 return true;
5015 case PLUS:
5016 if (mode == TImode)
5018 *total = COSTS_N_INSNS (9);
5019 return true;
5021 break;
5023 case MULT:
5024 cost =
5025 GET_CODE (XEXP (x, 0)) ==
5026 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5027 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5029 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5031 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5032 cost = COSTS_N_INSNS (14);
5033 if ((val & 0xffff) == 0)
5034 cost = COSTS_N_INSNS (9);
5035 else if (val > 0 && val < 0x10000)
5036 cost = COSTS_N_INSNS (11);
5039 *total = cost;
5040 return true;
5041 case DIV:
5042 case UDIV:
5043 case MOD:
5044 case UMOD:
5045 *total = COSTS_N_INSNS (20);
5046 return true;
5047 case ROTATE:
5048 case ROTATERT:
5049 case ASHIFT:
5050 case ASHIFTRT:
5051 case LSHIFTRT:
5052 *total = COSTS_N_INSNS (4);
5053 return true;
5054 case UNSPEC:
5055 if (XINT (x, 1) == UNSPEC_CONVERT)
5056 *total = COSTS_N_INSNS (0);
5057 else
5058 *total = COSTS_N_INSNS (4);
5059 return true;
5061 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5062 if (GET_MODE_CLASS (mode) == MODE_INT
5063 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5064 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5065 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5066 *total = cost;
5067 return true;
5070 static enum machine_mode
5071 spu_unwind_word_mode (void)
5073 return SImode;
5076 /* Decide whether we can make a sibling call to a function. DECL is the
5077 declaration of the function being targeted by the call and EXP is the
5078 CALL_EXPR representing the call. */
5079 static bool
5080 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5082 return decl && !TARGET_LARGE_MEM;
5085 /* We need to correctly update the back chain pointer and the Available
5086 Stack Size (which is in the second slot of the sp register.) */
5087 void
5088 spu_allocate_stack (rtx op0, rtx op1)
5090 HOST_WIDE_INT v;
5091 rtx chain = gen_reg_rtx (V4SImode);
5092 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5093 rtx sp = gen_reg_rtx (V4SImode);
5094 rtx splatted = gen_reg_rtx (V4SImode);
5095 rtx pat = gen_reg_rtx (TImode);
5097 /* copy the back chain so we can save it back again. */
5098 emit_move_insn (chain, stack_bot);
5100 op1 = force_reg (SImode, op1);
5102 v = 0x1020300010203ll;
5103 emit_move_insn (pat, immed_double_const (v, v, TImode));
5104 emit_insn (gen_shufb (splatted, op1, op1, pat));
5106 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5107 emit_insn (gen_subv4si3 (sp, sp, splatted));
5109 if (flag_stack_check)
5111 rtx avail = gen_reg_rtx(SImode);
5112 rtx result = gen_reg_rtx(SImode);
5113 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5114 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5115 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5118 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5120 emit_move_insn (stack_bot, chain);
5122 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5125 void
5126 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5128 static unsigned char arr[16] =
5129 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5130 rtx temp = gen_reg_rtx (SImode);
5131 rtx temp2 = gen_reg_rtx (SImode);
5132 rtx temp3 = gen_reg_rtx (V4SImode);
5133 rtx temp4 = gen_reg_rtx (V4SImode);
5134 rtx pat = gen_reg_rtx (TImode);
5135 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5137 /* Restore the backchain from the first word, sp from the second. */
5138 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5139 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5141 emit_move_insn (pat, array_to_constant (TImode, arr));
5143 /* Compute Available Stack Size for sp */
5144 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5145 emit_insn (gen_shufb (temp3, temp, temp, pat));
5147 /* Compute Available Stack Size for back chain */
5148 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5149 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5150 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5152 emit_insn (gen_addv4si3 (sp, sp, temp3));
5153 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5156 static void
5157 spu_init_libfuncs (void)
5159 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5160 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5161 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5162 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5163 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5164 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5165 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5166 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5167 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5168 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5169 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5171 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5172 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5174 set_optab_libfunc (smul_optab, TImode, "__multi3");
5175 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5176 set_optab_libfunc (smod_optab, TImode, "__modti3");
5177 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5178 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5179 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5182 /* Make a subreg, stripping any existing subreg. We could possibly just
5183 call simplify_subreg, but in this case we know what we want. */
5185 spu_gen_subreg (enum machine_mode mode, rtx x)
5187 if (GET_CODE (x) == SUBREG)
5188 x = SUBREG_REG (x);
5189 if (GET_MODE (x) == mode)
5190 return x;
5191 return gen_rtx_SUBREG (mode, x, 0);
5194 static bool
5195 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5197 return (TYPE_MODE (type) == BLKmode
5198 && ((type) == 0
5199 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5200 || int_size_in_bytes (type) >
5201 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5204 /* Create the built-in types and functions */
5206 enum spu_function_code
5208 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5209 #include "spu-builtins.def"
5210 #undef DEF_BUILTIN
5211 NUM_SPU_BUILTINS
5214 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5216 struct spu_builtin_description spu_builtins[] = {
5217 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5218 {fcode, icode, name, type, params, NULL_TREE},
5219 #include "spu-builtins.def"
5220 #undef DEF_BUILTIN
5223 static void
5224 spu_init_builtins (void)
5226 struct spu_builtin_description *d;
5227 unsigned int i;
5229 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5230 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5231 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5232 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5233 V4SF_type_node = build_vector_type (float_type_node, 4);
5234 V2DF_type_node = build_vector_type (double_type_node, 2);
5236 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5237 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5238 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5239 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5241 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5243 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5244 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5245 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5246 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5247 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5248 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5249 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5250 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5251 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5252 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5253 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5254 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5256 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5257 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5258 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5259 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5260 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5261 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5262 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5263 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5265 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5266 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5268 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5270 spu_builtin_types[SPU_BTI_PTR] =
5271 build_pointer_type (build_qualified_type
5272 (void_type_node,
5273 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5275 /* For each builtin we build a new prototype. The tree code will make
5276 sure nodes are shared. */
5277 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5279 tree p;
5280 char name[64]; /* build_function will make a copy. */
5281 int parm;
5283 if (d->name == 0)
5284 continue;
5286 /* Find last parm. */
5287 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5290 p = void_list_node;
5291 while (parm > 1)
5292 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5294 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5296 sprintf (name, "__builtin_%s", d->name);
5297 d->fndecl =
5298 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5299 NULL, NULL_TREE);
5300 if (d->fcode == SPU_MASK_FOR_LOAD)
5301 TREE_READONLY (d->fndecl) = 1;
5303 /* These builtins don't throw. */
5304 TREE_NOTHROW (d->fndecl) = 1;
5308 void
5309 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5311 static unsigned char arr[16] =
5312 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5314 rtx temp = gen_reg_rtx (Pmode);
5315 rtx temp2 = gen_reg_rtx (V4SImode);
5316 rtx temp3 = gen_reg_rtx (V4SImode);
5317 rtx pat = gen_reg_rtx (TImode);
5318 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5320 emit_move_insn (pat, array_to_constant (TImode, arr));
5322 /* Restore the sp. */
5323 emit_move_insn (temp, op1);
5324 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5326 /* Compute available stack size for sp. */
5327 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5328 emit_insn (gen_shufb (temp3, temp, temp, pat));
5330 emit_insn (gen_addv4si3 (sp, sp, temp3));
5331 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5335 spu_safe_dma (HOST_WIDE_INT channel)
5337 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5340 void
5341 spu_builtin_splats (rtx ops[])
5343 enum machine_mode mode = GET_MODE (ops[0]);
5344 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5346 unsigned char arr[16];
5347 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5348 emit_move_insn (ops[0], array_to_constant (mode, arr));
5350 else
5352 rtx reg = gen_reg_rtx (TImode);
5353 rtx shuf;
5354 if (GET_CODE (ops[1]) != REG
5355 && GET_CODE (ops[1]) != SUBREG)
5356 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5357 switch (mode)
5359 case V2DImode:
5360 case V2DFmode:
5361 shuf =
5362 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5363 TImode);
5364 break;
5365 case V4SImode:
5366 case V4SFmode:
5367 shuf =
5368 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5369 TImode);
5370 break;
5371 case V8HImode:
5372 shuf =
5373 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5374 TImode);
5375 break;
5376 case V16QImode:
5377 shuf =
5378 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5379 TImode);
5380 break;
5381 default:
5382 abort ();
5384 emit_move_insn (reg, shuf);
5385 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5389 void
5390 spu_builtin_extract (rtx ops[])
5392 enum machine_mode mode;
5393 rtx rot, from, tmp;
5395 mode = GET_MODE (ops[1]);
5397 if (GET_CODE (ops[2]) == CONST_INT)
5399 switch (mode)
5401 case V16QImode:
5402 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5403 break;
5404 case V8HImode:
5405 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5406 break;
5407 case V4SFmode:
5408 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5409 break;
5410 case V4SImode:
5411 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5412 break;
5413 case V2DImode:
5414 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5415 break;
5416 case V2DFmode:
5417 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5418 break;
5419 default:
5420 abort ();
5422 return;
5425 from = spu_gen_subreg (TImode, ops[1]);
5426 rot = gen_reg_rtx (TImode);
5427 tmp = gen_reg_rtx (SImode);
5429 switch (mode)
5431 case V16QImode:
5432 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5433 break;
5434 case V8HImode:
5435 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5436 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5437 break;
5438 case V4SFmode:
5439 case V4SImode:
5440 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5441 break;
5442 case V2DImode:
5443 case V2DFmode:
5444 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5445 break;
5446 default:
5447 abort ();
5449 emit_insn (gen_rotqby_ti (rot, from, tmp));
5451 emit_insn (gen_spu_convert (ops[0], rot));
5454 void
5455 spu_builtin_insert (rtx ops[])
5457 enum machine_mode mode = GET_MODE (ops[0]);
5458 enum machine_mode imode = GET_MODE_INNER (mode);
5459 rtx mask = gen_reg_rtx (TImode);
5460 rtx offset;
5462 if (GET_CODE (ops[3]) == CONST_INT)
5463 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5464 else
5466 offset = gen_reg_rtx (SImode);
5467 emit_insn (gen_mulsi3
5468 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5470 emit_insn (gen_cpat
5471 (mask, stack_pointer_rtx, offset,
5472 GEN_INT (GET_MODE_SIZE (imode))));
5473 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5476 void
5477 spu_builtin_promote (rtx ops[])
5479 enum machine_mode mode, imode;
5480 rtx rot, from, offset;
5481 HOST_WIDE_INT pos;
5483 mode = GET_MODE (ops[0]);
5484 imode = GET_MODE_INNER (mode);
5486 from = gen_reg_rtx (TImode);
5487 rot = spu_gen_subreg (TImode, ops[0]);
5489 emit_insn (gen_spu_convert (from, ops[1]));
5491 if (GET_CODE (ops[2]) == CONST_INT)
5493 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5494 if (GET_MODE_SIZE (imode) < 4)
5495 pos += 4 - GET_MODE_SIZE (imode);
5496 offset = GEN_INT (pos & 15);
5498 else
5500 offset = gen_reg_rtx (SImode);
5501 switch (mode)
5503 case V16QImode:
5504 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5505 break;
5506 case V8HImode:
5507 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5508 emit_insn (gen_addsi3 (offset, offset, offset));
5509 break;
5510 case V4SFmode:
5511 case V4SImode:
5512 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5513 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5514 break;
5515 case V2DImode:
5516 case V2DFmode:
5517 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5518 break;
5519 default:
5520 abort ();
5523 emit_insn (gen_rotqby_ti (rot, from, offset));
5526 void
5527 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5529 rtx shuf = gen_reg_rtx (V4SImode);
5530 rtx insn = gen_reg_rtx (V4SImode);
5531 rtx shufc;
5532 rtx insnc;
5533 rtx mem;
5535 fnaddr = force_reg (SImode, fnaddr);
5536 cxt = force_reg (SImode, cxt);
5538 if (TARGET_LARGE_MEM)
5540 rtx rotl = gen_reg_rtx (V4SImode);
5541 rtx mask = gen_reg_rtx (V4SImode);
5542 rtx bi = gen_reg_rtx (SImode);
5543 unsigned char shufa[16] = {
5544 2, 3, 0, 1, 18, 19, 16, 17,
5545 0, 1, 2, 3, 16, 17, 18, 19
5547 unsigned char insna[16] = {
5548 0x41, 0, 0, 79,
5549 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5550 0x60, 0x80, 0, 79,
5551 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5554 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5555 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5557 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5558 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5559 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5560 emit_insn (gen_selb (insn, insnc, rotl, mask));
5562 mem = memory_address (Pmode, tramp);
5563 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5565 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5566 mem = memory_address (Pmode, plus_constant (tramp, 16));
5567 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5569 else
5571 rtx scxt = gen_reg_rtx (SImode);
5572 rtx sfnaddr = gen_reg_rtx (SImode);
5573 unsigned char insna[16] = {
5574 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5575 0x30, 0, 0, 0,
5576 0, 0, 0, 0,
5577 0, 0, 0, 0
5580 shufc = gen_reg_rtx (TImode);
5581 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5583 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5584 fits 18 bits and the last 4 are zeros. This will be true if
5585 the stack pointer is initialized to 0x3fff0 at program start,
5586 otherwise the ila instruction will be garbage. */
5588 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5589 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5590 emit_insn (gen_cpat
5591 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5592 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5593 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5595 mem = memory_address (Pmode, tramp);
5596 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5599 emit_insn (gen_sync ());
5602 void
5603 spu_expand_sign_extend (rtx ops[])
5605 unsigned char arr[16];
5606 rtx pat = gen_reg_rtx (TImode);
5607 rtx sign, c;
5608 int i, last;
5609 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5610 if (GET_MODE (ops[1]) == QImode)
5612 sign = gen_reg_rtx (HImode);
5613 emit_insn (gen_extendqihi2 (sign, ops[1]));
5614 for (i = 0; i < 16; i++)
5615 arr[i] = 0x12;
5616 arr[last] = 0x13;
5618 else
5620 for (i = 0; i < 16; i++)
5621 arr[i] = 0x10;
5622 switch (GET_MODE (ops[1]))
5624 case HImode:
5625 sign = gen_reg_rtx (SImode);
5626 emit_insn (gen_extendhisi2 (sign, ops[1]));
5627 arr[last] = 0x03;
5628 arr[last - 1] = 0x02;
5629 break;
5630 case SImode:
5631 sign = gen_reg_rtx (SImode);
5632 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5633 for (i = 0; i < 4; i++)
5634 arr[last - i] = 3 - i;
5635 break;
5636 case DImode:
5637 sign = gen_reg_rtx (SImode);
5638 c = gen_reg_rtx (SImode);
5639 emit_insn (gen_spu_convert (c, ops[1]));
5640 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5641 for (i = 0; i < 8; i++)
5642 arr[last - i] = 7 - i;
5643 break;
5644 default:
5645 abort ();
5648 emit_move_insn (pat, array_to_constant (TImode, arr));
5649 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5652 /* expand vector initialization. If there are any constant parts,
5653 load constant parts first. Then load any non-constant parts. */
5654 void
5655 spu_expand_vector_init (rtx target, rtx vals)
5657 enum machine_mode mode = GET_MODE (target);
5658 int n_elts = GET_MODE_NUNITS (mode);
5659 int n_var = 0;
5660 bool all_same = true;
5661 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5662 int i;
5664 first = XVECEXP (vals, 0, 0);
5665 for (i = 0; i < n_elts; ++i)
5667 x = XVECEXP (vals, 0, i);
5668 if (!(CONST_INT_P (x)
5669 || GET_CODE (x) == CONST_DOUBLE
5670 || GET_CODE (x) == CONST_FIXED))
5671 ++n_var;
5672 else
5674 if (first_constant == NULL_RTX)
5675 first_constant = x;
5677 if (i > 0 && !rtx_equal_p (x, first))
5678 all_same = false;
5681 /* if all elements are the same, use splats to repeat elements */
5682 if (all_same)
5684 if (!CONSTANT_P (first)
5685 && !register_operand (first, GET_MODE (x)))
5686 first = force_reg (GET_MODE (first), first);
5687 emit_insn (gen_spu_splats (target, first));
5688 return;
5691 /* load constant parts */
5692 if (n_var != n_elts)
5694 if (n_var == 0)
5696 emit_move_insn (target,
5697 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5699 else
5701 rtx constant_parts_rtx = copy_rtx (vals);
5703 gcc_assert (first_constant != NULL_RTX);
5704 /* fill empty slots with the first constant, this increases
5705 our chance of using splats in the recursive call below. */
5706 for (i = 0; i < n_elts; ++i)
5708 x = XVECEXP (constant_parts_rtx, 0, i);
5709 if (!(CONST_INT_P (x)
5710 || GET_CODE (x) == CONST_DOUBLE
5711 || GET_CODE (x) == CONST_FIXED))
5712 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5715 spu_expand_vector_init (target, constant_parts_rtx);
5719 /* load variable parts */
5720 if (n_var != 0)
5722 rtx insert_operands[4];
5724 insert_operands[0] = target;
5725 insert_operands[2] = target;
5726 for (i = 0; i < n_elts; ++i)
5728 x = XVECEXP (vals, 0, i);
5729 if (!(CONST_INT_P (x)
5730 || GET_CODE (x) == CONST_DOUBLE
5731 || GET_CODE (x) == CONST_FIXED))
5733 if (!register_operand (x, GET_MODE (x)))
5734 x = force_reg (GET_MODE (x), x);
5735 insert_operands[1] = x;
5736 insert_operands[3] = GEN_INT (i);
5737 spu_builtin_insert (insert_operands);
5743 /* Return insn index for the vector compare instruction for given CODE,
5744 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5746 static int
5747 get_vec_cmp_insn (enum rtx_code code,
5748 enum machine_mode dest_mode,
5749 enum machine_mode op_mode)
5752 switch (code)
5754 case EQ:
5755 if (dest_mode == V16QImode && op_mode == V16QImode)
5756 return CODE_FOR_ceq_v16qi;
5757 if (dest_mode == V8HImode && op_mode == V8HImode)
5758 return CODE_FOR_ceq_v8hi;
5759 if (dest_mode == V4SImode && op_mode == V4SImode)
5760 return CODE_FOR_ceq_v4si;
5761 if (dest_mode == V4SImode && op_mode == V4SFmode)
5762 return CODE_FOR_ceq_v4sf;
5763 if (dest_mode == V2DImode && op_mode == V2DFmode)
5764 return CODE_FOR_ceq_v2df;
5765 break;
5766 case GT:
5767 if (dest_mode == V16QImode && op_mode == V16QImode)
5768 return CODE_FOR_cgt_v16qi;
5769 if (dest_mode == V8HImode && op_mode == V8HImode)
5770 return CODE_FOR_cgt_v8hi;
5771 if (dest_mode == V4SImode && op_mode == V4SImode)
5772 return CODE_FOR_cgt_v4si;
5773 if (dest_mode == V4SImode && op_mode == V4SFmode)
5774 return CODE_FOR_cgt_v4sf;
5775 if (dest_mode == V2DImode && op_mode == V2DFmode)
5776 return CODE_FOR_cgt_v2df;
5777 break;
5778 case GTU:
5779 if (dest_mode == V16QImode && op_mode == V16QImode)
5780 return CODE_FOR_clgt_v16qi;
5781 if (dest_mode == V8HImode && op_mode == V8HImode)
5782 return CODE_FOR_clgt_v8hi;
5783 if (dest_mode == V4SImode && op_mode == V4SImode)
5784 return CODE_FOR_clgt_v4si;
5785 break;
5786 default:
5787 break;
5789 return -1;
5792 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5793 DMODE is expected destination mode. This is a recursive function. */
5795 static rtx
5796 spu_emit_vector_compare (enum rtx_code rcode,
5797 rtx op0, rtx op1,
5798 enum machine_mode dmode)
5800 int vec_cmp_insn;
5801 rtx mask;
5802 enum machine_mode dest_mode;
5803 enum machine_mode op_mode = GET_MODE (op1);
5805 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5807 /* Floating point vector compare instructions uses destination V4SImode.
5808 Double floating point vector compare instructions uses destination V2DImode.
5809 Move destination to appropriate mode later. */
5810 if (dmode == V4SFmode)
5811 dest_mode = V4SImode;
5812 else if (dmode == V2DFmode)
5813 dest_mode = V2DImode;
5814 else
5815 dest_mode = dmode;
5817 mask = gen_reg_rtx (dest_mode);
5818 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5820 if (vec_cmp_insn == -1)
5822 bool swap_operands = false;
5823 bool try_again = false;
5824 switch (rcode)
5826 case LT:
5827 rcode = GT;
5828 swap_operands = true;
5829 try_again = true;
5830 break;
5831 case LTU:
5832 rcode = GTU;
5833 swap_operands = true;
5834 try_again = true;
5835 break;
5836 case NE:
5837 /* Treat A != B as ~(A==B). */
5839 enum insn_code nor_code;
5840 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5841 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5842 gcc_assert (nor_code != CODE_FOR_nothing);
5843 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5844 if (dmode != dest_mode)
5846 rtx temp = gen_reg_rtx (dest_mode);
5847 convert_move (temp, mask, 0);
5848 return temp;
5850 return mask;
5852 break;
5853 case GE:
5854 case GEU:
5855 case LE:
5856 case LEU:
5857 /* Try GT/GTU/LT/LTU OR EQ */
5859 rtx c_rtx, eq_rtx;
5860 enum insn_code ior_code;
5861 enum rtx_code new_code;
5863 switch (rcode)
5865 case GE: new_code = GT; break;
5866 case GEU: new_code = GTU; break;
5867 case LE: new_code = LT; break;
5868 case LEU: new_code = LTU; break;
5869 default:
5870 gcc_unreachable ();
5873 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5874 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5876 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5877 gcc_assert (ior_code != CODE_FOR_nothing);
5878 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5879 if (dmode != dest_mode)
5881 rtx temp = gen_reg_rtx (dest_mode);
5882 convert_move (temp, mask, 0);
5883 return temp;
5885 return mask;
5887 break;
5888 default:
5889 gcc_unreachable ();
5892 /* You only get two chances. */
5893 if (try_again)
5894 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5896 gcc_assert (vec_cmp_insn != -1);
5898 if (swap_operands)
5900 rtx tmp;
5901 tmp = op0;
5902 op0 = op1;
5903 op1 = tmp;
5907 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5908 if (dmode != dest_mode)
5910 rtx temp = gen_reg_rtx (dest_mode);
5911 convert_move (temp, mask, 0);
5912 return temp;
5914 return mask;
5918 /* Emit vector conditional expression.
5919 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5920 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5923 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5924 rtx cond, rtx cc_op0, rtx cc_op1)
5926 enum machine_mode dest_mode = GET_MODE (dest);
5927 enum rtx_code rcode = GET_CODE (cond);
5928 rtx mask;
5930 /* Get the vector mask for the given relational operations. */
5931 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5933 emit_insn(gen_selb (dest, op2, op1, mask));
5935 return 1;
5938 static rtx
5939 spu_force_reg (enum machine_mode mode, rtx op)
5941 rtx x, r;
5942 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5944 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5945 || GET_MODE (op) == BLKmode)
5946 return force_reg (mode, convert_to_mode (mode, op, 0));
5947 abort ();
5950 r = force_reg (GET_MODE (op), op);
5951 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5953 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5954 if (x)
5955 return x;
5958 x = gen_reg_rtx (mode);
5959 emit_insn (gen_spu_convert (x, r));
5960 return x;
5963 static void
5964 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5966 HOST_WIDE_INT v = 0;
5967 int lsbits;
5968 /* Check the range of immediate operands. */
5969 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5971 int range = p - SPU_BTI_7;
5973 if (!CONSTANT_P (op))
5974 error ("%s expects an integer literal in the range [%d, %d].",
5975 d->name,
5976 spu_builtin_range[range].low, spu_builtin_range[range].high);
5978 if (GET_CODE (op) == CONST
5979 && (GET_CODE (XEXP (op, 0)) == PLUS
5980 || GET_CODE (XEXP (op, 0)) == MINUS))
5982 v = INTVAL (XEXP (XEXP (op, 0), 1));
5983 op = XEXP (XEXP (op, 0), 0);
5985 else if (GET_CODE (op) == CONST_INT)
5986 v = INTVAL (op);
5987 else if (GET_CODE (op) == CONST_VECTOR
5988 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5989 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5991 /* The default for v is 0 which is valid in every range. */
5992 if (v < spu_builtin_range[range].low
5993 || v > spu_builtin_range[range].high)
5994 error ("%s expects an integer literal in the range [%d, %d]. ("
5995 HOST_WIDE_INT_PRINT_DEC ")",
5996 d->name,
5997 spu_builtin_range[range].low, spu_builtin_range[range].high,
6000 switch (p)
6002 case SPU_BTI_S10_4:
6003 lsbits = 4;
6004 break;
6005 case SPU_BTI_U16_2:
6006 /* This is only used in lqa, and stqa. Even though the insns
6007 encode 16 bits of the address (all but the 2 least
6008 significant), only 14 bits are used because it is masked to
6009 be 16 byte aligned. */
6010 lsbits = 4;
6011 break;
6012 case SPU_BTI_S16_2:
6013 /* This is used for lqr and stqr. */
6014 lsbits = 2;
6015 break;
6016 default:
6017 lsbits = 0;
6020 if (GET_CODE (op) == LABEL_REF
6021 || (GET_CODE (op) == SYMBOL_REF
6022 && SYMBOL_REF_FUNCTION_P (op))
6023 || (v & ((1 << lsbits) - 1)) != 0)
6024 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6025 d->name);
6030 static int
6031 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6032 rtx target, rtx ops[])
6034 enum insn_code icode = (enum insn_code) d->icode;
6035 int i = 0, a;
6037 /* Expand the arguments into rtl. */
6039 if (d->parm[0] != SPU_BTI_VOID)
6040 ops[i++] = target;
6042 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6044 tree arg = CALL_EXPR_ARG (exp, a);
6045 if (arg == 0)
6046 abort ();
6047 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6050 /* The insn pattern may have additional operands (SCRATCH).
6051 Return the number of actual non-SCRATCH operands. */
6052 gcc_assert (i <= insn_data[icode].n_operands);
6053 return i;
6056 static rtx
6057 spu_expand_builtin_1 (struct spu_builtin_description *d,
6058 tree exp, rtx target)
6060 rtx pat;
6061 rtx ops[8];
6062 enum insn_code icode = (enum insn_code) d->icode;
6063 enum machine_mode mode, tmode;
6064 int i, p;
6065 int n_operands;
6066 tree return_type;
6068 /* Set up ops[] with values from arglist. */
6069 n_operands = expand_builtin_args (d, exp, target, ops);
6071 /* Handle the target operand which must be operand 0. */
6072 i = 0;
6073 if (d->parm[0] != SPU_BTI_VOID)
6076 /* We prefer the mode specified for the match_operand otherwise
6077 use the mode from the builtin function prototype. */
6078 tmode = insn_data[d->icode].operand[0].mode;
6079 if (tmode == VOIDmode)
6080 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6082 /* Try to use target because not using it can lead to extra copies
6083 and when we are using all of the registers extra copies leads
6084 to extra spills. */
6085 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6086 ops[0] = target;
6087 else
6088 target = ops[0] = gen_reg_rtx (tmode);
6090 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6091 abort ();
6093 i++;
6096 if (d->fcode == SPU_MASK_FOR_LOAD)
6098 enum machine_mode mode = insn_data[icode].operand[1].mode;
6099 tree arg;
6100 rtx addr, op, pat;
6102 /* get addr */
6103 arg = CALL_EXPR_ARG (exp, 0);
6104 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6105 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6106 addr = memory_address (mode, op);
6108 /* negate addr */
6109 op = gen_reg_rtx (GET_MODE (addr));
6110 emit_insn (gen_rtx_SET (VOIDmode, op,
6111 gen_rtx_NEG (GET_MODE (addr), addr)));
6112 op = gen_rtx_MEM (mode, op);
6114 pat = GEN_FCN (icode) (target, op);
6115 if (!pat)
6116 return 0;
6117 emit_insn (pat);
6118 return target;
6121 /* Ignore align_hint, but still expand it's args in case they have
6122 side effects. */
6123 if (icode == CODE_FOR_spu_align_hint)
6124 return 0;
6126 /* Handle the rest of the operands. */
6127 for (p = 1; i < n_operands; i++, p++)
6129 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6130 mode = insn_data[d->icode].operand[i].mode;
6131 else
6132 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6134 /* mode can be VOIDmode here for labels */
6136 /* For specific intrinsics with an immediate operand, e.g.,
6137 si_ai(), we sometimes need to convert the scalar argument to a
6138 vector argument by splatting the scalar. */
6139 if (VECTOR_MODE_P (mode)
6140 && (GET_CODE (ops[i]) == CONST_INT
6141 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6142 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6144 if (GET_CODE (ops[i]) == CONST_INT)
6145 ops[i] = spu_const (mode, INTVAL (ops[i]));
6146 else
6148 rtx reg = gen_reg_rtx (mode);
6149 enum machine_mode imode = GET_MODE_INNER (mode);
6150 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6151 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6152 if (imode != GET_MODE (ops[i]))
6153 ops[i] = convert_to_mode (imode, ops[i],
6154 TYPE_UNSIGNED (spu_builtin_types
6155 [d->parm[i]]));
6156 emit_insn (gen_spu_splats (reg, ops[i]));
6157 ops[i] = reg;
6161 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6163 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6164 ops[i] = spu_force_reg (mode, ops[i]);
6167 switch (n_operands)
6169 case 0:
6170 pat = GEN_FCN (icode) (0);
6171 break;
6172 case 1:
6173 pat = GEN_FCN (icode) (ops[0]);
6174 break;
6175 case 2:
6176 pat = GEN_FCN (icode) (ops[0], ops[1]);
6177 break;
6178 case 3:
6179 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6180 break;
6181 case 4:
6182 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6183 break;
6184 case 5:
6185 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6186 break;
6187 case 6:
6188 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6189 break;
6190 default:
6191 abort ();
6194 if (!pat)
6195 abort ();
6197 if (d->type == B_CALL || d->type == B_BISLED)
6198 emit_call_insn (pat);
6199 else if (d->type == B_JUMP)
6201 emit_jump_insn (pat);
6202 emit_barrier ();
6204 else
6205 emit_insn (pat);
6207 return_type = spu_builtin_types[d->parm[0]];
6208 if (d->parm[0] != SPU_BTI_VOID
6209 && GET_MODE (target) != TYPE_MODE (return_type))
6211 /* target is the return value. It should always be the mode of
6212 the builtin function prototype. */
6213 target = spu_force_reg (TYPE_MODE (return_type), target);
6216 return target;
6220 spu_expand_builtin (tree exp,
6221 rtx target,
6222 rtx subtarget ATTRIBUTE_UNUSED,
6223 enum machine_mode mode ATTRIBUTE_UNUSED,
6224 int ignore ATTRIBUTE_UNUSED)
6226 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6227 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6228 struct spu_builtin_description *d;
6230 if (fcode < NUM_SPU_BUILTINS)
6232 d = &spu_builtins[fcode];
6234 return spu_expand_builtin_1 (d, exp, target);
6236 abort ();
6239 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6240 static tree
6241 spu_builtin_mul_widen_even (tree type)
6243 switch (TYPE_MODE (type))
6245 case V8HImode:
6246 if (TYPE_UNSIGNED (type))
6247 return spu_builtins[SPU_MULE_0].fndecl;
6248 else
6249 return spu_builtins[SPU_MULE_1].fndecl;
6250 break;
6251 default:
6252 return NULL_TREE;
6256 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6257 static tree
6258 spu_builtin_mul_widen_odd (tree type)
6260 switch (TYPE_MODE (type))
6262 case V8HImode:
6263 if (TYPE_UNSIGNED (type))
6264 return spu_builtins[SPU_MULO_1].fndecl;
6265 else
6266 return spu_builtins[SPU_MULO_0].fndecl;
6267 break;
6268 default:
6269 return NULL_TREE;
6273 /* Implement targetm.vectorize.builtin_mask_for_load. */
6274 static tree
6275 spu_builtin_mask_for_load (void)
6277 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6278 gcc_assert (d);
6279 return d->fndecl;
6282 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6283 static int
6284 spu_builtin_vectorization_cost (bool runtime_test)
6286 /* If the branch of the runtime test is taken - i.e. - the vectorized
6287 version is skipped - this incurs a misprediction cost (because the
6288 vectorized version is expected to be the fall-through). So we subtract
6289 the latency of a mispredicted branch from the costs that are incurred
6290 when the vectorized version is executed. */
6291 if (runtime_test)
6292 return -19;
6293 else
6294 return 0;
6297 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6298 after applying N number of iterations. This routine does not determine
6299 how may iterations are required to reach desired alignment. */
6301 static bool
6302 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6304 if (is_packed)
6305 return false;
6307 /* All other types are naturally aligned. */
6308 return true;
6311 /* Implement targetm.vectorize.builtin_vec_perm. */
6312 tree
6313 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6315 struct spu_builtin_description *d;
6317 *mask_element_type = unsigned_char_type_node;
6319 switch (TYPE_MODE (type))
6321 case V16QImode:
6322 if (TYPE_UNSIGNED (type))
6323 d = &spu_builtins[SPU_SHUFFLE_0];
6324 else
6325 d = &spu_builtins[SPU_SHUFFLE_1];
6326 break;
6328 case V8HImode:
6329 if (TYPE_UNSIGNED (type))
6330 d = &spu_builtins[SPU_SHUFFLE_2];
6331 else
6332 d = &spu_builtins[SPU_SHUFFLE_3];
6333 break;
6335 case V4SImode:
6336 if (TYPE_UNSIGNED (type))
6337 d = &spu_builtins[SPU_SHUFFLE_4];
6338 else
6339 d = &spu_builtins[SPU_SHUFFLE_5];
6340 break;
6342 case V2DImode:
6343 if (TYPE_UNSIGNED (type))
6344 d = &spu_builtins[SPU_SHUFFLE_6];
6345 else
6346 d = &spu_builtins[SPU_SHUFFLE_7];
6347 break;
6349 case V4SFmode:
6350 d = &spu_builtins[SPU_SHUFFLE_8];
6351 break;
6353 case V2DFmode:
6354 d = &spu_builtins[SPU_SHUFFLE_9];
6355 break;
6357 default:
6358 return NULL_TREE;
6361 gcc_assert (d);
6362 return d->fndecl;
6365 /* Count the total number of instructions in each pipe and return the
6366 maximum, which is used as the Minimum Iteration Interval (MII)
6367 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6368 -2 are instructions that can go in pipe0 or pipe1. */
6369 static int
6370 spu_sms_res_mii (struct ddg *g)
6372 int i;
6373 unsigned t[4] = {0, 0, 0, 0};
6375 for (i = 0; i < g->num_nodes; i++)
6377 rtx insn = g->nodes[i].insn;
6378 int p = get_pipe (insn) + 2;
6380 assert (p >= 0);
6381 assert (p < 4);
6383 t[p]++;
6384 if (dump_file && INSN_P (insn))
6385 fprintf (dump_file, "i%d %s %d %d\n",
6386 INSN_UID (insn),
6387 insn_data[INSN_CODE(insn)].name,
6388 p, t[p]);
6390 if (dump_file)
6391 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6393 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6397 void
6398 spu_init_expanders (void)
6400 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6401 * frame_pointer_needed is true. We don't know that until we're
6402 * expanding the prologue. */
6403 if (cfun)
6404 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6407 static enum machine_mode
6408 spu_libgcc_cmp_return_mode (void)
6411 /* For SPU word mode is TI mode so it is better to use SImode
6412 for compare returns. */
6413 return SImode;
6416 static enum machine_mode
6417 spu_libgcc_shift_count_mode (void)
6419 /* For SPU word mode is TI mode so it is better to use SImode
6420 for shift counts. */
6421 return SImode;
6424 /* An early place to adjust some flags after GCC has finished processing
6425 * them. */
6426 static void
6427 asm_file_start (void)
6429 /* Variable tracking should be run after all optimizations which
6430 change order of insns. It also needs a valid CFG. */
6431 spu_flag_var_tracking = flag_var_tracking;
6432 flag_var_tracking = 0;
6434 default_file_start ();
6437 /* Implement targetm.section_type_flags. */
6438 static unsigned int
6439 spu_section_type_flags (tree decl, const char *name, int reloc)
6441 /* .toe needs to have type @nobits. */
6442 if (strcmp (name, ".toe") == 0)
6443 return SECTION_BSS;
6444 return default_section_type_flags (decl, name, reloc);
6447 /* Generate a constant or register which contains 2^SCALE. We assume
6448 the result is valid for MODE. Currently, MODE must be V4SFmode and
6449 SCALE must be SImode. */
6451 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6453 gcc_assert (mode == V4SFmode);
6454 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6455 if (GET_CODE (scale) != CONST_INT)
6457 /* unsigned int exp = (127 + scale) << 23;
6458 __vector float m = (__vector float) spu_splats (exp); */
6459 rtx reg = force_reg (SImode, scale);
6460 rtx exp = gen_reg_rtx (SImode);
6461 rtx mul = gen_reg_rtx (mode);
6462 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6463 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6464 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6465 return mul;
6467 else
6469 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6470 unsigned char arr[16];
6471 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6472 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6473 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6474 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6475 return array_to_constant (mode, arr);
6479 #include "gt-spu.h"