* config/arc/arc.c: Include "df.h".
[official-gcc.git] / gcc / config / spu / spu.c
blob07e22655b6c8e24cc8ee7691d87179f3837ad837
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "machmode.h"
52 #include "gimple.h"
53 #include "tm-constrs.h"
54 #include "ddg.h"
55 #include "sbitmap.h"
56 #include "timevar.h"
57 #include "df.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS,
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
111 SPU_BTI_MAX
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
127 struct spu_builtin_range
129 int low, high;
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
155 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int store_with_one_insn_p (rtx mem);
193 static int mem_is_padded_component_ref (rtx x);
194 static int reg_aligned_for_addr (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
205 static tree spu_builtin_mul_widen_even (tree);
206 static tree spu_builtin_mul_widen_odd (tree);
207 static tree spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree, bool);
210 static tree spu_builtin_vec_perm (tree, tree *);
211 static int spu_sms_res_mii (struct ddg *g);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree, const char *, int);
214 static rtx spu_expand_load (rtx, rtx, rtx, int);
216 extern const char *reg_names[];
218 /* Which instruction set architecture to use. */
219 int spu_arch;
220 /* Which cpu are we tuning for. */
221 int spu_tune;
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist = (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
232 reorganization. */
233 static int spu_flag_var_tracking;
235 enum spu_immediate {
236 SPU_NONE,
237 SPU_IL,
238 SPU_ILA,
239 SPU_ILH,
240 SPU_ILHU,
241 SPU_ORI,
242 SPU_ORHI,
243 SPU_ORBI,
244 SPU_IOHL
246 enum immediate_class
248 IC_POOL, /* constant pool */
249 IC_IL1, /* one il* instruction */
250 IC_IL2, /* both ilhu and iohl instructions */
251 IC_IL1s, /* one il* instruction */
252 IC_IL2s, /* both ilhu and iohl instructions */
253 IC_FSMBI, /* the fsmbi instruction */
254 IC_CPAT, /* one of the c*d instructions */
255 IC_FSMBI2 /* fsmbi plus 1 other instruction */
258 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
259 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
260 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
261 static enum immediate_class classify_immediate (rtx op,
262 enum machine_mode mode);
264 static enum machine_mode spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
272 /* Table of machine attributes. */
273 static const struct attribute_spec spu_attribute_table[] =
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
276 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
277 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
278 { NULL, 0, 0, false, false, false, NULL }
281 /* TARGET overrides. */
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS spu_init_builtins
286 #undef TARGET_EXPAND_BUILTIN
287 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
289 #undef TARGET_UNWIND_WORD_MODE
290 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
292 #undef TARGET_LEGITIMIZE_ADDRESS
293 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
295 /* The .8byte directive doesn't seem to work well for a 32 bit
296 architecture. */
297 #undef TARGET_ASM_UNALIGNED_DI_OP
298 #define TARGET_ASM_UNALIGNED_DI_OP NULL
300 #undef TARGET_RTX_COSTS
301 #define TARGET_RTX_COSTS spu_rtx_costs
303 #undef TARGET_ADDRESS_COST
304 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
306 #undef TARGET_SCHED_ISSUE_RATE
307 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
309 #undef TARGET_SCHED_INIT_GLOBAL
310 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
312 #undef TARGET_SCHED_INIT
313 #define TARGET_SCHED_INIT spu_sched_init
315 #undef TARGET_SCHED_VARIABLE_ISSUE
316 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
318 #undef TARGET_SCHED_REORDER
319 #define TARGET_SCHED_REORDER spu_sched_reorder
321 #undef TARGET_SCHED_REORDER2
322 #define TARGET_SCHED_REORDER2 spu_sched_reorder
324 #undef TARGET_SCHED_ADJUST_COST
325 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
327 #undef TARGET_ATTRIBUTE_TABLE
328 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
330 #undef TARGET_ASM_INTEGER
331 #define TARGET_ASM_INTEGER spu_assemble_integer
333 #undef TARGET_SCALAR_MODE_SUPPORTED_P
334 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
336 #undef TARGET_VECTOR_MODE_SUPPORTED_P
337 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
339 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
340 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
342 #undef TARGET_ASM_GLOBALIZE_LABEL
343 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
348 #undef TARGET_MUST_PASS_IN_STACK
349 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351 #undef TARGET_BUILD_BUILTIN_VA_LIST
352 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
354 #undef TARGET_EXPAND_BUILTIN_VA_START
355 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
357 #undef TARGET_SETUP_INCOMING_VARARGS
358 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
360 #undef TARGET_MACHINE_DEPENDENT_REORG
361 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
366 #undef TARGET_DEFAULT_TARGET_FLAGS
367 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
369 #undef TARGET_INIT_LIBFUNCS
370 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
372 #undef TARGET_RETURN_IN_MEMORY
373 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
378 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
379 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
381 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
382 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
384 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
385 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
387 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
388 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
390 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
391 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
393 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
394 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
396 #undef TARGET_LIBGCC_CMP_RETURN_MODE
397 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
399 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
400 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
402 #undef TARGET_SCHED_SMS_RES_MII
403 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
405 #undef TARGET_ASM_FILE_START
406 #define TARGET_ASM_FILE_START asm_file_start
408 #undef TARGET_SECTION_TYPE_FLAGS
409 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
411 #undef TARGET_LEGITIMATE_ADDRESS_P
412 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
414 struct gcc_target targetm = TARGET_INITIALIZER;
416 void
417 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
419 /* Override some of the default param values. With so many registers
420 larger values are better for these params. */
421 MAX_PENDING_LIST_LENGTH = 128;
423 /* With so many registers this is better on by default. */
424 flag_rename_registers = 1;
427 /* Sometimes certain combinations of command options do not make sense
428 on a particular target machine. You can define a macro
429 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
430 executed once just after all the command options have been parsed. */
431 void
432 spu_override_options (void)
434 /* Small loops will be unpeeled at -O3. For SPU it is more important
435 to keep code small by default. */
436 if (!flag_unroll_loops && !flag_peel_loops
437 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
438 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
440 flag_omit_frame_pointer = 1;
442 /* Functions must be 8 byte aligned so we correctly handle dual issue */
443 if (align_functions < 8)
444 align_functions = 8;
446 spu_hint_dist = 8*4 - spu_max_nops*4;
447 if (spu_hint_dist < 0)
448 spu_hint_dist = 0;
450 if (spu_fixed_range_string)
451 fix_range (spu_fixed_range_string);
453 /* Determine processor architectural level. */
454 if (spu_arch_string)
456 if (strcmp (&spu_arch_string[0], "cell") == 0)
457 spu_arch = PROCESSOR_CELL;
458 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
459 spu_arch = PROCESSOR_CELLEDP;
460 else
461 error ("Unknown architecture '%s'", &spu_arch_string[0]);
464 /* Determine processor to tune for. */
465 if (spu_tune_string)
467 if (strcmp (&spu_tune_string[0], "cell") == 0)
468 spu_tune = PROCESSOR_CELL;
469 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
470 spu_tune = PROCESSOR_CELLEDP;
471 else
472 error ("Unknown architecture '%s'", &spu_tune_string[0]);
475 /* Change defaults according to the processor architecture. */
476 if (spu_arch == PROCESSOR_CELLEDP)
478 /* If no command line option has been otherwise specified, change
479 the default to -mno-safe-hints on celledp -- only the original
480 Cell/B.E. processors require this workaround. */
481 if (!(target_flags_explicit & MASK_SAFE_HINTS))
482 target_flags &= ~MASK_SAFE_HINTS;
485 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
488 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
489 struct attribute_spec.handler. */
491 /* True if MODE is valid for the target. By "valid", we mean able to
492 be manipulated in non-trivial ways. In particular, this means all
493 the arithmetic is supported. */
494 static bool
495 spu_scalar_mode_supported_p (enum machine_mode mode)
497 switch (mode)
499 case QImode:
500 case HImode:
501 case SImode:
502 case SFmode:
503 case DImode:
504 case TImode:
505 case DFmode:
506 return true;
508 default:
509 return false;
513 /* Similarly for vector modes. "Supported" here is less strict. At
514 least some operations are supported; need to check optabs or builtins
515 for further details. */
516 static bool
517 spu_vector_mode_supported_p (enum machine_mode mode)
519 switch (mode)
521 case V16QImode:
522 case V8HImode:
523 case V4SImode:
524 case V2DImode:
525 case V4SFmode:
526 case V2DFmode:
527 return true;
529 default:
530 return false;
534 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
535 least significant bytes of the outer mode. This function returns
536 TRUE for the SUBREG's where this is correct. */
538 valid_subreg (rtx op)
540 enum machine_mode om = GET_MODE (op);
541 enum machine_mode im = GET_MODE (SUBREG_REG (op));
542 return om != VOIDmode && im != VOIDmode
543 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
544 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
545 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
548 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
549 and adjust the start offset. */
550 static rtx
551 adjust_operand (rtx op, HOST_WIDE_INT * start)
553 enum machine_mode mode;
554 int op_size;
555 /* Strip any paradoxical SUBREG. */
556 if (GET_CODE (op) == SUBREG
557 && (GET_MODE_BITSIZE (GET_MODE (op))
558 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
560 if (start)
561 *start -=
562 GET_MODE_BITSIZE (GET_MODE (op)) -
563 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
564 op = SUBREG_REG (op);
566 /* If it is smaller than SI, assure a SUBREG */
567 op_size = GET_MODE_BITSIZE (GET_MODE (op));
568 if (op_size < 32)
570 if (start)
571 *start += 32 - op_size;
572 op_size = 32;
574 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
575 mode = mode_for_size (op_size, MODE_INT, 0);
576 if (mode != GET_MODE (op))
577 op = gen_rtx_SUBREG (mode, op, 0);
578 return op;
581 void
582 spu_expand_extv (rtx ops[], int unsignedp)
584 rtx dst = ops[0], src = ops[1];
585 HOST_WIDE_INT width = INTVAL (ops[2]);
586 HOST_WIDE_INT start = INTVAL (ops[3]);
587 HOST_WIDE_INT align_mask;
588 rtx s0, s1, mask, r0;
590 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
592 if (MEM_P (src))
594 /* First, determine if we need 1 TImode load or 2. We need only 1
595 if the bits being extracted do not cross the alignment boundary
596 as determined by the MEM and its address. */
598 align_mask = -MEM_ALIGN (src);
599 if ((start & align_mask) == ((start + width - 1) & align_mask))
601 /* Alignment is sufficient for 1 load. */
602 s0 = gen_reg_rtx (TImode);
603 r0 = spu_expand_load (s0, 0, src, start / 8);
604 start &= 7;
605 if (r0)
606 emit_insn (gen_rotqby_ti (s0, s0, r0));
608 else
610 /* Need 2 loads. */
611 s0 = gen_reg_rtx (TImode);
612 s1 = gen_reg_rtx (TImode);
613 r0 = spu_expand_load (s0, s1, src, start / 8);
614 start &= 7;
616 gcc_assert (start + width <= 128);
617 if (r0)
619 rtx r1 = gen_reg_rtx (SImode);
620 mask = gen_reg_rtx (TImode);
621 emit_move_insn (mask, GEN_INT (-1));
622 emit_insn (gen_rotqby_ti (s0, s0, r0));
623 emit_insn (gen_rotqby_ti (s1, s1, r0));
624 if (GET_CODE (r0) == CONST_INT)
625 r1 = GEN_INT (INTVAL (r0) & 15);
626 else
627 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
628 emit_insn (gen_shlqby_ti (mask, mask, r1));
629 emit_insn (gen_selb (s0, s1, s0, mask));
634 else if (GET_CODE (src) == SUBREG)
636 rtx r = SUBREG_REG (src);
637 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
638 s0 = gen_reg_rtx (TImode);
639 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
640 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
641 else
642 emit_move_insn (s0, src);
644 else
646 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
647 s0 = gen_reg_rtx (TImode);
648 emit_move_insn (s0, src);
651 /* Now s0 is TImode and contains the bits to extract at start. */
653 if (start)
654 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
656 if (128 - width)
658 tree c = build_int_cst (NULL_TREE, 128 - width);
659 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
662 emit_move_insn (dst, s0);
665 void
666 spu_expand_insv (rtx ops[])
668 HOST_WIDE_INT width = INTVAL (ops[1]);
669 HOST_WIDE_INT start = INTVAL (ops[2]);
670 HOST_WIDE_INT maskbits;
671 enum machine_mode dst_mode, src_mode;
672 rtx dst = ops[0], src = ops[3];
673 int dst_size, src_size;
674 rtx mask;
675 rtx shift_reg;
676 int shift;
679 if (GET_CODE (ops[0]) == MEM)
680 dst = gen_reg_rtx (TImode);
681 else
682 dst = adjust_operand (dst, &start);
683 dst_mode = GET_MODE (dst);
684 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
686 if (CONSTANT_P (src))
688 enum machine_mode m =
689 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
690 src = force_reg (m, convert_to_mode (m, src, 0));
692 src = adjust_operand (src, 0);
693 src_mode = GET_MODE (src);
694 src_size = GET_MODE_BITSIZE (GET_MODE (src));
696 mask = gen_reg_rtx (dst_mode);
697 shift_reg = gen_reg_rtx (dst_mode);
698 shift = dst_size - start - width;
700 /* It's not safe to use subreg here because the compiler assumes
701 that the SUBREG_REG is right justified in the SUBREG. */
702 convert_move (shift_reg, src, 1);
704 if (shift > 0)
706 switch (dst_mode)
708 case SImode:
709 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
710 break;
711 case DImode:
712 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
713 break;
714 case TImode:
715 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
716 break;
717 default:
718 abort ();
721 else if (shift < 0)
722 abort ();
724 switch (dst_size)
726 case 32:
727 maskbits = (-1ll << (32 - width - start));
728 if (start)
729 maskbits += (1ll << (32 - start));
730 emit_move_insn (mask, GEN_INT (maskbits));
731 break;
732 case 64:
733 maskbits = (-1ll << (64 - width - start));
734 if (start)
735 maskbits += (1ll << (64 - start));
736 emit_move_insn (mask, GEN_INT (maskbits));
737 break;
738 case 128:
740 unsigned char arr[16];
741 int i = start / 8;
742 memset (arr, 0, sizeof (arr));
743 arr[i] = 0xff >> (start & 7);
744 for (i++; i <= (start + width - 1) / 8; i++)
745 arr[i] = 0xff;
746 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
747 emit_move_insn (mask, array_to_constant (TImode, arr));
749 break;
750 default:
751 abort ();
753 if (GET_CODE (ops[0]) == MEM)
755 rtx low = gen_reg_rtx (SImode);
756 rtx rotl = gen_reg_rtx (SImode);
757 rtx mask0 = gen_reg_rtx (TImode);
758 rtx addr;
759 rtx addr0;
760 rtx addr1;
761 rtx mem;
763 addr = force_reg (Pmode, XEXP (ops[0], 0));
764 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
765 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
766 emit_insn (gen_negsi2 (rotl, low));
767 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
768 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
769 mem = change_address (ops[0], TImode, addr0);
770 set_mem_alias_set (mem, 0);
771 emit_move_insn (dst, mem);
772 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
773 if (start + width > MEM_ALIGN (ops[0]))
775 rtx shl = gen_reg_rtx (SImode);
776 rtx mask1 = gen_reg_rtx (TImode);
777 rtx dst1 = gen_reg_rtx (TImode);
778 rtx mem1;
779 addr1 = plus_constant (addr, 16);
780 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
781 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
782 emit_insn (gen_shlqby_ti (mask1, mask, shl));
783 mem1 = change_address (ops[0], TImode, addr1);
784 set_mem_alias_set (mem1, 0);
785 emit_move_insn (dst1, mem1);
786 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
787 emit_move_insn (mem1, dst1);
789 emit_move_insn (mem, dst);
791 else
792 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
797 spu_expand_block_move (rtx ops[])
799 HOST_WIDE_INT bytes, align, offset;
800 rtx src, dst, sreg, dreg, target;
801 int i;
802 if (GET_CODE (ops[2]) != CONST_INT
803 || GET_CODE (ops[3]) != CONST_INT
804 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
805 return 0;
807 bytes = INTVAL (ops[2]);
808 align = INTVAL (ops[3]);
810 if (bytes <= 0)
811 return 1;
813 dst = ops[0];
814 src = ops[1];
816 if (align == 16)
818 for (offset = 0; offset + 16 <= bytes; offset += 16)
820 dst = adjust_address (ops[0], V16QImode, offset);
821 src = adjust_address (ops[1], V16QImode, offset);
822 emit_move_insn (dst, src);
824 if (offset < bytes)
826 rtx mask;
827 unsigned char arr[16] = { 0 };
828 for (i = 0; i < bytes - offset; i++)
829 arr[i] = 0xff;
830 dst = adjust_address (ops[0], V16QImode, offset);
831 src = adjust_address (ops[1], V16QImode, offset);
832 mask = gen_reg_rtx (V16QImode);
833 sreg = gen_reg_rtx (V16QImode);
834 dreg = gen_reg_rtx (V16QImode);
835 target = gen_reg_rtx (V16QImode);
836 emit_move_insn (mask, array_to_constant (V16QImode, arr));
837 emit_move_insn (dreg, dst);
838 emit_move_insn (sreg, src);
839 emit_insn (gen_selb (target, dreg, sreg, mask));
840 emit_move_insn (dst, target);
842 return 1;
844 return 0;
847 enum spu_comp_code
848 { SPU_EQ, SPU_GT, SPU_GTU };
850 int spu_comp_icode[12][3] = {
851 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
852 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
853 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
854 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
855 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
856 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
857 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
858 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
859 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
860 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
861 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
862 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
865 /* Generate a compare for CODE. Return a brand-new rtx that represents
866 the result of the compare. GCC can figure this out too if we don't
867 provide all variations of compares, but GCC always wants to use
868 WORD_MODE, we can generate better code in most cases if we do it
869 ourselves. */
870 void
871 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
873 int reverse_compare = 0;
874 int reverse_test = 0;
875 rtx compare_result, eq_result;
876 rtx comp_rtx, eq_rtx;
877 enum machine_mode comp_mode;
878 enum machine_mode op_mode;
879 enum spu_comp_code scode, eq_code;
880 enum insn_code ior_code;
881 enum rtx_code code = GET_CODE (cmp);
882 rtx op0 = XEXP (cmp, 0);
883 rtx op1 = XEXP (cmp, 1);
884 int index;
885 int eq_test = 0;
887 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
888 and so on, to keep the constant in operand 1. */
889 if (GET_CODE (op1) == CONST_INT)
891 HOST_WIDE_INT val = INTVAL (op1) - 1;
892 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
893 switch (code)
895 case GE:
896 op1 = GEN_INT (val);
897 code = GT;
898 break;
899 case LT:
900 op1 = GEN_INT (val);
901 code = LE;
902 break;
903 case GEU:
904 op1 = GEN_INT (val);
905 code = GTU;
906 break;
907 case LTU:
908 op1 = GEN_INT (val);
909 code = LEU;
910 break;
911 default:
912 break;
916 comp_mode = SImode;
917 op_mode = GET_MODE (op0);
919 switch (code)
921 case GE:
922 scode = SPU_GT;
923 if (HONOR_NANS (op_mode))
925 reverse_compare = 0;
926 reverse_test = 0;
927 eq_test = 1;
928 eq_code = SPU_EQ;
930 else
932 reverse_compare = 1;
933 reverse_test = 1;
935 break;
936 case LE:
937 scode = SPU_GT;
938 if (HONOR_NANS (op_mode))
940 reverse_compare = 1;
941 reverse_test = 0;
942 eq_test = 1;
943 eq_code = SPU_EQ;
945 else
947 reverse_compare = 0;
948 reverse_test = 1;
950 break;
951 case LT:
952 reverse_compare = 1;
953 reverse_test = 0;
954 scode = SPU_GT;
955 break;
956 case GEU:
957 reverse_compare = 1;
958 reverse_test = 1;
959 scode = SPU_GTU;
960 break;
961 case LEU:
962 reverse_compare = 0;
963 reverse_test = 1;
964 scode = SPU_GTU;
965 break;
966 case LTU:
967 reverse_compare = 1;
968 reverse_test = 0;
969 scode = SPU_GTU;
970 break;
971 case NE:
972 reverse_compare = 0;
973 reverse_test = 1;
974 scode = SPU_EQ;
975 break;
977 case EQ:
978 scode = SPU_EQ;
979 break;
980 case GT:
981 scode = SPU_GT;
982 break;
983 case GTU:
984 scode = SPU_GTU;
985 break;
986 default:
987 scode = SPU_EQ;
988 break;
991 switch (op_mode)
993 case QImode:
994 index = 0;
995 comp_mode = QImode;
996 break;
997 case HImode:
998 index = 1;
999 comp_mode = HImode;
1000 break;
1001 case SImode:
1002 index = 2;
1003 break;
1004 case DImode:
1005 index = 3;
1006 break;
1007 case TImode:
1008 index = 4;
1009 break;
1010 case SFmode:
1011 index = 5;
1012 break;
1013 case DFmode:
1014 index = 6;
1015 break;
1016 case V16QImode:
1017 index = 7;
1018 comp_mode = op_mode;
1019 break;
1020 case V8HImode:
1021 index = 8;
1022 comp_mode = op_mode;
1023 break;
1024 case V4SImode:
1025 index = 9;
1026 comp_mode = op_mode;
1027 break;
1028 case V4SFmode:
1029 index = 10;
1030 comp_mode = V4SImode;
1031 break;
1032 case V2DFmode:
1033 index = 11;
1034 comp_mode = V2DImode;
1035 break;
1036 case V2DImode:
1037 default:
1038 abort ();
1041 if (GET_MODE (op1) == DFmode
1042 && (scode != SPU_GT && scode != SPU_EQ))
1043 abort ();
1045 if (is_set == 0 && op1 == const0_rtx
1046 && (GET_MODE (op0) == SImode
1047 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1049 /* Don't need to set a register with the result when we are
1050 comparing against zero and branching. */
1051 reverse_test = !reverse_test;
1052 compare_result = op0;
1054 else
1056 compare_result = gen_reg_rtx (comp_mode);
1058 if (reverse_compare)
1060 rtx t = op1;
1061 op1 = op0;
1062 op0 = t;
1065 if (spu_comp_icode[index][scode] == 0)
1066 abort ();
1068 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1069 (op0, op_mode))
1070 op0 = force_reg (op_mode, op0);
1071 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1072 (op1, op_mode))
1073 op1 = force_reg (op_mode, op1);
1074 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1075 op0, op1);
1076 if (comp_rtx == 0)
1077 abort ();
1078 emit_insn (comp_rtx);
1080 if (eq_test)
1082 eq_result = gen_reg_rtx (comp_mode);
1083 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1084 op0, op1);
1085 if (eq_rtx == 0)
1086 abort ();
1087 emit_insn (eq_rtx);
1088 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1089 gcc_assert (ior_code != CODE_FOR_nothing);
1090 emit_insn (GEN_FCN (ior_code)
1091 (compare_result, compare_result, eq_result));
1095 if (is_set == 0)
1097 rtx bcomp;
1098 rtx loc_ref;
1100 /* We don't have branch on QI compare insns, so we convert the
1101 QI compare result to a HI result. */
1102 if (comp_mode == QImode)
1104 rtx old_res = compare_result;
1105 compare_result = gen_reg_rtx (HImode);
1106 comp_mode = HImode;
1107 emit_insn (gen_extendqihi2 (compare_result, old_res));
1110 if (reverse_test)
1111 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1112 else
1113 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1115 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1116 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1117 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1118 loc_ref, pc_rtx)));
1120 else if (is_set == 2)
1122 rtx target = operands[0];
1123 int compare_size = GET_MODE_BITSIZE (comp_mode);
1124 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1125 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1126 rtx select_mask;
1127 rtx op_t = operands[2];
1128 rtx op_f = operands[3];
1130 /* The result of the comparison can be SI, HI or QI mode. Create a
1131 mask based on that result. */
1132 if (target_size > compare_size)
1134 select_mask = gen_reg_rtx (mode);
1135 emit_insn (gen_extend_compare (select_mask, compare_result));
1137 else if (target_size < compare_size)
1138 select_mask =
1139 gen_rtx_SUBREG (mode, compare_result,
1140 (compare_size - target_size) / BITS_PER_UNIT);
1141 else if (comp_mode != mode)
1142 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1143 else
1144 select_mask = compare_result;
1146 if (GET_MODE (target) != GET_MODE (op_t)
1147 || GET_MODE (target) != GET_MODE (op_f))
1148 abort ();
1150 if (reverse_test)
1151 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1152 else
1153 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1155 else
1157 rtx target = operands[0];
1158 if (reverse_test)
1159 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1160 gen_rtx_NOT (comp_mode, compare_result)));
1161 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1162 emit_insn (gen_extendhisi2 (target, compare_result));
1163 else if (GET_MODE (target) == SImode
1164 && GET_MODE (compare_result) == QImode)
1165 emit_insn (gen_extend_compare (target, compare_result));
1166 else
1167 emit_move_insn (target, compare_result);
1171 HOST_WIDE_INT
1172 const_double_to_hwint (rtx x)
1174 HOST_WIDE_INT val;
1175 REAL_VALUE_TYPE rv;
1176 if (GET_MODE (x) == SFmode)
1178 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1179 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1181 else if (GET_MODE (x) == DFmode)
1183 long l[2];
1184 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1185 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1186 val = l[0];
1187 val = (val << 32) | (l[1] & 0xffffffff);
1189 else
1190 abort ();
1191 return val;
1195 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1197 long tv[2];
1198 REAL_VALUE_TYPE rv;
1199 gcc_assert (mode == SFmode || mode == DFmode);
1201 if (mode == SFmode)
1202 tv[0] = (v << 32) >> 32;
1203 else if (mode == DFmode)
1205 tv[1] = (v << 32) >> 32;
1206 tv[0] = v >> 32;
1208 real_from_target (&rv, tv, mode);
1209 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1212 void
1213 print_operand_address (FILE * file, register rtx addr)
1215 rtx reg;
1216 rtx offset;
1218 if (GET_CODE (addr) == AND
1219 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1220 && INTVAL (XEXP (addr, 1)) == -16)
1221 addr = XEXP (addr, 0);
1223 switch (GET_CODE (addr))
1225 case REG:
1226 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1227 break;
1229 case PLUS:
1230 reg = XEXP (addr, 0);
1231 offset = XEXP (addr, 1);
1232 if (GET_CODE (offset) == REG)
1234 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1235 reg_names[REGNO (offset)]);
1237 else if (GET_CODE (offset) == CONST_INT)
1239 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1240 INTVAL (offset), reg_names[REGNO (reg)]);
1242 else
1243 abort ();
1244 break;
1246 case CONST:
1247 case LABEL_REF:
1248 case SYMBOL_REF:
1249 case CONST_INT:
1250 output_addr_const (file, addr);
1251 break;
1253 default:
1254 debug_rtx (addr);
1255 abort ();
1259 void
1260 print_operand (FILE * file, rtx x, int code)
1262 enum machine_mode mode = GET_MODE (x);
1263 HOST_WIDE_INT val;
1264 unsigned char arr[16];
1265 int xcode = GET_CODE (x);
1266 int i, info;
1267 if (GET_MODE (x) == VOIDmode)
1268 switch (code)
1270 case 'L': /* 128 bits, signed */
1271 case 'm': /* 128 bits, signed */
1272 case 'T': /* 128 bits, signed */
1273 case 't': /* 128 bits, signed */
1274 mode = TImode;
1275 break;
1276 case 'K': /* 64 bits, signed */
1277 case 'k': /* 64 bits, signed */
1278 case 'D': /* 64 bits, signed */
1279 case 'd': /* 64 bits, signed */
1280 mode = DImode;
1281 break;
1282 case 'J': /* 32 bits, signed */
1283 case 'j': /* 32 bits, signed */
1284 case 's': /* 32 bits, signed */
1285 case 'S': /* 32 bits, signed */
1286 mode = SImode;
1287 break;
1289 switch (code)
1292 case 'j': /* 32 bits, signed */
1293 case 'k': /* 64 bits, signed */
1294 case 'm': /* 128 bits, signed */
1295 if (xcode == CONST_INT
1296 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1298 gcc_assert (logical_immediate_p (x, mode));
1299 constant_to_array (mode, x, arr);
1300 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1301 val = trunc_int_for_mode (val, SImode);
1302 switch (which_logical_immediate (val))
1304 case SPU_ORI:
1305 break;
1306 case SPU_ORHI:
1307 fprintf (file, "h");
1308 break;
1309 case SPU_ORBI:
1310 fprintf (file, "b");
1311 break;
1312 default:
1313 gcc_unreachable();
1316 else
1317 gcc_unreachable();
1318 return;
1320 case 'J': /* 32 bits, signed */
1321 case 'K': /* 64 bits, signed */
1322 case 'L': /* 128 bits, signed */
1323 if (xcode == CONST_INT
1324 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1326 gcc_assert (logical_immediate_p (x, mode)
1327 || iohl_immediate_p (x, mode));
1328 constant_to_array (mode, x, arr);
1329 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1330 val = trunc_int_for_mode (val, SImode);
1331 switch (which_logical_immediate (val))
1333 case SPU_ORI:
1334 case SPU_IOHL:
1335 break;
1336 case SPU_ORHI:
1337 val = trunc_int_for_mode (val, HImode);
1338 break;
1339 case SPU_ORBI:
1340 val = trunc_int_for_mode (val, QImode);
1341 break;
1342 default:
1343 gcc_unreachable();
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1347 else
1348 gcc_unreachable();
1349 return;
1351 case 't': /* 128 bits, signed */
1352 case 'd': /* 64 bits, signed */
1353 case 's': /* 32 bits, signed */
1354 if (CONSTANT_P (x))
1356 enum immediate_class c = classify_immediate (x, mode);
1357 switch (c)
1359 case IC_IL1:
1360 constant_to_array (mode, x, arr);
1361 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1362 val = trunc_int_for_mode (val, SImode);
1363 switch (which_immediate_load (val))
1365 case SPU_IL:
1366 break;
1367 case SPU_ILA:
1368 fprintf (file, "a");
1369 break;
1370 case SPU_ILH:
1371 fprintf (file, "h");
1372 break;
1373 case SPU_ILHU:
1374 fprintf (file, "hu");
1375 break;
1376 default:
1377 gcc_unreachable ();
1379 break;
1380 case IC_CPAT:
1381 constant_to_array (mode, x, arr);
1382 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1383 if (info == 1)
1384 fprintf (file, "b");
1385 else if (info == 2)
1386 fprintf (file, "h");
1387 else if (info == 4)
1388 fprintf (file, "w");
1389 else if (info == 8)
1390 fprintf (file, "d");
1391 break;
1392 case IC_IL1s:
1393 if (xcode == CONST_VECTOR)
1395 x = CONST_VECTOR_ELT (x, 0);
1396 xcode = GET_CODE (x);
1398 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1399 fprintf (file, "a");
1400 else if (xcode == HIGH)
1401 fprintf (file, "hu");
1402 break;
1403 case IC_FSMBI:
1404 case IC_FSMBI2:
1405 case IC_IL2:
1406 case IC_IL2s:
1407 case IC_POOL:
1408 abort ();
1411 else
1412 gcc_unreachable ();
1413 return;
1415 case 'T': /* 128 bits, signed */
1416 case 'D': /* 64 bits, signed */
1417 case 'S': /* 32 bits, signed */
1418 if (CONSTANT_P (x))
1420 enum immediate_class c = classify_immediate (x, mode);
1421 switch (c)
1423 case IC_IL1:
1424 constant_to_array (mode, x, arr);
1425 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1426 val = trunc_int_for_mode (val, SImode);
1427 switch (which_immediate_load (val))
1429 case SPU_IL:
1430 case SPU_ILA:
1431 break;
1432 case SPU_ILH:
1433 case SPU_ILHU:
1434 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1435 break;
1436 default:
1437 gcc_unreachable ();
1439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1440 break;
1441 case IC_FSMBI:
1442 constant_to_array (mode, x, arr);
1443 val = 0;
1444 for (i = 0; i < 16; i++)
1446 val <<= 1;
1447 val |= arr[i] & 1;
1449 print_operand (file, GEN_INT (val), 0);
1450 break;
1451 case IC_CPAT:
1452 constant_to_array (mode, x, arr);
1453 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1454 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1455 break;
1456 case IC_IL1s:
1457 if (xcode == HIGH)
1458 x = XEXP (x, 0);
1459 if (GET_CODE (x) == CONST_VECTOR)
1460 x = CONST_VECTOR_ELT (x, 0);
1461 output_addr_const (file, x);
1462 if (xcode == HIGH)
1463 fprintf (file, "@h");
1464 break;
1465 case IC_IL2:
1466 case IC_IL2s:
1467 case IC_FSMBI2:
1468 case IC_POOL:
1469 abort ();
1472 else
1473 gcc_unreachable ();
1474 return;
1476 case 'C':
1477 if (xcode == CONST_INT)
1479 /* Only 4 least significant bits are relevant for generate
1480 control word instructions. */
1481 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1482 return;
1484 break;
1486 case 'M': /* print code for c*d */
1487 if (GET_CODE (x) == CONST_INT)
1488 switch (INTVAL (x))
1490 case 1:
1491 fprintf (file, "b");
1492 break;
1493 case 2:
1494 fprintf (file, "h");
1495 break;
1496 case 4:
1497 fprintf (file, "w");
1498 break;
1499 case 8:
1500 fprintf (file, "d");
1501 break;
1502 default:
1503 gcc_unreachable();
1505 else
1506 gcc_unreachable();
1507 return;
1509 case 'N': /* Negate the operand */
1510 if (xcode == CONST_INT)
1511 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1512 else if (xcode == CONST_VECTOR)
1513 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1514 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1515 return;
1517 case 'I': /* enable/disable interrupts */
1518 if (xcode == CONST_INT)
1519 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1520 return;
1522 case 'b': /* branch modifiers */
1523 if (xcode == REG)
1524 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1525 else if (COMPARISON_P (x))
1526 fprintf (file, "%s", xcode == NE ? "n" : "");
1527 return;
1529 case 'i': /* indirect call */
1530 if (xcode == MEM)
1532 if (GET_CODE (XEXP (x, 0)) == REG)
1533 /* Used in indirect function calls. */
1534 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1535 else
1536 output_address (XEXP (x, 0));
1538 return;
1540 case 'p': /* load/store */
1541 if (xcode == MEM)
1543 x = XEXP (x, 0);
1544 xcode = GET_CODE (x);
1546 if (xcode == AND)
1548 x = XEXP (x, 0);
1549 xcode = GET_CODE (x);
1551 if (xcode == REG)
1552 fprintf (file, "d");
1553 else if (xcode == CONST_INT)
1554 fprintf (file, "a");
1555 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1556 fprintf (file, "r");
1557 else if (xcode == PLUS || xcode == LO_SUM)
1559 if (GET_CODE (XEXP (x, 1)) == REG)
1560 fprintf (file, "x");
1561 else
1562 fprintf (file, "d");
1564 return;
1566 case 'e':
1567 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1568 val &= 0x7;
1569 output_addr_const (file, GEN_INT (val));
1570 return;
1572 case 'f':
1573 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1574 val &= 0x1f;
1575 output_addr_const (file, GEN_INT (val));
1576 return;
1578 case 'g':
1579 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1580 val &= 0x3f;
1581 output_addr_const (file, GEN_INT (val));
1582 return;
1584 case 'h':
1585 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1586 val = (val >> 3) & 0x1f;
1587 output_addr_const (file, GEN_INT (val));
1588 return;
1590 case 'E':
1591 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1592 val = -val;
1593 val &= 0x7;
1594 output_addr_const (file, GEN_INT (val));
1595 return;
1597 case 'F':
1598 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1599 val = -val;
1600 val &= 0x1f;
1601 output_addr_const (file, GEN_INT (val));
1602 return;
1604 case 'G':
1605 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1606 val = -val;
1607 val &= 0x3f;
1608 output_addr_const (file, GEN_INT (val));
1609 return;
1611 case 'H':
1612 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1613 val = -(val & -8ll);
1614 val = (val >> 3) & 0x1f;
1615 output_addr_const (file, GEN_INT (val));
1616 return;
1618 case 'v':
1619 case 'w':
1620 constant_to_array (mode, x, arr);
1621 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1622 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1623 return;
1625 case 0:
1626 if (xcode == REG)
1627 fprintf (file, "%s", reg_names[REGNO (x)]);
1628 else if (xcode == MEM)
1629 output_address (XEXP (x, 0));
1630 else if (xcode == CONST_VECTOR)
1631 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1632 else
1633 output_addr_const (file, x);
1634 return;
1636 /* unused letters
1637 o qr u yz
1638 AB OPQR UVWXYZ */
1639 default:
1640 output_operand_lossage ("invalid %%xn code");
1642 gcc_unreachable ();
1645 extern char call_used_regs[];
1647 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1648 caller saved register. For leaf functions it is more efficient to
1649 use a volatile register because we won't need to save and restore the
1650 pic register. This routine is only valid after register allocation
1651 is completed, so we can pick an unused register. */
1652 static rtx
1653 get_pic_reg (void)
1655 rtx pic_reg = pic_offset_table_rtx;
1656 if (!reload_completed && !reload_in_progress)
1657 abort ();
1658 return pic_reg;
1661 /* Split constant addresses to handle cases that are too large.
1662 Add in the pic register when in PIC mode.
1663 Split immediates that require more than 1 instruction. */
1665 spu_split_immediate (rtx * ops)
1667 enum machine_mode mode = GET_MODE (ops[0]);
1668 enum immediate_class c = classify_immediate (ops[1], mode);
1670 switch (c)
1672 case IC_IL2:
1674 unsigned char arrhi[16];
1675 unsigned char arrlo[16];
1676 rtx to, temp, hi, lo;
1677 int i;
1678 enum machine_mode imode = mode;
1679 /* We need to do reals as ints because the constant used in the
1680 IOR might not be a legitimate real constant. */
1681 imode = int_mode_for_mode (mode);
1682 constant_to_array (mode, ops[1], arrhi);
1683 if (imode != mode)
1684 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1685 else
1686 to = ops[0];
1687 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1688 for (i = 0; i < 16; i += 4)
1690 arrlo[i + 2] = arrhi[i + 2];
1691 arrlo[i + 3] = arrhi[i + 3];
1692 arrlo[i + 0] = arrlo[i + 1] = 0;
1693 arrhi[i + 2] = arrhi[i + 3] = 0;
1695 hi = array_to_constant (imode, arrhi);
1696 lo = array_to_constant (imode, arrlo);
1697 emit_move_insn (temp, hi);
1698 emit_insn (gen_rtx_SET
1699 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1700 return 1;
1702 case IC_FSMBI2:
1704 unsigned char arr_fsmbi[16];
1705 unsigned char arr_andbi[16];
1706 rtx to, reg_fsmbi, reg_and;
1707 int i;
1708 enum machine_mode imode = mode;
1709 /* We need to do reals as ints because the constant used in the
1710 * AND might not be a legitimate real constant. */
1711 imode = int_mode_for_mode (mode);
1712 constant_to_array (mode, ops[1], arr_fsmbi);
1713 if (imode != mode)
1714 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1715 else
1716 to = ops[0];
1717 for (i = 0; i < 16; i++)
1718 if (arr_fsmbi[i] != 0)
1720 arr_andbi[0] = arr_fsmbi[i];
1721 arr_fsmbi[i] = 0xff;
1723 for (i = 1; i < 16; i++)
1724 arr_andbi[i] = arr_andbi[0];
1725 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1726 reg_and = array_to_constant (imode, arr_andbi);
1727 emit_move_insn (to, reg_fsmbi);
1728 emit_insn (gen_rtx_SET
1729 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1730 return 1;
1732 case IC_POOL:
1733 if (reload_in_progress || reload_completed)
1735 rtx mem = force_const_mem (mode, ops[1]);
1736 if (TARGET_LARGE_MEM)
1738 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1739 emit_move_insn (addr, XEXP (mem, 0));
1740 mem = replace_equiv_address (mem, addr);
1742 emit_move_insn (ops[0], mem);
1743 return 1;
1745 break;
1746 case IC_IL1s:
1747 case IC_IL2s:
1748 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1750 if (c == IC_IL2s)
1752 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1753 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1755 else if (flag_pic)
1756 emit_insn (gen_pic (ops[0], ops[1]));
1757 if (flag_pic)
1759 rtx pic_reg = get_pic_reg ();
1760 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1761 crtl->uses_pic_offset_table = 1;
1763 return flag_pic || c == IC_IL2s;
1765 break;
1766 case IC_IL1:
1767 case IC_FSMBI:
1768 case IC_CPAT:
1769 break;
1771 return 0;
1774 /* SAVING is TRUE when we are generating the actual load and store
1775 instructions for REGNO. When determining the size of the stack
1776 needed for saving register we must allocate enough space for the
1777 worst case, because we don't always have the information early enough
1778 to not allocate it. But we can at least eliminate the actual loads
1779 and stores during the prologue/epilogue. */
1780 static int
1781 need_to_save_reg (int regno, int saving)
1783 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1784 return 1;
1785 if (flag_pic
1786 && regno == PIC_OFFSET_TABLE_REGNUM
1787 && (!saving || crtl->uses_pic_offset_table)
1788 && (!saving
1789 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1790 return 1;
1791 return 0;
1794 /* This function is only correct starting with local register
1795 allocation */
1797 spu_saved_regs_size (void)
1799 int reg_save_size = 0;
1800 int regno;
1802 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1803 if (need_to_save_reg (regno, 0))
1804 reg_save_size += 0x10;
1805 return reg_save_size;
1808 static rtx
1809 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1811 rtx reg = gen_rtx_REG (V4SImode, regno);
1812 rtx mem =
1813 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1814 return emit_insn (gen_movv4si (mem, reg));
1817 static rtx
1818 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1820 rtx reg = gen_rtx_REG (V4SImode, regno);
1821 rtx mem =
1822 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1823 return emit_insn (gen_movv4si (reg, mem));
1826 /* This happens after reload, so we need to expand it. */
1827 static rtx
1828 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1830 rtx insn;
1831 if (satisfies_constraint_K (GEN_INT (imm)))
1833 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1835 else
1837 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1838 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1839 if (REGNO (src) == REGNO (scratch))
1840 abort ();
1842 return insn;
1845 /* Return nonzero if this function is known to have a null epilogue. */
1848 direct_return (void)
1850 if (reload_completed)
1852 if (cfun->static_chain_decl == 0
1853 && (spu_saved_regs_size ()
1854 + get_frame_size ()
1855 + crtl->outgoing_args_size
1856 + crtl->args.pretend_args_size == 0)
1857 && current_function_is_leaf)
1858 return 1;
1860 return 0;
1864 The stack frame looks like this:
1865 +-------------+
1866 | incoming |
1867 | args |
1868 AP -> +-------------+
1869 | $lr save |
1870 +-------------+
1871 prev SP | back chain |
1872 +-------------+
1873 | var args |
1874 | reg save | crtl->args.pretend_args_size bytes
1875 +-------------+
1876 | ... |
1877 | saved regs | spu_saved_regs_size() bytes
1878 FP -> +-------------+
1879 | ... |
1880 | vars | get_frame_size() bytes
1881 HFP -> +-------------+
1882 | ... |
1883 | outgoing |
1884 | args | crtl->outgoing_args_size bytes
1885 +-------------+
1886 | $lr of next |
1887 | frame |
1888 +-------------+
1889 | back chain |
1890 SP -> +-------------+
1893 void
1894 spu_expand_prologue (void)
1896 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1897 HOST_WIDE_INT total_size;
1898 HOST_WIDE_INT saved_regs_size;
1899 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1900 rtx scratch_reg_0, scratch_reg_1;
1901 rtx insn, real;
1903 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1904 the "toplevel" insn chain. */
1905 emit_note (NOTE_INSN_DELETED);
1907 if (flag_pic && optimize == 0)
1908 crtl->uses_pic_offset_table = 1;
1910 if (spu_naked_function_p (current_function_decl))
1911 return;
1913 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1914 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1916 saved_regs_size = spu_saved_regs_size ();
1917 total_size = size + saved_regs_size
1918 + crtl->outgoing_args_size
1919 + crtl->args.pretend_args_size;
1921 if (!current_function_is_leaf
1922 || cfun->calls_alloca || total_size > 0)
1923 total_size += STACK_POINTER_OFFSET;
1925 /* Save this first because code after this might use the link
1926 register as a scratch register. */
1927 if (!current_function_is_leaf)
1929 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1930 RTX_FRAME_RELATED_P (insn) = 1;
1933 if (total_size > 0)
1935 offset = -crtl->args.pretend_args_size;
1936 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1937 if (need_to_save_reg (regno, 1))
1939 offset -= 16;
1940 insn = frame_emit_store (regno, sp_reg, offset);
1941 RTX_FRAME_RELATED_P (insn) = 1;
1945 if (flag_pic && crtl->uses_pic_offset_table)
1947 rtx pic_reg = get_pic_reg ();
1948 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1949 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1952 if (total_size > 0)
1954 if (flag_stack_check)
1956 /* We compare against total_size-1 because
1957 ($sp >= total_size) <=> ($sp > total_size-1) */
1958 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1959 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1960 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1961 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1963 emit_move_insn (scratch_v4si, size_v4si);
1964 size_v4si = scratch_v4si;
1966 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1967 emit_insn (gen_vec_extractv4si
1968 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1969 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1972 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1973 the value of the previous $sp because we save it as the back
1974 chain. */
1975 if (total_size <= 2000)
1977 /* In this case we save the back chain first. */
1978 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1979 insn =
1980 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1982 else
1984 insn = emit_move_insn (scratch_reg_0, sp_reg);
1985 insn =
1986 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1988 RTX_FRAME_RELATED_P (insn) = 1;
1989 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1990 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1992 if (total_size > 2000)
1994 /* Save the back chain ptr */
1995 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1998 if (frame_pointer_needed)
2000 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2001 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2002 + crtl->outgoing_args_size;
2003 /* Set the new frame_pointer */
2004 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2005 RTX_FRAME_RELATED_P (insn) = 1;
2006 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2007 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2008 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2012 emit_note (NOTE_INSN_DELETED);
2015 void
2016 spu_expand_epilogue (bool sibcall_p)
2018 int size = get_frame_size (), offset, regno;
2019 HOST_WIDE_INT saved_regs_size, total_size;
2020 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2021 rtx jump, scratch_reg_0;
2023 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2024 the "toplevel" insn chain. */
2025 emit_note (NOTE_INSN_DELETED);
2027 if (spu_naked_function_p (current_function_decl))
2028 return;
2030 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2032 saved_regs_size = spu_saved_regs_size ();
2033 total_size = size + saved_regs_size
2034 + crtl->outgoing_args_size
2035 + crtl->args.pretend_args_size;
2037 if (!current_function_is_leaf
2038 || cfun->calls_alloca || total_size > 0)
2039 total_size += STACK_POINTER_OFFSET;
2041 if (total_size > 0)
2043 if (cfun->calls_alloca)
2044 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2045 else
2046 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2049 if (saved_regs_size > 0)
2051 offset = -crtl->args.pretend_args_size;
2052 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2053 if (need_to_save_reg (regno, 1))
2055 offset -= 0x10;
2056 frame_emit_load (regno, sp_reg, offset);
2061 if (!current_function_is_leaf)
2062 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2064 if (!sibcall_p)
2066 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2067 jump = emit_jump_insn (gen__return ());
2068 emit_barrier_after (jump);
2071 emit_note (NOTE_INSN_DELETED);
2075 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2077 if (count != 0)
2078 return 0;
2079 /* This is inefficient because it ends up copying to a save-register
2080 which then gets saved even though $lr has already been saved. But
2081 it does generate better code for leaf functions and we don't need
2082 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2083 used for __builtin_return_address anyway, so maybe we don't care if
2084 it's inefficient. */
2085 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2089 /* Given VAL, generate a constant appropriate for MODE.
2090 If MODE is a vector mode, every element will be VAL.
2091 For TImode, VAL will be zero extended to 128 bits. */
2093 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2095 rtx inner;
2096 rtvec v;
2097 int units, i;
2099 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2100 || GET_MODE_CLASS (mode) == MODE_FLOAT
2101 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2102 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2104 if (GET_MODE_CLASS (mode) == MODE_INT)
2105 return immed_double_const (val, 0, mode);
2107 /* val is the bit representation of the float */
2108 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2109 return hwint_to_const_double (mode, val);
2111 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2112 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2113 else
2114 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2116 units = GET_MODE_NUNITS (mode);
2118 v = rtvec_alloc (units);
2120 for (i = 0; i < units; ++i)
2121 RTVEC_ELT (v, i) = inner;
2123 return gen_rtx_CONST_VECTOR (mode, v);
2126 /* Create a MODE vector constant from 4 ints. */
2128 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2130 unsigned char arr[16];
2131 arr[0] = (a >> 24) & 0xff;
2132 arr[1] = (a >> 16) & 0xff;
2133 arr[2] = (a >> 8) & 0xff;
2134 arr[3] = (a >> 0) & 0xff;
2135 arr[4] = (b >> 24) & 0xff;
2136 arr[5] = (b >> 16) & 0xff;
2137 arr[6] = (b >> 8) & 0xff;
2138 arr[7] = (b >> 0) & 0xff;
2139 arr[8] = (c >> 24) & 0xff;
2140 arr[9] = (c >> 16) & 0xff;
2141 arr[10] = (c >> 8) & 0xff;
2142 arr[11] = (c >> 0) & 0xff;
2143 arr[12] = (d >> 24) & 0xff;
2144 arr[13] = (d >> 16) & 0xff;
2145 arr[14] = (d >> 8) & 0xff;
2146 arr[15] = (d >> 0) & 0xff;
2147 return array_to_constant(mode, arr);
2150 /* branch hint stuff */
2152 /* An array of these is used to propagate hints to predecessor blocks. */
2153 struct spu_bb_info
2155 rtx prop_jump; /* propagated from another block */
2156 int bb_index; /* the original block. */
2158 static struct spu_bb_info *spu_bb_info;
2160 #define STOP_HINT_P(INSN) \
2161 (GET_CODE(INSN) == CALL_INSN \
2162 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2163 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2165 /* 1 when RTX is a hinted branch or its target. We keep track of
2166 what has been hinted so the safe-hint code can test it easily. */
2167 #define HINTED_P(RTX) \
2168 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2170 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2171 #define SCHED_ON_EVEN_P(RTX) \
2172 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2174 /* Emit a nop for INSN such that the two will dual issue. This assumes
2175 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2176 We check for TImode to handle a MULTI1 insn which has dual issued its
2177 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2178 ADDR_VEC insns. */
2179 static void
2180 emit_nop_for_insn (rtx insn)
2182 int p;
2183 rtx new_insn;
2184 p = get_pipe (insn);
2185 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2186 new_insn = emit_insn_after (gen_lnop (), insn);
2187 else if (p == 1 && GET_MODE (insn) == TImode)
2189 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2190 PUT_MODE (new_insn, TImode);
2191 PUT_MODE (insn, VOIDmode);
2193 else
2194 new_insn = emit_insn_after (gen_lnop (), insn);
2195 recog_memoized (new_insn);
2198 /* Insert nops in basic blocks to meet dual issue alignment
2199 requirements. Also make sure hbrp and hint instructions are at least
2200 one cycle apart, possibly inserting a nop. */
2201 static void
2202 pad_bb(void)
2204 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2205 int length;
2206 int addr;
2208 /* This sets up INSN_ADDRESSES. */
2209 shorten_branches (get_insns ());
2211 /* Keep track of length added by nops. */
2212 length = 0;
2214 prev_insn = 0;
2215 insn = get_insns ();
2216 if (!active_insn_p (insn))
2217 insn = next_active_insn (insn);
2218 for (; insn; insn = next_insn)
2220 next_insn = next_active_insn (insn);
2221 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2222 || INSN_CODE (insn) == CODE_FOR_hbr)
2224 if (hbr_insn)
2226 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2227 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2228 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2229 || (a1 - a0 == 4))
2231 prev_insn = emit_insn_before (gen_lnop (), insn);
2232 PUT_MODE (prev_insn, GET_MODE (insn));
2233 PUT_MODE (insn, TImode);
2234 length += 4;
2237 hbr_insn = insn;
2239 if (INSN_CODE (insn) == CODE_FOR_blockage)
2241 if (GET_MODE (insn) == TImode)
2242 PUT_MODE (next_insn, TImode);
2243 insn = next_insn;
2244 next_insn = next_active_insn (insn);
2246 addr = INSN_ADDRESSES (INSN_UID (insn));
2247 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2249 if (((addr + length) & 7) != 0)
2251 emit_nop_for_insn (prev_insn);
2252 length += 4;
2255 else if (GET_MODE (insn) == TImode
2256 && ((next_insn && GET_MODE (next_insn) != TImode)
2257 || get_attr_type (insn) == TYPE_MULTI0)
2258 && ((addr + length) & 7) != 0)
2260 /* prev_insn will always be set because the first insn is
2261 always 8-byte aligned. */
2262 emit_nop_for_insn (prev_insn);
2263 length += 4;
2265 prev_insn = insn;
2270 /* Routines for branch hints. */
2272 static void
2273 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2274 int distance, sbitmap blocks)
2276 rtx branch_label = 0;
2277 rtx hint;
2278 rtx insn;
2279 rtx table;
2281 if (before == 0 || branch == 0 || target == 0)
2282 return;
2284 /* While scheduling we require hints to be no further than 600, so
2285 we need to enforce that here too */
2286 if (distance > 600)
2287 return;
2289 /* If we have a Basic block note, emit it after the basic block note. */
2290 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2291 before = NEXT_INSN (before);
2293 branch_label = gen_label_rtx ();
2294 LABEL_NUSES (branch_label)++;
2295 LABEL_PRESERVE_P (branch_label) = 1;
2296 insn = emit_label_before (branch_label, branch);
2297 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2298 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2300 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2301 recog_memoized (hint);
2302 HINTED_P (branch) = 1;
2304 if (GET_CODE (target) == LABEL_REF)
2305 HINTED_P (XEXP (target, 0)) = 1;
2306 else if (tablejump_p (branch, 0, &table))
2308 rtvec vec;
2309 int j;
2310 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2311 vec = XVEC (PATTERN (table), 0);
2312 else
2313 vec = XVEC (PATTERN (table), 1);
2314 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2315 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2318 if (distance >= 588)
2320 /* Make sure the hint isn't scheduled any earlier than this point,
2321 which could make it too far for the branch offest to fit */
2322 recog_memoized (emit_insn_before (gen_blockage (), hint));
2324 else if (distance <= 8 * 4)
2326 /* To guarantee at least 8 insns between the hint and branch we
2327 insert nops. */
2328 int d;
2329 for (d = distance; d < 8 * 4; d += 4)
2331 insn =
2332 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2333 recog_memoized (insn);
2336 /* Make sure any nops inserted aren't scheduled before the hint. */
2337 recog_memoized (emit_insn_after (gen_blockage (), hint));
2339 /* Make sure any nops inserted aren't scheduled after the call. */
2340 if (CALL_P (branch) && distance < 8 * 4)
2341 recog_memoized (emit_insn_before (gen_blockage (), branch));
2345 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2346 the rtx for the branch target. */
2347 static rtx
2348 get_branch_target (rtx branch)
2350 if (GET_CODE (branch) == JUMP_INSN)
2352 rtx set, src;
2354 /* Return statements */
2355 if (GET_CODE (PATTERN (branch)) == RETURN)
2356 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2358 /* jump table */
2359 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2360 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2361 return 0;
2363 set = single_set (branch);
2364 src = SET_SRC (set);
2365 if (GET_CODE (SET_DEST (set)) != PC)
2366 abort ();
2368 if (GET_CODE (src) == IF_THEN_ELSE)
2370 rtx lab = 0;
2371 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2372 if (note)
2374 /* If the more probable case is not a fall through, then
2375 try a branch hint. */
2376 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2377 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2378 && GET_CODE (XEXP (src, 1)) != PC)
2379 lab = XEXP (src, 1);
2380 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2381 && GET_CODE (XEXP (src, 2)) != PC)
2382 lab = XEXP (src, 2);
2384 if (lab)
2386 if (GET_CODE (lab) == RETURN)
2387 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2388 return lab;
2390 return 0;
2393 return src;
2395 else if (GET_CODE (branch) == CALL_INSN)
2397 rtx call;
2398 /* All of our call patterns are in a PARALLEL and the CALL is
2399 the first pattern in the PARALLEL. */
2400 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2401 abort ();
2402 call = XVECEXP (PATTERN (branch), 0, 0);
2403 if (GET_CODE (call) == SET)
2404 call = SET_SRC (call);
2405 if (GET_CODE (call) != CALL)
2406 abort ();
2407 return XEXP (XEXP (call, 0), 0);
2409 return 0;
2412 /* The special $hbr register is used to prevent the insn scheduler from
2413 moving hbr insns across instructions which invalidate them. It
2414 should only be used in a clobber, and this function searches for
2415 insns which clobber it. */
2416 static bool
2417 insn_clobbers_hbr (rtx insn)
2419 if (INSN_P (insn)
2420 && GET_CODE (PATTERN (insn)) == PARALLEL)
2422 rtx parallel = PATTERN (insn);
2423 rtx clobber;
2424 int j;
2425 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2427 clobber = XVECEXP (parallel, 0, j);
2428 if (GET_CODE (clobber) == CLOBBER
2429 && GET_CODE (XEXP (clobber, 0)) == REG
2430 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2431 return 1;
2434 return 0;
2437 /* Search up to 32 insns starting at FIRST:
2438 - at any kind of hinted branch, just return
2439 - at any unconditional branch in the first 15 insns, just return
2440 - at a call or indirect branch, after the first 15 insns, force it to
2441 an even address and return
2442 - at any unconditional branch, after the first 15 insns, force it to
2443 an even address.
2444 At then end of the search, insert an hbrp within 4 insns of FIRST,
2445 and an hbrp within 16 instructions of FIRST.
2447 static void
2448 insert_hbrp_for_ilb_runout (rtx first)
2450 rtx insn, before_4 = 0, before_16 = 0;
2451 int addr = 0, length, first_addr = -1;
2452 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2453 int insert_lnop_after = 0;
2454 for (insn = first; insn; insn = NEXT_INSN (insn))
2455 if (INSN_P (insn))
2457 if (first_addr == -1)
2458 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2459 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2460 length = get_attr_length (insn);
2462 if (before_4 == 0 && addr + length >= 4 * 4)
2463 before_4 = insn;
2464 /* We test for 14 instructions because the first hbrp will add
2465 up to 2 instructions. */
2466 if (before_16 == 0 && addr + length >= 14 * 4)
2467 before_16 = insn;
2469 if (INSN_CODE (insn) == CODE_FOR_hbr)
2471 /* Make sure an hbrp is at least 2 cycles away from a hint.
2472 Insert an lnop after the hbrp when necessary. */
2473 if (before_4 == 0 && addr > 0)
2475 before_4 = insn;
2476 insert_lnop_after |= 1;
2478 else if (before_4 && addr <= 4 * 4)
2479 insert_lnop_after |= 1;
2480 if (before_16 == 0 && addr > 10 * 4)
2482 before_16 = insn;
2483 insert_lnop_after |= 2;
2485 else if (before_16 && addr <= 14 * 4)
2486 insert_lnop_after |= 2;
2489 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2491 if (addr < hbrp_addr0)
2492 hbrp_addr0 = addr;
2493 else if (addr < hbrp_addr1)
2494 hbrp_addr1 = addr;
2497 if (CALL_P (insn) || JUMP_P (insn))
2499 if (HINTED_P (insn))
2500 return;
2502 /* Any branch after the first 15 insns should be on an even
2503 address to avoid a special case branch. There might be
2504 some nops and/or hbrps inserted, so we test after 10
2505 insns. */
2506 if (addr > 10 * 4)
2507 SCHED_ON_EVEN_P (insn) = 1;
2510 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2511 return;
2514 if (addr + length >= 32 * 4)
2516 gcc_assert (before_4 && before_16);
2517 if (hbrp_addr0 > 4 * 4)
2519 insn =
2520 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2521 recog_memoized (insn);
2522 INSN_ADDRESSES_NEW (insn,
2523 INSN_ADDRESSES (INSN_UID (before_4)));
2524 PUT_MODE (insn, GET_MODE (before_4));
2525 PUT_MODE (before_4, TImode);
2526 if (insert_lnop_after & 1)
2528 insn = emit_insn_before (gen_lnop (), before_4);
2529 recog_memoized (insn);
2530 INSN_ADDRESSES_NEW (insn,
2531 INSN_ADDRESSES (INSN_UID (before_4)));
2532 PUT_MODE (insn, TImode);
2535 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2536 && hbrp_addr1 > 16 * 4)
2538 insn =
2539 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2540 recog_memoized (insn);
2541 INSN_ADDRESSES_NEW (insn,
2542 INSN_ADDRESSES (INSN_UID (before_16)));
2543 PUT_MODE (insn, GET_MODE (before_16));
2544 PUT_MODE (before_16, TImode);
2545 if (insert_lnop_after & 2)
2547 insn = emit_insn_before (gen_lnop (), before_16);
2548 recog_memoized (insn);
2549 INSN_ADDRESSES_NEW (insn,
2550 INSN_ADDRESSES (INSN_UID
2551 (before_16)));
2552 PUT_MODE (insn, TImode);
2555 return;
2558 else if (BARRIER_P (insn))
2559 return;
2563 /* The SPU might hang when it executes 48 inline instructions after a
2564 hinted branch jumps to its hinted target. The beginning of a
2565 function and the return from a call might have been hinted, and must
2566 be handled as well. To prevent a hang we insert 2 hbrps. The first
2567 should be within 6 insns of the branch target. The second should be
2568 within 22 insns of the branch target. When determining if hbrps are
2569 necessary, we look for only 32 inline instructions, because up to to
2570 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2571 new hbrps, we insert them within 4 and 16 insns of the target. */
2572 static void
2573 insert_hbrp (void)
2575 rtx insn;
2576 if (TARGET_SAFE_HINTS)
2578 shorten_branches (get_insns ());
2579 /* Insert hbrp at beginning of function */
2580 insn = next_active_insn (get_insns ());
2581 if (insn)
2582 insert_hbrp_for_ilb_runout (insn);
2583 /* Insert hbrp after hinted targets. */
2584 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2585 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2586 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2590 static int in_spu_reorg;
2592 /* Insert branch hints. There are no branch optimizations after this
2593 pass, so it's safe to set our branch hints now. */
2594 static void
2595 spu_machine_dependent_reorg (void)
2597 sbitmap blocks;
2598 basic_block bb;
2599 rtx branch, insn;
2600 rtx branch_target = 0;
2601 int branch_addr = 0, insn_addr, required_dist = 0;
2602 int i;
2603 unsigned int j;
2605 if (!TARGET_BRANCH_HINTS || optimize == 0)
2607 /* We still do it for unoptimized code because an external
2608 function might have hinted a call or return. */
2609 insert_hbrp ();
2610 pad_bb ();
2611 return;
2614 blocks = sbitmap_alloc (last_basic_block);
2615 sbitmap_zero (blocks);
2617 in_spu_reorg = 1;
2618 compute_bb_for_insn ();
2620 compact_blocks ();
2622 spu_bb_info =
2623 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2624 sizeof (struct spu_bb_info));
2626 /* We need exact insn addresses and lengths. */
2627 shorten_branches (get_insns ());
2629 for (i = n_basic_blocks - 1; i >= 0; i--)
2631 bb = BASIC_BLOCK (i);
2632 branch = 0;
2633 if (spu_bb_info[i].prop_jump)
2635 branch = spu_bb_info[i].prop_jump;
2636 branch_target = get_branch_target (branch);
2637 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2638 required_dist = spu_hint_dist;
2640 /* Search from end of a block to beginning. In this loop, find
2641 jumps which need a branch and emit them only when:
2642 - it's an indirect branch and we're at the insn which sets
2643 the register
2644 - we're at an insn that will invalidate the hint. e.g., a
2645 call, another hint insn, inline asm that clobbers $hbr, and
2646 some inlined operations (divmodsi4). Don't consider jumps
2647 because they are only at the end of a block and are
2648 considered when we are deciding whether to propagate
2649 - we're getting too far away from the branch. The hbr insns
2650 only have a signed 10 bit offset
2651 We go back as far as possible so the branch will be considered
2652 for propagation when we get to the beginning of the block. */
2653 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2655 if (INSN_P (insn))
2657 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2658 if (branch
2659 && ((GET_CODE (branch_target) == REG
2660 && set_of (branch_target, insn) != NULL_RTX)
2661 || insn_clobbers_hbr (insn)
2662 || branch_addr - insn_addr > 600))
2664 rtx next = NEXT_INSN (insn);
2665 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2666 if (insn != BB_END (bb)
2667 && branch_addr - next_addr >= required_dist)
2669 if (dump_file)
2670 fprintf (dump_file,
2671 "hint for %i in block %i before %i\n",
2672 INSN_UID (branch), bb->index,
2673 INSN_UID (next));
2674 spu_emit_branch_hint (next, branch, branch_target,
2675 branch_addr - next_addr, blocks);
2677 branch = 0;
2680 /* JUMP_P will only be true at the end of a block. When
2681 branch is already set it means we've previously decided
2682 to propagate a hint for that branch into this block. */
2683 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2685 branch = 0;
2686 if ((branch_target = get_branch_target (insn)))
2688 branch = insn;
2689 branch_addr = insn_addr;
2690 required_dist = spu_hint_dist;
2694 if (insn == BB_HEAD (bb))
2695 break;
2698 if (branch)
2700 /* If we haven't emitted a hint for this branch yet, it might
2701 be profitable to emit it in one of the predecessor blocks,
2702 especially for loops. */
2703 rtx bbend;
2704 basic_block prev = 0, prop = 0, prev2 = 0;
2705 int loop_exit = 0, simple_loop = 0;
2706 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2708 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2709 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2710 prev = EDGE_PRED (bb, j)->src;
2711 else
2712 prev2 = EDGE_PRED (bb, j)->src;
2714 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2715 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2716 loop_exit = 1;
2717 else if (EDGE_SUCC (bb, j)->dest == bb)
2718 simple_loop = 1;
2720 /* If this branch is a loop exit then propagate to previous
2721 fallthru block. This catches the cases when it is a simple
2722 loop or when there is an initial branch into the loop. */
2723 if (prev && (loop_exit || simple_loop)
2724 && prev->loop_depth <= bb->loop_depth)
2725 prop = prev;
2727 /* If there is only one adjacent predecessor. Don't propagate
2728 outside this loop. This loop_depth test isn't perfect, but
2729 I'm not sure the loop_father member is valid at this point. */
2730 else if (prev && single_pred_p (bb)
2731 && prev->loop_depth == bb->loop_depth)
2732 prop = prev;
2734 /* If this is the JOIN block of a simple IF-THEN then
2735 propogate the hint to the HEADER block. */
2736 else if (prev && prev2
2737 && EDGE_COUNT (bb->preds) == 2
2738 && EDGE_COUNT (prev->preds) == 1
2739 && EDGE_PRED (prev, 0)->src == prev2
2740 && prev2->loop_depth == bb->loop_depth
2741 && GET_CODE (branch_target) != REG)
2742 prop = prev;
2744 /* Don't propagate when:
2745 - this is a simple loop and the hint would be too far
2746 - this is not a simple loop and there are 16 insns in
2747 this block already
2748 - the predecessor block ends in a branch that will be
2749 hinted
2750 - the predecessor block ends in an insn that invalidates
2751 the hint */
2752 if (prop
2753 && prop->index >= 0
2754 && (bbend = BB_END (prop))
2755 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2756 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2757 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2759 if (dump_file)
2760 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2761 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2762 bb->index, prop->index, bb->loop_depth,
2763 INSN_UID (branch), loop_exit, simple_loop,
2764 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2766 spu_bb_info[prop->index].prop_jump = branch;
2767 spu_bb_info[prop->index].bb_index = i;
2769 else if (branch_addr - next_addr >= required_dist)
2771 if (dump_file)
2772 fprintf (dump_file, "hint for %i in block %i before %i\n",
2773 INSN_UID (branch), bb->index,
2774 INSN_UID (NEXT_INSN (insn)));
2775 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2776 branch_addr - next_addr, blocks);
2778 branch = 0;
2781 free (spu_bb_info);
2783 if (!sbitmap_empty_p (blocks))
2784 find_many_sub_basic_blocks (blocks);
2786 /* We have to schedule to make sure alignment is ok. */
2787 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2789 /* The hints need to be scheduled, so call it again. */
2790 schedule_insns ();
2792 insert_hbrp ();
2794 pad_bb ();
2796 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2797 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2799 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2800 between its branch label and the branch . We don't move the
2801 label because GCC expects it at the beginning of the block. */
2802 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2803 rtx label_ref = XVECEXP (unspec, 0, 0);
2804 rtx label = XEXP (label_ref, 0);
2805 rtx branch;
2806 int offset = 0;
2807 for (branch = NEXT_INSN (label);
2808 !JUMP_P (branch) && !CALL_P (branch);
2809 branch = NEXT_INSN (branch))
2810 if (NONJUMP_INSN_P (branch))
2811 offset += get_attr_length (branch);
2812 if (offset > 0)
2813 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2816 if (spu_flag_var_tracking)
2818 df_analyze ();
2819 timevar_push (TV_VAR_TRACKING);
2820 variable_tracking_main ();
2821 timevar_pop (TV_VAR_TRACKING);
2822 df_finish_pass (false);
2825 free_bb_for_insn ();
2827 in_spu_reorg = 0;
2831 /* Insn scheduling routines, primarily for dual issue. */
2832 static int
2833 spu_sched_issue_rate (void)
2835 return 2;
2838 static int
2839 uses_ls_unit(rtx insn)
2841 rtx set = single_set (insn);
2842 if (set != 0
2843 && (GET_CODE (SET_DEST (set)) == MEM
2844 || GET_CODE (SET_SRC (set)) == MEM))
2845 return 1;
2846 return 0;
2849 static int
2850 get_pipe (rtx insn)
2852 enum attr_type t;
2853 /* Handle inline asm */
2854 if (INSN_CODE (insn) == -1)
2855 return -1;
2856 t = get_attr_type (insn);
2857 switch (t)
2859 case TYPE_CONVERT:
2860 return -2;
2861 case TYPE_MULTI0:
2862 return -1;
2864 case TYPE_FX2:
2865 case TYPE_FX3:
2866 case TYPE_SPR:
2867 case TYPE_NOP:
2868 case TYPE_FXB:
2869 case TYPE_FPD:
2870 case TYPE_FP6:
2871 case TYPE_FP7:
2872 return 0;
2874 case TYPE_LNOP:
2875 case TYPE_SHUF:
2876 case TYPE_LOAD:
2877 case TYPE_STORE:
2878 case TYPE_BR:
2879 case TYPE_MULTI1:
2880 case TYPE_HBR:
2881 case TYPE_IPREFETCH:
2882 return 1;
2883 default:
2884 abort ();
2889 /* haifa-sched.c has a static variable that keeps track of the current
2890 cycle. It is passed to spu_sched_reorder, and we record it here for
2891 use by spu_sched_variable_issue. It won't be accurate if the
2892 scheduler updates it's clock_var between the two calls. */
2893 static int clock_var;
2895 /* This is used to keep track of insn alignment. Set to 0 at the
2896 beginning of each block and increased by the "length" attr of each
2897 insn scheduled. */
2898 static int spu_sched_length;
2900 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2901 ready list appropriately in spu_sched_reorder(). */
2902 static int pipe0_clock;
2903 static int pipe1_clock;
2905 static int prev_clock_var;
2907 static int prev_priority;
2909 /* The SPU needs to load the next ilb sometime during the execution of
2910 the previous ilb. There is a potential conflict if every cycle has a
2911 load or store. To avoid the conflict we make sure the load/store
2912 unit is free for at least one cycle during the execution of insns in
2913 the previous ilb. */
2914 static int spu_ls_first;
2915 static int prev_ls_clock;
2917 static void
2918 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2919 int max_ready ATTRIBUTE_UNUSED)
2921 spu_sched_length = 0;
2924 static void
2925 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2926 int max_ready ATTRIBUTE_UNUSED)
2928 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2930 /* When any block might be at least 8-byte aligned, assume they
2931 will all be at least 8-byte aligned to make sure dual issue
2932 works out correctly. */
2933 spu_sched_length = 0;
2935 spu_ls_first = INT_MAX;
2936 clock_var = -1;
2937 prev_ls_clock = -1;
2938 pipe0_clock = -1;
2939 pipe1_clock = -1;
2940 prev_clock_var = -1;
2941 prev_priority = -1;
2944 static int
2945 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2946 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2948 int len;
2949 int p;
2950 if (GET_CODE (PATTERN (insn)) == USE
2951 || GET_CODE (PATTERN (insn)) == CLOBBER
2952 || (len = get_attr_length (insn)) == 0)
2953 return more;
2955 spu_sched_length += len;
2957 /* Reset on inline asm */
2958 if (INSN_CODE (insn) == -1)
2960 spu_ls_first = INT_MAX;
2961 pipe0_clock = -1;
2962 pipe1_clock = -1;
2963 return 0;
2965 p = get_pipe (insn);
2966 if (p == 0)
2967 pipe0_clock = clock_var;
2968 else
2969 pipe1_clock = clock_var;
2971 if (in_spu_reorg)
2973 if (clock_var - prev_ls_clock > 1
2974 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2975 spu_ls_first = INT_MAX;
2976 if (uses_ls_unit (insn))
2978 if (spu_ls_first == INT_MAX)
2979 spu_ls_first = spu_sched_length;
2980 prev_ls_clock = clock_var;
2983 /* The scheduler hasn't inserted the nop, but we will later on.
2984 Include those nops in spu_sched_length. */
2985 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2986 spu_sched_length += 4;
2987 prev_clock_var = clock_var;
2989 /* more is -1 when called from spu_sched_reorder for new insns
2990 that don't have INSN_PRIORITY */
2991 if (more >= 0)
2992 prev_priority = INSN_PRIORITY (insn);
2995 /* Always try issueing more insns. spu_sched_reorder will decide
2996 when the cycle should be advanced. */
2997 return 1;
3000 /* This function is called for both TARGET_SCHED_REORDER and
3001 TARGET_SCHED_REORDER2. */
3002 static int
3003 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3004 rtx *ready, int *nreadyp, int clock)
3006 int i, nready = *nreadyp;
3007 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3008 rtx insn;
3010 clock_var = clock;
3012 if (nready <= 0 || pipe1_clock >= clock)
3013 return 0;
3015 /* Find any rtl insns that don't generate assembly insns and schedule
3016 them first. */
3017 for (i = nready - 1; i >= 0; i--)
3019 insn = ready[i];
3020 if (INSN_CODE (insn) == -1
3021 || INSN_CODE (insn) == CODE_FOR_blockage
3022 || (INSN_P (insn) && get_attr_length (insn) == 0))
3024 ready[i] = ready[nready - 1];
3025 ready[nready - 1] = insn;
3026 return 1;
3030 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3031 for (i = 0; i < nready; i++)
3032 if (INSN_CODE (ready[i]) != -1)
3034 insn = ready[i];
3035 switch (get_attr_type (insn))
3037 default:
3038 case TYPE_MULTI0:
3039 case TYPE_CONVERT:
3040 case TYPE_FX2:
3041 case TYPE_FX3:
3042 case TYPE_SPR:
3043 case TYPE_NOP:
3044 case TYPE_FXB:
3045 case TYPE_FPD:
3046 case TYPE_FP6:
3047 case TYPE_FP7:
3048 pipe_0 = i;
3049 break;
3050 case TYPE_LOAD:
3051 case TYPE_STORE:
3052 pipe_ls = i;
3053 case TYPE_LNOP:
3054 case TYPE_SHUF:
3055 case TYPE_BR:
3056 case TYPE_MULTI1:
3057 case TYPE_HBR:
3058 pipe_1 = i;
3059 break;
3060 case TYPE_IPREFETCH:
3061 pipe_hbrp = i;
3062 break;
3066 /* In the first scheduling phase, schedule loads and stores together
3067 to increase the chance they will get merged during postreload CSE. */
3068 if (!reload_completed && pipe_ls >= 0)
3070 insn = ready[pipe_ls];
3071 ready[pipe_ls] = ready[nready - 1];
3072 ready[nready - 1] = insn;
3073 return 1;
3076 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3077 if (pipe_hbrp >= 0)
3078 pipe_1 = pipe_hbrp;
3080 /* When we have loads/stores in every cycle of the last 15 insns and
3081 we are about to schedule another load/store, emit an hbrp insn
3082 instead. */
3083 if (in_spu_reorg
3084 && spu_sched_length - spu_ls_first >= 4 * 15
3085 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3087 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3088 recog_memoized (insn);
3089 if (pipe0_clock < clock)
3090 PUT_MODE (insn, TImode);
3091 spu_sched_variable_issue (file, verbose, insn, -1);
3092 return 0;
3095 /* In general, we want to emit nops to increase dual issue, but dual
3096 issue isn't faster when one of the insns could be scheduled later
3097 without effecting the critical path. We look at INSN_PRIORITY to
3098 make a good guess, but it isn't perfect so -mdual-nops=n can be
3099 used to effect it. */
3100 if (in_spu_reorg && spu_dual_nops < 10)
3102 /* When we are at an even address and we are not issueing nops to
3103 improve scheduling then we need to advance the cycle. */
3104 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3105 && (spu_dual_nops == 0
3106 || (pipe_1 != -1
3107 && prev_priority >
3108 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3109 return 0;
3111 /* When at an odd address, schedule the highest priority insn
3112 without considering pipeline. */
3113 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3114 && (spu_dual_nops == 0
3115 || (prev_priority >
3116 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3117 return 1;
3121 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3122 pipe0 insn in the ready list, schedule it. */
3123 if (pipe0_clock < clock && pipe_0 >= 0)
3124 schedule_i = pipe_0;
3126 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3127 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3128 else
3129 schedule_i = pipe_1;
3131 if (schedule_i > -1)
3133 insn = ready[schedule_i];
3134 ready[schedule_i] = ready[nready - 1];
3135 ready[nready - 1] = insn;
3136 return 1;
3138 return 0;
3141 /* INSN is dependent on DEP_INSN. */
3142 static int
3143 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3145 rtx set;
3147 /* The blockage pattern is used to prevent instructions from being
3148 moved across it and has no cost. */
3149 if (INSN_CODE (insn) == CODE_FOR_blockage
3150 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3151 return 0;
3153 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3154 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3155 return 0;
3157 /* Make sure hbrps are spread out. */
3158 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3159 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3160 return 8;
3162 /* Make sure hints and hbrps are 2 cycles apart. */
3163 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3164 || INSN_CODE (insn) == CODE_FOR_hbr)
3165 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3166 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3167 return 2;
3169 /* An hbrp has no real dependency on other insns. */
3170 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3171 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3172 return 0;
3174 /* Assuming that it is unlikely an argument register will be used in
3175 the first cycle of the called function, we reduce the cost for
3176 slightly better scheduling of dep_insn. When not hinted, the
3177 mispredicted branch would hide the cost as well. */
3178 if (CALL_P (insn))
3180 rtx target = get_branch_target (insn);
3181 if (GET_CODE (target) != REG || !set_of (target, insn))
3182 return cost - 2;
3183 return cost;
3186 /* And when returning from a function, let's assume the return values
3187 are completed sooner too. */
3188 if (CALL_P (dep_insn))
3189 return cost - 2;
3191 /* Make sure an instruction that loads from the back chain is schedule
3192 away from the return instruction so a hint is more likely to get
3193 issued. */
3194 if (INSN_CODE (insn) == CODE_FOR__return
3195 && (set = single_set (dep_insn))
3196 && GET_CODE (SET_DEST (set)) == REG
3197 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3198 return 20;
3200 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3201 scheduler makes every insn in a block anti-dependent on the final
3202 jump_insn. We adjust here so higher cost insns will get scheduled
3203 earlier. */
3204 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3205 return insn_cost (dep_insn) - 3;
3207 return cost;
3210 /* Create a CONST_DOUBLE from a string. */
3211 struct rtx_def *
3212 spu_float_const (const char *string, enum machine_mode mode)
3214 REAL_VALUE_TYPE value;
3215 value = REAL_VALUE_ATOF (string, mode);
3216 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3220 spu_constant_address_p (rtx x)
3222 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3223 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3224 || GET_CODE (x) == HIGH);
3227 static enum spu_immediate
3228 which_immediate_load (HOST_WIDE_INT val)
3230 gcc_assert (val == trunc_int_for_mode (val, SImode));
3232 if (val >= -0x8000 && val <= 0x7fff)
3233 return SPU_IL;
3234 if (val >= 0 && val <= 0x3ffff)
3235 return SPU_ILA;
3236 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3237 return SPU_ILH;
3238 if ((val & 0xffff) == 0)
3239 return SPU_ILHU;
3241 return SPU_NONE;
3244 /* Return true when OP can be loaded by one of the il instructions, or
3245 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3247 immediate_load_p (rtx op, enum machine_mode mode)
3249 if (CONSTANT_P (op))
3251 enum immediate_class c = classify_immediate (op, mode);
3252 return c == IC_IL1 || c == IC_IL1s
3253 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3255 return 0;
3258 /* Return true if the first SIZE bytes of arr is a constant that can be
3259 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3260 represent the size and offset of the instruction to use. */
3261 static int
3262 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3264 int cpat, run, i, start;
3265 cpat = 1;
3266 run = 0;
3267 start = -1;
3268 for (i = 0; i < size && cpat; i++)
3269 if (arr[i] != i+16)
3271 if (!run)
3273 start = i;
3274 if (arr[i] == 3)
3275 run = 1;
3276 else if (arr[i] == 2 && arr[i+1] == 3)
3277 run = 2;
3278 else if (arr[i] == 0)
3280 while (arr[i+run] == run && i+run < 16)
3281 run++;
3282 if (run != 4 && run != 8)
3283 cpat = 0;
3285 else
3286 cpat = 0;
3287 if ((i & (run-1)) != 0)
3288 cpat = 0;
3289 i += run;
3291 else
3292 cpat = 0;
3294 if (cpat && (run || size < 16))
3296 if (run == 0)
3297 run = 1;
3298 if (prun)
3299 *prun = run;
3300 if (pstart)
3301 *pstart = start == -1 ? 16-run : start;
3302 return 1;
3304 return 0;
3307 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3308 it into a register. MODE is only valid when OP is a CONST_INT. */
3309 static enum immediate_class
3310 classify_immediate (rtx op, enum machine_mode mode)
3312 HOST_WIDE_INT val;
3313 unsigned char arr[16];
3314 int i, j, repeated, fsmbi, repeat;
3316 gcc_assert (CONSTANT_P (op));
3318 if (GET_MODE (op) != VOIDmode)
3319 mode = GET_MODE (op);
3321 /* A V4SI const_vector with all identical symbols is ok. */
3322 if (!flag_pic
3323 && mode == V4SImode
3324 && GET_CODE (op) == CONST_VECTOR
3325 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3326 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3327 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3328 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3329 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3330 op = CONST_VECTOR_ELT (op, 0);
3332 switch (GET_CODE (op))
3334 case SYMBOL_REF:
3335 case LABEL_REF:
3336 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3338 case CONST:
3339 /* We can never know if the resulting address fits in 18 bits and can be
3340 loaded with ila. For now, assume the address will not overflow if
3341 the displacement is "small" (fits 'K' constraint). */
3342 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3344 rtx sym = XEXP (XEXP (op, 0), 0);
3345 rtx cst = XEXP (XEXP (op, 0), 1);
3347 if (GET_CODE (sym) == SYMBOL_REF
3348 && GET_CODE (cst) == CONST_INT
3349 && satisfies_constraint_K (cst))
3350 return IC_IL1s;
3352 return IC_IL2s;
3354 case HIGH:
3355 return IC_IL1s;
3357 case CONST_VECTOR:
3358 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3359 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3360 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3361 return IC_POOL;
3362 /* Fall through. */
3364 case CONST_INT:
3365 case CONST_DOUBLE:
3366 constant_to_array (mode, op, arr);
3368 /* Check that each 4-byte slot is identical. */
3369 repeated = 1;
3370 for (i = 4; i < 16; i += 4)
3371 for (j = 0; j < 4; j++)
3372 if (arr[j] != arr[i + j])
3373 repeated = 0;
3375 if (repeated)
3377 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3378 val = trunc_int_for_mode (val, SImode);
3380 if (which_immediate_load (val) != SPU_NONE)
3381 return IC_IL1;
3384 /* Any mode of 2 bytes or smaller can be loaded with an il
3385 instruction. */
3386 gcc_assert (GET_MODE_SIZE (mode) > 2);
3388 fsmbi = 1;
3389 repeat = 0;
3390 for (i = 0; i < 16 && fsmbi; i++)
3391 if (arr[i] != 0 && repeat == 0)
3392 repeat = arr[i];
3393 else if (arr[i] != 0 && arr[i] != repeat)
3394 fsmbi = 0;
3395 if (fsmbi)
3396 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3398 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3399 return IC_CPAT;
3401 if (repeated)
3402 return IC_IL2;
3404 return IC_POOL;
3405 default:
3406 break;
3408 gcc_unreachable ();
3411 static enum spu_immediate
3412 which_logical_immediate (HOST_WIDE_INT val)
3414 gcc_assert (val == trunc_int_for_mode (val, SImode));
3416 if (val >= -0x200 && val <= 0x1ff)
3417 return SPU_ORI;
3418 if (val >= 0 && val <= 0xffff)
3419 return SPU_IOHL;
3420 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3422 val = trunc_int_for_mode (val, HImode);
3423 if (val >= -0x200 && val <= 0x1ff)
3424 return SPU_ORHI;
3425 if ((val & 0xff) == ((val >> 8) & 0xff))
3427 val = trunc_int_for_mode (val, QImode);
3428 if (val >= -0x200 && val <= 0x1ff)
3429 return SPU_ORBI;
3432 return SPU_NONE;
3435 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3436 CONST_DOUBLEs. */
3437 static int
3438 const_vector_immediate_p (rtx x)
3440 int i;
3441 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3442 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3443 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3445 return 0;
3446 return 1;
3450 logical_immediate_p (rtx op, enum machine_mode mode)
3452 HOST_WIDE_INT val;
3453 unsigned char arr[16];
3454 int i, j;
3456 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3457 || GET_CODE (op) == CONST_VECTOR);
3459 if (GET_CODE (op) == CONST_VECTOR
3460 && !const_vector_immediate_p (op))
3461 return 0;
3463 if (GET_MODE (op) != VOIDmode)
3464 mode = GET_MODE (op);
3466 constant_to_array (mode, op, arr);
3468 /* Check that bytes are repeated. */
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 return 0;
3474 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3475 val = trunc_int_for_mode (val, SImode);
3477 i = which_logical_immediate (val);
3478 return i != SPU_NONE && i != SPU_IOHL;
3482 iohl_immediate_p (rtx op, enum machine_mode mode)
3484 HOST_WIDE_INT val;
3485 unsigned char arr[16];
3486 int i, j;
3488 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3489 || GET_CODE (op) == CONST_VECTOR);
3491 if (GET_CODE (op) == CONST_VECTOR
3492 && !const_vector_immediate_p (op))
3493 return 0;
3495 if (GET_MODE (op) != VOIDmode)
3496 mode = GET_MODE (op);
3498 constant_to_array (mode, op, arr);
3500 /* Check that bytes are repeated. */
3501 for (i = 4; i < 16; i += 4)
3502 for (j = 0; j < 4; j++)
3503 if (arr[j] != arr[i + j])
3504 return 0;
3506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3507 val = trunc_int_for_mode (val, SImode);
3509 return val >= 0 && val <= 0xffff;
3513 arith_immediate_p (rtx op, enum machine_mode mode,
3514 HOST_WIDE_INT low, HOST_WIDE_INT high)
3516 HOST_WIDE_INT val;
3517 unsigned char arr[16];
3518 int bytes, i, j;
3520 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3521 || GET_CODE (op) == CONST_VECTOR);
3523 if (GET_CODE (op) == CONST_VECTOR
3524 && !const_vector_immediate_p (op))
3525 return 0;
3527 if (GET_MODE (op) != VOIDmode)
3528 mode = GET_MODE (op);
3530 constant_to_array (mode, op, arr);
3532 if (VECTOR_MODE_P (mode))
3533 mode = GET_MODE_INNER (mode);
3535 bytes = GET_MODE_SIZE (mode);
3536 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3538 /* Check that bytes are repeated. */
3539 for (i = bytes; i < 16; i += bytes)
3540 for (j = 0; j < bytes; j++)
3541 if (arr[j] != arr[i + j])
3542 return 0;
3544 val = arr[0];
3545 for (j = 1; j < bytes; j++)
3546 val = (val << 8) | arr[j];
3548 val = trunc_int_for_mode (val, mode);
3550 return val >= low && val <= high;
3553 /* TRUE when op is an immediate and an exact power of 2, and given that
3554 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3555 all entries must be the same. */
3556 bool
3557 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3559 enum machine_mode int_mode;
3560 HOST_WIDE_INT val;
3561 unsigned char arr[16];
3562 int bytes, i, j;
3564 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3565 || GET_CODE (op) == CONST_VECTOR);
3567 if (GET_CODE (op) == CONST_VECTOR
3568 && !const_vector_immediate_p (op))
3569 return 0;
3571 if (GET_MODE (op) != VOIDmode)
3572 mode = GET_MODE (op);
3574 constant_to_array (mode, op, arr);
3576 if (VECTOR_MODE_P (mode))
3577 mode = GET_MODE_INNER (mode);
3579 bytes = GET_MODE_SIZE (mode);
3580 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3582 /* Check that bytes are repeated. */
3583 for (i = bytes; i < 16; i += bytes)
3584 for (j = 0; j < bytes; j++)
3585 if (arr[j] != arr[i + j])
3586 return 0;
3588 val = arr[0];
3589 for (j = 1; j < bytes; j++)
3590 val = (val << 8) | arr[j];
3592 val = trunc_int_for_mode (val, int_mode);
3594 /* Currently, we only handle SFmode */
3595 gcc_assert (mode == SFmode);
3596 if (mode == SFmode)
3598 int exp = (val >> 23) - 127;
3599 return val > 0 && (val & 0x007fffff) == 0
3600 && exp >= low && exp <= high;
3602 return FALSE;
3605 /* We accept:
3606 - any 32-bit constant (SImode, SFmode)
3607 - any constant that can be generated with fsmbi (any mode)
3608 - a 64-bit constant where the high and low bits are identical
3609 (DImode, DFmode)
3610 - a 128-bit constant where the four 32-bit words match. */
3612 spu_legitimate_constant_p (rtx x)
3614 if (GET_CODE (x) == HIGH)
3615 x = XEXP (x, 0);
3616 /* V4SI with all identical symbols is valid. */
3617 if (!flag_pic
3618 && GET_MODE (x) == V4SImode
3619 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3620 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3621 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3622 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3623 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3624 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3626 if (GET_CODE (x) == CONST_VECTOR
3627 && !const_vector_immediate_p (x))
3628 return 0;
3629 return 1;
3632 /* Valid address are:
3633 - symbol_ref, label_ref, const
3634 - reg
3635 - reg + const_int, where const_int is 16 byte aligned
3636 - reg + reg, alignment doesn't matter
3637 The alignment matters in the reg+const case because lqd and stqd
3638 ignore the 4 least significant bits of the const. We only care about
3639 16 byte modes because the expand phase will change all smaller MEM
3640 references to TImode. */
3641 static bool
3642 spu_legitimate_address_p (enum machine_mode mode,
3643 rtx x, bool reg_ok_strict)
3645 int aligned = GET_MODE_SIZE (mode) >= 16;
3646 if (aligned
3647 && GET_CODE (x) == AND
3648 && GET_CODE (XEXP (x, 1)) == CONST_INT
3649 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3650 x = XEXP (x, 0);
3651 switch (GET_CODE (x))
3653 case LABEL_REF:
3654 case SYMBOL_REF:
3655 case CONST:
3656 return !TARGET_LARGE_MEM;
3658 case CONST_INT:
3659 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3661 case SUBREG:
3662 x = XEXP (x, 0);
3663 if (REG_P (x))
3664 return 0;
3666 case REG:
3667 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3669 case PLUS:
3670 case LO_SUM:
3672 rtx op0 = XEXP (x, 0);
3673 rtx op1 = XEXP (x, 1);
3674 if (GET_CODE (op0) == SUBREG)
3675 op0 = XEXP (op0, 0);
3676 if (GET_CODE (op1) == SUBREG)
3677 op1 = XEXP (op1, 0);
3678 if (GET_CODE (op0) == REG
3679 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3680 && GET_CODE (op1) == CONST_INT
3681 && INTVAL (op1) >= -0x2000
3682 && INTVAL (op1) <= 0x1fff
3683 && (!aligned || (INTVAL (op1) & 15) == 0))
3684 return TRUE;
3685 if (GET_CODE (op0) == REG
3686 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3687 && GET_CODE (op1) == REG
3688 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3689 return TRUE;
3691 break;
3693 default:
3694 break;
3696 return FALSE;
3699 /* When the address is reg + const_int, force the const_int into a
3700 register. */
3702 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3703 enum machine_mode mode ATTRIBUTE_UNUSED)
3705 rtx op0, op1;
3706 /* Make sure both operands are registers. */
3707 if (GET_CODE (x) == PLUS)
3709 op0 = XEXP (x, 0);
3710 op1 = XEXP (x, 1);
3711 if (ALIGNED_SYMBOL_REF_P (op0))
3713 op0 = force_reg (Pmode, op0);
3714 mark_reg_pointer (op0, 128);
3716 else if (GET_CODE (op0) != REG)
3717 op0 = force_reg (Pmode, op0);
3718 if (ALIGNED_SYMBOL_REF_P (op1))
3720 op1 = force_reg (Pmode, op1);
3721 mark_reg_pointer (op1, 128);
3723 else if (GET_CODE (op1) != REG)
3724 op1 = force_reg (Pmode, op1);
3725 x = gen_rtx_PLUS (Pmode, op0, op1);
3727 return x;
3730 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3731 struct attribute_spec.handler. */
3732 static tree
3733 spu_handle_fndecl_attribute (tree * node,
3734 tree name,
3735 tree args ATTRIBUTE_UNUSED,
3736 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3738 if (TREE_CODE (*node) != FUNCTION_DECL)
3740 warning (0, "%qE attribute only applies to functions",
3741 name);
3742 *no_add_attrs = true;
3745 return NULL_TREE;
3748 /* Handle the "vector" attribute. */
3749 static tree
3750 spu_handle_vector_attribute (tree * node, tree name,
3751 tree args ATTRIBUTE_UNUSED,
3752 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3754 tree type = *node, result = NULL_TREE;
3755 enum machine_mode mode;
3756 int unsigned_p;
3758 while (POINTER_TYPE_P (type)
3759 || TREE_CODE (type) == FUNCTION_TYPE
3760 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3761 type = TREE_TYPE (type);
3763 mode = TYPE_MODE (type);
3765 unsigned_p = TYPE_UNSIGNED (type);
3766 switch (mode)
3768 case DImode:
3769 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3770 break;
3771 case SImode:
3772 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3773 break;
3774 case HImode:
3775 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3776 break;
3777 case QImode:
3778 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3779 break;
3780 case SFmode:
3781 result = V4SF_type_node;
3782 break;
3783 case DFmode:
3784 result = V2DF_type_node;
3785 break;
3786 default:
3787 break;
3790 /* Propagate qualifiers attached to the element type
3791 onto the vector type. */
3792 if (result && result != type && TYPE_QUALS (type))
3793 result = build_qualified_type (result, TYPE_QUALS (type));
3795 *no_add_attrs = true; /* No need to hang on to the attribute. */
3797 if (!result)
3798 warning (0, "%qE attribute ignored", name);
3799 else
3800 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3802 return NULL_TREE;
3805 /* Return nonzero if FUNC is a naked function. */
3806 static int
3807 spu_naked_function_p (tree func)
3809 tree a;
3811 if (TREE_CODE (func) != FUNCTION_DECL)
3812 abort ();
3814 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3815 return a != NULL_TREE;
3819 spu_initial_elimination_offset (int from, int to)
3821 int saved_regs_size = spu_saved_regs_size ();
3822 int sp_offset = 0;
3823 if (!current_function_is_leaf || crtl->outgoing_args_size
3824 || get_frame_size () || saved_regs_size)
3825 sp_offset = STACK_POINTER_OFFSET;
3826 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3827 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3828 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3829 return get_frame_size ();
3830 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3831 return sp_offset + crtl->outgoing_args_size
3832 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3833 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3834 return get_frame_size () + saved_regs_size + sp_offset;
3835 else
3836 gcc_unreachable ();
3840 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3842 enum machine_mode mode = TYPE_MODE (type);
3843 int byte_size = ((mode == BLKmode)
3844 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3848 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3850 enum machine_mode smode;
3851 rtvec v;
3852 int i;
3853 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3854 int n = byte_size / UNITS_PER_WORD;
3855 v = rtvec_alloc (nregs);
3856 for (i = 0; i < n; i++)
3858 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3859 gen_rtx_REG (TImode,
3860 FIRST_RETURN_REGNUM
3861 + i),
3862 GEN_INT (UNITS_PER_WORD * i));
3863 byte_size -= UNITS_PER_WORD;
3866 if (n < nregs)
3868 if (byte_size < 4)
3869 byte_size = 4;
3870 smode =
3871 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3872 RTVEC_ELT (v, n) =
3873 gen_rtx_EXPR_LIST (VOIDmode,
3874 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3875 GEN_INT (UNITS_PER_WORD * n));
3877 return gen_rtx_PARALLEL (mode, v);
3879 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3883 spu_function_arg (CUMULATIVE_ARGS cum,
3884 enum machine_mode mode,
3885 tree type, int named ATTRIBUTE_UNUSED)
3887 int byte_size;
3889 if (cum >= MAX_REGISTER_ARGS)
3890 return 0;
3892 byte_size = ((mode == BLKmode)
3893 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3895 /* The ABI does not allow parameters to be passed partially in
3896 reg and partially in stack. */
3897 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3898 return 0;
3900 /* Make sure small structs are left justified in a register. */
3901 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3902 && byte_size < UNITS_PER_WORD && byte_size > 0)
3904 enum machine_mode smode;
3905 rtx gr_reg;
3906 if (byte_size < 4)
3907 byte_size = 4;
3908 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3909 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3910 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3911 const0_rtx);
3912 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3914 else
3915 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3918 /* Variable sized types are passed by reference. */
3919 static bool
3920 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3921 enum machine_mode mode ATTRIBUTE_UNUSED,
3922 const_tree type, bool named ATTRIBUTE_UNUSED)
3924 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3928 /* Var args. */
3930 /* Create and return the va_list datatype.
3932 On SPU, va_list is an array type equivalent to
3934 typedef struct __va_list_tag
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3939 } va_list[1];
3941 where __args points to the arg that will be returned by the next
3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3944 static tree
3945 spu_build_builtin_va_list (void)
3947 tree f_args, f_skip, record, type_decl;
3948 bool owp;
3950 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3952 type_decl =
3953 build_decl (BUILTINS_LOCATION,
3954 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3956 f_args = build_decl (BUILTINS_LOCATION,
3957 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3958 f_skip = build_decl (BUILTINS_LOCATION,
3959 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3961 DECL_FIELD_CONTEXT (f_args) = record;
3962 DECL_ALIGN (f_args) = 128;
3963 DECL_USER_ALIGN (f_args) = 1;
3965 DECL_FIELD_CONTEXT (f_skip) = record;
3966 DECL_ALIGN (f_skip) = 128;
3967 DECL_USER_ALIGN (f_skip) = 1;
3969 TREE_CHAIN (record) = type_decl;
3970 TYPE_NAME (record) = type_decl;
3971 TYPE_FIELDS (record) = f_args;
3972 TREE_CHAIN (f_args) = f_skip;
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3976 owp = warn_padded;
3977 warn_padded = false;
3979 layout_type (record);
3981 warn_padded = owp;
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record, build_index_type (size_zero_node));
3987 /* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3990 The following global variables are used to initialize
3991 the va_list structure:
3993 crtl->args.info;
3994 the CUMULATIVE_ARGS for this function
3996 crtl->args.arg_offset_rtx:
3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
4000 static void
4001 spu_va_start (tree valist, rtx nextarg)
4003 tree f_args, f_skip;
4004 tree args, skip, t;
4006 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4007 f_skip = TREE_CHAIN (f_args);
4009 valist = build_va_arg_indirect_ref (valist);
4010 args =
4011 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4012 skip =
4013 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4015 /* Find the __args area. */
4016 t = make_tree (TREE_TYPE (args), nextarg);
4017 if (crtl->args.pretend_args_size > 0)
4018 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4019 size_int (-STACK_POINTER_OFFSET));
4020 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4021 TREE_SIDE_EFFECTS (t) = 1;
4022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4024 /* Find the __skip area. */
4025 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4026 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4027 size_int (crtl->args.pretend_args_size
4028 - STACK_POINTER_OFFSET));
4029 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4030 TREE_SIDE_EFFECTS (t) = 1;
4031 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4034 /* Gimplify va_arg by updating the va_list structure
4035 VALIST as required to retrieve an argument of type
4036 TYPE, and returning that argument.
4038 ret = va_arg(VALIST, TYPE);
4040 generates code equivalent to:
4042 paddedsize = (sizeof(TYPE) + 15) & -16;
4043 if (VALIST.__args + paddedsize > VALIST.__skip
4044 && VALIST.__args <= VALIST.__skip)
4045 addr = VALIST.__skip + 32;
4046 else
4047 addr = VALIST.__args;
4048 VALIST.__args = addr + paddedsize;
4049 ret = *(TYPE *)addr;
4051 static tree
4052 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4053 gimple_seq * post_p ATTRIBUTE_UNUSED)
4055 tree f_args, f_skip;
4056 tree args, skip;
4057 HOST_WIDE_INT size, rsize;
4058 tree paddedsize, addr, tmp;
4059 bool pass_by_reference_p;
4061 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4062 f_skip = TREE_CHAIN (f_args);
4064 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4065 args =
4066 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4067 skip =
4068 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4070 addr = create_tmp_var (ptr_type_node, "va_arg");
4071 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4073 /* if an object is dynamically sized, a pointer to it is passed
4074 instead of the object itself. */
4075 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4076 false);
4077 if (pass_by_reference_p)
4078 type = build_pointer_type (type);
4079 size = int_size_in_bytes (type);
4080 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4082 /* build conditional expression to calculate addr. The expression
4083 will be gimplified later. */
4084 paddedsize = size_int (rsize);
4085 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4086 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4087 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4088 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4089 unshare_expr (skip)));
4091 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4092 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4093 size_int (32)), unshare_expr (args));
4095 gimplify_assign (addr, tmp, pre_p);
4097 /* update VALIST.__args */
4098 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4099 gimplify_assign (unshare_expr (args), tmp, pre_p);
4101 addr = fold_convert (build_pointer_type (type), addr);
4103 if (pass_by_reference_p)
4104 addr = build_va_arg_indirect_ref (addr);
4106 return build_va_arg_indirect_ref (addr);
4109 /* Save parameter registers starting with the register that corresponds
4110 to the first unnamed parameters. If the first unnamed parameter is
4111 in the stack then save no registers. Set pretend_args_size to the
4112 amount of space needed to save the registers. */
4113 void
4114 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4115 tree type, int *pretend_size, int no_rtl)
4117 if (!no_rtl)
4119 rtx tmp;
4120 int regno;
4121 int offset;
4122 int ncum = *cum;
4124 /* cum currently points to the last named argument, we want to
4125 start at the next argument. */
4126 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4128 offset = -STACK_POINTER_OFFSET;
4129 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4131 tmp = gen_frame_mem (V4SImode,
4132 plus_constant (virtual_incoming_args_rtx,
4133 offset));
4134 emit_move_insn (tmp,
4135 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4136 offset += 16;
4138 *pretend_size = offset + STACK_POINTER_OFFSET;
4142 void
4143 spu_conditional_register_usage (void)
4145 if (flag_pic)
4147 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4148 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4152 /* This is called any time we inspect the alignment of a register for
4153 addresses. */
4154 static int
4155 reg_aligned_for_addr (rtx x)
4157 int regno =
4158 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4159 return REGNO_POINTER_ALIGN (regno) >= 128;
4162 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4163 into its SYMBOL_REF_FLAGS. */
4164 static void
4165 spu_encode_section_info (tree decl, rtx rtl, int first)
4167 default_encode_section_info (decl, rtl, first);
4169 /* If a variable has a forced alignment to < 16 bytes, mark it with
4170 SYMBOL_FLAG_ALIGN1. */
4171 if (TREE_CODE (decl) == VAR_DECL
4172 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4173 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4176 /* Return TRUE if we are certain the mem refers to a complete object
4177 which is both 16-byte aligned and padded to a 16-byte boundary. This
4178 would make it safe to store with a single instruction.
4179 We guarantee the alignment and padding for static objects by aligning
4180 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4181 FIXME: We currently cannot guarantee this for objects on the stack
4182 because assign_parm_setup_stack calls assign_stack_local with the
4183 alignment of the parameter mode and in that case the alignment never
4184 gets adjusted by LOCAL_ALIGNMENT. */
4185 static int
4186 store_with_one_insn_p (rtx mem)
4188 enum machine_mode mode = GET_MODE (mem);
4189 rtx addr = XEXP (mem, 0);
4190 if (mode == BLKmode)
4191 return 0;
4192 if (GET_MODE_SIZE (mode) >= 16)
4193 return 1;
4194 /* Only static objects. */
4195 if (GET_CODE (addr) == SYMBOL_REF)
4197 /* We use the associated declaration to make sure the access is
4198 referring to the whole object.
4199 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4200 if it is necessary. Will there be cases where one exists, and
4201 the other does not? Will there be cases where both exist, but
4202 have different types? */
4203 tree decl = MEM_EXPR (mem);
4204 if (decl
4205 && TREE_CODE (decl) == VAR_DECL
4206 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4207 return 1;
4208 decl = SYMBOL_REF_DECL (addr);
4209 if (decl
4210 && TREE_CODE (decl) == VAR_DECL
4211 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4212 return 1;
4214 return 0;
4217 /* Return 1 when the address is not valid for a simple load and store as
4218 required by the '_mov*' patterns. We could make this less strict
4219 for loads, but we prefer mem's to look the same so they are more
4220 likely to be merged. */
4221 static int
4222 address_needs_split (rtx mem)
4224 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4225 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4226 || !(store_with_one_insn_p (mem)
4227 || mem_is_padded_component_ref (mem))))
4228 return 1;
4230 return 0;
4234 spu_expand_mov (rtx * ops, enum machine_mode mode)
4236 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4237 abort ();
4239 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4241 rtx from = SUBREG_REG (ops[1]);
4242 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4244 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4245 && GET_MODE_CLASS (imode) == MODE_INT
4246 && subreg_lowpart_p (ops[1]));
4248 if (GET_MODE_SIZE (imode) < 4)
4249 imode = SImode;
4250 if (imode != GET_MODE (from))
4251 from = gen_rtx_SUBREG (imode, from, 0);
4253 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4255 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4256 emit_insn (GEN_FCN (icode) (ops[0], from));
4258 else
4259 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4260 return 1;
4263 /* At least one of the operands needs to be a register. */
4264 if ((reload_in_progress | reload_completed) == 0
4265 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4267 rtx temp = force_reg (mode, ops[1]);
4268 emit_move_insn (ops[0], temp);
4269 return 1;
4271 if (reload_in_progress || reload_completed)
4273 if (CONSTANT_P (ops[1]))
4274 return spu_split_immediate (ops);
4275 return 0;
4278 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4279 extend them. */
4280 if (GET_CODE (ops[1]) == CONST_INT)
4282 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4283 if (val != INTVAL (ops[1]))
4285 emit_move_insn (ops[0], GEN_INT (val));
4286 return 1;
4289 if (MEM_P (ops[0]))
4290 return spu_split_store (ops);
4291 if (MEM_P (ops[1]))
4292 return spu_split_load (ops);
4294 return 0;
4297 static void
4298 spu_convert_move (rtx dst, rtx src)
4300 enum machine_mode mode = GET_MODE (dst);
4301 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4302 rtx reg;
4303 gcc_assert (GET_MODE (src) == TImode);
4304 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4305 emit_insn (gen_rtx_SET (VOIDmode, reg,
4306 gen_rtx_TRUNCATE (int_mode,
4307 gen_rtx_LSHIFTRT (TImode, src,
4308 GEN_INT (int_mode == DImode ? 64 : 96)))));
4309 if (int_mode != mode)
4311 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4312 emit_move_insn (dst, reg);
4316 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4317 the address from SRC and SRC+16. Return a REG or CONST_INT that
4318 specifies how many bytes to rotate the loaded registers, plus any
4319 extra from EXTRA_ROTQBY. The address and rotate amounts are
4320 normalized to improve merging of loads and rotate computations. */
4321 static rtx
4322 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4324 rtx addr = XEXP (src, 0);
4325 rtx p0, p1, rot, addr0, addr1;
4326 int rot_amt;
4328 rot = 0;
4329 rot_amt = 0;
4331 if (MEM_ALIGN (src) >= 128)
4332 /* Address is already aligned; simply perform a TImode load. */ ;
4333 else if (GET_CODE (addr) == PLUS)
4335 /* 8 cases:
4336 aligned reg + aligned reg => lqx
4337 aligned reg + unaligned reg => lqx, rotqby
4338 aligned reg + aligned const => lqd
4339 aligned reg + unaligned const => lqd, rotqbyi
4340 unaligned reg + aligned reg => lqx, rotqby
4341 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4342 unaligned reg + aligned const => lqd, rotqby
4343 unaligned reg + unaligned const -> not allowed by legitimate address
4345 p0 = XEXP (addr, 0);
4346 p1 = XEXP (addr, 1);
4347 if (!reg_aligned_for_addr (p0))
4349 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4351 rot = gen_reg_rtx (SImode);
4352 emit_insn (gen_addsi3 (rot, p0, p1));
4354 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4356 if (INTVAL (p1) > 0
4357 && REG_POINTER (p0)
4358 && INTVAL (p1) * BITS_PER_UNIT
4359 < REGNO_POINTER_ALIGN (REGNO (p0)))
4361 rot = gen_reg_rtx (SImode);
4362 emit_insn (gen_addsi3 (rot, p0, p1));
4363 addr = p0;
4365 else
4367 rtx x = gen_reg_rtx (SImode);
4368 emit_move_insn (x, p1);
4369 if (!spu_arith_operand (p1, SImode))
4370 p1 = x;
4371 rot = gen_reg_rtx (SImode);
4372 emit_insn (gen_addsi3 (rot, p0, p1));
4373 addr = gen_rtx_PLUS (Pmode, p0, x);
4376 else
4377 rot = p0;
4379 else
4381 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4383 rot_amt = INTVAL (p1) & 15;
4384 if (INTVAL (p1) & -16)
4386 p1 = GEN_INT (INTVAL (p1) & -16);
4387 addr = gen_rtx_PLUS (SImode, p0, p1);
4389 else
4390 addr = p0;
4392 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4393 rot = p1;
4396 else if (REG_P (addr))
4398 if (!reg_aligned_for_addr (addr))
4399 rot = addr;
4401 else if (GET_CODE (addr) == CONST)
4403 if (GET_CODE (XEXP (addr, 0)) == PLUS
4404 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4405 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4407 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4408 if (rot_amt & -16)
4409 addr = gen_rtx_CONST (Pmode,
4410 gen_rtx_PLUS (Pmode,
4411 XEXP (XEXP (addr, 0), 0),
4412 GEN_INT (rot_amt & -16)));
4413 else
4414 addr = XEXP (XEXP (addr, 0), 0);
4416 else
4418 rot = gen_reg_rtx (Pmode);
4419 emit_move_insn (rot, addr);
4422 else if (GET_CODE (addr) == CONST_INT)
4424 rot_amt = INTVAL (addr);
4425 addr = GEN_INT (rot_amt & -16);
4427 else if (!ALIGNED_SYMBOL_REF_P (addr))
4429 rot = gen_reg_rtx (Pmode);
4430 emit_move_insn (rot, addr);
4433 rot_amt += extra_rotby;
4435 rot_amt &= 15;
4437 if (rot && rot_amt)
4439 rtx x = gen_reg_rtx (SImode);
4440 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4441 rot = x;
4442 rot_amt = 0;
4444 if (!rot && rot_amt)
4445 rot = GEN_INT (rot_amt);
4447 addr0 = copy_rtx (addr);
4448 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4449 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4451 if (dst1)
4453 addr1 = plus_constant (copy_rtx (addr), 16);
4454 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4455 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4458 return rot;
4462 spu_split_load (rtx * ops)
4464 enum machine_mode mode = GET_MODE (ops[0]);
4465 rtx addr, load, rot;
4466 int rot_amt;
4468 if (GET_MODE_SIZE (mode) >= 16)
4469 return 0;
4471 addr = XEXP (ops[1], 0);
4472 gcc_assert (GET_CODE (addr) != AND);
4474 if (!address_needs_split (ops[1]))
4476 ops[1] = change_address (ops[1], TImode, addr);
4477 load = gen_reg_rtx (TImode);
4478 emit_insn (gen__movti (load, ops[1]));
4479 spu_convert_move (ops[0], load);
4480 return 1;
4483 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4485 load = gen_reg_rtx (TImode);
4486 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4488 if (rot)
4489 emit_insn (gen_rotqby_ti (load, load, rot));
4491 spu_convert_move (ops[0], load);
4492 return 1;
4496 spu_split_store (rtx * ops)
4498 enum machine_mode mode = GET_MODE (ops[0]);
4499 rtx reg;
4500 rtx addr, p0, p1, p1_lo, smem;
4501 int aform;
4502 int scalar;
4504 if (GET_MODE_SIZE (mode) >= 16)
4505 return 0;
4507 addr = XEXP (ops[0], 0);
4508 gcc_assert (GET_CODE (addr) != AND);
4510 if (!address_needs_split (ops[0]))
4512 reg = gen_reg_rtx (TImode);
4513 emit_insn (gen_spu_convert (reg, ops[1]));
4514 ops[0] = change_address (ops[0], TImode, addr);
4515 emit_move_insn (ops[0], reg);
4516 return 1;
4519 if (GET_CODE (addr) == PLUS)
4521 /* 8 cases:
4522 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4523 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4524 aligned reg + aligned const => lqd, c?d, shuf, stqx
4525 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4526 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4527 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4528 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4529 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4531 aform = 0;
4532 p0 = XEXP (addr, 0);
4533 p1 = p1_lo = XEXP (addr, 1);
4534 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4536 p1_lo = GEN_INT (INTVAL (p1) & 15);
4537 if (reg_aligned_for_addr (p0))
4539 p1 = GEN_INT (INTVAL (p1) & -16);
4540 if (p1 == const0_rtx)
4541 addr = p0;
4542 else
4543 addr = gen_rtx_PLUS (SImode, p0, p1);
4545 else
4547 rtx x = gen_reg_rtx (SImode);
4548 emit_move_insn (x, p1);
4549 addr = gen_rtx_PLUS (SImode, p0, x);
4553 else if (REG_P (addr))
4555 aform = 0;
4556 p0 = addr;
4557 p1 = p1_lo = const0_rtx;
4559 else
4561 aform = 1;
4562 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4563 p1 = 0; /* aform doesn't use p1 */
4564 p1_lo = addr;
4565 if (ALIGNED_SYMBOL_REF_P (addr))
4566 p1_lo = const0_rtx;
4567 else if (GET_CODE (addr) == CONST
4568 && GET_CODE (XEXP (addr, 0)) == PLUS
4569 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4570 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4572 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4573 if ((v & -16) != 0)
4574 addr = gen_rtx_CONST (Pmode,
4575 gen_rtx_PLUS (Pmode,
4576 XEXP (XEXP (addr, 0), 0),
4577 GEN_INT (v & -16)));
4578 else
4579 addr = XEXP (XEXP (addr, 0), 0);
4580 p1_lo = GEN_INT (v & 15);
4582 else if (GET_CODE (addr) == CONST_INT)
4584 p1_lo = GEN_INT (INTVAL (addr) & 15);
4585 addr = GEN_INT (INTVAL (addr) & -16);
4587 else
4589 p1_lo = gen_reg_rtx (SImode);
4590 emit_move_insn (p1_lo, addr);
4594 reg = gen_reg_rtx (TImode);
4596 scalar = store_with_one_insn_p (ops[0]);
4597 if (!scalar)
4599 /* We could copy the flags from the ops[0] MEM to mem here,
4600 We don't because we want this load to be optimized away if
4601 possible, and copying the flags will prevent that in certain
4602 cases, e.g. consider the volatile flag. */
4604 rtx pat = gen_reg_rtx (TImode);
4605 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4606 set_mem_alias_set (lmem, 0);
4607 emit_insn (gen_movti (reg, lmem));
4609 if (!p0 || reg_aligned_for_addr (p0))
4610 p0 = stack_pointer_rtx;
4611 if (!p1_lo)
4612 p1_lo = const0_rtx;
4614 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4615 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4617 else
4619 if (GET_CODE (ops[1]) == REG)
4620 emit_insn (gen_spu_convert (reg, ops[1]));
4621 else if (GET_CODE (ops[1]) == SUBREG)
4622 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4623 else
4624 abort ();
4627 if (GET_MODE_SIZE (mode) < 4 && scalar)
4628 emit_insn (gen_ashlti3
4629 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4631 smem = change_address (ops[0], TImode, copy_rtx (addr));
4632 /* We can't use the previous alias set because the memory has changed
4633 size and can potentially overlap objects of other types. */
4634 set_mem_alias_set (smem, 0);
4636 emit_insn (gen_movti (smem, reg));
4637 return 1;
4640 /* Return TRUE if X is MEM which is a struct member reference
4641 and the member can safely be loaded and stored with a single
4642 instruction because it is padded. */
4643 static int
4644 mem_is_padded_component_ref (rtx x)
4646 tree t = MEM_EXPR (x);
4647 tree r;
4648 if (!t || TREE_CODE (t) != COMPONENT_REF)
4649 return 0;
4650 t = TREE_OPERAND (t, 1);
4651 if (!t || TREE_CODE (t) != FIELD_DECL
4652 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4653 return 0;
4654 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4655 r = DECL_FIELD_CONTEXT (t);
4656 if (!r || TREE_CODE (r) != RECORD_TYPE)
4657 return 0;
4658 /* Make sure they are the same mode */
4659 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4660 return 0;
4661 /* If there are no following fields then the field alignment assures
4662 the structure is padded to the alignment which means this field is
4663 padded too. */
4664 if (TREE_CHAIN (t) == 0)
4665 return 1;
4666 /* If the following field is also aligned then this field will be
4667 padded. */
4668 t = TREE_CHAIN (t);
4669 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4670 return 1;
4671 return 0;
4674 /* Parse the -mfixed-range= option string. */
4675 static void
4676 fix_range (const char *const_str)
4678 int i, first, last;
4679 char *str, *dash, *comma;
4681 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4682 REG2 are either register names or register numbers. The effect
4683 of this option is to mark the registers in the range from REG1 to
4684 REG2 as ``fixed'' so they won't be used by the compiler. */
4686 i = strlen (const_str);
4687 str = (char *) alloca (i + 1);
4688 memcpy (str, const_str, i + 1);
4690 while (1)
4692 dash = strchr (str, '-');
4693 if (!dash)
4695 warning (0, "value of -mfixed-range must have form REG1-REG2");
4696 return;
4698 *dash = '\0';
4699 comma = strchr (dash + 1, ',');
4700 if (comma)
4701 *comma = '\0';
4703 first = decode_reg_name (str);
4704 if (first < 0)
4706 warning (0, "unknown register name: %s", str);
4707 return;
4710 last = decode_reg_name (dash + 1);
4711 if (last < 0)
4713 warning (0, "unknown register name: %s", dash + 1);
4714 return;
4717 *dash = '-';
4719 if (first > last)
4721 warning (0, "%s-%s is an empty range", str, dash + 1);
4722 return;
4725 for (i = first; i <= last; ++i)
4726 fixed_regs[i] = call_used_regs[i] = 1;
4728 if (!comma)
4729 break;
4731 *comma = ',';
4732 str = comma + 1;
4736 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4737 can be generated using the fsmbi instruction. */
4739 fsmbi_const_p (rtx x)
4741 if (CONSTANT_P (x))
4743 /* We can always choose TImode for CONST_INT because the high bits
4744 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4745 enum immediate_class c = classify_immediate (x, TImode);
4746 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4748 return 0;
4751 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4752 can be generated using the cbd, chd, cwd or cdd instruction. */
4754 cpat_const_p (rtx x, enum machine_mode mode)
4756 if (CONSTANT_P (x))
4758 enum immediate_class c = classify_immediate (x, mode);
4759 return c == IC_CPAT;
4761 return 0;
4765 gen_cpat_const (rtx * ops)
4767 unsigned char dst[16];
4768 int i, offset, shift, isize;
4769 if (GET_CODE (ops[3]) != CONST_INT
4770 || GET_CODE (ops[2]) != CONST_INT
4771 || (GET_CODE (ops[1]) != CONST_INT
4772 && GET_CODE (ops[1]) != REG))
4773 return 0;
4774 if (GET_CODE (ops[1]) == REG
4775 && (!REG_POINTER (ops[1])
4776 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4777 return 0;
4779 for (i = 0; i < 16; i++)
4780 dst[i] = i + 16;
4781 isize = INTVAL (ops[3]);
4782 if (isize == 1)
4783 shift = 3;
4784 else if (isize == 2)
4785 shift = 2;
4786 else
4787 shift = 0;
4788 offset = (INTVAL (ops[2]) +
4789 (GET_CODE (ops[1]) ==
4790 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4791 for (i = 0; i < isize; i++)
4792 dst[offset + i] = i + shift;
4793 return array_to_constant (TImode, dst);
4796 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4797 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4798 than 16 bytes, the value is repeated across the rest of the array. */
4799 void
4800 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4802 HOST_WIDE_INT val;
4803 int i, j, first;
4805 memset (arr, 0, 16);
4806 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4807 if (GET_CODE (x) == CONST_INT
4808 || (GET_CODE (x) == CONST_DOUBLE
4809 && (mode == SFmode || mode == DFmode)))
4811 gcc_assert (mode != VOIDmode && mode != BLKmode);
4813 if (GET_CODE (x) == CONST_DOUBLE)
4814 val = const_double_to_hwint (x);
4815 else
4816 val = INTVAL (x);
4817 first = GET_MODE_SIZE (mode) - 1;
4818 for (i = first; i >= 0; i--)
4820 arr[i] = val & 0xff;
4821 val >>= 8;
4823 /* Splat the constant across the whole array. */
4824 for (j = 0, i = first + 1; i < 16; i++)
4826 arr[i] = arr[j];
4827 j = (j == first) ? 0 : j + 1;
4830 else if (GET_CODE (x) == CONST_DOUBLE)
4832 val = CONST_DOUBLE_LOW (x);
4833 for (i = 15; i >= 8; i--)
4835 arr[i] = val & 0xff;
4836 val >>= 8;
4838 val = CONST_DOUBLE_HIGH (x);
4839 for (i = 7; i >= 0; i--)
4841 arr[i] = val & 0xff;
4842 val >>= 8;
4845 else if (GET_CODE (x) == CONST_VECTOR)
4847 int units;
4848 rtx elt;
4849 mode = GET_MODE_INNER (mode);
4850 units = CONST_VECTOR_NUNITS (x);
4851 for (i = 0; i < units; i++)
4853 elt = CONST_VECTOR_ELT (x, i);
4854 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4856 if (GET_CODE (elt) == CONST_DOUBLE)
4857 val = const_double_to_hwint (elt);
4858 else
4859 val = INTVAL (elt);
4860 first = GET_MODE_SIZE (mode) - 1;
4861 if (first + i * GET_MODE_SIZE (mode) > 16)
4862 abort ();
4863 for (j = first; j >= 0; j--)
4865 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4866 val >>= 8;
4871 else
4872 gcc_unreachable();
4875 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4876 smaller than 16 bytes, use the bytes that would represent that value
4877 in a register, e.g., for QImode return the value of arr[3]. */
4879 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4881 enum machine_mode inner_mode;
4882 rtvec v;
4883 int units, size, i, j, k;
4884 HOST_WIDE_INT val;
4886 if (GET_MODE_CLASS (mode) == MODE_INT
4887 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4889 j = GET_MODE_SIZE (mode);
4890 i = j < 4 ? 4 - j : 0;
4891 for (val = 0; i < j; i++)
4892 val = (val << 8) | arr[i];
4893 val = trunc_int_for_mode (val, mode);
4894 return GEN_INT (val);
4897 if (mode == TImode)
4899 HOST_WIDE_INT high;
4900 for (i = high = 0; i < 8; i++)
4901 high = (high << 8) | arr[i];
4902 for (i = 8, val = 0; i < 16; i++)
4903 val = (val << 8) | arr[i];
4904 return immed_double_const (val, high, TImode);
4906 if (mode == SFmode)
4908 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4909 val = trunc_int_for_mode (val, SImode);
4910 return hwint_to_const_double (SFmode, val);
4912 if (mode == DFmode)
4914 for (i = 0, val = 0; i < 8; i++)
4915 val = (val << 8) | arr[i];
4916 return hwint_to_const_double (DFmode, val);
4919 if (!VECTOR_MODE_P (mode))
4920 abort ();
4922 units = GET_MODE_NUNITS (mode);
4923 size = GET_MODE_UNIT_SIZE (mode);
4924 inner_mode = GET_MODE_INNER (mode);
4925 v = rtvec_alloc (units);
4927 for (k = i = 0; i < units; ++i)
4929 val = 0;
4930 for (j = 0; j < size; j++, k++)
4931 val = (val << 8) | arr[k];
4933 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4934 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4935 else
4936 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4938 if (k > 16)
4939 abort ();
4941 return gen_rtx_CONST_VECTOR (mode, v);
4944 static void
4945 reloc_diagnostic (rtx x)
4947 tree loc_decl, decl = 0;
4948 const char *msg;
4949 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4950 return;
4952 if (GET_CODE (x) == SYMBOL_REF)
4953 decl = SYMBOL_REF_DECL (x);
4954 else if (GET_CODE (x) == CONST
4955 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4956 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4958 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4959 if (decl && !DECL_P (decl))
4960 decl = 0;
4962 /* We use last_assemble_variable_decl to get line information. It's
4963 not always going to be right and might not even be close, but will
4964 be right for the more common cases. */
4965 if (!last_assemble_variable_decl || in_section == ctors_section)
4966 loc_decl = decl;
4967 else
4968 loc_decl = last_assemble_variable_decl;
4970 /* The decl could be a string constant. */
4971 if (decl && DECL_P (decl))
4972 msg = "%Jcreating run-time relocation for %qD";
4973 else
4974 msg = "creating run-time relocation";
4976 if (TARGET_WARN_RELOC)
4977 warning (0, msg, loc_decl, decl);
4978 else
4979 error (msg, loc_decl, decl);
4982 /* Hook into assemble_integer so we can generate an error for run-time
4983 relocations. The SPU ABI disallows them. */
4984 static bool
4985 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4987 /* By default run-time relocations aren't supported, but we allow them
4988 in case users support it in their own run-time loader. And we provide
4989 a warning for those users that don't. */
4990 if ((GET_CODE (x) == SYMBOL_REF)
4991 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4992 reloc_diagnostic (x);
4994 return default_assemble_integer (x, size, aligned_p);
4997 static void
4998 spu_asm_globalize_label (FILE * file, const char *name)
5000 fputs ("\t.global\t", file);
5001 assemble_name (file, name);
5002 fputs ("\n", file);
5005 static bool
5006 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5007 bool speed ATTRIBUTE_UNUSED)
5009 enum machine_mode mode = GET_MODE (x);
5010 int cost = COSTS_N_INSNS (2);
5012 /* Folding to a CONST_VECTOR will use extra space but there might
5013 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5014 only if it allows us to fold away multiple insns. Changing the cost
5015 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5016 because this cost will only be compared against a single insn.
5017 if (code == CONST_VECTOR)
5018 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5021 /* Use defaults for float operations. Not accurate but good enough. */
5022 if (mode == DFmode)
5024 *total = COSTS_N_INSNS (13);
5025 return true;
5027 if (mode == SFmode)
5029 *total = COSTS_N_INSNS (6);
5030 return true;
5032 switch (code)
5034 case CONST_INT:
5035 if (satisfies_constraint_K (x))
5036 *total = 0;
5037 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5038 *total = COSTS_N_INSNS (1);
5039 else
5040 *total = COSTS_N_INSNS (3);
5041 return true;
5043 case CONST:
5044 *total = COSTS_N_INSNS (3);
5045 return true;
5047 case LABEL_REF:
5048 case SYMBOL_REF:
5049 *total = COSTS_N_INSNS (0);
5050 return true;
5052 case CONST_DOUBLE:
5053 *total = COSTS_N_INSNS (5);
5054 return true;
5056 case FLOAT_EXTEND:
5057 case FLOAT_TRUNCATE:
5058 case FLOAT:
5059 case UNSIGNED_FLOAT:
5060 case FIX:
5061 case UNSIGNED_FIX:
5062 *total = COSTS_N_INSNS (7);
5063 return true;
5065 case PLUS:
5066 if (mode == TImode)
5068 *total = COSTS_N_INSNS (9);
5069 return true;
5071 break;
5073 case MULT:
5074 cost =
5075 GET_CODE (XEXP (x, 0)) ==
5076 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5077 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5079 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5081 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5082 cost = COSTS_N_INSNS (14);
5083 if ((val & 0xffff) == 0)
5084 cost = COSTS_N_INSNS (9);
5085 else if (val > 0 && val < 0x10000)
5086 cost = COSTS_N_INSNS (11);
5089 *total = cost;
5090 return true;
5091 case DIV:
5092 case UDIV:
5093 case MOD:
5094 case UMOD:
5095 *total = COSTS_N_INSNS (20);
5096 return true;
5097 case ROTATE:
5098 case ROTATERT:
5099 case ASHIFT:
5100 case ASHIFTRT:
5101 case LSHIFTRT:
5102 *total = COSTS_N_INSNS (4);
5103 return true;
5104 case UNSPEC:
5105 if (XINT (x, 1) == UNSPEC_CONVERT)
5106 *total = COSTS_N_INSNS (0);
5107 else
5108 *total = COSTS_N_INSNS (4);
5109 return true;
5111 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5112 if (GET_MODE_CLASS (mode) == MODE_INT
5113 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5114 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5115 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5116 *total = cost;
5117 return true;
5120 static enum machine_mode
5121 spu_unwind_word_mode (void)
5123 return SImode;
5126 /* Decide whether we can make a sibling call to a function. DECL is the
5127 declaration of the function being targeted by the call and EXP is the
5128 CALL_EXPR representing the call. */
5129 static bool
5130 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5132 return decl && !TARGET_LARGE_MEM;
5135 /* We need to correctly update the back chain pointer and the Available
5136 Stack Size (which is in the second slot of the sp register.) */
5137 void
5138 spu_allocate_stack (rtx op0, rtx op1)
5140 HOST_WIDE_INT v;
5141 rtx chain = gen_reg_rtx (V4SImode);
5142 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5143 rtx sp = gen_reg_rtx (V4SImode);
5144 rtx splatted = gen_reg_rtx (V4SImode);
5145 rtx pat = gen_reg_rtx (TImode);
5147 /* copy the back chain so we can save it back again. */
5148 emit_move_insn (chain, stack_bot);
5150 op1 = force_reg (SImode, op1);
5152 v = 0x1020300010203ll;
5153 emit_move_insn (pat, immed_double_const (v, v, TImode));
5154 emit_insn (gen_shufb (splatted, op1, op1, pat));
5156 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5157 emit_insn (gen_subv4si3 (sp, sp, splatted));
5159 if (flag_stack_check)
5161 rtx avail = gen_reg_rtx(SImode);
5162 rtx result = gen_reg_rtx(SImode);
5163 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5164 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5165 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5168 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5170 emit_move_insn (stack_bot, chain);
5172 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5175 void
5176 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5178 static unsigned char arr[16] =
5179 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5180 rtx temp = gen_reg_rtx (SImode);
5181 rtx temp2 = gen_reg_rtx (SImode);
5182 rtx temp3 = gen_reg_rtx (V4SImode);
5183 rtx temp4 = gen_reg_rtx (V4SImode);
5184 rtx pat = gen_reg_rtx (TImode);
5185 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5187 /* Restore the backchain from the first word, sp from the second. */
5188 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5189 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5191 emit_move_insn (pat, array_to_constant (TImode, arr));
5193 /* Compute Available Stack Size for sp */
5194 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5195 emit_insn (gen_shufb (temp3, temp, temp, pat));
5197 /* Compute Available Stack Size for back chain */
5198 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5199 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5200 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5202 emit_insn (gen_addv4si3 (sp, sp, temp3));
5203 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5206 static void
5207 spu_init_libfuncs (void)
5209 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5210 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5211 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5212 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5213 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5214 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5215 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5216 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5217 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5218 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5219 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5221 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5222 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5224 set_optab_libfunc (smul_optab, TImode, "__multi3");
5225 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5226 set_optab_libfunc (smod_optab, TImode, "__modti3");
5227 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5228 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5229 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5232 /* Make a subreg, stripping any existing subreg. We could possibly just
5233 call simplify_subreg, but in this case we know what we want. */
5235 spu_gen_subreg (enum machine_mode mode, rtx x)
5237 if (GET_CODE (x) == SUBREG)
5238 x = SUBREG_REG (x);
5239 if (GET_MODE (x) == mode)
5240 return x;
5241 return gen_rtx_SUBREG (mode, x, 0);
5244 static bool
5245 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5247 return (TYPE_MODE (type) == BLKmode
5248 && ((type) == 0
5249 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5250 || int_size_in_bytes (type) >
5251 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5254 /* Create the built-in types and functions */
5256 enum spu_function_code
5258 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5259 #include "spu-builtins.def"
5260 #undef DEF_BUILTIN
5261 NUM_SPU_BUILTINS
5264 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5266 struct spu_builtin_description spu_builtins[] = {
5267 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5268 {fcode, icode, name, type, params, NULL_TREE},
5269 #include "spu-builtins.def"
5270 #undef DEF_BUILTIN
5273 static void
5274 spu_init_builtins (void)
5276 struct spu_builtin_description *d;
5277 unsigned int i;
5279 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5280 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5281 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5282 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5283 V4SF_type_node = build_vector_type (float_type_node, 4);
5284 V2DF_type_node = build_vector_type (double_type_node, 2);
5286 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5287 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5288 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5289 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5291 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5293 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5294 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5295 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5296 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5297 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5298 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5299 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5300 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5301 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5302 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5303 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5304 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5306 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5307 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5308 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5309 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5310 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5311 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5312 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5313 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5315 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5316 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5318 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5320 spu_builtin_types[SPU_BTI_PTR] =
5321 build_pointer_type (build_qualified_type
5322 (void_type_node,
5323 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5325 /* For each builtin we build a new prototype. The tree code will make
5326 sure nodes are shared. */
5327 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5329 tree p;
5330 char name[64]; /* build_function will make a copy. */
5331 int parm;
5333 if (d->name == 0)
5334 continue;
5336 /* Find last parm. */
5337 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5340 p = void_list_node;
5341 while (parm > 1)
5342 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5344 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5346 sprintf (name, "__builtin_%s", d->name);
5347 d->fndecl =
5348 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5349 NULL, NULL_TREE);
5350 if (d->fcode == SPU_MASK_FOR_LOAD)
5351 TREE_READONLY (d->fndecl) = 1;
5353 /* These builtins don't throw. */
5354 TREE_NOTHROW (d->fndecl) = 1;
5358 void
5359 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5361 static unsigned char arr[16] =
5362 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5364 rtx temp = gen_reg_rtx (Pmode);
5365 rtx temp2 = gen_reg_rtx (V4SImode);
5366 rtx temp3 = gen_reg_rtx (V4SImode);
5367 rtx pat = gen_reg_rtx (TImode);
5368 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5370 emit_move_insn (pat, array_to_constant (TImode, arr));
5372 /* Restore the sp. */
5373 emit_move_insn (temp, op1);
5374 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5376 /* Compute available stack size for sp. */
5377 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5378 emit_insn (gen_shufb (temp3, temp, temp, pat));
5380 emit_insn (gen_addv4si3 (sp, sp, temp3));
5381 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5385 spu_safe_dma (HOST_WIDE_INT channel)
5387 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5390 void
5391 spu_builtin_splats (rtx ops[])
5393 enum machine_mode mode = GET_MODE (ops[0]);
5394 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5396 unsigned char arr[16];
5397 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5398 emit_move_insn (ops[0], array_to_constant (mode, arr));
5400 else
5402 rtx reg = gen_reg_rtx (TImode);
5403 rtx shuf;
5404 if (GET_CODE (ops[1]) != REG
5405 && GET_CODE (ops[1]) != SUBREG)
5406 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5407 switch (mode)
5409 case V2DImode:
5410 case V2DFmode:
5411 shuf =
5412 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5413 TImode);
5414 break;
5415 case V4SImode:
5416 case V4SFmode:
5417 shuf =
5418 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5419 TImode);
5420 break;
5421 case V8HImode:
5422 shuf =
5423 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5424 TImode);
5425 break;
5426 case V16QImode:
5427 shuf =
5428 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5429 TImode);
5430 break;
5431 default:
5432 abort ();
5434 emit_move_insn (reg, shuf);
5435 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5439 void
5440 spu_builtin_extract (rtx ops[])
5442 enum machine_mode mode;
5443 rtx rot, from, tmp;
5445 mode = GET_MODE (ops[1]);
5447 if (GET_CODE (ops[2]) == CONST_INT)
5449 switch (mode)
5451 case V16QImode:
5452 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5453 break;
5454 case V8HImode:
5455 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5456 break;
5457 case V4SFmode:
5458 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5459 break;
5460 case V4SImode:
5461 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5462 break;
5463 case V2DImode:
5464 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5465 break;
5466 case V2DFmode:
5467 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5468 break;
5469 default:
5470 abort ();
5472 return;
5475 from = spu_gen_subreg (TImode, ops[1]);
5476 rot = gen_reg_rtx (TImode);
5477 tmp = gen_reg_rtx (SImode);
5479 switch (mode)
5481 case V16QImode:
5482 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5483 break;
5484 case V8HImode:
5485 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5486 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5487 break;
5488 case V4SFmode:
5489 case V4SImode:
5490 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5491 break;
5492 case V2DImode:
5493 case V2DFmode:
5494 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5495 break;
5496 default:
5497 abort ();
5499 emit_insn (gen_rotqby_ti (rot, from, tmp));
5501 emit_insn (gen_spu_convert (ops[0], rot));
5504 void
5505 spu_builtin_insert (rtx ops[])
5507 enum machine_mode mode = GET_MODE (ops[0]);
5508 enum machine_mode imode = GET_MODE_INNER (mode);
5509 rtx mask = gen_reg_rtx (TImode);
5510 rtx offset;
5512 if (GET_CODE (ops[3]) == CONST_INT)
5513 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5514 else
5516 offset = gen_reg_rtx (SImode);
5517 emit_insn (gen_mulsi3
5518 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5520 emit_insn (gen_cpat
5521 (mask, stack_pointer_rtx, offset,
5522 GEN_INT (GET_MODE_SIZE (imode))));
5523 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5526 void
5527 spu_builtin_promote (rtx ops[])
5529 enum machine_mode mode, imode;
5530 rtx rot, from, offset;
5531 HOST_WIDE_INT pos;
5533 mode = GET_MODE (ops[0]);
5534 imode = GET_MODE_INNER (mode);
5536 from = gen_reg_rtx (TImode);
5537 rot = spu_gen_subreg (TImode, ops[0]);
5539 emit_insn (gen_spu_convert (from, ops[1]));
5541 if (GET_CODE (ops[2]) == CONST_INT)
5543 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5544 if (GET_MODE_SIZE (imode) < 4)
5545 pos += 4 - GET_MODE_SIZE (imode);
5546 offset = GEN_INT (pos & 15);
5548 else
5550 offset = gen_reg_rtx (SImode);
5551 switch (mode)
5553 case V16QImode:
5554 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5555 break;
5556 case V8HImode:
5557 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5558 emit_insn (gen_addsi3 (offset, offset, offset));
5559 break;
5560 case V4SFmode:
5561 case V4SImode:
5562 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5563 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5564 break;
5565 case V2DImode:
5566 case V2DFmode:
5567 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5568 break;
5569 default:
5570 abort ();
5573 emit_insn (gen_rotqby_ti (rot, from, offset));
5576 void
5577 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5579 rtx shuf = gen_reg_rtx (V4SImode);
5580 rtx insn = gen_reg_rtx (V4SImode);
5581 rtx shufc;
5582 rtx insnc;
5583 rtx mem;
5585 fnaddr = force_reg (SImode, fnaddr);
5586 cxt = force_reg (SImode, cxt);
5588 if (TARGET_LARGE_MEM)
5590 rtx rotl = gen_reg_rtx (V4SImode);
5591 rtx mask = gen_reg_rtx (V4SImode);
5592 rtx bi = gen_reg_rtx (SImode);
5593 unsigned char shufa[16] = {
5594 2, 3, 0, 1, 18, 19, 16, 17,
5595 0, 1, 2, 3, 16, 17, 18, 19
5597 unsigned char insna[16] = {
5598 0x41, 0, 0, 79,
5599 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5600 0x60, 0x80, 0, 79,
5601 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5604 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5605 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5607 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5608 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5609 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5610 emit_insn (gen_selb (insn, insnc, rotl, mask));
5612 mem = memory_address (Pmode, tramp);
5613 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5615 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5616 mem = memory_address (Pmode, plus_constant (tramp, 16));
5617 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5619 else
5621 rtx scxt = gen_reg_rtx (SImode);
5622 rtx sfnaddr = gen_reg_rtx (SImode);
5623 unsigned char insna[16] = {
5624 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5625 0x30, 0, 0, 0,
5626 0, 0, 0, 0,
5627 0, 0, 0, 0
5630 shufc = gen_reg_rtx (TImode);
5631 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5633 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5634 fits 18 bits and the last 4 are zeros. This will be true if
5635 the stack pointer is initialized to 0x3fff0 at program start,
5636 otherwise the ila instruction will be garbage. */
5638 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5639 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5640 emit_insn (gen_cpat
5641 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5642 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5643 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5645 mem = memory_address (Pmode, tramp);
5646 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5649 emit_insn (gen_sync ());
5652 void
5653 spu_expand_sign_extend (rtx ops[])
5655 unsigned char arr[16];
5656 rtx pat = gen_reg_rtx (TImode);
5657 rtx sign, c;
5658 int i, last;
5659 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5660 if (GET_MODE (ops[1]) == QImode)
5662 sign = gen_reg_rtx (HImode);
5663 emit_insn (gen_extendqihi2 (sign, ops[1]));
5664 for (i = 0; i < 16; i++)
5665 arr[i] = 0x12;
5666 arr[last] = 0x13;
5668 else
5670 for (i = 0; i < 16; i++)
5671 arr[i] = 0x10;
5672 switch (GET_MODE (ops[1]))
5674 case HImode:
5675 sign = gen_reg_rtx (SImode);
5676 emit_insn (gen_extendhisi2 (sign, ops[1]));
5677 arr[last] = 0x03;
5678 arr[last - 1] = 0x02;
5679 break;
5680 case SImode:
5681 sign = gen_reg_rtx (SImode);
5682 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5683 for (i = 0; i < 4; i++)
5684 arr[last - i] = 3 - i;
5685 break;
5686 case DImode:
5687 sign = gen_reg_rtx (SImode);
5688 c = gen_reg_rtx (SImode);
5689 emit_insn (gen_spu_convert (c, ops[1]));
5690 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5691 for (i = 0; i < 8; i++)
5692 arr[last - i] = 7 - i;
5693 break;
5694 default:
5695 abort ();
5698 emit_move_insn (pat, array_to_constant (TImode, arr));
5699 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5702 /* expand vector initialization. If there are any constant parts,
5703 load constant parts first. Then load any non-constant parts. */
5704 void
5705 spu_expand_vector_init (rtx target, rtx vals)
5707 enum machine_mode mode = GET_MODE (target);
5708 int n_elts = GET_MODE_NUNITS (mode);
5709 int n_var = 0;
5710 bool all_same = true;
5711 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5712 int i;
5714 first = XVECEXP (vals, 0, 0);
5715 for (i = 0; i < n_elts; ++i)
5717 x = XVECEXP (vals, 0, i);
5718 if (!(CONST_INT_P (x)
5719 || GET_CODE (x) == CONST_DOUBLE
5720 || GET_CODE (x) == CONST_FIXED))
5721 ++n_var;
5722 else
5724 if (first_constant == NULL_RTX)
5725 first_constant = x;
5727 if (i > 0 && !rtx_equal_p (x, first))
5728 all_same = false;
5731 /* if all elements are the same, use splats to repeat elements */
5732 if (all_same)
5734 if (!CONSTANT_P (first)
5735 && !register_operand (first, GET_MODE (x)))
5736 first = force_reg (GET_MODE (first), first);
5737 emit_insn (gen_spu_splats (target, first));
5738 return;
5741 /* load constant parts */
5742 if (n_var != n_elts)
5744 if (n_var == 0)
5746 emit_move_insn (target,
5747 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5749 else
5751 rtx constant_parts_rtx = copy_rtx (vals);
5753 gcc_assert (first_constant != NULL_RTX);
5754 /* fill empty slots with the first constant, this increases
5755 our chance of using splats in the recursive call below. */
5756 for (i = 0; i < n_elts; ++i)
5758 x = XVECEXP (constant_parts_rtx, 0, i);
5759 if (!(CONST_INT_P (x)
5760 || GET_CODE (x) == CONST_DOUBLE
5761 || GET_CODE (x) == CONST_FIXED))
5762 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5765 spu_expand_vector_init (target, constant_parts_rtx);
5769 /* load variable parts */
5770 if (n_var != 0)
5772 rtx insert_operands[4];
5774 insert_operands[0] = target;
5775 insert_operands[2] = target;
5776 for (i = 0; i < n_elts; ++i)
5778 x = XVECEXP (vals, 0, i);
5779 if (!(CONST_INT_P (x)
5780 || GET_CODE (x) == CONST_DOUBLE
5781 || GET_CODE (x) == CONST_FIXED))
5783 if (!register_operand (x, GET_MODE (x)))
5784 x = force_reg (GET_MODE (x), x);
5785 insert_operands[1] = x;
5786 insert_operands[3] = GEN_INT (i);
5787 spu_builtin_insert (insert_operands);
5793 /* Return insn index for the vector compare instruction for given CODE,
5794 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5796 static int
5797 get_vec_cmp_insn (enum rtx_code code,
5798 enum machine_mode dest_mode,
5799 enum machine_mode op_mode)
5802 switch (code)
5804 case EQ:
5805 if (dest_mode == V16QImode && op_mode == V16QImode)
5806 return CODE_FOR_ceq_v16qi;
5807 if (dest_mode == V8HImode && op_mode == V8HImode)
5808 return CODE_FOR_ceq_v8hi;
5809 if (dest_mode == V4SImode && op_mode == V4SImode)
5810 return CODE_FOR_ceq_v4si;
5811 if (dest_mode == V4SImode && op_mode == V4SFmode)
5812 return CODE_FOR_ceq_v4sf;
5813 if (dest_mode == V2DImode && op_mode == V2DFmode)
5814 return CODE_FOR_ceq_v2df;
5815 break;
5816 case GT:
5817 if (dest_mode == V16QImode && op_mode == V16QImode)
5818 return CODE_FOR_cgt_v16qi;
5819 if (dest_mode == V8HImode && op_mode == V8HImode)
5820 return CODE_FOR_cgt_v8hi;
5821 if (dest_mode == V4SImode && op_mode == V4SImode)
5822 return CODE_FOR_cgt_v4si;
5823 if (dest_mode == V4SImode && op_mode == V4SFmode)
5824 return CODE_FOR_cgt_v4sf;
5825 if (dest_mode == V2DImode && op_mode == V2DFmode)
5826 return CODE_FOR_cgt_v2df;
5827 break;
5828 case GTU:
5829 if (dest_mode == V16QImode && op_mode == V16QImode)
5830 return CODE_FOR_clgt_v16qi;
5831 if (dest_mode == V8HImode && op_mode == V8HImode)
5832 return CODE_FOR_clgt_v8hi;
5833 if (dest_mode == V4SImode && op_mode == V4SImode)
5834 return CODE_FOR_clgt_v4si;
5835 break;
5836 default:
5837 break;
5839 return -1;
5842 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5843 DMODE is expected destination mode. This is a recursive function. */
5845 static rtx
5846 spu_emit_vector_compare (enum rtx_code rcode,
5847 rtx op0, rtx op1,
5848 enum machine_mode dmode)
5850 int vec_cmp_insn;
5851 rtx mask;
5852 enum machine_mode dest_mode;
5853 enum machine_mode op_mode = GET_MODE (op1);
5855 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5857 /* Floating point vector compare instructions uses destination V4SImode.
5858 Double floating point vector compare instructions uses destination V2DImode.
5859 Move destination to appropriate mode later. */
5860 if (dmode == V4SFmode)
5861 dest_mode = V4SImode;
5862 else if (dmode == V2DFmode)
5863 dest_mode = V2DImode;
5864 else
5865 dest_mode = dmode;
5867 mask = gen_reg_rtx (dest_mode);
5868 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5870 if (vec_cmp_insn == -1)
5872 bool swap_operands = false;
5873 bool try_again = false;
5874 switch (rcode)
5876 case LT:
5877 rcode = GT;
5878 swap_operands = true;
5879 try_again = true;
5880 break;
5881 case LTU:
5882 rcode = GTU;
5883 swap_operands = true;
5884 try_again = true;
5885 break;
5886 case NE:
5887 /* Treat A != B as ~(A==B). */
5889 enum insn_code nor_code;
5890 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5891 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5892 gcc_assert (nor_code != CODE_FOR_nothing);
5893 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5894 if (dmode != dest_mode)
5896 rtx temp = gen_reg_rtx (dest_mode);
5897 convert_move (temp, mask, 0);
5898 return temp;
5900 return mask;
5902 break;
5903 case GE:
5904 case GEU:
5905 case LE:
5906 case LEU:
5907 /* Try GT/GTU/LT/LTU OR EQ */
5909 rtx c_rtx, eq_rtx;
5910 enum insn_code ior_code;
5911 enum rtx_code new_code;
5913 switch (rcode)
5915 case GE: new_code = GT; break;
5916 case GEU: new_code = GTU; break;
5917 case LE: new_code = LT; break;
5918 case LEU: new_code = LTU; break;
5919 default:
5920 gcc_unreachable ();
5923 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5924 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5926 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5927 gcc_assert (ior_code != CODE_FOR_nothing);
5928 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5929 if (dmode != dest_mode)
5931 rtx temp = gen_reg_rtx (dest_mode);
5932 convert_move (temp, mask, 0);
5933 return temp;
5935 return mask;
5937 break;
5938 default:
5939 gcc_unreachable ();
5942 /* You only get two chances. */
5943 if (try_again)
5944 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5946 gcc_assert (vec_cmp_insn != -1);
5948 if (swap_operands)
5950 rtx tmp;
5951 tmp = op0;
5952 op0 = op1;
5953 op1 = tmp;
5957 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5958 if (dmode != dest_mode)
5960 rtx temp = gen_reg_rtx (dest_mode);
5961 convert_move (temp, mask, 0);
5962 return temp;
5964 return mask;
5968 /* Emit vector conditional expression.
5969 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5970 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5973 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5974 rtx cond, rtx cc_op0, rtx cc_op1)
5976 enum machine_mode dest_mode = GET_MODE (dest);
5977 enum rtx_code rcode = GET_CODE (cond);
5978 rtx mask;
5980 /* Get the vector mask for the given relational operations. */
5981 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5983 emit_insn(gen_selb (dest, op2, op1, mask));
5985 return 1;
5988 static rtx
5989 spu_force_reg (enum machine_mode mode, rtx op)
5991 rtx x, r;
5992 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5994 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5995 || GET_MODE (op) == BLKmode)
5996 return force_reg (mode, convert_to_mode (mode, op, 0));
5997 abort ();
6000 r = force_reg (GET_MODE (op), op);
6001 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6003 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6004 if (x)
6005 return x;
6008 x = gen_reg_rtx (mode);
6009 emit_insn (gen_spu_convert (x, r));
6010 return x;
6013 static void
6014 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6016 HOST_WIDE_INT v = 0;
6017 int lsbits;
6018 /* Check the range of immediate operands. */
6019 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6021 int range = p - SPU_BTI_7;
6023 if (!CONSTANT_P (op))
6024 error ("%s expects an integer literal in the range [%d, %d].",
6025 d->name,
6026 spu_builtin_range[range].low, spu_builtin_range[range].high);
6028 if (GET_CODE (op) == CONST
6029 && (GET_CODE (XEXP (op, 0)) == PLUS
6030 || GET_CODE (XEXP (op, 0)) == MINUS))
6032 v = INTVAL (XEXP (XEXP (op, 0), 1));
6033 op = XEXP (XEXP (op, 0), 0);
6035 else if (GET_CODE (op) == CONST_INT)
6036 v = INTVAL (op);
6037 else if (GET_CODE (op) == CONST_VECTOR
6038 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6039 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6041 /* The default for v is 0 which is valid in every range. */
6042 if (v < spu_builtin_range[range].low
6043 || v > spu_builtin_range[range].high)
6044 error ("%s expects an integer literal in the range [%d, %d]. ("
6045 HOST_WIDE_INT_PRINT_DEC ")",
6046 d->name,
6047 spu_builtin_range[range].low, spu_builtin_range[range].high,
6050 switch (p)
6052 case SPU_BTI_S10_4:
6053 lsbits = 4;
6054 break;
6055 case SPU_BTI_U16_2:
6056 /* This is only used in lqa, and stqa. Even though the insns
6057 encode 16 bits of the address (all but the 2 least
6058 significant), only 14 bits are used because it is masked to
6059 be 16 byte aligned. */
6060 lsbits = 4;
6061 break;
6062 case SPU_BTI_S16_2:
6063 /* This is used for lqr and stqr. */
6064 lsbits = 2;
6065 break;
6066 default:
6067 lsbits = 0;
6070 if (GET_CODE (op) == LABEL_REF
6071 || (GET_CODE (op) == SYMBOL_REF
6072 && SYMBOL_REF_FUNCTION_P (op))
6073 || (v & ((1 << lsbits) - 1)) != 0)
6074 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6075 d->name);
6080 static int
6081 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6082 rtx target, rtx ops[])
6084 enum insn_code icode = (enum insn_code) d->icode;
6085 int i = 0, a;
6087 /* Expand the arguments into rtl. */
6089 if (d->parm[0] != SPU_BTI_VOID)
6090 ops[i++] = target;
6092 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6094 tree arg = CALL_EXPR_ARG (exp, a);
6095 if (arg == 0)
6096 abort ();
6097 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6100 /* The insn pattern may have additional operands (SCRATCH).
6101 Return the number of actual non-SCRATCH operands. */
6102 gcc_assert (i <= insn_data[icode].n_operands);
6103 return i;
6106 static rtx
6107 spu_expand_builtin_1 (struct spu_builtin_description *d,
6108 tree exp, rtx target)
6110 rtx pat;
6111 rtx ops[8];
6112 enum insn_code icode = (enum insn_code) d->icode;
6113 enum machine_mode mode, tmode;
6114 int i, p;
6115 int n_operands;
6116 tree return_type;
6118 /* Set up ops[] with values from arglist. */
6119 n_operands = expand_builtin_args (d, exp, target, ops);
6121 /* Handle the target operand which must be operand 0. */
6122 i = 0;
6123 if (d->parm[0] != SPU_BTI_VOID)
6126 /* We prefer the mode specified for the match_operand otherwise
6127 use the mode from the builtin function prototype. */
6128 tmode = insn_data[d->icode].operand[0].mode;
6129 if (tmode == VOIDmode)
6130 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6132 /* Try to use target because not using it can lead to extra copies
6133 and when we are using all of the registers extra copies leads
6134 to extra spills. */
6135 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6136 ops[0] = target;
6137 else
6138 target = ops[0] = gen_reg_rtx (tmode);
6140 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6141 abort ();
6143 i++;
6146 if (d->fcode == SPU_MASK_FOR_LOAD)
6148 enum machine_mode mode = insn_data[icode].operand[1].mode;
6149 tree arg;
6150 rtx addr, op, pat;
6152 /* get addr */
6153 arg = CALL_EXPR_ARG (exp, 0);
6154 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6155 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6156 addr = memory_address (mode, op);
6158 /* negate addr */
6159 op = gen_reg_rtx (GET_MODE (addr));
6160 emit_insn (gen_rtx_SET (VOIDmode, op,
6161 gen_rtx_NEG (GET_MODE (addr), addr)));
6162 op = gen_rtx_MEM (mode, op);
6164 pat = GEN_FCN (icode) (target, op);
6165 if (!pat)
6166 return 0;
6167 emit_insn (pat);
6168 return target;
6171 /* Ignore align_hint, but still expand it's args in case they have
6172 side effects. */
6173 if (icode == CODE_FOR_spu_align_hint)
6174 return 0;
6176 /* Handle the rest of the operands. */
6177 for (p = 1; i < n_operands; i++, p++)
6179 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6180 mode = insn_data[d->icode].operand[i].mode;
6181 else
6182 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6184 /* mode can be VOIDmode here for labels */
6186 /* For specific intrinsics with an immediate operand, e.g.,
6187 si_ai(), we sometimes need to convert the scalar argument to a
6188 vector argument by splatting the scalar. */
6189 if (VECTOR_MODE_P (mode)
6190 && (GET_CODE (ops[i]) == CONST_INT
6191 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6192 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6194 if (GET_CODE (ops[i]) == CONST_INT)
6195 ops[i] = spu_const (mode, INTVAL (ops[i]));
6196 else
6198 rtx reg = gen_reg_rtx (mode);
6199 enum machine_mode imode = GET_MODE_INNER (mode);
6200 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6201 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6202 if (imode != GET_MODE (ops[i]))
6203 ops[i] = convert_to_mode (imode, ops[i],
6204 TYPE_UNSIGNED (spu_builtin_types
6205 [d->parm[i]]));
6206 emit_insn (gen_spu_splats (reg, ops[i]));
6207 ops[i] = reg;
6211 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6213 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6214 ops[i] = spu_force_reg (mode, ops[i]);
6217 switch (n_operands)
6219 case 0:
6220 pat = GEN_FCN (icode) (0);
6221 break;
6222 case 1:
6223 pat = GEN_FCN (icode) (ops[0]);
6224 break;
6225 case 2:
6226 pat = GEN_FCN (icode) (ops[0], ops[1]);
6227 break;
6228 case 3:
6229 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6230 break;
6231 case 4:
6232 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6233 break;
6234 case 5:
6235 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6236 break;
6237 case 6:
6238 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6239 break;
6240 default:
6241 abort ();
6244 if (!pat)
6245 abort ();
6247 if (d->type == B_CALL || d->type == B_BISLED)
6248 emit_call_insn (pat);
6249 else if (d->type == B_JUMP)
6251 emit_jump_insn (pat);
6252 emit_barrier ();
6254 else
6255 emit_insn (pat);
6257 return_type = spu_builtin_types[d->parm[0]];
6258 if (d->parm[0] != SPU_BTI_VOID
6259 && GET_MODE (target) != TYPE_MODE (return_type))
6261 /* target is the return value. It should always be the mode of
6262 the builtin function prototype. */
6263 target = spu_force_reg (TYPE_MODE (return_type), target);
6266 return target;
6270 spu_expand_builtin (tree exp,
6271 rtx target,
6272 rtx subtarget ATTRIBUTE_UNUSED,
6273 enum machine_mode mode ATTRIBUTE_UNUSED,
6274 int ignore ATTRIBUTE_UNUSED)
6276 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6277 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6278 struct spu_builtin_description *d;
6280 if (fcode < NUM_SPU_BUILTINS)
6282 d = &spu_builtins[fcode];
6284 return spu_expand_builtin_1 (d, exp, target);
6286 abort ();
6289 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6290 static tree
6291 spu_builtin_mul_widen_even (tree type)
6293 switch (TYPE_MODE (type))
6295 case V8HImode:
6296 if (TYPE_UNSIGNED (type))
6297 return spu_builtins[SPU_MULE_0].fndecl;
6298 else
6299 return spu_builtins[SPU_MULE_1].fndecl;
6300 break;
6301 default:
6302 return NULL_TREE;
6306 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6307 static tree
6308 spu_builtin_mul_widen_odd (tree type)
6310 switch (TYPE_MODE (type))
6312 case V8HImode:
6313 if (TYPE_UNSIGNED (type))
6314 return spu_builtins[SPU_MULO_1].fndecl;
6315 else
6316 return spu_builtins[SPU_MULO_0].fndecl;
6317 break;
6318 default:
6319 return NULL_TREE;
6323 /* Implement targetm.vectorize.builtin_mask_for_load. */
6324 static tree
6325 spu_builtin_mask_for_load (void)
6327 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6328 gcc_assert (d);
6329 return d->fndecl;
6332 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6333 static int
6334 spu_builtin_vectorization_cost (bool runtime_test)
6336 /* If the branch of the runtime test is taken - i.e. - the vectorized
6337 version is skipped - this incurs a misprediction cost (because the
6338 vectorized version is expected to be the fall-through). So we subtract
6339 the latency of a mispredicted branch from the costs that are incurred
6340 when the vectorized version is executed. */
6341 if (runtime_test)
6342 return -19;
6343 else
6344 return 0;
6347 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6348 after applying N number of iterations. This routine does not determine
6349 how may iterations are required to reach desired alignment. */
6351 static bool
6352 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6354 if (is_packed)
6355 return false;
6357 /* All other types are naturally aligned. */
6358 return true;
6361 /* Implement targetm.vectorize.builtin_vec_perm. */
6362 tree
6363 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6365 struct spu_builtin_description *d;
6367 *mask_element_type = unsigned_char_type_node;
6369 switch (TYPE_MODE (type))
6371 case V16QImode:
6372 if (TYPE_UNSIGNED (type))
6373 d = &spu_builtins[SPU_SHUFFLE_0];
6374 else
6375 d = &spu_builtins[SPU_SHUFFLE_1];
6376 break;
6378 case V8HImode:
6379 if (TYPE_UNSIGNED (type))
6380 d = &spu_builtins[SPU_SHUFFLE_2];
6381 else
6382 d = &spu_builtins[SPU_SHUFFLE_3];
6383 break;
6385 case V4SImode:
6386 if (TYPE_UNSIGNED (type))
6387 d = &spu_builtins[SPU_SHUFFLE_4];
6388 else
6389 d = &spu_builtins[SPU_SHUFFLE_5];
6390 break;
6392 case V2DImode:
6393 if (TYPE_UNSIGNED (type))
6394 d = &spu_builtins[SPU_SHUFFLE_6];
6395 else
6396 d = &spu_builtins[SPU_SHUFFLE_7];
6397 break;
6399 case V4SFmode:
6400 d = &spu_builtins[SPU_SHUFFLE_8];
6401 break;
6403 case V2DFmode:
6404 d = &spu_builtins[SPU_SHUFFLE_9];
6405 break;
6407 default:
6408 return NULL_TREE;
6411 gcc_assert (d);
6412 return d->fndecl;
6415 /* Count the total number of instructions in each pipe and return the
6416 maximum, which is used as the Minimum Iteration Interval (MII)
6417 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6418 -2 are instructions that can go in pipe0 or pipe1. */
6419 static int
6420 spu_sms_res_mii (struct ddg *g)
6422 int i;
6423 unsigned t[4] = {0, 0, 0, 0};
6425 for (i = 0; i < g->num_nodes; i++)
6427 rtx insn = g->nodes[i].insn;
6428 int p = get_pipe (insn) + 2;
6430 assert (p >= 0);
6431 assert (p < 4);
6433 t[p]++;
6434 if (dump_file && INSN_P (insn))
6435 fprintf (dump_file, "i%d %s %d %d\n",
6436 INSN_UID (insn),
6437 insn_data[INSN_CODE(insn)].name,
6438 p, t[p]);
6440 if (dump_file)
6441 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6443 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6447 void
6448 spu_init_expanders (void)
6450 if (cfun)
6452 rtx r0, r1;
6453 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6454 frame_pointer_needed is true. We don't know that until we're
6455 expanding the prologue. */
6456 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6458 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6459 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6460 to be treated as aligned, so generate them here. */
6461 r0 = gen_reg_rtx (SImode);
6462 r1 = gen_reg_rtx (SImode);
6463 mark_reg_pointer (r0, 128);
6464 mark_reg_pointer (r1, 128);
6465 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6466 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6470 static enum machine_mode
6471 spu_libgcc_cmp_return_mode (void)
6474 /* For SPU word mode is TI mode so it is better to use SImode
6475 for compare returns. */
6476 return SImode;
6479 static enum machine_mode
6480 spu_libgcc_shift_count_mode (void)
6482 /* For SPU word mode is TI mode so it is better to use SImode
6483 for shift counts. */
6484 return SImode;
6487 /* An early place to adjust some flags after GCC has finished processing
6488 * them. */
6489 static void
6490 asm_file_start (void)
6492 /* Variable tracking should be run after all optimizations which
6493 change order of insns. It also needs a valid CFG. */
6494 spu_flag_var_tracking = flag_var_tracking;
6495 flag_var_tracking = 0;
6497 default_file_start ();
6500 /* Implement targetm.section_type_flags. */
6501 static unsigned int
6502 spu_section_type_flags (tree decl, const char *name, int reloc)
6504 /* .toe needs to have type @nobits. */
6505 if (strcmp (name, ".toe") == 0)
6506 return SECTION_BSS;
6507 return default_section_type_flags (decl, name, reloc);
6510 /* Generate a constant or register which contains 2^SCALE. We assume
6511 the result is valid for MODE. Currently, MODE must be V4SFmode and
6512 SCALE must be SImode. */
6514 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6516 gcc_assert (mode == V4SFmode);
6517 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6518 if (GET_CODE (scale) != CONST_INT)
6520 /* unsigned int exp = (127 + scale) << 23;
6521 __vector float m = (__vector float) spu_splats (exp); */
6522 rtx reg = force_reg (SImode, scale);
6523 rtx exp = gen_reg_rtx (SImode);
6524 rtx mul = gen_reg_rtx (mode);
6525 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6526 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6527 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6528 return mul;
6530 else
6532 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6533 unsigned char arr[16];
6534 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6535 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6536 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6537 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6538 return array_to_constant (mode, arr);
6542 /* After reload, just change the convert into a move instruction
6543 or a dead instruction. */
6544 void
6545 spu_split_convert (rtx ops[])
6547 if (REGNO (ops[0]) == REGNO (ops[1]))
6548 emit_note (NOTE_INSN_DELETED);
6549 else
6551 /* Use TImode always as this might help hard reg copyprop. */
6552 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6553 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6554 emit_insn (gen_move_insn (op0, op1));
6558 #include "gt-spu.h"