2009-07-17 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / config / spu / spu.c
blob37488b2c14b477b68a91c29a118807152d6127d1
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "machmode.h"
52 #include "gimple.h"
53 #include "tm-constrs.h"
54 #include "ddg.h"
55 #include "sbitmap.h"
56 #include "timevar.h"
57 #include "df.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS,
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
111 SPU_BTI_MAX
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
127 struct spu_builtin_range
129 int low, high;
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
155 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int store_with_one_insn_p (rtx mem);
193 static int mem_is_padded_component_ref (rtx x);
194 static int reg_aligned_for_addr (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
205 static tree spu_builtin_mul_widen_even (tree);
206 static tree spu_builtin_mul_widen_odd (tree);
207 static tree spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree, bool);
210 static tree spu_builtin_vec_perm (tree, tree *);
211 static int spu_sms_res_mii (struct ddg *g);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree, const char *, int);
214 static rtx spu_expand_load (rtx, rtx, rtx, int);
216 extern const char *reg_names[];
218 /* Which instruction set architecture to use. */
219 int spu_arch;
220 /* Which cpu are we tuning for. */
221 int spu_tune;
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist = (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
232 reorganization. */
233 static int spu_flag_var_tracking;
235 enum spu_immediate {
236 SPU_NONE,
237 SPU_IL,
238 SPU_ILA,
239 SPU_ILH,
240 SPU_ILHU,
241 SPU_ORI,
242 SPU_ORHI,
243 SPU_ORBI,
244 SPU_IOHL
246 enum immediate_class
248 IC_POOL, /* constant pool */
249 IC_IL1, /* one il* instruction */
250 IC_IL2, /* both ilhu and iohl instructions */
251 IC_IL1s, /* one il* instruction */
252 IC_IL2s, /* both ilhu and iohl instructions */
253 IC_FSMBI, /* the fsmbi instruction */
254 IC_CPAT, /* one of the c*d instructions */
255 IC_FSMBI2 /* fsmbi plus 1 other instruction */
258 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
259 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
260 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
261 static enum immediate_class classify_immediate (rtx op,
262 enum machine_mode mode);
264 static enum machine_mode spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
272 /* Table of machine attributes. */
273 static const struct attribute_spec spu_attribute_table[] =
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
276 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
277 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
278 { NULL, 0, 0, false, false, false, NULL }
281 /* TARGET overrides. */
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS spu_init_builtins
286 #undef TARGET_EXPAND_BUILTIN
287 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
289 #undef TARGET_UNWIND_WORD_MODE
290 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
292 #undef TARGET_LEGITIMIZE_ADDRESS
293 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
295 /* The .8byte directive doesn't seem to work well for a 32 bit
296 architecture. */
297 #undef TARGET_ASM_UNALIGNED_DI_OP
298 #define TARGET_ASM_UNALIGNED_DI_OP NULL
300 #undef TARGET_RTX_COSTS
301 #define TARGET_RTX_COSTS spu_rtx_costs
303 #undef TARGET_ADDRESS_COST
304 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
306 #undef TARGET_SCHED_ISSUE_RATE
307 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
309 #undef TARGET_SCHED_INIT_GLOBAL
310 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
312 #undef TARGET_SCHED_INIT
313 #define TARGET_SCHED_INIT spu_sched_init
315 #undef TARGET_SCHED_VARIABLE_ISSUE
316 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
318 #undef TARGET_SCHED_REORDER
319 #define TARGET_SCHED_REORDER spu_sched_reorder
321 #undef TARGET_SCHED_REORDER2
322 #define TARGET_SCHED_REORDER2 spu_sched_reorder
324 #undef TARGET_SCHED_ADJUST_COST
325 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
327 #undef TARGET_ATTRIBUTE_TABLE
328 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
330 #undef TARGET_ASM_INTEGER
331 #define TARGET_ASM_INTEGER spu_assemble_integer
333 #undef TARGET_SCALAR_MODE_SUPPORTED_P
334 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
336 #undef TARGET_VECTOR_MODE_SUPPORTED_P
337 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
339 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
340 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
342 #undef TARGET_ASM_GLOBALIZE_LABEL
343 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
348 #undef TARGET_MUST_PASS_IN_STACK
349 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351 #undef TARGET_BUILD_BUILTIN_VA_LIST
352 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
354 #undef TARGET_EXPAND_BUILTIN_VA_START
355 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
357 #undef TARGET_SETUP_INCOMING_VARARGS
358 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
360 #undef TARGET_MACHINE_DEPENDENT_REORG
361 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
366 #undef TARGET_DEFAULT_TARGET_FLAGS
367 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
369 #undef TARGET_INIT_LIBFUNCS
370 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
372 #undef TARGET_RETURN_IN_MEMORY
373 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
378 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
379 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
381 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
382 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
384 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
385 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
387 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
388 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
390 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
391 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
393 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
394 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
396 #undef TARGET_LIBGCC_CMP_RETURN_MODE
397 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
399 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
400 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
402 #undef TARGET_SCHED_SMS_RES_MII
403 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
405 #undef TARGET_ASM_FILE_START
406 #define TARGET_ASM_FILE_START asm_file_start
408 #undef TARGET_SECTION_TYPE_FLAGS
409 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
411 #undef TARGET_LEGITIMATE_ADDRESS_P
412 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
414 struct gcc_target targetm = TARGET_INITIALIZER;
416 void
417 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
419 /* Override some of the default param values. With so many registers
420 larger values are better for these params. */
421 MAX_PENDING_LIST_LENGTH = 128;
423 /* With so many registers this is better on by default. */
424 flag_rename_registers = 1;
427 /* Sometimes certain combinations of command options do not make sense
428 on a particular target machine. You can define a macro
429 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
430 executed once just after all the command options have been parsed. */
431 void
432 spu_override_options (void)
434 /* Small loops will be unpeeled at -O3. For SPU it is more important
435 to keep code small by default. */
436 if (!flag_unroll_loops && !flag_peel_loops
437 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
438 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
440 flag_omit_frame_pointer = 1;
442 /* Functions must be 8 byte aligned so we correctly handle dual issue */
443 if (align_functions < 8)
444 align_functions = 8;
446 spu_hint_dist = 8*4 - spu_max_nops*4;
447 if (spu_hint_dist < 0)
448 spu_hint_dist = 0;
450 if (spu_fixed_range_string)
451 fix_range (spu_fixed_range_string);
453 /* Determine processor architectural level. */
454 if (spu_arch_string)
456 if (strcmp (&spu_arch_string[0], "cell") == 0)
457 spu_arch = PROCESSOR_CELL;
458 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
459 spu_arch = PROCESSOR_CELLEDP;
460 else
461 error ("Unknown architecture '%s'", &spu_arch_string[0]);
464 /* Determine processor to tune for. */
465 if (spu_tune_string)
467 if (strcmp (&spu_tune_string[0], "cell") == 0)
468 spu_tune = PROCESSOR_CELL;
469 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
470 spu_tune = PROCESSOR_CELLEDP;
471 else
472 error ("Unknown architecture '%s'", &spu_tune_string[0]);
475 /* Change defaults according to the processor architecture. */
476 if (spu_arch == PROCESSOR_CELLEDP)
478 /* If no command line option has been otherwise specified, change
479 the default to -mno-safe-hints on celledp -- only the original
480 Cell/B.E. processors require this workaround. */
481 if (!(target_flags_explicit & MASK_SAFE_HINTS))
482 target_flags &= ~MASK_SAFE_HINTS;
485 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
488 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
489 struct attribute_spec.handler. */
491 /* True if MODE is valid for the target. By "valid", we mean able to
492 be manipulated in non-trivial ways. In particular, this means all
493 the arithmetic is supported. */
494 static bool
495 spu_scalar_mode_supported_p (enum machine_mode mode)
497 switch (mode)
499 case QImode:
500 case HImode:
501 case SImode:
502 case SFmode:
503 case DImode:
504 case TImode:
505 case DFmode:
506 return true;
508 default:
509 return false;
513 /* Similarly for vector modes. "Supported" here is less strict. At
514 least some operations are supported; need to check optabs or builtins
515 for further details. */
516 static bool
517 spu_vector_mode_supported_p (enum machine_mode mode)
519 switch (mode)
521 case V16QImode:
522 case V8HImode:
523 case V4SImode:
524 case V2DImode:
525 case V4SFmode:
526 case V2DFmode:
527 return true;
529 default:
530 return false;
534 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
535 least significant bytes of the outer mode. This function returns
536 TRUE for the SUBREG's where this is correct. */
538 valid_subreg (rtx op)
540 enum machine_mode om = GET_MODE (op);
541 enum machine_mode im = GET_MODE (SUBREG_REG (op));
542 return om != VOIDmode && im != VOIDmode
543 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
544 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
545 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
548 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
549 and adjust the start offset. */
550 static rtx
551 adjust_operand (rtx op, HOST_WIDE_INT * start)
553 enum machine_mode mode;
554 int op_size;
555 /* Strip any paradoxical SUBREG. */
556 if (GET_CODE (op) == SUBREG
557 && (GET_MODE_BITSIZE (GET_MODE (op))
558 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
560 if (start)
561 *start -=
562 GET_MODE_BITSIZE (GET_MODE (op)) -
563 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
564 op = SUBREG_REG (op);
566 /* If it is smaller than SI, assure a SUBREG */
567 op_size = GET_MODE_BITSIZE (GET_MODE (op));
568 if (op_size < 32)
570 if (start)
571 *start += 32 - op_size;
572 op_size = 32;
574 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
575 mode = mode_for_size (op_size, MODE_INT, 0);
576 if (mode != GET_MODE (op))
577 op = gen_rtx_SUBREG (mode, op, 0);
578 return op;
581 void
582 spu_expand_extv (rtx ops[], int unsignedp)
584 rtx dst = ops[0], src = ops[1];
585 HOST_WIDE_INT width = INTVAL (ops[2]);
586 HOST_WIDE_INT start = INTVAL (ops[3]);
587 HOST_WIDE_INT align_mask;
588 rtx s0, s1, mask, r0;
590 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
592 if (MEM_P (src))
594 /* First, determine if we need 1 TImode load or 2. We need only 1
595 if the bits being extracted do not cross the alignment boundary
596 as determined by the MEM and its address. */
598 align_mask = -MEM_ALIGN (src);
599 if ((start & align_mask) == ((start + width - 1) & align_mask))
601 /* Alignment is sufficient for 1 load. */
602 s0 = gen_reg_rtx (TImode);
603 r0 = spu_expand_load (s0, 0, src, start / 8);
604 start &= 7;
605 if (r0)
606 emit_insn (gen_rotqby_ti (s0, s0, r0));
608 else
610 /* Need 2 loads. */
611 s0 = gen_reg_rtx (TImode);
612 s1 = gen_reg_rtx (TImode);
613 r0 = spu_expand_load (s0, s1, src, start / 8);
614 start &= 7;
616 gcc_assert (start + width <= 128);
617 if (r0)
619 rtx r1 = gen_reg_rtx (SImode);
620 mask = gen_reg_rtx (TImode);
621 emit_move_insn (mask, GEN_INT (-1));
622 emit_insn (gen_rotqby_ti (s0, s0, r0));
623 emit_insn (gen_rotqby_ti (s1, s1, r0));
624 if (GET_CODE (r0) == CONST_INT)
625 r1 = GEN_INT (INTVAL (r0) & 15);
626 else
627 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
628 emit_insn (gen_shlqby_ti (mask, mask, r1));
629 emit_insn (gen_selb (s0, s1, s0, mask));
634 else if (GET_CODE (src) == SUBREG)
636 rtx r = SUBREG_REG (src);
637 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
638 s0 = gen_reg_rtx (TImode);
639 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
640 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
641 else
642 emit_move_insn (s0, src);
644 else
646 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
647 s0 = gen_reg_rtx (TImode);
648 emit_move_insn (s0, src);
651 /* Now s0 is TImode and contains the bits to extract at start. */
653 if (start)
654 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
656 if (128 - width)
658 tree c = build_int_cst (NULL_TREE, 128 - width);
659 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
662 emit_move_insn (dst, s0);
665 void
666 spu_expand_insv (rtx ops[])
668 HOST_WIDE_INT width = INTVAL (ops[1]);
669 HOST_WIDE_INT start = INTVAL (ops[2]);
670 HOST_WIDE_INT maskbits;
671 enum machine_mode dst_mode, src_mode;
672 rtx dst = ops[0], src = ops[3];
673 int dst_size, src_size;
674 rtx mask;
675 rtx shift_reg;
676 int shift;
679 if (GET_CODE (ops[0]) == MEM)
680 dst = gen_reg_rtx (TImode);
681 else
682 dst = adjust_operand (dst, &start);
683 dst_mode = GET_MODE (dst);
684 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
686 if (CONSTANT_P (src))
688 enum machine_mode m =
689 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
690 src = force_reg (m, convert_to_mode (m, src, 0));
692 src = adjust_operand (src, 0);
693 src_mode = GET_MODE (src);
694 src_size = GET_MODE_BITSIZE (GET_MODE (src));
696 mask = gen_reg_rtx (dst_mode);
697 shift_reg = gen_reg_rtx (dst_mode);
698 shift = dst_size - start - width;
700 /* It's not safe to use subreg here because the compiler assumes
701 that the SUBREG_REG is right justified in the SUBREG. */
702 convert_move (shift_reg, src, 1);
704 if (shift > 0)
706 switch (dst_mode)
708 case SImode:
709 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
710 break;
711 case DImode:
712 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
713 break;
714 case TImode:
715 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
716 break;
717 default:
718 abort ();
721 else if (shift < 0)
722 abort ();
724 switch (dst_size)
726 case 32:
727 maskbits = (-1ll << (32 - width - start));
728 if (start)
729 maskbits += (1ll << (32 - start));
730 emit_move_insn (mask, GEN_INT (maskbits));
731 break;
732 case 64:
733 maskbits = (-1ll << (64 - width - start));
734 if (start)
735 maskbits += (1ll << (64 - start));
736 emit_move_insn (mask, GEN_INT (maskbits));
737 break;
738 case 128:
740 unsigned char arr[16];
741 int i = start / 8;
742 memset (arr, 0, sizeof (arr));
743 arr[i] = 0xff >> (start & 7);
744 for (i++; i <= (start + width - 1) / 8; i++)
745 arr[i] = 0xff;
746 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
747 emit_move_insn (mask, array_to_constant (TImode, arr));
749 break;
750 default:
751 abort ();
753 if (GET_CODE (ops[0]) == MEM)
755 rtx low = gen_reg_rtx (SImode);
756 rtx rotl = gen_reg_rtx (SImode);
757 rtx mask0 = gen_reg_rtx (TImode);
758 rtx addr;
759 rtx addr0;
760 rtx addr1;
761 rtx mem;
763 addr = force_reg (Pmode, XEXP (ops[0], 0));
764 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
765 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
766 emit_insn (gen_negsi2 (rotl, low));
767 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
768 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
769 mem = change_address (ops[0], TImode, addr0);
770 set_mem_alias_set (mem, 0);
771 emit_move_insn (dst, mem);
772 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
773 if (start + width > MEM_ALIGN (ops[0]))
775 rtx shl = gen_reg_rtx (SImode);
776 rtx mask1 = gen_reg_rtx (TImode);
777 rtx dst1 = gen_reg_rtx (TImode);
778 rtx mem1;
779 addr1 = plus_constant (addr, 16);
780 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
781 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
782 emit_insn (gen_shlqby_ti (mask1, mask, shl));
783 mem1 = change_address (ops[0], TImode, addr1);
784 set_mem_alias_set (mem1, 0);
785 emit_move_insn (dst1, mem1);
786 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
787 emit_move_insn (mem1, dst1);
789 emit_move_insn (mem, dst);
791 else
792 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
797 spu_expand_block_move (rtx ops[])
799 HOST_WIDE_INT bytes, align, offset;
800 rtx src, dst, sreg, dreg, target;
801 int i;
802 if (GET_CODE (ops[2]) != CONST_INT
803 || GET_CODE (ops[3]) != CONST_INT
804 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
805 return 0;
807 bytes = INTVAL (ops[2]);
808 align = INTVAL (ops[3]);
810 if (bytes <= 0)
811 return 1;
813 dst = ops[0];
814 src = ops[1];
816 if (align == 16)
818 for (offset = 0; offset + 16 <= bytes; offset += 16)
820 dst = adjust_address (ops[0], V16QImode, offset);
821 src = adjust_address (ops[1], V16QImode, offset);
822 emit_move_insn (dst, src);
824 if (offset < bytes)
826 rtx mask;
827 unsigned char arr[16] = { 0 };
828 for (i = 0; i < bytes - offset; i++)
829 arr[i] = 0xff;
830 dst = adjust_address (ops[0], V16QImode, offset);
831 src = adjust_address (ops[1], V16QImode, offset);
832 mask = gen_reg_rtx (V16QImode);
833 sreg = gen_reg_rtx (V16QImode);
834 dreg = gen_reg_rtx (V16QImode);
835 target = gen_reg_rtx (V16QImode);
836 emit_move_insn (mask, array_to_constant (V16QImode, arr));
837 emit_move_insn (dreg, dst);
838 emit_move_insn (sreg, src);
839 emit_insn (gen_selb (target, dreg, sreg, mask));
840 emit_move_insn (dst, target);
842 return 1;
844 return 0;
847 enum spu_comp_code
848 { SPU_EQ, SPU_GT, SPU_GTU };
850 int spu_comp_icode[12][3] = {
851 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
852 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
853 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
854 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
855 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
856 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
857 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
858 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
859 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
860 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
861 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
862 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
865 /* Generate a compare for CODE. Return a brand-new rtx that represents
866 the result of the compare. GCC can figure this out too if we don't
867 provide all variations of compares, but GCC always wants to use
868 WORD_MODE, we can generate better code in most cases if we do it
869 ourselves. */
870 void
871 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
873 int reverse_compare = 0;
874 int reverse_test = 0;
875 rtx compare_result, eq_result;
876 rtx comp_rtx, eq_rtx;
877 enum machine_mode comp_mode;
878 enum machine_mode op_mode;
879 enum spu_comp_code scode, eq_code;
880 enum insn_code ior_code;
881 enum rtx_code code = GET_CODE (cmp);
882 rtx op0 = XEXP (cmp, 0);
883 rtx op1 = XEXP (cmp, 1);
884 int index;
885 int eq_test = 0;
887 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
888 and so on, to keep the constant in operand 1. */
889 if (GET_CODE (op1) == CONST_INT)
891 HOST_WIDE_INT val = INTVAL (op1) - 1;
892 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
893 switch (code)
895 case GE:
896 op1 = GEN_INT (val);
897 code = GT;
898 break;
899 case LT:
900 op1 = GEN_INT (val);
901 code = LE;
902 break;
903 case GEU:
904 op1 = GEN_INT (val);
905 code = GTU;
906 break;
907 case LTU:
908 op1 = GEN_INT (val);
909 code = LEU;
910 break;
911 default:
912 break;
916 comp_mode = SImode;
917 op_mode = GET_MODE (op0);
919 switch (code)
921 case GE:
922 scode = SPU_GT;
923 if (HONOR_NANS (op_mode))
925 reverse_compare = 0;
926 reverse_test = 0;
927 eq_test = 1;
928 eq_code = SPU_EQ;
930 else
932 reverse_compare = 1;
933 reverse_test = 1;
935 break;
936 case LE:
937 scode = SPU_GT;
938 if (HONOR_NANS (op_mode))
940 reverse_compare = 1;
941 reverse_test = 0;
942 eq_test = 1;
943 eq_code = SPU_EQ;
945 else
947 reverse_compare = 0;
948 reverse_test = 1;
950 break;
951 case LT:
952 reverse_compare = 1;
953 reverse_test = 0;
954 scode = SPU_GT;
955 break;
956 case GEU:
957 reverse_compare = 1;
958 reverse_test = 1;
959 scode = SPU_GTU;
960 break;
961 case LEU:
962 reverse_compare = 0;
963 reverse_test = 1;
964 scode = SPU_GTU;
965 break;
966 case LTU:
967 reverse_compare = 1;
968 reverse_test = 0;
969 scode = SPU_GTU;
970 break;
971 case NE:
972 reverse_compare = 0;
973 reverse_test = 1;
974 scode = SPU_EQ;
975 break;
977 case EQ:
978 scode = SPU_EQ;
979 break;
980 case GT:
981 scode = SPU_GT;
982 break;
983 case GTU:
984 scode = SPU_GTU;
985 break;
986 default:
987 scode = SPU_EQ;
988 break;
991 switch (op_mode)
993 case QImode:
994 index = 0;
995 comp_mode = QImode;
996 break;
997 case HImode:
998 index = 1;
999 comp_mode = HImode;
1000 break;
1001 case SImode:
1002 index = 2;
1003 break;
1004 case DImode:
1005 index = 3;
1006 break;
1007 case TImode:
1008 index = 4;
1009 break;
1010 case SFmode:
1011 index = 5;
1012 break;
1013 case DFmode:
1014 index = 6;
1015 break;
1016 case V16QImode:
1017 index = 7;
1018 comp_mode = op_mode;
1019 break;
1020 case V8HImode:
1021 index = 8;
1022 comp_mode = op_mode;
1023 break;
1024 case V4SImode:
1025 index = 9;
1026 comp_mode = op_mode;
1027 break;
1028 case V4SFmode:
1029 index = 10;
1030 comp_mode = V4SImode;
1031 break;
1032 case V2DFmode:
1033 index = 11;
1034 comp_mode = V2DImode;
1035 break;
1036 case V2DImode:
1037 default:
1038 abort ();
1041 if (GET_MODE (op1) == DFmode
1042 && (scode != SPU_GT && scode != SPU_EQ))
1043 abort ();
1045 if (is_set == 0 && op1 == const0_rtx
1046 && (GET_MODE (op0) == SImode
1047 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1049 /* Don't need to set a register with the result when we are
1050 comparing against zero and branching. */
1051 reverse_test = !reverse_test;
1052 compare_result = op0;
1054 else
1056 compare_result = gen_reg_rtx (comp_mode);
1058 if (reverse_compare)
1060 rtx t = op1;
1061 op1 = op0;
1062 op0 = t;
1065 if (spu_comp_icode[index][scode] == 0)
1066 abort ();
1068 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1069 (op0, op_mode))
1070 op0 = force_reg (op_mode, op0);
1071 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1072 (op1, op_mode))
1073 op1 = force_reg (op_mode, op1);
1074 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1075 op0, op1);
1076 if (comp_rtx == 0)
1077 abort ();
1078 emit_insn (comp_rtx);
1080 if (eq_test)
1082 eq_result = gen_reg_rtx (comp_mode);
1083 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1084 op0, op1);
1085 if (eq_rtx == 0)
1086 abort ();
1087 emit_insn (eq_rtx);
1088 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1089 gcc_assert (ior_code != CODE_FOR_nothing);
1090 emit_insn (GEN_FCN (ior_code)
1091 (compare_result, compare_result, eq_result));
1095 if (is_set == 0)
1097 rtx bcomp;
1098 rtx loc_ref;
1100 /* We don't have branch on QI compare insns, so we convert the
1101 QI compare result to a HI result. */
1102 if (comp_mode == QImode)
1104 rtx old_res = compare_result;
1105 compare_result = gen_reg_rtx (HImode);
1106 comp_mode = HImode;
1107 emit_insn (gen_extendqihi2 (compare_result, old_res));
1110 if (reverse_test)
1111 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1112 else
1113 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1115 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1116 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1117 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1118 loc_ref, pc_rtx)));
1120 else if (is_set == 2)
1122 rtx target = operands[0];
1123 int compare_size = GET_MODE_BITSIZE (comp_mode);
1124 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1125 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1126 rtx select_mask;
1127 rtx op_t = operands[2];
1128 rtx op_f = operands[3];
1130 /* The result of the comparison can be SI, HI or QI mode. Create a
1131 mask based on that result. */
1132 if (target_size > compare_size)
1134 select_mask = gen_reg_rtx (mode);
1135 emit_insn (gen_extend_compare (select_mask, compare_result));
1137 else if (target_size < compare_size)
1138 select_mask =
1139 gen_rtx_SUBREG (mode, compare_result,
1140 (compare_size - target_size) / BITS_PER_UNIT);
1141 else if (comp_mode != mode)
1142 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1143 else
1144 select_mask = compare_result;
1146 if (GET_MODE (target) != GET_MODE (op_t)
1147 || GET_MODE (target) != GET_MODE (op_f))
1148 abort ();
1150 if (reverse_test)
1151 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1152 else
1153 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1155 else
1157 rtx target = operands[0];
1158 if (reverse_test)
1159 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1160 gen_rtx_NOT (comp_mode, compare_result)));
1161 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1162 emit_insn (gen_extendhisi2 (target, compare_result));
1163 else if (GET_MODE (target) == SImode
1164 && GET_MODE (compare_result) == QImode)
1165 emit_insn (gen_extend_compare (target, compare_result));
1166 else
1167 emit_move_insn (target, compare_result);
1171 HOST_WIDE_INT
1172 const_double_to_hwint (rtx x)
1174 HOST_WIDE_INT val;
1175 REAL_VALUE_TYPE rv;
1176 if (GET_MODE (x) == SFmode)
1178 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1179 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1181 else if (GET_MODE (x) == DFmode)
1183 long l[2];
1184 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1185 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1186 val = l[0];
1187 val = (val << 32) | (l[1] & 0xffffffff);
1189 else
1190 abort ();
1191 return val;
1195 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1197 long tv[2];
1198 REAL_VALUE_TYPE rv;
1199 gcc_assert (mode == SFmode || mode == DFmode);
1201 if (mode == SFmode)
1202 tv[0] = (v << 32) >> 32;
1203 else if (mode == DFmode)
1205 tv[1] = (v << 32) >> 32;
1206 tv[0] = v >> 32;
1208 real_from_target (&rv, tv, mode);
1209 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1212 void
1213 print_operand_address (FILE * file, register rtx addr)
1215 rtx reg;
1216 rtx offset;
1218 if (GET_CODE (addr) == AND
1219 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1220 && INTVAL (XEXP (addr, 1)) == -16)
1221 addr = XEXP (addr, 0);
1223 switch (GET_CODE (addr))
1225 case REG:
1226 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1227 break;
1229 case PLUS:
1230 reg = XEXP (addr, 0);
1231 offset = XEXP (addr, 1);
1232 if (GET_CODE (offset) == REG)
1234 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1235 reg_names[REGNO (offset)]);
1237 else if (GET_CODE (offset) == CONST_INT)
1239 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1240 INTVAL (offset), reg_names[REGNO (reg)]);
1242 else
1243 abort ();
1244 break;
1246 case CONST:
1247 case LABEL_REF:
1248 case SYMBOL_REF:
1249 case CONST_INT:
1250 output_addr_const (file, addr);
1251 break;
1253 default:
1254 debug_rtx (addr);
1255 abort ();
1259 void
1260 print_operand (FILE * file, rtx x, int code)
1262 enum machine_mode mode = GET_MODE (x);
1263 HOST_WIDE_INT val;
1264 unsigned char arr[16];
1265 int xcode = GET_CODE (x);
1266 int i, info;
1267 if (GET_MODE (x) == VOIDmode)
1268 switch (code)
1270 case 'L': /* 128 bits, signed */
1271 case 'm': /* 128 bits, signed */
1272 case 'T': /* 128 bits, signed */
1273 case 't': /* 128 bits, signed */
1274 mode = TImode;
1275 break;
1276 case 'K': /* 64 bits, signed */
1277 case 'k': /* 64 bits, signed */
1278 case 'D': /* 64 bits, signed */
1279 case 'd': /* 64 bits, signed */
1280 mode = DImode;
1281 break;
1282 case 'J': /* 32 bits, signed */
1283 case 'j': /* 32 bits, signed */
1284 case 's': /* 32 bits, signed */
1285 case 'S': /* 32 bits, signed */
1286 mode = SImode;
1287 break;
1289 switch (code)
1292 case 'j': /* 32 bits, signed */
1293 case 'k': /* 64 bits, signed */
1294 case 'm': /* 128 bits, signed */
1295 if (xcode == CONST_INT
1296 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1298 gcc_assert (logical_immediate_p (x, mode));
1299 constant_to_array (mode, x, arr);
1300 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1301 val = trunc_int_for_mode (val, SImode);
1302 switch (which_logical_immediate (val))
1304 case SPU_ORI:
1305 break;
1306 case SPU_ORHI:
1307 fprintf (file, "h");
1308 break;
1309 case SPU_ORBI:
1310 fprintf (file, "b");
1311 break;
1312 default:
1313 gcc_unreachable();
1316 else
1317 gcc_unreachable();
1318 return;
1320 case 'J': /* 32 bits, signed */
1321 case 'K': /* 64 bits, signed */
1322 case 'L': /* 128 bits, signed */
1323 if (xcode == CONST_INT
1324 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1326 gcc_assert (logical_immediate_p (x, mode)
1327 || iohl_immediate_p (x, mode));
1328 constant_to_array (mode, x, arr);
1329 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1330 val = trunc_int_for_mode (val, SImode);
1331 switch (which_logical_immediate (val))
1333 case SPU_ORI:
1334 case SPU_IOHL:
1335 break;
1336 case SPU_ORHI:
1337 val = trunc_int_for_mode (val, HImode);
1338 break;
1339 case SPU_ORBI:
1340 val = trunc_int_for_mode (val, QImode);
1341 break;
1342 default:
1343 gcc_unreachable();
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1347 else
1348 gcc_unreachable();
1349 return;
1351 case 't': /* 128 bits, signed */
1352 case 'd': /* 64 bits, signed */
1353 case 's': /* 32 bits, signed */
1354 if (CONSTANT_P (x))
1356 enum immediate_class c = classify_immediate (x, mode);
1357 switch (c)
1359 case IC_IL1:
1360 constant_to_array (mode, x, arr);
1361 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1362 val = trunc_int_for_mode (val, SImode);
1363 switch (which_immediate_load (val))
1365 case SPU_IL:
1366 break;
1367 case SPU_ILA:
1368 fprintf (file, "a");
1369 break;
1370 case SPU_ILH:
1371 fprintf (file, "h");
1372 break;
1373 case SPU_ILHU:
1374 fprintf (file, "hu");
1375 break;
1376 default:
1377 gcc_unreachable ();
1379 break;
1380 case IC_CPAT:
1381 constant_to_array (mode, x, arr);
1382 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1383 if (info == 1)
1384 fprintf (file, "b");
1385 else if (info == 2)
1386 fprintf (file, "h");
1387 else if (info == 4)
1388 fprintf (file, "w");
1389 else if (info == 8)
1390 fprintf (file, "d");
1391 break;
1392 case IC_IL1s:
1393 if (xcode == CONST_VECTOR)
1395 x = CONST_VECTOR_ELT (x, 0);
1396 xcode = GET_CODE (x);
1398 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1399 fprintf (file, "a");
1400 else if (xcode == HIGH)
1401 fprintf (file, "hu");
1402 break;
1403 case IC_FSMBI:
1404 case IC_FSMBI2:
1405 case IC_IL2:
1406 case IC_IL2s:
1407 case IC_POOL:
1408 abort ();
1411 else
1412 gcc_unreachable ();
1413 return;
1415 case 'T': /* 128 bits, signed */
1416 case 'D': /* 64 bits, signed */
1417 case 'S': /* 32 bits, signed */
1418 if (CONSTANT_P (x))
1420 enum immediate_class c = classify_immediate (x, mode);
1421 switch (c)
1423 case IC_IL1:
1424 constant_to_array (mode, x, arr);
1425 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1426 val = trunc_int_for_mode (val, SImode);
1427 switch (which_immediate_load (val))
1429 case SPU_IL:
1430 case SPU_ILA:
1431 break;
1432 case SPU_ILH:
1433 case SPU_ILHU:
1434 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1435 break;
1436 default:
1437 gcc_unreachable ();
1439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1440 break;
1441 case IC_FSMBI:
1442 constant_to_array (mode, x, arr);
1443 val = 0;
1444 for (i = 0; i < 16; i++)
1446 val <<= 1;
1447 val |= arr[i] & 1;
1449 print_operand (file, GEN_INT (val), 0);
1450 break;
1451 case IC_CPAT:
1452 constant_to_array (mode, x, arr);
1453 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1454 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1455 break;
1456 case IC_IL1s:
1457 if (xcode == HIGH)
1458 x = XEXP (x, 0);
1459 if (GET_CODE (x) == CONST_VECTOR)
1460 x = CONST_VECTOR_ELT (x, 0);
1461 output_addr_const (file, x);
1462 if (xcode == HIGH)
1463 fprintf (file, "@h");
1464 break;
1465 case IC_IL2:
1466 case IC_IL2s:
1467 case IC_FSMBI2:
1468 case IC_POOL:
1469 abort ();
1472 else
1473 gcc_unreachable ();
1474 return;
1476 case 'C':
1477 if (xcode == CONST_INT)
1479 /* Only 4 least significant bits are relevant for generate
1480 control word instructions. */
1481 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1482 return;
1484 break;
1486 case 'M': /* print code for c*d */
1487 if (GET_CODE (x) == CONST_INT)
1488 switch (INTVAL (x))
1490 case 1:
1491 fprintf (file, "b");
1492 break;
1493 case 2:
1494 fprintf (file, "h");
1495 break;
1496 case 4:
1497 fprintf (file, "w");
1498 break;
1499 case 8:
1500 fprintf (file, "d");
1501 break;
1502 default:
1503 gcc_unreachable();
1505 else
1506 gcc_unreachable();
1507 return;
1509 case 'N': /* Negate the operand */
1510 if (xcode == CONST_INT)
1511 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1512 else if (xcode == CONST_VECTOR)
1513 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1514 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1515 return;
1517 case 'I': /* enable/disable interrupts */
1518 if (xcode == CONST_INT)
1519 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1520 return;
1522 case 'b': /* branch modifiers */
1523 if (xcode == REG)
1524 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1525 else if (COMPARISON_P (x))
1526 fprintf (file, "%s", xcode == NE ? "n" : "");
1527 return;
1529 case 'i': /* indirect call */
1530 if (xcode == MEM)
1532 if (GET_CODE (XEXP (x, 0)) == REG)
1533 /* Used in indirect function calls. */
1534 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1535 else
1536 output_address (XEXP (x, 0));
1538 return;
1540 case 'p': /* load/store */
1541 if (xcode == MEM)
1543 x = XEXP (x, 0);
1544 xcode = GET_CODE (x);
1546 if (xcode == AND)
1548 x = XEXP (x, 0);
1549 xcode = GET_CODE (x);
1551 if (xcode == REG)
1552 fprintf (file, "d");
1553 else if (xcode == CONST_INT)
1554 fprintf (file, "a");
1555 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1556 fprintf (file, "r");
1557 else if (xcode == PLUS || xcode == LO_SUM)
1559 if (GET_CODE (XEXP (x, 1)) == REG)
1560 fprintf (file, "x");
1561 else
1562 fprintf (file, "d");
1564 return;
1566 case 'e':
1567 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1568 val &= 0x7;
1569 output_addr_const (file, GEN_INT (val));
1570 return;
1572 case 'f':
1573 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1574 val &= 0x1f;
1575 output_addr_const (file, GEN_INT (val));
1576 return;
1578 case 'g':
1579 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1580 val &= 0x3f;
1581 output_addr_const (file, GEN_INT (val));
1582 return;
1584 case 'h':
1585 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1586 val = (val >> 3) & 0x1f;
1587 output_addr_const (file, GEN_INT (val));
1588 return;
1590 case 'E':
1591 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1592 val = -val;
1593 val &= 0x7;
1594 output_addr_const (file, GEN_INT (val));
1595 return;
1597 case 'F':
1598 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1599 val = -val;
1600 val &= 0x1f;
1601 output_addr_const (file, GEN_INT (val));
1602 return;
1604 case 'G':
1605 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1606 val = -val;
1607 val &= 0x3f;
1608 output_addr_const (file, GEN_INT (val));
1609 return;
1611 case 'H':
1612 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1613 val = -(val & -8ll);
1614 val = (val >> 3) & 0x1f;
1615 output_addr_const (file, GEN_INT (val));
1616 return;
1618 case 'v':
1619 case 'w':
1620 constant_to_array (mode, x, arr);
1621 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1622 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1623 return;
1625 case 0:
1626 if (xcode == REG)
1627 fprintf (file, "%s", reg_names[REGNO (x)]);
1628 else if (xcode == MEM)
1629 output_address (XEXP (x, 0));
1630 else if (xcode == CONST_VECTOR)
1631 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1632 else
1633 output_addr_const (file, x);
1634 return;
1636 /* unused letters
1637 o qr u yz
1638 AB OPQR UVWXYZ */
1639 default:
1640 output_operand_lossage ("invalid %%xn code");
1642 gcc_unreachable ();
1645 extern char call_used_regs[];
1647 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1648 caller saved register. For leaf functions it is more efficient to
1649 use a volatile register because we won't need to save and restore the
1650 pic register. This routine is only valid after register allocation
1651 is completed, so we can pick an unused register. */
1652 static rtx
1653 get_pic_reg (void)
1655 rtx pic_reg = pic_offset_table_rtx;
1656 if (!reload_completed && !reload_in_progress)
1657 abort ();
1658 return pic_reg;
1661 /* Split constant addresses to handle cases that are too large.
1662 Add in the pic register when in PIC mode.
1663 Split immediates that require more than 1 instruction. */
1665 spu_split_immediate (rtx * ops)
1667 enum machine_mode mode = GET_MODE (ops[0]);
1668 enum immediate_class c = classify_immediate (ops[1], mode);
1670 switch (c)
1672 case IC_IL2:
1674 unsigned char arrhi[16];
1675 unsigned char arrlo[16];
1676 rtx to, temp, hi, lo;
1677 int i;
1678 enum machine_mode imode = mode;
1679 /* We need to do reals as ints because the constant used in the
1680 IOR might not be a legitimate real constant. */
1681 imode = int_mode_for_mode (mode);
1682 constant_to_array (mode, ops[1], arrhi);
1683 if (imode != mode)
1684 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1685 else
1686 to = ops[0];
1687 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1688 for (i = 0; i < 16; i += 4)
1690 arrlo[i + 2] = arrhi[i + 2];
1691 arrlo[i + 3] = arrhi[i + 3];
1692 arrlo[i + 0] = arrlo[i + 1] = 0;
1693 arrhi[i + 2] = arrhi[i + 3] = 0;
1695 hi = array_to_constant (imode, arrhi);
1696 lo = array_to_constant (imode, arrlo);
1697 emit_move_insn (temp, hi);
1698 emit_insn (gen_rtx_SET
1699 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1700 return 1;
1702 case IC_FSMBI2:
1704 unsigned char arr_fsmbi[16];
1705 unsigned char arr_andbi[16];
1706 rtx to, reg_fsmbi, reg_and;
1707 int i;
1708 enum machine_mode imode = mode;
1709 /* We need to do reals as ints because the constant used in the
1710 * AND might not be a legitimate real constant. */
1711 imode = int_mode_for_mode (mode);
1712 constant_to_array (mode, ops[1], arr_fsmbi);
1713 if (imode != mode)
1714 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1715 else
1716 to = ops[0];
1717 for (i = 0; i < 16; i++)
1718 if (arr_fsmbi[i] != 0)
1720 arr_andbi[0] = arr_fsmbi[i];
1721 arr_fsmbi[i] = 0xff;
1723 for (i = 1; i < 16; i++)
1724 arr_andbi[i] = arr_andbi[0];
1725 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1726 reg_and = array_to_constant (imode, arr_andbi);
1727 emit_move_insn (to, reg_fsmbi);
1728 emit_insn (gen_rtx_SET
1729 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1730 return 1;
1732 case IC_POOL:
1733 if (reload_in_progress || reload_completed)
1735 rtx mem = force_const_mem (mode, ops[1]);
1736 if (TARGET_LARGE_MEM)
1738 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1739 emit_move_insn (addr, XEXP (mem, 0));
1740 mem = replace_equiv_address (mem, addr);
1742 emit_move_insn (ops[0], mem);
1743 return 1;
1745 break;
1746 case IC_IL1s:
1747 case IC_IL2s:
1748 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1750 if (c == IC_IL2s)
1752 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1753 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1755 else if (flag_pic)
1756 emit_insn (gen_pic (ops[0], ops[1]));
1757 if (flag_pic)
1759 rtx pic_reg = get_pic_reg ();
1760 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1761 crtl->uses_pic_offset_table = 1;
1763 return flag_pic || c == IC_IL2s;
1765 break;
1766 case IC_IL1:
1767 case IC_FSMBI:
1768 case IC_CPAT:
1769 break;
1771 return 0;
1774 /* SAVING is TRUE when we are generating the actual load and store
1775 instructions for REGNO. When determining the size of the stack
1776 needed for saving register we must allocate enough space for the
1777 worst case, because we don't always have the information early enough
1778 to not allocate it. But we can at least eliminate the actual loads
1779 and stores during the prologue/epilogue. */
1780 static int
1781 need_to_save_reg (int regno, int saving)
1783 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1784 return 1;
1785 if (flag_pic
1786 && regno == PIC_OFFSET_TABLE_REGNUM
1787 && (!saving || crtl->uses_pic_offset_table)
1788 && (!saving
1789 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1790 return 1;
1791 return 0;
1794 /* This function is only correct starting with local register
1795 allocation */
1797 spu_saved_regs_size (void)
1799 int reg_save_size = 0;
1800 int regno;
1802 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1803 if (need_to_save_reg (regno, 0))
1804 reg_save_size += 0x10;
1805 return reg_save_size;
1808 static rtx
1809 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1811 rtx reg = gen_rtx_REG (V4SImode, regno);
1812 rtx mem =
1813 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1814 return emit_insn (gen_movv4si (mem, reg));
1817 static rtx
1818 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1820 rtx reg = gen_rtx_REG (V4SImode, regno);
1821 rtx mem =
1822 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1823 return emit_insn (gen_movv4si (reg, mem));
1826 /* This happens after reload, so we need to expand it. */
1827 static rtx
1828 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1830 rtx insn;
1831 if (satisfies_constraint_K (GEN_INT (imm)))
1833 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1835 else
1837 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1838 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1839 if (REGNO (src) == REGNO (scratch))
1840 abort ();
1842 return insn;
1845 /* Return nonzero if this function is known to have a null epilogue. */
1848 direct_return (void)
1850 if (reload_completed)
1852 if (cfun->static_chain_decl == 0
1853 && (spu_saved_regs_size ()
1854 + get_frame_size ()
1855 + crtl->outgoing_args_size
1856 + crtl->args.pretend_args_size == 0)
1857 && current_function_is_leaf)
1858 return 1;
1860 return 0;
1864 The stack frame looks like this:
1865 +-------------+
1866 | incoming |
1867 | args |
1868 AP -> +-------------+
1869 | $lr save |
1870 +-------------+
1871 prev SP | back chain |
1872 +-------------+
1873 | var args |
1874 | reg save | crtl->args.pretend_args_size bytes
1875 +-------------+
1876 | ... |
1877 | saved regs | spu_saved_regs_size() bytes
1878 FP -> +-------------+
1879 | ... |
1880 | vars | get_frame_size() bytes
1881 HFP -> +-------------+
1882 | ... |
1883 | outgoing |
1884 | args | crtl->outgoing_args_size bytes
1885 +-------------+
1886 | $lr of next |
1887 | frame |
1888 +-------------+
1889 | back chain |
1890 SP -> +-------------+
1893 void
1894 spu_expand_prologue (void)
1896 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1897 HOST_WIDE_INT total_size;
1898 HOST_WIDE_INT saved_regs_size;
1899 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1900 rtx scratch_reg_0, scratch_reg_1;
1901 rtx insn, real;
1903 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1904 the "toplevel" insn chain. */
1905 emit_note (NOTE_INSN_DELETED);
1907 if (flag_pic && optimize == 0)
1908 crtl->uses_pic_offset_table = 1;
1910 if (spu_naked_function_p (current_function_decl))
1911 return;
1913 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1914 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1916 saved_regs_size = spu_saved_regs_size ();
1917 total_size = size + saved_regs_size
1918 + crtl->outgoing_args_size
1919 + crtl->args.pretend_args_size;
1921 if (!current_function_is_leaf
1922 || cfun->calls_alloca || total_size > 0)
1923 total_size += STACK_POINTER_OFFSET;
1925 /* Save this first because code after this might use the link
1926 register as a scratch register. */
1927 if (!current_function_is_leaf)
1929 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1930 RTX_FRAME_RELATED_P (insn) = 1;
1933 if (total_size > 0)
1935 offset = -crtl->args.pretend_args_size;
1936 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1937 if (need_to_save_reg (regno, 1))
1939 offset -= 16;
1940 insn = frame_emit_store (regno, sp_reg, offset);
1941 RTX_FRAME_RELATED_P (insn) = 1;
1945 if (flag_pic && crtl->uses_pic_offset_table)
1947 rtx pic_reg = get_pic_reg ();
1948 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1949 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1952 if (total_size > 0)
1954 if (flag_stack_check)
1956 /* We compare against total_size-1 because
1957 ($sp >= total_size) <=> ($sp > total_size-1) */
1958 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1959 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1960 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1961 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1963 emit_move_insn (scratch_v4si, size_v4si);
1964 size_v4si = scratch_v4si;
1966 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1967 emit_insn (gen_vec_extractv4si
1968 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1969 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1972 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1973 the value of the previous $sp because we save it as the back
1974 chain. */
1975 if (total_size <= 2000)
1977 /* In this case we save the back chain first. */
1978 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1979 insn =
1980 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1982 else
1984 insn = emit_move_insn (scratch_reg_0, sp_reg);
1985 insn =
1986 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1988 RTX_FRAME_RELATED_P (insn) = 1;
1989 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1990 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1992 if (total_size > 2000)
1994 /* Save the back chain ptr */
1995 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1998 if (frame_pointer_needed)
2000 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2001 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2002 + crtl->outgoing_args_size;
2003 /* Set the new frame_pointer */
2004 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2005 RTX_FRAME_RELATED_P (insn) = 1;
2006 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2007 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2008 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2012 emit_note (NOTE_INSN_DELETED);
2015 void
2016 spu_expand_epilogue (bool sibcall_p)
2018 int size = get_frame_size (), offset, regno;
2019 HOST_WIDE_INT saved_regs_size, total_size;
2020 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2021 rtx jump, scratch_reg_0;
2023 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2024 the "toplevel" insn chain. */
2025 emit_note (NOTE_INSN_DELETED);
2027 if (spu_naked_function_p (current_function_decl))
2028 return;
2030 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2032 saved_regs_size = spu_saved_regs_size ();
2033 total_size = size + saved_regs_size
2034 + crtl->outgoing_args_size
2035 + crtl->args.pretend_args_size;
2037 if (!current_function_is_leaf
2038 || cfun->calls_alloca || total_size > 0)
2039 total_size += STACK_POINTER_OFFSET;
2041 if (total_size > 0)
2043 if (cfun->calls_alloca)
2044 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2045 else
2046 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2049 if (saved_regs_size > 0)
2051 offset = -crtl->args.pretend_args_size;
2052 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2053 if (need_to_save_reg (regno, 1))
2055 offset -= 0x10;
2056 frame_emit_load (regno, sp_reg, offset);
2061 if (!current_function_is_leaf)
2062 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2064 if (!sibcall_p)
2066 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2067 jump = emit_jump_insn (gen__return ());
2068 emit_barrier_after (jump);
2071 emit_note (NOTE_INSN_DELETED);
2075 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2077 if (count != 0)
2078 return 0;
2079 /* This is inefficient because it ends up copying to a save-register
2080 which then gets saved even though $lr has already been saved. But
2081 it does generate better code for leaf functions and we don't need
2082 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2083 used for __builtin_return_address anyway, so maybe we don't care if
2084 it's inefficient. */
2085 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2089 /* Given VAL, generate a constant appropriate for MODE.
2090 If MODE is a vector mode, every element will be VAL.
2091 For TImode, VAL will be zero extended to 128 bits. */
2093 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2095 rtx inner;
2096 rtvec v;
2097 int units, i;
2099 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2100 || GET_MODE_CLASS (mode) == MODE_FLOAT
2101 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2102 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2104 if (GET_MODE_CLASS (mode) == MODE_INT)
2105 return immed_double_const (val, 0, mode);
2107 /* val is the bit representation of the float */
2108 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2109 return hwint_to_const_double (mode, val);
2111 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2112 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2113 else
2114 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2116 units = GET_MODE_NUNITS (mode);
2118 v = rtvec_alloc (units);
2120 for (i = 0; i < units; ++i)
2121 RTVEC_ELT (v, i) = inner;
2123 return gen_rtx_CONST_VECTOR (mode, v);
2126 /* Create a MODE vector constant from 4 ints. */
2128 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2130 unsigned char arr[16];
2131 arr[0] = (a >> 24) & 0xff;
2132 arr[1] = (a >> 16) & 0xff;
2133 arr[2] = (a >> 8) & 0xff;
2134 arr[3] = (a >> 0) & 0xff;
2135 arr[4] = (b >> 24) & 0xff;
2136 arr[5] = (b >> 16) & 0xff;
2137 arr[6] = (b >> 8) & 0xff;
2138 arr[7] = (b >> 0) & 0xff;
2139 arr[8] = (c >> 24) & 0xff;
2140 arr[9] = (c >> 16) & 0xff;
2141 arr[10] = (c >> 8) & 0xff;
2142 arr[11] = (c >> 0) & 0xff;
2143 arr[12] = (d >> 24) & 0xff;
2144 arr[13] = (d >> 16) & 0xff;
2145 arr[14] = (d >> 8) & 0xff;
2146 arr[15] = (d >> 0) & 0xff;
2147 return array_to_constant(mode, arr);
2150 /* branch hint stuff */
2152 /* An array of these is used to propagate hints to predecessor blocks. */
2153 struct spu_bb_info
2155 rtx prop_jump; /* propagated from another block */
2156 int bb_index; /* the original block. */
2158 static struct spu_bb_info *spu_bb_info;
2160 #define STOP_HINT_P(INSN) \
2161 (GET_CODE(INSN) == CALL_INSN \
2162 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2163 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2165 /* 1 when RTX is a hinted branch or its target. We keep track of
2166 what has been hinted so the safe-hint code can test it easily. */
2167 #define HINTED_P(RTX) \
2168 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2170 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2171 #define SCHED_ON_EVEN_P(RTX) \
2172 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2174 /* Emit a nop for INSN such that the two will dual issue. This assumes
2175 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2176 We check for TImode to handle a MULTI1 insn which has dual issued its
2177 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2178 ADDR_VEC insns. */
2179 static void
2180 emit_nop_for_insn (rtx insn)
2182 int p;
2183 rtx new_insn;
2184 p = get_pipe (insn);
2185 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2186 new_insn = emit_insn_after (gen_lnop (), insn);
2187 else if (p == 1 && GET_MODE (insn) == TImode)
2189 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2190 PUT_MODE (new_insn, TImode);
2191 PUT_MODE (insn, VOIDmode);
2193 else
2194 new_insn = emit_insn_after (gen_lnop (), insn);
2195 recog_memoized (new_insn);
2198 /* Insert nops in basic blocks to meet dual issue alignment
2199 requirements. Also make sure hbrp and hint instructions are at least
2200 one cycle apart, possibly inserting a nop. */
2201 static void
2202 pad_bb(void)
2204 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2205 int length;
2206 int addr;
2208 /* This sets up INSN_ADDRESSES. */
2209 shorten_branches (get_insns ());
2211 /* Keep track of length added by nops. */
2212 length = 0;
2214 prev_insn = 0;
2215 insn = get_insns ();
2216 if (!active_insn_p (insn))
2217 insn = next_active_insn (insn);
2218 for (; insn; insn = next_insn)
2220 next_insn = next_active_insn (insn);
2221 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2222 || INSN_CODE (insn) == CODE_FOR_hbr)
2224 if (hbr_insn)
2226 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2227 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2228 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2229 || (a1 - a0 == 4))
2231 prev_insn = emit_insn_before (gen_lnop (), insn);
2232 PUT_MODE (prev_insn, GET_MODE (insn));
2233 PUT_MODE (insn, TImode);
2234 length += 4;
2237 hbr_insn = insn;
2239 if (INSN_CODE (insn) == CODE_FOR_blockage)
2241 if (GET_MODE (insn) == TImode)
2242 PUT_MODE (next_insn, TImode);
2243 insn = next_insn;
2244 next_insn = next_active_insn (insn);
2246 addr = INSN_ADDRESSES (INSN_UID (insn));
2247 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2249 if (((addr + length) & 7) != 0)
2251 emit_nop_for_insn (prev_insn);
2252 length += 4;
2255 else if (GET_MODE (insn) == TImode
2256 && ((next_insn && GET_MODE (next_insn) != TImode)
2257 || get_attr_type (insn) == TYPE_MULTI0)
2258 && ((addr + length) & 7) != 0)
2260 /* prev_insn will always be set because the first insn is
2261 always 8-byte aligned. */
2262 emit_nop_for_insn (prev_insn);
2263 length += 4;
2265 prev_insn = insn;
2270 /* Routines for branch hints. */
2272 static void
2273 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2274 int distance, sbitmap blocks)
2276 rtx branch_label = 0;
2277 rtx hint;
2278 rtx insn;
2279 rtx table;
2281 if (before == 0 || branch == 0 || target == 0)
2282 return;
2284 /* While scheduling we require hints to be no further than 600, so
2285 we need to enforce that here too */
2286 if (distance > 600)
2287 return;
2289 /* If we have a Basic block note, emit it after the basic block note. */
2290 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2291 before = NEXT_INSN (before);
2293 branch_label = gen_label_rtx ();
2294 LABEL_NUSES (branch_label)++;
2295 LABEL_PRESERVE_P (branch_label) = 1;
2296 insn = emit_label_before (branch_label, branch);
2297 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2298 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2300 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2301 recog_memoized (hint);
2302 HINTED_P (branch) = 1;
2304 if (GET_CODE (target) == LABEL_REF)
2305 HINTED_P (XEXP (target, 0)) = 1;
2306 else if (tablejump_p (branch, 0, &table))
2308 rtvec vec;
2309 int j;
2310 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2311 vec = XVEC (PATTERN (table), 0);
2312 else
2313 vec = XVEC (PATTERN (table), 1);
2314 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2315 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2318 if (distance >= 588)
2320 /* Make sure the hint isn't scheduled any earlier than this point,
2321 which could make it too far for the branch offest to fit */
2322 recog_memoized (emit_insn_before (gen_blockage (), hint));
2324 else if (distance <= 8 * 4)
2326 /* To guarantee at least 8 insns between the hint and branch we
2327 insert nops. */
2328 int d;
2329 for (d = distance; d < 8 * 4; d += 4)
2331 insn =
2332 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2333 recog_memoized (insn);
2336 /* Make sure any nops inserted aren't scheduled before the hint. */
2337 recog_memoized (emit_insn_after (gen_blockage (), hint));
2339 /* Make sure any nops inserted aren't scheduled after the call. */
2340 if (CALL_P (branch) && distance < 8 * 4)
2341 recog_memoized (emit_insn_before (gen_blockage (), branch));
2345 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2346 the rtx for the branch target. */
2347 static rtx
2348 get_branch_target (rtx branch)
2350 if (GET_CODE (branch) == JUMP_INSN)
2352 rtx set, src;
2354 /* Return statements */
2355 if (GET_CODE (PATTERN (branch)) == RETURN)
2356 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2358 /* jump table */
2359 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2360 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2361 return 0;
2363 set = single_set (branch);
2364 src = SET_SRC (set);
2365 if (GET_CODE (SET_DEST (set)) != PC)
2366 abort ();
2368 if (GET_CODE (src) == IF_THEN_ELSE)
2370 rtx lab = 0;
2371 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2372 if (note)
2374 /* If the more probable case is not a fall through, then
2375 try a branch hint. */
2376 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2377 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2378 && GET_CODE (XEXP (src, 1)) != PC)
2379 lab = XEXP (src, 1);
2380 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2381 && GET_CODE (XEXP (src, 2)) != PC)
2382 lab = XEXP (src, 2);
2384 if (lab)
2386 if (GET_CODE (lab) == RETURN)
2387 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2388 return lab;
2390 return 0;
2393 return src;
2395 else if (GET_CODE (branch) == CALL_INSN)
2397 rtx call;
2398 /* All of our call patterns are in a PARALLEL and the CALL is
2399 the first pattern in the PARALLEL. */
2400 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2401 abort ();
2402 call = XVECEXP (PATTERN (branch), 0, 0);
2403 if (GET_CODE (call) == SET)
2404 call = SET_SRC (call);
2405 if (GET_CODE (call) != CALL)
2406 abort ();
2407 return XEXP (XEXP (call, 0), 0);
2409 return 0;
2412 /* The special $hbr register is used to prevent the insn scheduler from
2413 moving hbr insns across instructions which invalidate them. It
2414 should only be used in a clobber, and this function searches for
2415 insns which clobber it. */
2416 static bool
2417 insn_clobbers_hbr (rtx insn)
2419 if (INSN_P (insn)
2420 && GET_CODE (PATTERN (insn)) == PARALLEL)
2422 rtx parallel = PATTERN (insn);
2423 rtx clobber;
2424 int j;
2425 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2427 clobber = XVECEXP (parallel, 0, j);
2428 if (GET_CODE (clobber) == CLOBBER
2429 && GET_CODE (XEXP (clobber, 0)) == REG
2430 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2431 return 1;
2434 return 0;
2437 /* Search up to 32 insns starting at FIRST:
2438 - at any kind of hinted branch, just return
2439 - at any unconditional branch in the first 15 insns, just return
2440 - at a call or indirect branch, after the first 15 insns, force it to
2441 an even address and return
2442 - at any unconditional branch, after the first 15 insns, force it to
2443 an even address.
2444 At then end of the search, insert an hbrp within 4 insns of FIRST,
2445 and an hbrp within 16 instructions of FIRST.
2447 static void
2448 insert_hbrp_for_ilb_runout (rtx first)
2450 rtx insn, before_4 = 0, before_16 = 0;
2451 int addr = 0, length, first_addr = -1;
2452 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2453 int insert_lnop_after = 0;
2454 for (insn = first; insn; insn = NEXT_INSN (insn))
2455 if (INSN_P (insn))
2457 if (first_addr == -1)
2458 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2459 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2460 length = get_attr_length (insn);
2462 if (before_4 == 0 && addr + length >= 4 * 4)
2463 before_4 = insn;
2464 /* We test for 14 instructions because the first hbrp will add
2465 up to 2 instructions. */
2466 if (before_16 == 0 && addr + length >= 14 * 4)
2467 before_16 = insn;
2469 if (INSN_CODE (insn) == CODE_FOR_hbr)
2471 /* Make sure an hbrp is at least 2 cycles away from a hint.
2472 Insert an lnop after the hbrp when necessary. */
2473 if (before_4 == 0 && addr > 0)
2475 before_4 = insn;
2476 insert_lnop_after |= 1;
2478 else if (before_4 && addr <= 4 * 4)
2479 insert_lnop_after |= 1;
2480 if (before_16 == 0 && addr > 10 * 4)
2482 before_16 = insn;
2483 insert_lnop_after |= 2;
2485 else if (before_16 && addr <= 14 * 4)
2486 insert_lnop_after |= 2;
2489 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2491 if (addr < hbrp_addr0)
2492 hbrp_addr0 = addr;
2493 else if (addr < hbrp_addr1)
2494 hbrp_addr1 = addr;
2497 if (CALL_P (insn) || JUMP_P (insn))
2499 if (HINTED_P (insn))
2500 return;
2502 /* Any branch after the first 15 insns should be on an even
2503 address to avoid a special case branch. There might be
2504 some nops and/or hbrps inserted, so we test after 10
2505 insns. */
2506 if (addr > 10 * 4)
2507 SCHED_ON_EVEN_P (insn) = 1;
2510 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2511 return;
2514 if (addr + length >= 32 * 4)
2516 gcc_assert (before_4 && before_16);
2517 if (hbrp_addr0 > 4 * 4)
2519 insn =
2520 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2521 recog_memoized (insn);
2522 INSN_ADDRESSES_NEW (insn,
2523 INSN_ADDRESSES (INSN_UID (before_4)));
2524 PUT_MODE (insn, GET_MODE (before_4));
2525 PUT_MODE (before_4, TImode);
2526 if (insert_lnop_after & 1)
2528 insn = emit_insn_before (gen_lnop (), before_4);
2529 recog_memoized (insn);
2530 INSN_ADDRESSES_NEW (insn,
2531 INSN_ADDRESSES (INSN_UID (before_4)));
2532 PUT_MODE (insn, TImode);
2535 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2536 && hbrp_addr1 > 16 * 4)
2538 insn =
2539 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2540 recog_memoized (insn);
2541 INSN_ADDRESSES_NEW (insn,
2542 INSN_ADDRESSES (INSN_UID (before_16)));
2543 PUT_MODE (insn, GET_MODE (before_16));
2544 PUT_MODE (before_16, TImode);
2545 if (insert_lnop_after & 2)
2547 insn = emit_insn_before (gen_lnop (), before_16);
2548 recog_memoized (insn);
2549 INSN_ADDRESSES_NEW (insn,
2550 INSN_ADDRESSES (INSN_UID
2551 (before_16)));
2552 PUT_MODE (insn, TImode);
2555 return;
2558 else if (BARRIER_P (insn))
2559 return;
2563 /* The SPU might hang when it executes 48 inline instructions after a
2564 hinted branch jumps to its hinted target. The beginning of a
2565 function and the return from a call might have been hinted, and must
2566 be handled as well. To prevent a hang we insert 2 hbrps. The first
2567 should be within 6 insns of the branch target. The second should be
2568 within 22 insns of the branch target. When determining if hbrps are
2569 necessary, we look for only 32 inline instructions, because up to to
2570 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2571 new hbrps, we insert them within 4 and 16 insns of the target. */
2572 static void
2573 insert_hbrp (void)
2575 rtx insn;
2576 if (TARGET_SAFE_HINTS)
2578 shorten_branches (get_insns ());
2579 /* Insert hbrp at beginning of function */
2580 insn = next_active_insn (get_insns ());
2581 if (insn)
2582 insert_hbrp_for_ilb_runout (insn);
2583 /* Insert hbrp after hinted targets. */
2584 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2585 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2586 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2590 static int in_spu_reorg;
2592 /* Insert branch hints. There are no branch optimizations after this
2593 pass, so it's safe to set our branch hints now. */
2594 static void
2595 spu_machine_dependent_reorg (void)
2597 sbitmap blocks;
2598 basic_block bb;
2599 rtx branch, insn;
2600 rtx branch_target = 0;
2601 int branch_addr = 0, insn_addr, required_dist = 0;
2602 int i;
2603 unsigned int j;
2605 if (!TARGET_BRANCH_HINTS || optimize == 0)
2607 /* We still do it for unoptimized code because an external
2608 function might have hinted a call or return. */
2609 insert_hbrp ();
2610 pad_bb ();
2611 return;
2614 blocks = sbitmap_alloc (last_basic_block);
2615 sbitmap_zero (blocks);
2617 in_spu_reorg = 1;
2618 compute_bb_for_insn ();
2620 compact_blocks ();
2622 spu_bb_info =
2623 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2624 sizeof (struct spu_bb_info));
2626 /* We need exact insn addresses and lengths. */
2627 shorten_branches (get_insns ());
2629 for (i = n_basic_blocks - 1; i >= 0; i--)
2631 bb = BASIC_BLOCK (i);
2632 branch = 0;
2633 if (spu_bb_info[i].prop_jump)
2635 branch = spu_bb_info[i].prop_jump;
2636 branch_target = get_branch_target (branch);
2637 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2638 required_dist = spu_hint_dist;
2640 /* Search from end of a block to beginning. In this loop, find
2641 jumps which need a branch and emit them only when:
2642 - it's an indirect branch and we're at the insn which sets
2643 the register
2644 - we're at an insn that will invalidate the hint. e.g., a
2645 call, another hint insn, inline asm that clobbers $hbr, and
2646 some inlined operations (divmodsi4). Don't consider jumps
2647 because they are only at the end of a block and are
2648 considered when we are deciding whether to propagate
2649 - we're getting too far away from the branch. The hbr insns
2650 only have a signed 10 bit offset
2651 We go back as far as possible so the branch will be considered
2652 for propagation when we get to the beginning of the block. */
2653 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2655 if (INSN_P (insn))
2657 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2658 if (branch
2659 && ((GET_CODE (branch_target) == REG
2660 && set_of (branch_target, insn) != NULL_RTX)
2661 || insn_clobbers_hbr (insn)
2662 || branch_addr - insn_addr > 600))
2664 rtx next = NEXT_INSN (insn);
2665 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2666 if (insn != BB_END (bb)
2667 && branch_addr - next_addr >= required_dist)
2669 if (dump_file)
2670 fprintf (dump_file,
2671 "hint for %i in block %i before %i\n",
2672 INSN_UID (branch), bb->index,
2673 INSN_UID (next));
2674 spu_emit_branch_hint (next, branch, branch_target,
2675 branch_addr - next_addr, blocks);
2677 branch = 0;
2680 /* JUMP_P will only be true at the end of a block. When
2681 branch is already set it means we've previously decided
2682 to propagate a hint for that branch into this block. */
2683 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2685 branch = 0;
2686 if ((branch_target = get_branch_target (insn)))
2688 branch = insn;
2689 branch_addr = insn_addr;
2690 required_dist = spu_hint_dist;
2694 if (insn == BB_HEAD (bb))
2695 break;
2698 if (branch)
2700 /* If we haven't emitted a hint for this branch yet, it might
2701 be profitable to emit it in one of the predecessor blocks,
2702 especially for loops. */
2703 rtx bbend;
2704 basic_block prev = 0, prop = 0, prev2 = 0;
2705 int loop_exit = 0, simple_loop = 0;
2706 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2708 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2709 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2710 prev = EDGE_PRED (bb, j)->src;
2711 else
2712 prev2 = EDGE_PRED (bb, j)->src;
2714 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2715 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2716 loop_exit = 1;
2717 else if (EDGE_SUCC (bb, j)->dest == bb)
2718 simple_loop = 1;
2720 /* If this branch is a loop exit then propagate to previous
2721 fallthru block. This catches the cases when it is a simple
2722 loop or when there is an initial branch into the loop. */
2723 if (prev && (loop_exit || simple_loop)
2724 && prev->loop_depth <= bb->loop_depth)
2725 prop = prev;
2727 /* If there is only one adjacent predecessor. Don't propagate
2728 outside this loop. This loop_depth test isn't perfect, but
2729 I'm not sure the loop_father member is valid at this point. */
2730 else if (prev && single_pred_p (bb)
2731 && prev->loop_depth == bb->loop_depth)
2732 prop = prev;
2734 /* If this is the JOIN block of a simple IF-THEN then
2735 propogate the hint to the HEADER block. */
2736 else if (prev && prev2
2737 && EDGE_COUNT (bb->preds) == 2
2738 && EDGE_COUNT (prev->preds) == 1
2739 && EDGE_PRED (prev, 0)->src == prev2
2740 && prev2->loop_depth == bb->loop_depth
2741 && GET_CODE (branch_target) != REG)
2742 prop = prev;
2744 /* Don't propagate when:
2745 - this is a simple loop and the hint would be too far
2746 - this is not a simple loop and there are 16 insns in
2747 this block already
2748 - the predecessor block ends in a branch that will be
2749 hinted
2750 - the predecessor block ends in an insn that invalidates
2751 the hint */
2752 if (prop
2753 && prop->index >= 0
2754 && (bbend = BB_END (prop))
2755 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2756 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2757 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2759 if (dump_file)
2760 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2761 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2762 bb->index, prop->index, bb->loop_depth,
2763 INSN_UID (branch), loop_exit, simple_loop,
2764 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2766 spu_bb_info[prop->index].prop_jump = branch;
2767 spu_bb_info[prop->index].bb_index = i;
2769 else if (branch_addr - next_addr >= required_dist)
2771 if (dump_file)
2772 fprintf (dump_file, "hint for %i in block %i before %i\n",
2773 INSN_UID (branch), bb->index,
2774 INSN_UID (NEXT_INSN (insn)));
2775 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2776 branch_addr - next_addr, blocks);
2778 branch = 0;
2781 free (spu_bb_info);
2783 if (!sbitmap_empty_p (blocks))
2784 find_many_sub_basic_blocks (blocks);
2786 /* We have to schedule to make sure alignment is ok. */
2787 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2789 /* The hints need to be scheduled, so call it again. */
2790 schedule_insns ();
2792 insert_hbrp ();
2794 pad_bb ();
2796 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2797 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2799 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2800 between its branch label and the branch . We don't move the
2801 label because GCC expects it at the beginning of the block. */
2802 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2803 rtx label_ref = XVECEXP (unspec, 0, 0);
2804 rtx label = XEXP (label_ref, 0);
2805 rtx branch;
2806 int offset = 0;
2807 for (branch = NEXT_INSN (label);
2808 !JUMP_P (branch) && !CALL_P (branch);
2809 branch = NEXT_INSN (branch))
2810 if (NONJUMP_INSN_P (branch))
2811 offset += get_attr_length (branch);
2812 if (offset > 0)
2813 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2816 if (spu_flag_var_tracking)
2818 df_analyze ();
2819 timevar_push (TV_VAR_TRACKING);
2820 variable_tracking_main ();
2821 timevar_pop (TV_VAR_TRACKING);
2822 df_finish_pass (false);
2825 free_bb_for_insn ();
2827 in_spu_reorg = 0;
2831 /* Insn scheduling routines, primarily for dual issue. */
2832 static int
2833 spu_sched_issue_rate (void)
2835 return 2;
2838 static int
2839 uses_ls_unit(rtx insn)
2841 rtx set = single_set (insn);
2842 if (set != 0
2843 && (GET_CODE (SET_DEST (set)) == MEM
2844 || GET_CODE (SET_SRC (set)) == MEM))
2845 return 1;
2846 return 0;
2849 static int
2850 get_pipe (rtx insn)
2852 enum attr_type t;
2853 /* Handle inline asm */
2854 if (INSN_CODE (insn) == -1)
2855 return -1;
2856 t = get_attr_type (insn);
2857 switch (t)
2859 case TYPE_CONVERT:
2860 return -2;
2861 case TYPE_MULTI0:
2862 return -1;
2864 case TYPE_FX2:
2865 case TYPE_FX3:
2866 case TYPE_SPR:
2867 case TYPE_NOP:
2868 case TYPE_FXB:
2869 case TYPE_FPD:
2870 case TYPE_FP6:
2871 case TYPE_FP7:
2872 return 0;
2874 case TYPE_LNOP:
2875 case TYPE_SHUF:
2876 case TYPE_LOAD:
2877 case TYPE_STORE:
2878 case TYPE_BR:
2879 case TYPE_MULTI1:
2880 case TYPE_HBR:
2881 case TYPE_IPREFETCH:
2882 return 1;
2883 default:
2884 abort ();
2889 /* haifa-sched.c has a static variable that keeps track of the current
2890 cycle. It is passed to spu_sched_reorder, and we record it here for
2891 use by spu_sched_variable_issue. It won't be accurate if the
2892 scheduler updates it's clock_var between the two calls. */
2893 static int clock_var;
2895 /* This is used to keep track of insn alignment. Set to 0 at the
2896 beginning of each block and increased by the "length" attr of each
2897 insn scheduled. */
2898 static int spu_sched_length;
2900 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2901 ready list appropriately in spu_sched_reorder(). */
2902 static int pipe0_clock;
2903 static int pipe1_clock;
2905 static int prev_clock_var;
2907 static int prev_priority;
2909 /* The SPU needs to load the next ilb sometime during the execution of
2910 the previous ilb. There is a potential conflict if every cycle has a
2911 load or store. To avoid the conflict we make sure the load/store
2912 unit is free for at least one cycle during the execution of insns in
2913 the previous ilb. */
2914 static int spu_ls_first;
2915 static int prev_ls_clock;
2917 static void
2918 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2919 int max_ready ATTRIBUTE_UNUSED)
2921 spu_sched_length = 0;
2924 static void
2925 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2926 int max_ready ATTRIBUTE_UNUSED)
2928 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2930 /* When any block might be at least 8-byte aligned, assume they
2931 will all be at least 8-byte aligned to make sure dual issue
2932 works out correctly. */
2933 spu_sched_length = 0;
2935 spu_ls_first = INT_MAX;
2936 clock_var = -1;
2937 prev_ls_clock = -1;
2938 pipe0_clock = -1;
2939 pipe1_clock = -1;
2940 prev_clock_var = -1;
2941 prev_priority = -1;
2944 static int
2945 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2946 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2948 int len;
2949 int p;
2950 if (GET_CODE (PATTERN (insn)) == USE
2951 || GET_CODE (PATTERN (insn)) == CLOBBER
2952 || (len = get_attr_length (insn)) == 0)
2953 return more;
2955 spu_sched_length += len;
2957 /* Reset on inline asm */
2958 if (INSN_CODE (insn) == -1)
2960 spu_ls_first = INT_MAX;
2961 pipe0_clock = -1;
2962 pipe1_clock = -1;
2963 return 0;
2965 p = get_pipe (insn);
2966 if (p == 0)
2967 pipe0_clock = clock_var;
2968 else
2969 pipe1_clock = clock_var;
2971 if (in_spu_reorg)
2973 if (clock_var - prev_ls_clock > 1
2974 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2975 spu_ls_first = INT_MAX;
2976 if (uses_ls_unit (insn))
2978 if (spu_ls_first == INT_MAX)
2979 spu_ls_first = spu_sched_length;
2980 prev_ls_clock = clock_var;
2983 /* The scheduler hasn't inserted the nop, but we will later on.
2984 Include those nops in spu_sched_length. */
2985 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2986 spu_sched_length += 4;
2987 prev_clock_var = clock_var;
2989 /* more is -1 when called from spu_sched_reorder for new insns
2990 that don't have INSN_PRIORITY */
2991 if (more >= 0)
2992 prev_priority = INSN_PRIORITY (insn);
2995 /* Always try issueing more insns. spu_sched_reorder will decide
2996 when the cycle should be advanced. */
2997 return 1;
3000 /* This function is called for both TARGET_SCHED_REORDER and
3001 TARGET_SCHED_REORDER2. */
3002 static int
3003 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3004 rtx *ready, int *nreadyp, int clock)
3006 int i, nready = *nreadyp;
3007 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3008 rtx insn;
3010 clock_var = clock;
3012 if (nready <= 0 || pipe1_clock >= clock)
3013 return 0;
3015 /* Find any rtl insns that don't generate assembly insns and schedule
3016 them first. */
3017 for (i = nready - 1; i >= 0; i--)
3019 insn = ready[i];
3020 if (INSN_CODE (insn) == -1
3021 || INSN_CODE (insn) == CODE_FOR_blockage
3022 || (INSN_P (insn) && get_attr_length (insn) == 0))
3024 ready[i] = ready[nready - 1];
3025 ready[nready - 1] = insn;
3026 return 1;
3030 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3031 for (i = 0; i < nready; i++)
3032 if (INSN_CODE (ready[i]) != -1)
3034 insn = ready[i];
3035 switch (get_attr_type (insn))
3037 default:
3038 case TYPE_MULTI0:
3039 case TYPE_CONVERT:
3040 case TYPE_FX2:
3041 case TYPE_FX3:
3042 case TYPE_SPR:
3043 case TYPE_NOP:
3044 case TYPE_FXB:
3045 case TYPE_FPD:
3046 case TYPE_FP6:
3047 case TYPE_FP7:
3048 pipe_0 = i;
3049 break;
3050 case TYPE_LOAD:
3051 case TYPE_STORE:
3052 pipe_ls = i;
3053 case TYPE_LNOP:
3054 case TYPE_SHUF:
3055 case TYPE_BR:
3056 case TYPE_MULTI1:
3057 case TYPE_HBR:
3058 pipe_1 = i;
3059 break;
3060 case TYPE_IPREFETCH:
3061 pipe_hbrp = i;
3062 break;
3066 /* In the first scheduling phase, schedule loads and stores together
3067 to increase the chance they will get merged during postreload CSE. */
3068 if (!reload_completed && pipe_ls >= 0)
3070 insn = ready[pipe_ls];
3071 ready[pipe_ls] = ready[nready - 1];
3072 ready[nready - 1] = insn;
3073 return 1;
3076 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3077 if (pipe_hbrp >= 0)
3078 pipe_1 = pipe_hbrp;
3080 /* When we have loads/stores in every cycle of the last 15 insns and
3081 we are about to schedule another load/store, emit an hbrp insn
3082 instead. */
3083 if (in_spu_reorg
3084 && spu_sched_length - spu_ls_first >= 4 * 15
3085 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3087 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3088 recog_memoized (insn);
3089 if (pipe0_clock < clock)
3090 PUT_MODE (insn, TImode);
3091 spu_sched_variable_issue (file, verbose, insn, -1);
3092 return 0;
3095 /* In general, we want to emit nops to increase dual issue, but dual
3096 issue isn't faster when one of the insns could be scheduled later
3097 without effecting the critical path. We look at INSN_PRIORITY to
3098 make a good guess, but it isn't perfect so -mdual-nops=n can be
3099 used to effect it. */
3100 if (in_spu_reorg && spu_dual_nops < 10)
3102 /* When we are at an even address and we are not issueing nops to
3103 improve scheduling then we need to advance the cycle. */
3104 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3105 && (spu_dual_nops == 0
3106 || (pipe_1 != -1
3107 && prev_priority >
3108 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3109 return 0;
3111 /* When at an odd address, schedule the highest priority insn
3112 without considering pipeline. */
3113 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3114 && (spu_dual_nops == 0
3115 || (prev_priority >
3116 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3117 return 1;
3121 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3122 pipe0 insn in the ready list, schedule it. */
3123 if (pipe0_clock < clock && pipe_0 >= 0)
3124 schedule_i = pipe_0;
3126 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3127 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3128 else
3129 schedule_i = pipe_1;
3131 if (schedule_i > -1)
3133 insn = ready[schedule_i];
3134 ready[schedule_i] = ready[nready - 1];
3135 ready[nready - 1] = insn;
3136 return 1;
3138 return 0;
3141 /* INSN is dependent on DEP_INSN. */
3142 static int
3143 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3145 rtx set;
3147 /* The blockage pattern is used to prevent instructions from being
3148 moved across it and has no cost. */
3149 if (INSN_CODE (insn) == CODE_FOR_blockage
3150 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3151 return 0;
3153 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3154 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3155 return 0;
3157 /* Make sure hbrps are spread out. */
3158 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3159 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3160 return 8;
3162 /* Make sure hints and hbrps are 2 cycles apart. */
3163 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3164 || INSN_CODE (insn) == CODE_FOR_hbr)
3165 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3166 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3167 return 2;
3169 /* An hbrp has no real dependency on other insns. */
3170 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3171 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3172 return 0;
3174 /* Assuming that it is unlikely an argument register will be used in
3175 the first cycle of the called function, we reduce the cost for
3176 slightly better scheduling of dep_insn. When not hinted, the
3177 mispredicted branch would hide the cost as well. */
3178 if (CALL_P (insn))
3180 rtx target = get_branch_target (insn);
3181 if (GET_CODE (target) != REG || !set_of (target, insn))
3182 return cost - 2;
3183 return cost;
3186 /* And when returning from a function, let's assume the return values
3187 are completed sooner too. */
3188 if (CALL_P (dep_insn))
3189 return cost - 2;
3191 /* Make sure an instruction that loads from the back chain is schedule
3192 away from the return instruction so a hint is more likely to get
3193 issued. */
3194 if (INSN_CODE (insn) == CODE_FOR__return
3195 && (set = single_set (dep_insn))
3196 && GET_CODE (SET_DEST (set)) == REG
3197 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3198 return 20;
3200 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3201 scheduler makes every insn in a block anti-dependent on the final
3202 jump_insn. We adjust here so higher cost insns will get scheduled
3203 earlier. */
3204 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3205 return insn_cost (dep_insn) - 3;
3207 return cost;
3210 /* Create a CONST_DOUBLE from a string. */
3211 struct rtx_def *
3212 spu_float_const (const char *string, enum machine_mode mode)
3214 REAL_VALUE_TYPE value;
3215 value = REAL_VALUE_ATOF (string, mode);
3216 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3220 spu_constant_address_p (rtx x)
3222 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3223 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3224 || GET_CODE (x) == HIGH);
3227 static enum spu_immediate
3228 which_immediate_load (HOST_WIDE_INT val)
3230 gcc_assert (val == trunc_int_for_mode (val, SImode));
3232 if (val >= -0x8000 && val <= 0x7fff)
3233 return SPU_IL;
3234 if (val >= 0 && val <= 0x3ffff)
3235 return SPU_ILA;
3236 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3237 return SPU_ILH;
3238 if ((val & 0xffff) == 0)
3239 return SPU_ILHU;
3241 return SPU_NONE;
3244 /* Return true when OP can be loaded by one of the il instructions, or
3245 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3247 immediate_load_p (rtx op, enum machine_mode mode)
3249 if (CONSTANT_P (op))
3251 enum immediate_class c = classify_immediate (op, mode);
3252 return c == IC_IL1 || c == IC_IL1s
3253 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3255 return 0;
3258 /* Return true if the first SIZE bytes of arr is a constant that can be
3259 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3260 represent the size and offset of the instruction to use. */
3261 static int
3262 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3264 int cpat, run, i, start;
3265 cpat = 1;
3266 run = 0;
3267 start = -1;
3268 for (i = 0; i < size && cpat; i++)
3269 if (arr[i] != i+16)
3271 if (!run)
3273 start = i;
3274 if (arr[i] == 3)
3275 run = 1;
3276 else if (arr[i] == 2 && arr[i+1] == 3)
3277 run = 2;
3278 else if (arr[i] == 0)
3280 while (arr[i+run] == run && i+run < 16)
3281 run++;
3282 if (run != 4 && run != 8)
3283 cpat = 0;
3285 else
3286 cpat = 0;
3287 if ((i & (run-1)) != 0)
3288 cpat = 0;
3289 i += run;
3291 else
3292 cpat = 0;
3294 if (cpat && (run || size < 16))
3296 if (run == 0)
3297 run = 1;
3298 if (prun)
3299 *prun = run;
3300 if (pstart)
3301 *pstart = start == -1 ? 16-run : start;
3302 return 1;
3304 return 0;
3307 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3308 it into a register. MODE is only valid when OP is a CONST_INT. */
3309 static enum immediate_class
3310 classify_immediate (rtx op, enum machine_mode mode)
3312 HOST_WIDE_INT val;
3313 unsigned char arr[16];
3314 int i, j, repeated, fsmbi, repeat;
3316 gcc_assert (CONSTANT_P (op));
3318 if (GET_MODE (op) != VOIDmode)
3319 mode = GET_MODE (op);
3321 /* A V4SI const_vector with all identical symbols is ok. */
3322 if (!flag_pic
3323 && mode == V4SImode
3324 && GET_CODE (op) == CONST_VECTOR
3325 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3326 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3327 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3328 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3329 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3330 op = CONST_VECTOR_ELT (op, 0);
3332 switch (GET_CODE (op))
3334 case SYMBOL_REF:
3335 case LABEL_REF:
3336 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3338 case CONST:
3339 /* We can never know if the resulting address fits in 18 bits and can be
3340 loaded with ila. For now, assume the address will not overflow if
3341 the displacement is "small" (fits 'K' constraint). */
3342 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3344 rtx sym = XEXP (XEXP (op, 0), 0);
3345 rtx cst = XEXP (XEXP (op, 0), 1);
3347 if (GET_CODE (sym) == SYMBOL_REF
3348 && GET_CODE (cst) == CONST_INT
3349 && satisfies_constraint_K (cst))
3350 return IC_IL1s;
3352 return IC_IL2s;
3354 case HIGH:
3355 return IC_IL1s;
3357 case CONST_VECTOR:
3358 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3359 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3360 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3361 return IC_POOL;
3362 /* Fall through. */
3364 case CONST_INT:
3365 case CONST_DOUBLE:
3366 constant_to_array (mode, op, arr);
3368 /* Check that each 4-byte slot is identical. */
3369 repeated = 1;
3370 for (i = 4; i < 16; i += 4)
3371 for (j = 0; j < 4; j++)
3372 if (arr[j] != arr[i + j])
3373 repeated = 0;
3375 if (repeated)
3377 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3378 val = trunc_int_for_mode (val, SImode);
3380 if (which_immediate_load (val) != SPU_NONE)
3381 return IC_IL1;
3384 /* Any mode of 2 bytes or smaller can be loaded with an il
3385 instruction. */
3386 gcc_assert (GET_MODE_SIZE (mode) > 2);
3388 fsmbi = 1;
3389 repeat = 0;
3390 for (i = 0; i < 16 && fsmbi; i++)
3391 if (arr[i] != 0 && repeat == 0)
3392 repeat = arr[i];
3393 else if (arr[i] != 0 && arr[i] != repeat)
3394 fsmbi = 0;
3395 if (fsmbi)
3396 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3398 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3399 return IC_CPAT;
3401 if (repeated)
3402 return IC_IL2;
3404 return IC_POOL;
3405 default:
3406 break;
3408 gcc_unreachable ();
3411 static enum spu_immediate
3412 which_logical_immediate (HOST_WIDE_INT val)
3414 gcc_assert (val == trunc_int_for_mode (val, SImode));
3416 if (val >= -0x200 && val <= 0x1ff)
3417 return SPU_ORI;
3418 if (val >= 0 && val <= 0xffff)
3419 return SPU_IOHL;
3420 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3422 val = trunc_int_for_mode (val, HImode);
3423 if (val >= -0x200 && val <= 0x1ff)
3424 return SPU_ORHI;
3425 if ((val & 0xff) == ((val >> 8) & 0xff))
3427 val = trunc_int_for_mode (val, QImode);
3428 if (val >= -0x200 && val <= 0x1ff)
3429 return SPU_ORBI;
3432 return SPU_NONE;
3435 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3436 CONST_DOUBLEs. */
3437 static int
3438 const_vector_immediate_p (rtx x)
3440 int i;
3441 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3442 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3443 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3445 return 0;
3446 return 1;
3450 logical_immediate_p (rtx op, enum machine_mode mode)
3452 HOST_WIDE_INT val;
3453 unsigned char arr[16];
3454 int i, j;
3456 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3457 || GET_CODE (op) == CONST_VECTOR);
3459 if (GET_CODE (op) == CONST_VECTOR
3460 && !const_vector_immediate_p (op))
3461 return 0;
3463 if (GET_MODE (op) != VOIDmode)
3464 mode = GET_MODE (op);
3466 constant_to_array (mode, op, arr);
3468 /* Check that bytes are repeated. */
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 return 0;
3474 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3475 val = trunc_int_for_mode (val, SImode);
3477 i = which_logical_immediate (val);
3478 return i != SPU_NONE && i != SPU_IOHL;
3482 iohl_immediate_p (rtx op, enum machine_mode mode)
3484 HOST_WIDE_INT val;
3485 unsigned char arr[16];
3486 int i, j;
3488 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3489 || GET_CODE (op) == CONST_VECTOR);
3491 if (GET_CODE (op) == CONST_VECTOR
3492 && !const_vector_immediate_p (op))
3493 return 0;
3495 if (GET_MODE (op) != VOIDmode)
3496 mode = GET_MODE (op);
3498 constant_to_array (mode, op, arr);
3500 /* Check that bytes are repeated. */
3501 for (i = 4; i < 16; i += 4)
3502 for (j = 0; j < 4; j++)
3503 if (arr[j] != arr[i + j])
3504 return 0;
3506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3507 val = trunc_int_for_mode (val, SImode);
3509 return val >= 0 && val <= 0xffff;
3513 arith_immediate_p (rtx op, enum machine_mode mode,
3514 HOST_WIDE_INT low, HOST_WIDE_INT high)
3516 HOST_WIDE_INT val;
3517 unsigned char arr[16];
3518 int bytes, i, j;
3520 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3521 || GET_CODE (op) == CONST_VECTOR);
3523 if (GET_CODE (op) == CONST_VECTOR
3524 && !const_vector_immediate_p (op))
3525 return 0;
3527 if (GET_MODE (op) != VOIDmode)
3528 mode = GET_MODE (op);
3530 constant_to_array (mode, op, arr);
3532 if (VECTOR_MODE_P (mode))
3533 mode = GET_MODE_INNER (mode);
3535 bytes = GET_MODE_SIZE (mode);
3536 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3538 /* Check that bytes are repeated. */
3539 for (i = bytes; i < 16; i += bytes)
3540 for (j = 0; j < bytes; j++)
3541 if (arr[j] != arr[i + j])
3542 return 0;
3544 val = arr[0];
3545 for (j = 1; j < bytes; j++)
3546 val = (val << 8) | arr[j];
3548 val = trunc_int_for_mode (val, mode);
3550 return val >= low && val <= high;
3553 /* TRUE when op is an immediate and an exact power of 2, and given that
3554 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3555 all entries must be the same. */
3556 bool
3557 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3559 enum machine_mode int_mode;
3560 HOST_WIDE_INT val;
3561 unsigned char arr[16];
3562 int bytes, i, j;
3564 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3565 || GET_CODE (op) == CONST_VECTOR);
3567 if (GET_CODE (op) == CONST_VECTOR
3568 && !const_vector_immediate_p (op))
3569 return 0;
3571 if (GET_MODE (op) != VOIDmode)
3572 mode = GET_MODE (op);
3574 constant_to_array (mode, op, arr);
3576 if (VECTOR_MODE_P (mode))
3577 mode = GET_MODE_INNER (mode);
3579 bytes = GET_MODE_SIZE (mode);
3580 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3582 /* Check that bytes are repeated. */
3583 for (i = bytes; i < 16; i += bytes)
3584 for (j = 0; j < bytes; j++)
3585 if (arr[j] != arr[i + j])
3586 return 0;
3588 val = arr[0];
3589 for (j = 1; j < bytes; j++)
3590 val = (val << 8) | arr[j];
3592 val = trunc_int_for_mode (val, int_mode);
3594 /* Currently, we only handle SFmode */
3595 gcc_assert (mode == SFmode);
3596 if (mode == SFmode)
3598 int exp = (val >> 23) - 127;
3599 return val > 0 && (val & 0x007fffff) == 0
3600 && exp >= low && exp <= high;
3602 return FALSE;
3605 /* We accept:
3606 - any 32-bit constant (SImode, SFmode)
3607 - any constant that can be generated with fsmbi (any mode)
3608 - a 64-bit constant where the high and low bits are identical
3609 (DImode, DFmode)
3610 - a 128-bit constant where the four 32-bit words match. */
3612 spu_legitimate_constant_p (rtx x)
3614 if (GET_CODE (x) == HIGH)
3615 x = XEXP (x, 0);
3616 /* V4SI with all identical symbols is valid. */
3617 if (!flag_pic
3618 && GET_MODE (x) == V4SImode
3619 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3620 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3621 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3622 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3623 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3624 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3626 if (GET_CODE (x) == CONST_VECTOR
3627 && !const_vector_immediate_p (x))
3628 return 0;
3629 return 1;
3632 /* Valid address are:
3633 - symbol_ref, label_ref, const
3634 - reg
3635 - reg + const_int, where const_int is 16 byte aligned
3636 - reg + reg, alignment doesn't matter
3637 The alignment matters in the reg+const case because lqd and stqd
3638 ignore the 4 least significant bits of the const. We only care about
3639 16 byte modes because the expand phase will change all smaller MEM
3640 references to TImode. */
3641 static bool
3642 spu_legitimate_address_p (enum machine_mode mode,
3643 rtx x, bool reg_ok_strict)
3645 int aligned = GET_MODE_SIZE (mode) >= 16;
3646 if (aligned
3647 && GET_CODE (x) == AND
3648 && GET_CODE (XEXP (x, 1)) == CONST_INT
3649 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3650 x = XEXP (x, 0);
3651 switch (GET_CODE (x))
3653 case LABEL_REF:
3654 case SYMBOL_REF:
3655 case CONST:
3656 return !TARGET_LARGE_MEM;
3658 case CONST_INT:
3659 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3661 case SUBREG:
3662 x = XEXP (x, 0);
3663 if (REG_P (x))
3664 return 0;
3666 case REG:
3667 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3669 case PLUS:
3670 case LO_SUM:
3672 rtx op0 = XEXP (x, 0);
3673 rtx op1 = XEXP (x, 1);
3674 if (GET_CODE (op0) == SUBREG)
3675 op0 = XEXP (op0, 0);
3676 if (GET_CODE (op1) == SUBREG)
3677 op1 = XEXP (op1, 0);
3678 if (GET_CODE (op0) == REG
3679 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3680 && GET_CODE (op1) == CONST_INT
3681 && INTVAL (op1) >= -0x2000
3682 && INTVAL (op1) <= 0x1fff
3683 && (!aligned || (INTVAL (op1) & 15) == 0))
3684 return TRUE;
3685 if (GET_CODE (op0) == REG
3686 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3687 && GET_CODE (op1) == REG
3688 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3689 return TRUE;
3691 break;
3693 default:
3694 break;
3696 return FALSE;
3699 /* When the address is reg + const_int, force the const_int into a
3700 register. */
3702 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3703 enum machine_mode mode ATTRIBUTE_UNUSED)
3705 rtx op0, op1;
3706 /* Make sure both operands are registers. */
3707 if (GET_CODE (x) == PLUS)
3709 op0 = XEXP (x, 0);
3710 op1 = XEXP (x, 1);
3711 if (ALIGNED_SYMBOL_REF_P (op0))
3713 op0 = force_reg (Pmode, op0);
3714 mark_reg_pointer (op0, 128);
3716 else if (GET_CODE (op0) != REG)
3717 op0 = force_reg (Pmode, op0);
3718 if (ALIGNED_SYMBOL_REF_P (op1))
3720 op1 = force_reg (Pmode, op1);
3721 mark_reg_pointer (op1, 128);
3723 else if (GET_CODE (op1) != REG)
3724 op1 = force_reg (Pmode, op1);
3725 x = gen_rtx_PLUS (Pmode, op0, op1);
3727 return x;
3730 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3731 struct attribute_spec.handler. */
3732 static tree
3733 spu_handle_fndecl_attribute (tree * node,
3734 tree name,
3735 tree args ATTRIBUTE_UNUSED,
3736 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3738 if (TREE_CODE (*node) != FUNCTION_DECL)
3740 warning (0, "%qE attribute only applies to functions",
3741 name);
3742 *no_add_attrs = true;
3745 return NULL_TREE;
3748 /* Handle the "vector" attribute. */
3749 static tree
3750 spu_handle_vector_attribute (tree * node, tree name,
3751 tree args ATTRIBUTE_UNUSED,
3752 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3754 tree type = *node, result = NULL_TREE;
3755 enum machine_mode mode;
3756 int unsigned_p;
3758 while (POINTER_TYPE_P (type)
3759 || TREE_CODE (type) == FUNCTION_TYPE
3760 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3761 type = TREE_TYPE (type);
3763 mode = TYPE_MODE (type);
3765 unsigned_p = TYPE_UNSIGNED (type);
3766 switch (mode)
3768 case DImode:
3769 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3770 break;
3771 case SImode:
3772 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3773 break;
3774 case HImode:
3775 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3776 break;
3777 case QImode:
3778 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3779 break;
3780 case SFmode:
3781 result = V4SF_type_node;
3782 break;
3783 case DFmode:
3784 result = V2DF_type_node;
3785 break;
3786 default:
3787 break;
3790 /* Propagate qualifiers attached to the element type
3791 onto the vector type. */
3792 if (result && result != type && TYPE_QUALS (type))
3793 result = build_qualified_type (result, TYPE_QUALS (type));
3795 *no_add_attrs = true; /* No need to hang on to the attribute. */
3797 if (!result)
3798 warning (0, "%qE attribute ignored", name);
3799 else
3800 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3802 return NULL_TREE;
3805 /* Return nonzero if FUNC is a naked function. */
3806 static int
3807 spu_naked_function_p (tree func)
3809 tree a;
3811 if (TREE_CODE (func) != FUNCTION_DECL)
3812 abort ();
3814 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3815 return a != NULL_TREE;
3819 spu_initial_elimination_offset (int from, int to)
3821 int saved_regs_size = spu_saved_regs_size ();
3822 int sp_offset = 0;
3823 if (!current_function_is_leaf || crtl->outgoing_args_size
3824 || get_frame_size () || saved_regs_size)
3825 sp_offset = STACK_POINTER_OFFSET;
3826 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3827 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3828 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3829 return get_frame_size ();
3830 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3831 return sp_offset + crtl->outgoing_args_size
3832 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3833 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3834 return get_frame_size () + saved_regs_size + sp_offset;
3835 else
3836 gcc_unreachable ();
3840 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3842 enum machine_mode mode = TYPE_MODE (type);
3843 int byte_size = ((mode == BLKmode)
3844 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3848 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3850 enum machine_mode smode;
3851 rtvec v;
3852 int i;
3853 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3854 int n = byte_size / UNITS_PER_WORD;
3855 v = rtvec_alloc (nregs);
3856 for (i = 0; i < n; i++)
3858 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3859 gen_rtx_REG (TImode,
3860 FIRST_RETURN_REGNUM
3861 + i),
3862 GEN_INT (UNITS_PER_WORD * i));
3863 byte_size -= UNITS_PER_WORD;
3866 if (n < nregs)
3868 if (byte_size < 4)
3869 byte_size = 4;
3870 smode =
3871 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3872 RTVEC_ELT (v, n) =
3873 gen_rtx_EXPR_LIST (VOIDmode,
3874 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3875 GEN_INT (UNITS_PER_WORD * n));
3877 return gen_rtx_PARALLEL (mode, v);
3879 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3883 spu_function_arg (CUMULATIVE_ARGS cum,
3884 enum machine_mode mode,
3885 tree type, int named ATTRIBUTE_UNUSED)
3887 int byte_size;
3889 if (cum >= MAX_REGISTER_ARGS)
3890 return 0;
3892 byte_size = ((mode == BLKmode)
3893 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3895 /* The ABI does not allow parameters to be passed partially in
3896 reg and partially in stack. */
3897 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3898 return 0;
3900 /* Make sure small structs are left justified in a register. */
3901 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3902 && byte_size < UNITS_PER_WORD && byte_size > 0)
3904 enum machine_mode smode;
3905 rtx gr_reg;
3906 if (byte_size < 4)
3907 byte_size = 4;
3908 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3909 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3910 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3911 const0_rtx);
3912 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3914 else
3915 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3918 /* Variable sized types are passed by reference. */
3919 static bool
3920 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3921 enum machine_mode mode ATTRIBUTE_UNUSED,
3922 const_tree type, bool named ATTRIBUTE_UNUSED)
3924 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3928 /* Var args. */
3930 /* Create and return the va_list datatype.
3932 On SPU, va_list is an array type equivalent to
3934 typedef struct __va_list_tag
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3939 } va_list[1];
3941 where __args points to the arg that will be returned by the next
3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3944 static tree
3945 spu_build_builtin_va_list (void)
3947 tree f_args, f_skip, record, type_decl;
3948 bool owp;
3950 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3952 type_decl =
3953 build_decl (BUILTINS_LOCATION,
3954 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3956 f_args = build_decl (BUILTINS_LOCATION,
3957 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3958 f_skip = build_decl (BUILTINS_LOCATION,
3959 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3961 DECL_FIELD_CONTEXT (f_args) = record;
3962 DECL_ALIGN (f_args) = 128;
3963 DECL_USER_ALIGN (f_args) = 1;
3965 DECL_FIELD_CONTEXT (f_skip) = record;
3966 DECL_ALIGN (f_skip) = 128;
3967 DECL_USER_ALIGN (f_skip) = 1;
3969 TREE_CHAIN (record) = type_decl;
3970 TYPE_NAME (record) = type_decl;
3971 TYPE_FIELDS (record) = f_args;
3972 TREE_CHAIN (f_args) = f_skip;
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3976 owp = warn_padded;
3977 warn_padded = false;
3979 layout_type (record);
3981 warn_padded = owp;
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record, build_index_type (size_zero_node));
3987 /* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3990 The following global variables are used to initialize
3991 the va_list structure:
3993 crtl->args.info;
3994 the CUMULATIVE_ARGS for this function
3996 crtl->args.arg_offset_rtx:
3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
4000 static void
4001 spu_va_start (tree valist, rtx nextarg)
4003 tree f_args, f_skip;
4004 tree args, skip, t;
4006 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4007 f_skip = TREE_CHAIN (f_args);
4009 valist = build_va_arg_indirect_ref (valist);
4010 args =
4011 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4012 skip =
4013 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4015 /* Find the __args area. */
4016 t = make_tree (TREE_TYPE (args), nextarg);
4017 if (crtl->args.pretend_args_size > 0)
4018 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4019 size_int (-STACK_POINTER_OFFSET));
4020 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4021 TREE_SIDE_EFFECTS (t) = 1;
4022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4024 /* Find the __skip area. */
4025 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4026 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4027 size_int (crtl->args.pretend_args_size
4028 - STACK_POINTER_OFFSET));
4029 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4030 TREE_SIDE_EFFECTS (t) = 1;
4031 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4034 /* Gimplify va_arg by updating the va_list structure
4035 VALIST as required to retrieve an argument of type
4036 TYPE, and returning that argument.
4038 ret = va_arg(VALIST, TYPE);
4040 generates code equivalent to:
4042 paddedsize = (sizeof(TYPE) + 15) & -16;
4043 if (VALIST.__args + paddedsize > VALIST.__skip
4044 && VALIST.__args <= VALIST.__skip)
4045 addr = VALIST.__skip + 32;
4046 else
4047 addr = VALIST.__args;
4048 VALIST.__args = addr + paddedsize;
4049 ret = *(TYPE *)addr;
4051 static tree
4052 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4053 gimple_seq * post_p ATTRIBUTE_UNUSED)
4055 tree f_args, f_skip;
4056 tree args, skip;
4057 HOST_WIDE_INT size, rsize;
4058 tree paddedsize, addr, tmp;
4059 bool pass_by_reference_p;
4061 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4062 f_skip = TREE_CHAIN (f_args);
4064 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4065 args =
4066 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4067 skip =
4068 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4070 addr = create_tmp_var (ptr_type_node, "va_arg");
4072 /* if an object is dynamically sized, a pointer to it is passed
4073 instead of the object itself. */
4074 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4075 false);
4076 if (pass_by_reference_p)
4077 type = build_pointer_type (type);
4078 size = int_size_in_bytes (type);
4079 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4081 /* build conditional expression to calculate addr. The expression
4082 will be gimplified later. */
4083 paddedsize = size_int (rsize);
4084 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4085 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4086 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4087 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4088 unshare_expr (skip)));
4090 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4091 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4092 size_int (32)), unshare_expr (args));
4094 gimplify_assign (addr, tmp, pre_p);
4096 /* update VALIST.__args */
4097 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4098 gimplify_assign (unshare_expr (args), tmp, pre_p);
4100 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4101 addr);
4103 if (pass_by_reference_p)
4104 addr = build_va_arg_indirect_ref (addr);
4106 return build_va_arg_indirect_ref (addr);
4109 /* Save parameter registers starting with the register that corresponds
4110 to the first unnamed parameters. If the first unnamed parameter is
4111 in the stack then save no registers. Set pretend_args_size to the
4112 amount of space needed to save the registers. */
4113 void
4114 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4115 tree type, int *pretend_size, int no_rtl)
4117 if (!no_rtl)
4119 rtx tmp;
4120 int regno;
4121 int offset;
4122 int ncum = *cum;
4124 /* cum currently points to the last named argument, we want to
4125 start at the next argument. */
4126 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4128 offset = -STACK_POINTER_OFFSET;
4129 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4131 tmp = gen_frame_mem (V4SImode,
4132 plus_constant (virtual_incoming_args_rtx,
4133 offset));
4134 emit_move_insn (tmp,
4135 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4136 offset += 16;
4138 *pretend_size = offset + STACK_POINTER_OFFSET;
4142 void
4143 spu_conditional_register_usage (void)
4145 if (flag_pic)
4147 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4148 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4152 /* This is called any time we inspect the alignment of a register for
4153 addresses. */
4154 static int
4155 reg_aligned_for_addr (rtx x)
4157 int regno =
4158 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4159 return REGNO_POINTER_ALIGN (regno) >= 128;
4162 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4163 into its SYMBOL_REF_FLAGS. */
4164 static void
4165 spu_encode_section_info (tree decl, rtx rtl, int first)
4167 default_encode_section_info (decl, rtl, first);
4169 /* If a variable has a forced alignment to < 16 bytes, mark it with
4170 SYMBOL_FLAG_ALIGN1. */
4171 if (TREE_CODE (decl) == VAR_DECL
4172 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4173 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4176 /* Return TRUE if we are certain the mem refers to a complete object
4177 which is both 16-byte aligned and padded to a 16-byte boundary. This
4178 would make it safe to store with a single instruction.
4179 We guarantee the alignment and padding for static objects by aligning
4180 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4181 FIXME: We currently cannot guarantee this for objects on the stack
4182 because assign_parm_setup_stack calls assign_stack_local with the
4183 alignment of the parameter mode and in that case the alignment never
4184 gets adjusted by LOCAL_ALIGNMENT. */
4185 static int
4186 store_with_one_insn_p (rtx mem)
4188 enum machine_mode mode = GET_MODE (mem);
4189 rtx addr = XEXP (mem, 0);
4190 if (mode == BLKmode)
4191 return 0;
4192 if (GET_MODE_SIZE (mode) >= 16)
4193 return 1;
4194 /* Only static objects. */
4195 if (GET_CODE (addr) == SYMBOL_REF)
4197 /* We use the associated declaration to make sure the access is
4198 referring to the whole object.
4199 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4200 if it is necessary. Will there be cases where one exists, and
4201 the other does not? Will there be cases where both exist, but
4202 have different types? */
4203 tree decl = MEM_EXPR (mem);
4204 if (decl
4205 && TREE_CODE (decl) == VAR_DECL
4206 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4207 return 1;
4208 decl = SYMBOL_REF_DECL (addr);
4209 if (decl
4210 && TREE_CODE (decl) == VAR_DECL
4211 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4212 return 1;
4214 return 0;
4217 /* Return 1 when the address is not valid for a simple load and store as
4218 required by the '_mov*' patterns. We could make this less strict
4219 for loads, but we prefer mem's to look the same so they are more
4220 likely to be merged. */
4221 static int
4222 address_needs_split (rtx mem)
4224 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4225 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4226 || !(store_with_one_insn_p (mem)
4227 || mem_is_padded_component_ref (mem))))
4228 return 1;
4230 return 0;
4234 spu_expand_mov (rtx * ops, enum machine_mode mode)
4236 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4237 abort ();
4239 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4241 rtx from = SUBREG_REG (ops[1]);
4242 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4244 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4245 && GET_MODE_CLASS (imode) == MODE_INT
4246 && subreg_lowpart_p (ops[1]));
4248 if (GET_MODE_SIZE (imode) < 4)
4249 imode = SImode;
4250 if (imode != GET_MODE (from))
4251 from = gen_rtx_SUBREG (imode, from, 0);
4253 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4255 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4256 emit_insn (GEN_FCN (icode) (ops[0], from));
4258 else
4259 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4260 return 1;
4263 /* At least one of the operands needs to be a register. */
4264 if ((reload_in_progress | reload_completed) == 0
4265 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4267 rtx temp = force_reg (mode, ops[1]);
4268 emit_move_insn (ops[0], temp);
4269 return 1;
4271 if (reload_in_progress || reload_completed)
4273 if (CONSTANT_P (ops[1]))
4274 return spu_split_immediate (ops);
4275 return 0;
4278 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4279 extend them. */
4280 if (GET_CODE (ops[1]) == CONST_INT)
4282 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4283 if (val != INTVAL (ops[1]))
4285 emit_move_insn (ops[0], GEN_INT (val));
4286 return 1;
4289 if (MEM_P (ops[0]))
4290 return spu_split_store (ops);
4291 if (MEM_P (ops[1]))
4292 return spu_split_load (ops);
4294 return 0;
4297 static void
4298 spu_convert_move (rtx dst, rtx src)
4300 enum machine_mode mode = GET_MODE (dst);
4301 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4302 rtx reg;
4303 gcc_assert (GET_MODE (src) == TImode);
4304 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4305 emit_insn (gen_rtx_SET (VOIDmode, reg,
4306 gen_rtx_TRUNCATE (int_mode,
4307 gen_rtx_LSHIFTRT (TImode, src,
4308 GEN_INT (int_mode == DImode ? 64 : 96)))));
4309 if (int_mode != mode)
4311 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4312 emit_move_insn (dst, reg);
4316 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4317 the address from SRC and SRC+16. Return a REG or CONST_INT that
4318 specifies how many bytes to rotate the loaded registers, plus any
4319 extra from EXTRA_ROTQBY. The address and rotate amounts are
4320 normalized to improve merging of loads and rotate computations. */
4321 static rtx
4322 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4324 rtx addr = XEXP (src, 0);
4325 rtx p0, p1, rot, addr0, addr1;
4326 int rot_amt;
4328 rot = 0;
4329 rot_amt = 0;
4331 if (MEM_ALIGN (src) >= 128)
4332 /* Address is already aligned; simply perform a TImode load. */ ;
4333 else if (GET_CODE (addr) == PLUS)
4335 /* 8 cases:
4336 aligned reg + aligned reg => lqx
4337 aligned reg + unaligned reg => lqx, rotqby
4338 aligned reg + aligned const => lqd
4339 aligned reg + unaligned const => lqd, rotqbyi
4340 unaligned reg + aligned reg => lqx, rotqby
4341 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4342 unaligned reg + aligned const => lqd, rotqby
4343 unaligned reg + unaligned const -> not allowed by legitimate address
4345 p0 = XEXP (addr, 0);
4346 p1 = XEXP (addr, 1);
4347 if (!reg_aligned_for_addr (p0))
4349 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4351 rot = gen_reg_rtx (SImode);
4352 emit_insn (gen_addsi3 (rot, p0, p1));
4354 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4356 if (INTVAL (p1) > 0
4357 && REG_POINTER (p0)
4358 && INTVAL (p1) * BITS_PER_UNIT
4359 < REGNO_POINTER_ALIGN (REGNO (p0)))
4361 rot = gen_reg_rtx (SImode);
4362 emit_insn (gen_addsi3 (rot, p0, p1));
4363 addr = p0;
4365 else
4367 rtx x = gen_reg_rtx (SImode);
4368 emit_move_insn (x, p1);
4369 if (!spu_arith_operand (p1, SImode))
4370 p1 = x;
4371 rot = gen_reg_rtx (SImode);
4372 emit_insn (gen_addsi3 (rot, p0, p1));
4373 addr = gen_rtx_PLUS (Pmode, p0, x);
4376 else
4377 rot = p0;
4379 else
4381 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4383 rot_amt = INTVAL (p1) & 15;
4384 if (INTVAL (p1) & -16)
4386 p1 = GEN_INT (INTVAL (p1) & -16);
4387 addr = gen_rtx_PLUS (SImode, p0, p1);
4389 else
4390 addr = p0;
4392 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4393 rot = p1;
4396 else if (REG_P (addr))
4398 if (!reg_aligned_for_addr (addr))
4399 rot = addr;
4401 else if (GET_CODE (addr) == CONST)
4403 if (GET_CODE (XEXP (addr, 0)) == PLUS
4404 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4405 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4407 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4408 if (rot_amt & -16)
4409 addr = gen_rtx_CONST (Pmode,
4410 gen_rtx_PLUS (Pmode,
4411 XEXP (XEXP (addr, 0), 0),
4412 GEN_INT (rot_amt & -16)));
4413 else
4414 addr = XEXP (XEXP (addr, 0), 0);
4416 else
4418 rot = gen_reg_rtx (Pmode);
4419 emit_move_insn (rot, addr);
4422 else if (GET_CODE (addr) == CONST_INT)
4424 rot_amt = INTVAL (addr);
4425 addr = GEN_INT (rot_amt & -16);
4427 else if (!ALIGNED_SYMBOL_REF_P (addr))
4429 rot = gen_reg_rtx (Pmode);
4430 emit_move_insn (rot, addr);
4433 rot_amt += extra_rotby;
4435 rot_amt &= 15;
4437 if (rot && rot_amt)
4439 rtx x = gen_reg_rtx (SImode);
4440 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4441 rot = x;
4442 rot_amt = 0;
4444 if (!rot && rot_amt)
4445 rot = GEN_INT (rot_amt);
4447 addr0 = copy_rtx (addr);
4448 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4449 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4451 if (dst1)
4453 addr1 = plus_constant (copy_rtx (addr), 16);
4454 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4455 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4458 return rot;
4462 spu_split_load (rtx * ops)
4464 enum machine_mode mode = GET_MODE (ops[0]);
4465 rtx addr, load, rot;
4466 int rot_amt;
4468 if (GET_MODE_SIZE (mode) >= 16)
4469 return 0;
4471 addr = XEXP (ops[1], 0);
4472 gcc_assert (GET_CODE (addr) != AND);
4474 if (!address_needs_split (ops[1]))
4476 ops[1] = change_address (ops[1], TImode, addr);
4477 load = gen_reg_rtx (TImode);
4478 emit_insn (gen__movti (load, ops[1]));
4479 spu_convert_move (ops[0], load);
4480 return 1;
4483 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4485 load = gen_reg_rtx (TImode);
4486 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4488 if (rot)
4489 emit_insn (gen_rotqby_ti (load, load, rot));
4491 spu_convert_move (ops[0], load);
4492 return 1;
4496 spu_split_store (rtx * ops)
4498 enum machine_mode mode = GET_MODE (ops[0]);
4499 rtx reg;
4500 rtx addr, p0, p1, p1_lo, smem;
4501 int aform;
4502 int scalar;
4504 if (GET_MODE_SIZE (mode) >= 16)
4505 return 0;
4507 addr = XEXP (ops[0], 0);
4508 gcc_assert (GET_CODE (addr) != AND);
4510 if (!address_needs_split (ops[0]))
4512 reg = gen_reg_rtx (TImode);
4513 emit_insn (gen_spu_convert (reg, ops[1]));
4514 ops[0] = change_address (ops[0], TImode, addr);
4515 emit_move_insn (ops[0], reg);
4516 return 1;
4519 if (GET_CODE (addr) == PLUS)
4521 /* 8 cases:
4522 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4523 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4524 aligned reg + aligned const => lqd, c?d, shuf, stqx
4525 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4526 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4527 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4528 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4529 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4531 aform = 0;
4532 p0 = XEXP (addr, 0);
4533 p1 = p1_lo = XEXP (addr, 1);
4534 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4536 p1_lo = GEN_INT (INTVAL (p1) & 15);
4537 if (reg_aligned_for_addr (p0))
4539 p1 = GEN_INT (INTVAL (p1) & -16);
4540 if (p1 == const0_rtx)
4541 addr = p0;
4542 else
4543 addr = gen_rtx_PLUS (SImode, p0, p1);
4545 else
4547 rtx x = gen_reg_rtx (SImode);
4548 emit_move_insn (x, p1);
4549 addr = gen_rtx_PLUS (SImode, p0, x);
4553 else if (REG_P (addr))
4555 aform = 0;
4556 p0 = addr;
4557 p1 = p1_lo = const0_rtx;
4559 else
4561 aform = 1;
4562 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4563 p1 = 0; /* aform doesn't use p1 */
4564 p1_lo = addr;
4565 if (ALIGNED_SYMBOL_REF_P (addr))
4566 p1_lo = const0_rtx;
4567 else if (GET_CODE (addr) == CONST
4568 && GET_CODE (XEXP (addr, 0)) == PLUS
4569 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4570 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4572 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4573 if ((v & -16) != 0)
4574 addr = gen_rtx_CONST (Pmode,
4575 gen_rtx_PLUS (Pmode,
4576 XEXP (XEXP (addr, 0), 0),
4577 GEN_INT (v & -16)));
4578 else
4579 addr = XEXP (XEXP (addr, 0), 0);
4580 p1_lo = GEN_INT (v & 15);
4582 else if (GET_CODE (addr) == CONST_INT)
4584 p1_lo = GEN_INT (INTVAL (addr) & 15);
4585 addr = GEN_INT (INTVAL (addr) & -16);
4587 else
4589 p1_lo = gen_reg_rtx (SImode);
4590 emit_move_insn (p1_lo, addr);
4594 reg = gen_reg_rtx (TImode);
4596 scalar = store_with_one_insn_p (ops[0]);
4597 if (!scalar)
4599 /* We could copy the flags from the ops[0] MEM to mem here,
4600 We don't because we want this load to be optimized away if
4601 possible, and copying the flags will prevent that in certain
4602 cases, e.g. consider the volatile flag. */
4604 rtx pat = gen_reg_rtx (TImode);
4605 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4606 set_mem_alias_set (lmem, 0);
4607 emit_insn (gen_movti (reg, lmem));
4609 if (!p0 || reg_aligned_for_addr (p0))
4610 p0 = stack_pointer_rtx;
4611 if (!p1_lo)
4612 p1_lo = const0_rtx;
4614 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4615 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4617 else
4619 if (GET_CODE (ops[1]) == REG)
4620 emit_insn (gen_spu_convert (reg, ops[1]));
4621 else if (GET_CODE (ops[1]) == SUBREG)
4622 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4623 else
4624 abort ();
4627 if (GET_MODE_SIZE (mode) < 4 && scalar)
4628 emit_insn (gen_ashlti3
4629 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4631 smem = change_address (ops[0], TImode, copy_rtx (addr));
4632 /* We can't use the previous alias set because the memory has changed
4633 size and can potentially overlap objects of other types. */
4634 set_mem_alias_set (smem, 0);
4636 emit_insn (gen_movti (smem, reg));
4637 return 1;
4640 /* Return TRUE if X is MEM which is a struct member reference
4641 and the member can safely be loaded and stored with a single
4642 instruction because it is padded. */
4643 static int
4644 mem_is_padded_component_ref (rtx x)
4646 tree t = MEM_EXPR (x);
4647 tree r;
4648 if (!t || TREE_CODE (t) != COMPONENT_REF)
4649 return 0;
4650 t = TREE_OPERAND (t, 1);
4651 if (!t || TREE_CODE (t) != FIELD_DECL
4652 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4653 return 0;
4654 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4655 r = DECL_FIELD_CONTEXT (t);
4656 if (!r || TREE_CODE (r) != RECORD_TYPE)
4657 return 0;
4658 /* Make sure they are the same mode */
4659 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4660 return 0;
4661 /* If there are no following fields then the field alignment assures
4662 the structure is padded to the alignment which means this field is
4663 padded too. */
4664 if (TREE_CHAIN (t) == 0)
4665 return 1;
4666 /* If the following field is also aligned then this field will be
4667 padded. */
4668 t = TREE_CHAIN (t);
4669 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4670 return 1;
4671 return 0;
4674 /* Parse the -mfixed-range= option string. */
4675 static void
4676 fix_range (const char *const_str)
4678 int i, first, last;
4679 char *str, *dash, *comma;
4681 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4682 REG2 are either register names or register numbers. The effect
4683 of this option is to mark the registers in the range from REG1 to
4684 REG2 as ``fixed'' so they won't be used by the compiler. */
4686 i = strlen (const_str);
4687 str = (char *) alloca (i + 1);
4688 memcpy (str, const_str, i + 1);
4690 while (1)
4692 dash = strchr (str, '-');
4693 if (!dash)
4695 warning (0, "value of -mfixed-range must have form REG1-REG2");
4696 return;
4698 *dash = '\0';
4699 comma = strchr (dash + 1, ',');
4700 if (comma)
4701 *comma = '\0';
4703 first = decode_reg_name (str);
4704 if (first < 0)
4706 warning (0, "unknown register name: %s", str);
4707 return;
4710 last = decode_reg_name (dash + 1);
4711 if (last < 0)
4713 warning (0, "unknown register name: %s", dash + 1);
4714 return;
4717 *dash = '-';
4719 if (first > last)
4721 warning (0, "%s-%s is an empty range", str, dash + 1);
4722 return;
4725 for (i = first; i <= last; ++i)
4726 fixed_regs[i] = call_used_regs[i] = 1;
4728 if (!comma)
4729 break;
4731 *comma = ',';
4732 str = comma + 1;
4736 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4737 can be generated using the fsmbi instruction. */
4739 fsmbi_const_p (rtx x)
4741 if (CONSTANT_P (x))
4743 /* We can always choose TImode for CONST_INT because the high bits
4744 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4745 enum immediate_class c = classify_immediate (x, TImode);
4746 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4748 return 0;
4751 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4752 can be generated using the cbd, chd, cwd or cdd instruction. */
4754 cpat_const_p (rtx x, enum machine_mode mode)
4756 if (CONSTANT_P (x))
4758 enum immediate_class c = classify_immediate (x, mode);
4759 return c == IC_CPAT;
4761 return 0;
4765 gen_cpat_const (rtx * ops)
4767 unsigned char dst[16];
4768 int i, offset, shift, isize;
4769 if (GET_CODE (ops[3]) != CONST_INT
4770 || GET_CODE (ops[2]) != CONST_INT
4771 || (GET_CODE (ops[1]) != CONST_INT
4772 && GET_CODE (ops[1]) != REG))
4773 return 0;
4774 if (GET_CODE (ops[1]) == REG
4775 && (!REG_POINTER (ops[1])
4776 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4777 return 0;
4779 for (i = 0; i < 16; i++)
4780 dst[i] = i + 16;
4781 isize = INTVAL (ops[3]);
4782 if (isize == 1)
4783 shift = 3;
4784 else if (isize == 2)
4785 shift = 2;
4786 else
4787 shift = 0;
4788 offset = (INTVAL (ops[2]) +
4789 (GET_CODE (ops[1]) ==
4790 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4791 for (i = 0; i < isize; i++)
4792 dst[offset + i] = i + shift;
4793 return array_to_constant (TImode, dst);
4796 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4797 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4798 than 16 bytes, the value is repeated across the rest of the array. */
4799 void
4800 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4802 HOST_WIDE_INT val;
4803 int i, j, first;
4805 memset (arr, 0, 16);
4806 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4807 if (GET_CODE (x) == CONST_INT
4808 || (GET_CODE (x) == CONST_DOUBLE
4809 && (mode == SFmode || mode == DFmode)))
4811 gcc_assert (mode != VOIDmode && mode != BLKmode);
4813 if (GET_CODE (x) == CONST_DOUBLE)
4814 val = const_double_to_hwint (x);
4815 else
4816 val = INTVAL (x);
4817 first = GET_MODE_SIZE (mode) - 1;
4818 for (i = first; i >= 0; i--)
4820 arr[i] = val & 0xff;
4821 val >>= 8;
4823 /* Splat the constant across the whole array. */
4824 for (j = 0, i = first + 1; i < 16; i++)
4826 arr[i] = arr[j];
4827 j = (j == first) ? 0 : j + 1;
4830 else if (GET_CODE (x) == CONST_DOUBLE)
4832 val = CONST_DOUBLE_LOW (x);
4833 for (i = 15; i >= 8; i--)
4835 arr[i] = val & 0xff;
4836 val >>= 8;
4838 val = CONST_DOUBLE_HIGH (x);
4839 for (i = 7; i >= 0; i--)
4841 arr[i] = val & 0xff;
4842 val >>= 8;
4845 else if (GET_CODE (x) == CONST_VECTOR)
4847 int units;
4848 rtx elt;
4849 mode = GET_MODE_INNER (mode);
4850 units = CONST_VECTOR_NUNITS (x);
4851 for (i = 0; i < units; i++)
4853 elt = CONST_VECTOR_ELT (x, i);
4854 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4856 if (GET_CODE (elt) == CONST_DOUBLE)
4857 val = const_double_to_hwint (elt);
4858 else
4859 val = INTVAL (elt);
4860 first = GET_MODE_SIZE (mode) - 1;
4861 if (first + i * GET_MODE_SIZE (mode) > 16)
4862 abort ();
4863 for (j = first; j >= 0; j--)
4865 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4866 val >>= 8;
4871 else
4872 gcc_unreachable();
4875 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4876 smaller than 16 bytes, use the bytes that would represent that value
4877 in a register, e.g., for QImode return the value of arr[3]. */
4879 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4881 enum machine_mode inner_mode;
4882 rtvec v;
4883 int units, size, i, j, k;
4884 HOST_WIDE_INT val;
4886 if (GET_MODE_CLASS (mode) == MODE_INT
4887 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4889 j = GET_MODE_SIZE (mode);
4890 i = j < 4 ? 4 - j : 0;
4891 for (val = 0; i < j; i++)
4892 val = (val << 8) | arr[i];
4893 val = trunc_int_for_mode (val, mode);
4894 return GEN_INT (val);
4897 if (mode == TImode)
4899 HOST_WIDE_INT high;
4900 for (i = high = 0; i < 8; i++)
4901 high = (high << 8) | arr[i];
4902 for (i = 8, val = 0; i < 16; i++)
4903 val = (val << 8) | arr[i];
4904 return immed_double_const (val, high, TImode);
4906 if (mode == SFmode)
4908 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4909 val = trunc_int_for_mode (val, SImode);
4910 return hwint_to_const_double (SFmode, val);
4912 if (mode == DFmode)
4914 for (i = 0, val = 0; i < 8; i++)
4915 val = (val << 8) | arr[i];
4916 return hwint_to_const_double (DFmode, val);
4919 if (!VECTOR_MODE_P (mode))
4920 abort ();
4922 units = GET_MODE_NUNITS (mode);
4923 size = GET_MODE_UNIT_SIZE (mode);
4924 inner_mode = GET_MODE_INNER (mode);
4925 v = rtvec_alloc (units);
4927 for (k = i = 0; i < units; ++i)
4929 val = 0;
4930 for (j = 0; j < size; j++, k++)
4931 val = (val << 8) | arr[k];
4933 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4934 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4935 else
4936 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4938 if (k > 16)
4939 abort ();
4941 return gen_rtx_CONST_VECTOR (mode, v);
4944 static void
4945 reloc_diagnostic (rtx x)
4947 tree decl = 0;
4948 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4949 return;
4951 if (GET_CODE (x) == SYMBOL_REF)
4952 decl = SYMBOL_REF_DECL (x);
4953 else if (GET_CODE (x) == CONST
4954 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4955 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4957 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4958 if (decl && !DECL_P (decl))
4959 decl = 0;
4961 /* The decl could be a string constant. */
4962 if (decl && DECL_P (decl))
4964 location_t loc;
4965 /* We use last_assemble_variable_decl to get line information. It's
4966 not always going to be right and might not even be close, but will
4967 be right for the more common cases. */
4968 if (!last_assemble_variable_decl || in_section == ctors_section)
4969 loc = DECL_SOURCE_LOCATION (decl);
4970 else
4971 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
4973 if (TARGET_WARN_RELOC)
4974 warning_at (loc, 0,
4975 "creating run-time relocation for %qD", decl);
4976 else
4977 error_at (loc,
4978 "creating run-time relocation for %qD", decl);
4980 else
4982 if (TARGET_WARN_RELOC)
4983 warning_at (input_location, 0, "creating run-time relocation");
4984 else
4985 error_at (input_location, "creating run-time relocation");
4989 /* Hook into assemble_integer so we can generate an error for run-time
4990 relocations. The SPU ABI disallows them. */
4991 static bool
4992 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4994 /* By default run-time relocations aren't supported, but we allow them
4995 in case users support it in their own run-time loader. And we provide
4996 a warning for those users that don't. */
4997 if ((GET_CODE (x) == SYMBOL_REF)
4998 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4999 reloc_diagnostic (x);
5001 return default_assemble_integer (x, size, aligned_p);
5004 static void
5005 spu_asm_globalize_label (FILE * file, const char *name)
5007 fputs ("\t.global\t", file);
5008 assemble_name (file, name);
5009 fputs ("\n", file);
5012 static bool
5013 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5014 bool speed ATTRIBUTE_UNUSED)
5016 enum machine_mode mode = GET_MODE (x);
5017 int cost = COSTS_N_INSNS (2);
5019 /* Folding to a CONST_VECTOR will use extra space but there might
5020 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5021 only if it allows us to fold away multiple insns. Changing the cost
5022 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5023 because this cost will only be compared against a single insn.
5024 if (code == CONST_VECTOR)
5025 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5028 /* Use defaults for float operations. Not accurate but good enough. */
5029 if (mode == DFmode)
5031 *total = COSTS_N_INSNS (13);
5032 return true;
5034 if (mode == SFmode)
5036 *total = COSTS_N_INSNS (6);
5037 return true;
5039 switch (code)
5041 case CONST_INT:
5042 if (satisfies_constraint_K (x))
5043 *total = 0;
5044 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5045 *total = COSTS_N_INSNS (1);
5046 else
5047 *total = COSTS_N_INSNS (3);
5048 return true;
5050 case CONST:
5051 *total = COSTS_N_INSNS (3);
5052 return true;
5054 case LABEL_REF:
5055 case SYMBOL_REF:
5056 *total = COSTS_N_INSNS (0);
5057 return true;
5059 case CONST_DOUBLE:
5060 *total = COSTS_N_INSNS (5);
5061 return true;
5063 case FLOAT_EXTEND:
5064 case FLOAT_TRUNCATE:
5065 case FLOAT:
5066 case UNSIGNED_FLOAT:
5067 case FIX:
5068 case UNSIGNED_FIX:
5069 *total = COSTS_N_INSNS (7);
5070 return true;
5072 case PLUS:
5073 if (mode == TImode)
5075 *total = COSTS_N_INSNS (9);
5076 return true;
5078 break;
5080 case MULT:
5081 cost =
5082 GET_CODE (XEXP (x, 0)) ==
5083 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5084 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5086 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5088 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5089 cost = COSTS_N_INSNS (14);
5090 if ((val & 0xffff) == 0)
5091 cost = COSTS_N_INSNS (9);
5092 else if (val > 0 && val < 0x10000)
5093 cost = COSTS_N_INSNS (11);
5096 *total = cost;
5097 return true;
5098 case DIV:
5099 case UDIV:
5100 case MOD:
5101 case UMOD:
5102 *total = COSTS_N_INSNS (20);
5103 return true;
5104 case ROTATE:
5105 case ROTATERT:
5106 case ASHIFT:
5107 case ASHIFTRT:
5108 case LSHIFTRT:
5109 *total = COSTS_N_INSNS (4);
5110 return true;
5111 case UNSPEC:
5112 if (XINT (x, 1) == UNSPEC_CONVERT)
5113 *total = COSTS_N_INSNS (0);
5114 else
5115 *total = COSTS_N_INSNS (4);
5116 return true;
5118 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5119 if (GET_MODE_CLASS (mode) == MODE_INT
5120 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5121 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5122 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5123 *total = cost;
5124 return true;
5127 static enum machine_mode
5128 spu_unwind_word_mode (void)
5130 return SImode;
5133 /* Decide whether we can make a sibling call to a function. DECL is the
5134 declaration of the function being targeted by the call and EXP is the
5135 CALL_EXPR representing the call. */
5136 static bool
5137 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5139 return decl && !TARGET_LARGE_MEM;
5142 /* We need to correctly update the back chain pointer and the Available
5143 Stack Size (which is in the second slot of the sp register.) */
5144 void
5145 spu_allocate_stack (rtx op0, rtx op1)
5147 HOST_WIDE_INT v;
5148 rtx chain = gen_reg_rtx (V4SImode);
5149 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5150 rtx sp = gen_reg_rtx (V4SImode);
5151 rtx splatted = gen_reg_rtx (V4SImode);
5152 rtx pat = gen_reg_rtx (TImode);
5154 /* copy the back chain so we can save it back again. */
5155 emit_move_insn (chain, stack_bot);
5157 op1 = force_reg (SImode, op1);
5159 v = 0x1020300010203ll;
5160 emit_move_insn (pat, immed_double_const (v, v, TImode));
5161 emit_insn (gen_shufb (splatted, op1, op1, pat));
5163 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5164 emit_insn (gen_subv4si3 (sp, sp, splatted));
5166 if (flag_stack_check)
5168 rtx avail = gen_reg_rtx(SImode);
5169 rtx result = gen_reg_rtx(SImode);
5170 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5171 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5172 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5175 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5177 emit_move_insn (stack_bot, chain);
5179 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5182 void
5183 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5185 static unsigned char arr[16] =
5186 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5187 rtx temp = gen_reg_rtx (SImode);
5188 rtx temp2 = gen_reg_rtx (SImode);
5189 rtx temp3 = gen_reg_rtx (V4SImode);
5190 rtx temp4 = gen_reg_rtx (V4SImode);
5191 rtx pat = gen_reg_rtx (TImode);
5192 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5194 /* Restore the backchain from the first word, sp from the second. */
5195 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5196 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5198 emit_move_insn (pat, array_to_constant (TImode, arr));
5200 /* Compute Available Stack Size for sp */
5201 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5202 emit_insn (gen_shufb (temp3, temp, temp, pat));
5204 /* Compute Available Stack Size for back chain */
5205 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5206 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5207 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5209 emit_insn (gen_addv4si3 (sp, sp, temp3));
5210 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5213 static void
5214 spu_init_libfuncs (void)
5216 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5217 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5218 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5219 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5220 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5221 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5222 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5223 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5224 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5225 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5226 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5228 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5229 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5231 set_optab_libfunc (smul_optab, TImode, "__multi3");
5232 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5233 set_optab_libfunc (smod_optab, TImode, "__modti3");
5234 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5235 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5236 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5239 /* Make a subreg, stripping any existing subreg. We could possibly just
5240 call simplify_subreg, but in this case we know what we want. */
5242 spu_gen_subreg (enum machine_mode mode, rtx x)
5244 if (GET_CODE (x) == SUBREG)
5245 x = SUBREG_REG (x);
5246 if (GET_MODE (x) == mode)
5247 return x;
5248 return gen_rtx_SUBREG (mode, x, 0);
5251 static bool
5252 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5254 return (TYPE_MODE (type) == BLKmode
5255 && ((type) == 0
5256 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5257 || int_size_in_bytes (type) >
5258 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5261 /* Create the built-in types and functions */
5263 enum spu_function_code
5265 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5266 #include "spu-builtins.def"
5267 #undef DEF_BUILTIN
5268 NUM_SPU_BUILTINS
5271 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5273 struct spu_builtin_description spu_builtins[] = {
5274 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5275 {fcode, icode, name, type, params, NULL_TREE},
5276 #include "spu-builtins.def"
5277 #undef DEF_BUILTIN
5280 static void
5281 spu_init_builtins (void)
5283 struct spu_builtin_description *d;
5284 unsigned int i;
5286 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5287 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5288 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5289 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5290 V4SF_type_node = build_vector_type (float_type_node, 4);
5291 V2DF_type_node = build_vector_type (double_type_node, 2);
5293 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5294 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5295 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5296 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5298 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5300 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5301 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5302 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5303 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5304 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5305 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5306 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5307 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5308 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5309 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5310 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5311 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5313 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5314 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5315 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5316 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5317 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5318 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5319 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5320 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5322 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5323 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5325 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5327 spu_builtin_types[SPU_BTI_PTR] =
5328 build_pointer_type (build_qualified_type
5329 (void_type_node,
5330 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5332 /* For each builtin we build a new prototype. The tree code will make
5333 sure nodes are shared. */
5334 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5336 tree p;
5337 char name[64]; /* build_function will make a copy. */
5338 int parm;
5340 if (d->name == 0)
5341 continue;
5343 /* Find last parm. */
5344 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5347 p = void_list_node;
5348 while (parm > 1)
5349 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5351 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5353 sprintf (name, "__builtin_%s", d->name);
5354 d->fndecl =
5355 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5356 NULL, NULL_TREE);
5357 if (d->fcode == SPU_MASK_FOR_LOAD)
5358 TREE_READONLY (d->fndecl) = 1;
5360 /* These builtins don't throw. */
5361 TREE_NOTHROW (d->fndecl) = 1;
5365 void
5366 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5368 static unsigned char arr[16] =
5369 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5371 rtx temp = gen_reg_rtx (Pmode);
5372 rtx temp2 = gen_reg_rtx (V4SImode);
5373 rtx temp3 = gen_reg_rtx (V4SImode);
5374 rtx pat = gen_reg_rtx (TImode);
5375 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5377 emit_move_insn (pat, array_to_constant (TImode, arr));
5379 /* Restore the sp. */
5380 emit_move_insn (temp, op1);
5381 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5383 /* Compute available stack size for sp. */
5384 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5385 emit_insn (gen_shufb (temp3, temp, temp, pat));
5387 emit_insn (gen_addv4si3 (sp, sp, temp3));
5388 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5392 spu_safe_dma (HOST_WIDE_INT channel)
5394 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5397 void
5398 spu_builtin_splats (rtx ops[])
5400 enum machine_mode mode = GET_MODE (ops[0]);
5401 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5403 unsigned char arr[16];
5404 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5405 emit_move_insn (ops[0], array_to_constant (mode, arr));
5407 else
5409 rtx reg = gen_reg_rtx (TImode);
5410 rtx shuf;
5411 if (GET_CODE (ops[1]) != REG
5412 && GET_CODE (ops[1]) != SUBREG)
5413 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5414 switch (mode)
5416 case V2DImode:
5417 case V2DFmode:
5418 shuf =
5419 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5420 TImode);
5421 break;
5422 case V4SImode:
5423 case V4SFmode:
5424 shuf =
5425 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5426 TImode);
5427 break;
5428 case V8HImode:
5429 shuf =
5430 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5431 TImode);
5432 break;
5433 case V16QImode:
5434 shuf =
5435 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5436 TImode);
5437 break;
5438 default:
5439 abort ();
5441 emit_move_insn (reg, shuf);
5442 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5446 void
5447 spu_builtin_extract (rtx ops[])
5449 enum machine_mode mode;
5450 rtx rot, from, tmp;
5452 mode = GET_MODE (ops[1]);
5454 if (GET_CODE (ops[2]) == CONST_INT)
5456 switch (mode)
5458 case V16QImode:
5459 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5460 break;
5461 case V8HImode:
5462 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5463 break;
5464 case V4SFmode:
5465 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5466 break;
5467 case V4SImode:
5468 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5469 break;
5470 case V2DImode:
5471 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5472 break;
5473 case V2DFmode:
5474 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5475 break;
5476 default:
5477 abort ();
5479 return;
5482 from = spu_gen_subreg (TImode, ops[1]);
5483 rot = gen_reg_rtx (TImode);
5484 tmp = gen_reg_rtx (SImode);
5486 switch (mode)
5488 case V16QImode:
5489 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5490 break;
5491 case V8HImode:
5492 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5493 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5494 break;
5495 case V4SFmode:
5496 case V4SImode:
5497 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5498 break;
5499 case V2DImode:
5500 case V2DFmode:
5501 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5502 break;
5503 default:
5504 abort ();
5506 emit_insn (gen_rotqby_ti (rot, from, tmp));
5508 emit_insn (gen_spu_convert (ops[0], rot));
5511 void
5512 spu_builtin_insert (rtx ops[])
5514 enum machine_mode mode = GET_MODE (ops[0]);
5515 enum machine_mode imode = GET_MODE_INNER (mode);
5516 rtx mask = gen_reg_rtx (TImode);
5517 rtx offset;
5519 if (GET_CODE (ops[3]) == CONST_INT)
5520 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5521 else
5523 offset = gen_reg_rtx (SImode);
5524 emit_insn (gen_mulsi3
5525 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5527 emit_insn (gen_cpat
5528 (mask, stack_pointer_rtx, offset,
5529 GEN_INT (GET_MODE_SIZE (imode))));
5530 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5533 void
5534 spu_builtin_promote (rtx ops[])
5536 enum machine_mode mode, imode;
5537 rtx rot, from, offset;
5538 HOST_WIDE_INT pos;
5540 mode = GET_MODE (ops[0]);
5541 imode = GET_MODE_INNER (mode);
5543 from = gen_reg_rtx (TImode);
5544 rot = spu_gen_subreg (TImode, ops[0]);
5546 emit_insn (gen_spu_convert (from, ops[1]));
5548 if (GET_CODE (ops[2]) == CONST_INT)
5550 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5551 if (GET_MODE_SIZE (imode) < 4)
5552 pos += 4 - GET_MODE_SIZE (imode);
5553 offset = GEN_INT (pos & 15);
5555 else
5557 offset = gen_reg_rtx (SImode);
5558 switch (mode)
5560 case V16QImode:
5561 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5562 break;
5563 case V8HImode:
5564 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5565 emit_insn (gen_addsi3 (offset, offset, offset));
5566 break;
5567 case V4SFmode:
5568 case V4SImode:
5569 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5570 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5571 break;
5572 case V2DImode:
5573 case V2DFmode:
5574 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5575 break;
5576 default:
5577 abort ();
5580 emit_insn (gen_rotqby_ti (rot, from, offset));
5583 void
5584 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5586 rtx shuf = gen_reg_rtx (V4SImode);
5587 rtx insn = gen_reg_rtx (V4SImode);
5588 rtx shufc;
5589 rtx insnc;
5590 rtx mem;
5592 fnaddr = force_reg (SImode, fnaddr);
5593 cxt = force_reg (SImode, cxt);
5595 if (TARGET_LARGE_MEM)
5597 rtx rotl = gen_reg_rtx (V4SImode);
5598 rtx mask = gen_reg_rtx (V4SImode);
5599 rtx bi = gen_reg_rtx (SImode);
5600 unsigned char shufa[16] = {
5601 2, 3, 0, 1, 18, 19, 16, 17,
5602 0, 1, 2, 3, 16, 17, 18, 19
5604 unsigned char insna[16] = {
5605 0x41, 0, 0, 79,
5606 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5607 0x60, 0x80, 0, 79,
5608 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5611 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5612 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5614 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5615 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5616 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5617 emit_insn (gen_selb (insn, insnc, rotl, mask));
5619 mem = memory_address (Pmode, tramp);
5620 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5622 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5623 mem = memory_address (Pmode, plus_constant (tramp, 16));
5624 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5626 else
5628 rtx scxt = gen_reg_rtx (SImode);
5629 rtx sfnaddr = gen_reg_rtx (SImode);
5630 unsigned char insna[16] = {
5631 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5632 0x30, 0, 0, 0,
5633 0, 0, 0, 0,
5634 0, 0, 0, 0
5637 shufc = gen_reg_rtx (TImode);
5638 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5640 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5641 fits 18 bits and the last 4 are zeros. This will be true if
5642 the stack pointer is initialized to 0x3fff0 at program start,
5643 otherwise the ila instruction will be garbage. */
5645 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5646 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5647 emit_insn (gen_cpat
5648 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5649 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5650 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5652 mem = memory_address (Pmode, tramp);
5653 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5656 emit_insn (gen_sync ());
5659 void
5660 spu_expand_sign_extend (rtx ops[])
5662 unsigned char arr[16];
5663 rtx pat = gen_reg_rtx (TImode);
5664 rtx sign, c;
5665 int i, last;
5666 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5667 if (GET_MODE (ops[1]) == QImode)
5669 sign = gen_reg_rtx (HImode);
5670 emit_insn (gen_extendqihi2 (sign, ops[1]));
5671 for (i = 0; i < 16; i++)
5672 arr[i] = 0x12;
5673 arr[last] = 0x13;
5675 else
5677 for (i = 0; i < 16; i++)
5678 arr[i] = 0x10;
5679 switch (GET_MODE (ops[1]))
5681 case HImode:
5682 sign = gen_reg_rtx (SImode);
5683 emit_insn (gen_extendhisi2 (sign, ops[1]));
5684 arr[last] = 0x03;
5685 arr[last - 1] = 0x02;
5686 break;
5687 case SImode:
5688 sign = gen_reg_rtx (SImode);
5689 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5690 for (i = 0; i < 4; i++)
5691 arr[last - i] = 3 - i;
5692 break;
5693 case DImode:
5694 sign = gen_reg_rtx (SImode);
5695 c = gen_reg_rtx (SImode);
5696 emit_insn (gen_spu_convert (c, ops[1]));
5697 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5698 for (i = 0; i < 8; i++)
5699 arr[last - i] = 7 - i;
5700 break;
5701 default:
5702 abort ();
5705 emit_move_insn (pat, array_to_constant (TImode, arr));
5706 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5709 /* expand vector initialization. If there are any constant parts,
5710 load constant parts first. Then load any non-constant parts. */
5711 void
5712 spu_expand_vector_init (rtx target, rtx vals)
5714 enum machine_mode mode = GET_MODE (target);
5715 int n_elts = GET_MODE_NUNITS (mode);
5716 int n_var = 0;
5717 bool all_same = true;
5718 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5719 int i;
5721 first = XVECEXP (vals, 0, 0);
5722 for (i = 0; i < n_elts; ++i)
5724 x = XVECEXP (vals, 0, i);
5725 if (!(CONST_INT_P (x)
5726 || GET_CODE (x) == CONST_DOUBLE
5727 || GET_CODE (x) == CONST_FIXED))
5728 ++n_var;
5729 else
5731 if (first_constant == NULL_RTX)
5732 first_constant = x;
5734 if (i > 0 && !rtx_equal_p (x, first))
5735 all_same = false;
5738 /* if all elements are the same, use splats to repeat elements */
5739 if (all_same)
5741 if (!CONSTANT_P (first)
5742 && !register_operand (first, GET_MODE (x)))
5743 first = force_reg (GET_MODE (first), first);
5744 emit_insn (gen_spu_splats (target, first));
5745 return;
5748 /* load constant parts */
5749 if (n_var != n_elts)
5751 if (n_var == 0)
5753 emit_move_insn (target,
5754 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5756 else
5758 rtx constant_parts_rtx = copy_rtx (vals);
5760 gcc_assert (first_constant != NULL_RTX);
5761 /* fill empty slots with the first constant, this increases
5762 our chance of using splats in the recursive call below. */
5763 for (i = 0; i < n_elts; ++i)
5765 x = XVECEXP (constant_parts_rtx, 0, i);
5766 if (!(CONST_INT_P (x)
5767 || GET_CODE (x) == CONST_DOUBLE
5768 || GET_CODE (x) == CONST_FIXED))
5769 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5772 spu_expand_vector_init (target, constant_parts_rtx);
5776 /* load variable parts */
5777 if (n_var != 0)
5779 rtx insert_operands[4];
5781 insert_operands[0] = target;
5782 insert_operands[2] = target;
5783 for (i = 0; i < n_elts; ++i)
5785 x = XVECEXP (vals, 0, i);
5786 if (!(CONST_INT_P (x)
5787 || GET_CODE (x) == CONST_DOUBLE
5788 || GET_CODE (x) == CONST_FIXED))
5790 if (!register_operand (x, GET_MODE (x)))
5791 x = force_reg (GET_MODE (x), x);
5792 insert_operands[1] = x;
5793 insert_operands[3] = GEN_INT (i);
5794 spu_builtin_insert (insert_operands);
5800 /* Return insn index for the vector compare instruction for given CODE,
5801 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5803 static int
5804 get_vec_cmp_insn (enum rtx_code code,
5805 enum machine_mode dest_mode,
5806 enum machine_mode op_mode)
5809 switch (code)
5811 case EQ:
5812 if (dest_mode == V16QImode && op_mode == V16QImode)
5813 return CODE_FOR_ceq_v16qi;
5814 if (dest_mode == V8HImode && op_mode == V8HImode)
5815 return CODE_FOR_ceq_v8hi;
5816 if (dest_mode == V4SImode && op_mode == V4SImode)
5817 return CODE_FOR_ceq_v4si;
5818 if (dest_mode == V4SImode && op_mode == V4SFmode)
5819 return CODE_FOR_ceq_v4sf;
5820 if (dest_mode == V2DImode && op_mode == V2DFmode)
5821 return CODE_FOR_ceq_v2df;
5822 break;
5823 case GT:
5824 if (dest_mode == V16QImode && op_mode == V16QImode)
5825 return CODE_FOR_cgt_v16qi;
5826 if (dest_mode == V8HImode && op_mode == V8HImode)
5827 return CODE_FOR_cgt_v8hi;
5828 if (dest_mode == V4SImode && op_mode == V4SImode)
5829 return CODE_FOR_cgt_v4si;
5830 if (dest_mode == V4SImode && op_mode == V4SFmode)
5831 return CODE_FOR_cgt_v4sf;
5832 if (dest_mode == V2DImode && op_mode == V2DFmode)
5833 return CODE_FOR_cgt_v2df;
5834 break;
5835 case GTU:
5836 if (dest_mode == V16QImode && op_mode == V16QImode)
5837 return CODE_FOR_clgt_v16qi;
5838 if (dest_mode == V8HImode && op_mode == V8HImode)
5839 return CODE_FOR_clgt_v8hi;
5840 if (dest_mode == V4SImode && op_mode == V4SImode)
5841 return CODE_FOR_clgt_v4si;
5842 break;
5843 default:
5844 break;
5846 return -1;
5849 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5850 DMODE is expected destination mode. This is a recursive function. */
5852 static rtx
5853 spu_emit_vector_compare (enum rtx_code rcode,
5854 rtx op0, rtx op1,
5855 enum machine_mode dmode)
5857 int vec_cmp_insn;
5858 rtx mask;
5859 enum machine_mode dest_mode;
5860 enum machine_mode op_mode = GET_MODE (op1);
5862 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5864 /* Floating point vector compare instructions uses destination V4SImode.
5865 Double floating point vector compare instructions uses destination V2DImode.
5866 Move destination to appropriate mode later. */
5867 if (dmode == V4SFmode)
5868 dest_mode = V4SImode;
5869 else if (dmode == V2DFmode)
5870 dest_mode = V2DImode;
5871 else
5872 dest_mode = dmode;
5874 mask = gen_reg_rtx (dest_mode);
5875 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5877 if (vec_cmp_insn == -1)
5879 bool swap_operands = false;
5880 bool try_again = false;
5881 switch (rcode)
5883 case LT:
5884 rcode = GT;
5885 swap_operands = true;
5886 try_again = true;
5887 break;
5888 case LTU:
5889 rcode = GTU;
5890 swap_operands = true;
5891 try_again = true;
5892 break;
5893 case NE:
5894 /* Treat A != B as ~(A==B). */
5896 enum insn_code nor_code;
5897 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5898 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5899 gcc_assert (nor_code != CODE_FOR_nothing);
5900 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5901 if (dmode != dest_mode)
5903 rtx temp = gen_reg_rtx (dest_mode);
5904 convert_move (temp, mask, 0);
5905 return temp;
5907 return mask;
5909 break;
5910 case GE:
5911 case GEU:
5912 case LE:
5913 case LEU:
5914 /* Try GT/GTU/LT/LTU OR EQ */
5916 rtx c_rtx, eq_rtx;
5917 enum insn_code ior_code;
5918 enum rtx_code new_code;
5920 switch (rcode)
5922 case GE: new_code = GT; break;
5923 case GEU: new_code = GTU; break;
5924 case LE: new_code = LT; break;
5925 case LEU: new_code = LTU; break;
5926 default:
5927 gcc_unreachable ();
5930 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5931 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5933 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5934 gcc_assert (ior_code != CODE_FOR_nothing);
5935 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5936 if (dmode != dest_mode)
5938 rtx temp = gen_reg_rtx (dest_mode);
5939 convert_move (temp, mask, 0);
5940 return temp;
5942 return mask;
5944 break;
5945 default:
5946 gcc_unreachable ();
5949 /* You only get two chances. */
5950 if (try_again)
5951 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5953 gcc_assert (vec_cmp_insn != -1);
5955 if (swap_operands)
5957 rtx tmp;
5958 tmp = op0;
5959 op0 = op1;
5960 op1 = tmp;
5964 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5965 if (dmode != dest_mode)
5967 rtx temp = gen_reg_rtx (dest_mode);
5968 convert_move (temp, mask, 0);
5969 return temp;
5971 return mask;
5975 /* Emit vector conditional expression.
5976 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5977 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5980 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5981 rtx cond, rtx cc_op0, rtx cc_op1)
5983 enum machine_mode dest_mode = GET_MODE (dest);
5984 enum rtx_code rcode = GET_CODE (cond);
5985 rtx mask;
5987 /* Get the vector mask for the given relational operations. */
5988 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5990 emit_insn(gen_selb (dest, op2, op1, mask));
5992 return 1;
5995 static rtx
5996 spu_force_reg (enum machine_mode mode, rtx op)
5998 rtx x, r;
5999 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6001 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6002 || GET_MODE (op) == BLKmode)
6003 return force_reg (mode, convert_to_mode (mode, op, 0));
6004 abort ();
6007 r = force_reg (GET_MODE (op), op);
6008 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6010 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6011 if (x)
6012 return x;
6015 x = gen_reg_rtx (mode);
6016 emit_insn (gen_spu_convert (x, r));
6017 return x;
6020 static void
6021 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6023 HOST_WIDE_INT v = 0;
6024 int lsbits;
6025 /* Check the range of immediate operands. */
6026 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6028 int range = p - SPU_BTI_7;
6030 if (!CONSTANT_P (op))
6031 error ("%s expects an integer literal in the range [%d, %d].",
6032 d->name,
6033 spu_builtin_range[range].low, spu_builtin_range[range].high);
6035 if (GET_CODE (op) == CONST
6036 && (GET_CODE (XEXP (op, 0)) == PLUS
6037 || GET_CODE (XEXP (op, 0)) == MINUS))
6039 v = INTVAL (XEXP (XEXP (op, 0), 1));
6040 op = XEXP (XEXP (op, 0), 0);
6042 else if (GET_CODE (op) == CONST_INT)
6043 v = INTVAL (op);
6044 else if (GET_CODE (op) == CONST_VECTOR
6045 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6046 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6048 /* The default for v is 0 which is valid in every range. */
6049 if (v < spu_builtin_range[range].low
6050 || v > spu_builtin_range[range].high)
6051 error ("%s expects an integer literal in the range [%d, %d]. ("
6052 HOST_WIDE_INT_PRINT_DEC ")",
6053 d->name,
6054 spu_builtin_range[range].low, spu_builtin_range[range].high,
6057 switch (p)
6059 case SPU_BTI_S10_4:
6060 lsbits = 4;
6061 break;
6062 case SPU_BTI_U16_2:
6063 /* This is only used in lqa, and stqa. Even though the insns
6064 encode 16 bits of the address (all but the 2 least
6065 significant), only 14 bits are used because it is masked to
6066 be 16 byte aligned. */
6067 lsbits = 4;
6068 break;
6069 case SPU_BTI_S16_2:
6070 /* This is used for lqr and stqr. */
6071 lsbits = 2;
6072 break;
6073 default:
6074 lsbits = 0;
6077 if (GET_CODE (op) == LABEL_REF
6078 || (GET_CODE (op) == SYMBOL_REF
6079 && SYMBOL_REF_FUNCTION_P (op))
6080 || (v & ((1 << lsbits) - 1)) != 0)
6081 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6082 d->name);
6087 static int
6088 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6089 rtx target, rtx ops[])
6091 enum insn_code icode = (enum insn_code) d->icode;
6092 int i = 0, a;
6094 /* Expand the arguments into rtl. */
6096 if (d->parm[0] != SPU_BTI_VOID)
6097 ops[i++] = target;
6099 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6101 tree arg = CALL_EXPR_ARG (exp, a);
6102 if (arg == 0)
6103 abort ();
6104 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6107 /* The insn pattern may have additional operands (SCRATCH).
6108 Return the number of actual non-SCRATCH operands. */
6109 gcc_assert (i <= insn_data[icode].n_operands);
6110 return i;
6113 static rtx
6114 spu_expand_builtin_1 (struct spu_builtin_description *d,
6115 tree exp, rtx target)
6117 rtx pat;
6118 rtx ops[8];
6119 enum insn_code icode = (enum insn_code) d->icode;
6120 enum machine_mode mode, tmode;
6121 int i, p;
6122 int n_operands;
6123 tree return_type;
6125 /* Set up ops[] with values from arglist. */
6126 n_operands = expand_builtin_args (d, exp, target, ops);
6128 /* Handle the target operand which must be operand 0. */
6129 i = 0;
6130 if (d->parm[0] != SPU_BTI_VOID)
6133 /* We prefer the mode specified for the match_operand otherwise
6134 use the mode from the builtin function prototype. */
6135 tmode = insn_data[d->icode].operand[0].mode;
6136 if (tmode == VOIDmode)
6137 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6139 /* Try to use target because not using it can lead to extra copies
6140 and when we are using all of the registers extra copies leads
6141 to extra spills. */
6142 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6143 ops[0] = target;
6144 else
6145 target = ops[0] = gen_reg_rtx (tmode);
6147 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6148 abort ();
6150 i++;
6153 if (d->fcode == SPU_MASK_FOR_LOAD)
6155 enum machine_mode mode = insn_data[icode].operand[1].mode;
6156 tree arg;
6157 rtx addr, op, pat;
6159 /* get addr */
6160 arg = CALL_EXPR_ARG (exp, 0);
6161 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6162 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6163 addr = memory_address (mode, op);
6165 /* negate addr */
6166 op = gen_reg_rtx (GET_MODE (addr));
6167 emit_insn (gen_rtx_SET (VOIDmode, op,
6168 gen_rtx_NEG (GET_MODE (addr), addr)));
6169 op = gen_rtx_MEM (mode, op);
6171 pat = GEN_FCN (icode) (target, op);
6172 if (!pat)
6173 return 0;
6174 emit_insn (pat);
6175 return target;
6178 /* Ignore align_hint, but still expand it's args in case they have
6179 side effects. */
6180 if (icode == CODE_FOR_spu_align_hint)
6181 return 0;
6183 /* Handle the rest of the operands. */
6184 for (p = 1; i < n_operands; i++, p++)
6186 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6187 mode = insn_data[d->icode].operand[i].mode;
6188 else
6189 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6191 /* mode can be VOIDmode here for labels */
6193 /* For specific intrinsics with an immediate operand, e.g.,
6194 si_ai(), we sometimes need to convert the scalar argument to a
6195 vector argument by splatting the scalar. */
6196 if (VECTOR_MODE_P (mode)
6197 && (GET_CODE (ops[i]) == CONST_INT
6198 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6199 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6201 if (GET_CODE (ops[i]) == CONST_INT)
6202 ops[i] = spu_const (mode, INTVAL (ops[i]));
6203 else
6205 rtx reg = gen_reg_rtx (mode);
6206 enum machine_mode imode = GET_MODE_INNER (mode);
6207 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6208 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6209 if (imode != GET_MODE (ops[i]))
6210 ops[i] = convert_to_mode (imode, ops[i],
6211 TYPE_UNSIGNED (spu_builtin_types
6212 [d->parm[i]]));
6213 emit_insn (gen_spu_splats (reg, ops[i]));
6214 ops[i] = reg;
6218 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6220 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6221 ops[i] = spu_force_reg (mode, ops[i]);
6224 switch (n_operands)
6226 case 0:
6227 pat = GEN_FCN (icode) (0);
6228 break;
6229 case 1:
6230 pat = GEN_FCN (icode) (ops[0]);
6231 break;
6232 case 2:
6233 pat = GEN_FCN (icode) (ops[0], ops[1]);
6234 break;
6235 case 3:
6236 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6237 break;
6238 case 4:
6239 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6240 break;
6241 case 5:
6242 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6243 break;
6244 case 6:
6245 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6246 break;
6247 default:
6248 abort ();
6251 if (!pat)
6252 abort ();
6254 if (d->type == B_CALL || d->type == B_BISLED)
6255 emit_call_insn (pat);
6256 else if (d->type == B_JUMP)
6258 emit_jump_insn (pat);
6259 emit_barrier ();
6261 else
6262 emit_insn (pat);
6264 return_type = spu_builtin_types[d->parm[0]];
6265 if (d->parm[0] != SPU_BTI_VOID
6266 && GET_MODE (target) != TYPE_MODE (return_type))
6268 /* target is the return value. It should always be the mode of
6269 the builtin function prototype. */
6270 target = spu_force_reg (TYPE_MODE (return_type), target);
6273 return target;
6277 spu_expand_builtin (tree exp,
6278 rtx target,
6279 rtx subtarget ATTRIBUTE_UNUSED,
6280 enum machine_mode mode ATTRIBUTE_UNUSED,
6281 int ignore ATTRIBUTE_UNUSED)
6283 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6284 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6285 struct spu_builtin_description *d;
6287 if (fcode < NUM_SPU_BUILTINS)
6289 d = &spu_builtins[fcode];
6291 return spu_expand_builtin_1 (d, exp, target);
6293 abort ();
6296 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6297 static tree
6298 spu_builtin_mul_widen_even (tree type)
6300 switch (TYPE_MODE (type))
6302 case V8HImode:
6303 if (TYPE_UNSIGNED (type))
6304 return spu_builtins[SPU_MULE_0].fndecl;
6305 else
6306 return spu_builtins[SPU_MULE_1].fndecl;
6307 break;
6308 default:
6309 return NULL_TREE;
6313 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6314 static tree
6315 spu_builtin_mul_widen_odd (tree type)
6317 switch (TYPE_MODE (type))
6319 case V8HImode:
6320 if (TYPE_UNSIGNED (type))
6321 return spu_builtins[SPU_MULO_1].fndecl;
6322 else
6323 return spu_builtins[SPU_MULO_0].fndecl;
6324 break;
6325 default:
6326 return NULL_TREE;
6330 /* Implement targetm.vectorize.builtin_mask_for_load. */
6331 static tree
6332 spu_builtin_mask_for_load (void)
6334 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6335 gcc_assert (d);
6336 return d->fndecl;
6339 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6340 static int
6341 spu_builtin_vectorization_cost (bool runtime_test)
6343 /* If the branch of the runtime test is taken - i.e. - the vectorized
6344 version is skipped - this incurs a misprediction cost (because the
6345 vectorized version is expected to be the fall-through). So we subtract
6346 the latency of a mispredicted branch from the costs that are incurred
6347 when the vectorized version is executed. */
6348 if (runtime_test)
6349 return -19;
6350 else
6351 return 0;
6354 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6355 after applying N number of iterations. This routine does not determine
6356 how may iterations are required to reach desired alignment. */
6358 static bool
6359 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6361 if (is_packed)
6362 return false;
6364 /* All other types are naturally aligned. */
6365 return true;
6368 /* Implement targetm.vectorize.builtin_vec_perm. */
6369 tree
6370 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6372 struct spu_builtin_description *d;
6374 *mask_element_type = unsigned_char_type_node;
6376 switch (TYPE_MODE (type))
6378 case V16QImode:
6379 if (TYPE_UNSIGNED (type))
6380 d = &spu_builtins[SPU_SHUFFLE_0];
6381 else
6382 d = &spu_builtins[SPU_SHUFFLE_1];
6383 break;
6385 case V8HImode:
6386 if (TYPE_UNSIGNED (type))
6387 d = &spu_builtins[SPU_SHUFFLE_2];
6388 else
6389 d = &spu_builtins[SPU_SHUFFLE_3];
6390 break;
6392 case V4SImode:
6393 if (TYPE_UNSIGNED (type))
6394 d = &spu_builtins[SPU_SHUFFLE_4];
6395 else
6396 d = &spu_builtins[SPU_SHUFFLE_5];
6397 break;
6399 case V2DImode:
6400 if (TYPE_UNSIGNED (type))
6401 d = &spu_builtins[SPU_SHUFFLE_6];
6402 else
6403 d = &spu_builtins[SPU_SHUFFLE_7];
6404 break;
6406 case V4SFmode:
6407 d = &spu_builtins[SPU_SHUFFLE_8];
6408 break;
6410 case V2DFmode:
6411 d = &spu_builtins[SPU_SHUFFLE_9];
6412 break;
6414 default:
6415 return NULL_TREE;
6418 gcc_assert (d);
6419 return d->fndecl;
6422 /* Count the total number of instructions in each pipe and return the
6423 maximum, which is used as the Minimum Iteration Interval (MII)
6424 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6425 -2 are instructions that can go in pipe0 or pipe1. */
6426 static int
6427 spu_sms_res_mii (struct ddg *g)
6429 int i;
6430 unsigned t[4] = {0, 0, 0, 0};
6432 for (i = 0; i < g->num_nodes; i++)
6434 rtx insn = g->nodes[i].insn;
6435 int p = get_pipe (insn) + 2;
6437 assert (p >= 0);
6438 assert (p < 4);
6440 t[p]++;
6441 if (dump_file && INSN_P (insn))
6442 fprintf (dump_file, "i%d %s %d %d\n",
6443 INSN_UID (insn),
6444 insn_data[INSN_CODE(insn)].name,
6445 p, t[p]);
6447 if (dump_file)
6448 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6450 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6454 void
6455 spu_init_expanders (void)
6457 if (cfun)
6459 rtx r0, r1;
6460 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6461 frame_pointer_needed is true. We don't know that until we're
6462 expanding the prologue. */
6463 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6465 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6466 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6467 to be treated as aligned, so generate them here. */
6468 r0 = gen_reg_rtx (SImode);
6469 r1 = gen_reg_rtx (SImode);
6470 mark_reg_pointer (r0, 128);
6471 mark_reg_pointer (r1, 128);
6472 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6473 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6477 static enum machine_mode
6478 spu_libgcc_cmp_return_mode (void)
6481 /* For SPU word mode is TI mode so it is better to use SImode
6482 for compare returns. */
6483 return SImode;
6486 static enum machine_mode
6487 spu_libgcc_shift_count_mode (void)
6489 /* For SPU word mode is TI mode so it is better to use SImode
6490 for shift counts. */
6491 return SImode;
6494 /* An early place to adjust some flags after GCC has finished processing
6495 * them. */
6496 static void
6497 asm_file_start (void)
6499 /* Variable tracking should be run after all optimizations which
6500 change order of insns. It also needs a valid CFG. */
6501 spu_flag_var_tracking = flag_var_tracking;
6502 flag_var_tracking = 0;
6504 default_file_start ();
6507 /* Implement targetm.section_type_flags. */
6508 static unsigned int
6509 spu_section_type_flags (tree decl, const char *name, int reloc)
6511 /* .toe needs to have type @nobits. */
6512 if (strcmp (name, ".toe") == 0)
6513 return SECTION_BSS;
6514 return default_section_type_flags (decl, name, reloc);
6517 /* Generate a constant or register which contains 2^SCALE. We assume
6518 the result is valid for MODE. Currently, MODE must be V4SFmode and
6519 SCALE must be SImode. */
6521 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6523 gcc_assert (mode == V4SFmode);
6524 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6525 if (GET_CODE (scale) != CONST_INT)
6527 /* unsigned int exp = (127 + scale) << 23;
6528 __vector float m = (__vector float) spu_splats (exp); */
6529 rtx reg = force_reg (SImode, scale);
6530 rtx exp = gen_reg_rtx (SImode);
6531 rtx mul = gen_reg_rtx (mode);
6532 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6533 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6534 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6535 return mul;
6537 else
6539 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6540 unsigned char arr[16];
6541 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6542 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6543 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6544 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6545 return array_to_constant (mode, arr);
6549 /* After reload, just change the convert into a move instruction
6550 or a dead instruction. */
6551 void
6552 spu_split_convert (rtx ops[])
6554 if (REGNO (ops[0]) == REGNO (ops[1]))
6555 emit_note (NOTE_INSN_DELETED);
6556 else
6558 /* Use TImode always as this might help hard reg copyprop. */
6559 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6560 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6561 emit_insn (gen_move_insn (op0, op1));
6565 #include "gt-spu.h"