* config/c4x/c4x.md, config/cris/cris.c, config/crx/crx.c,
[official-gcc.git] / gcc / config / spu / spu.c
blob9adeacf20882202949a36f80cebd8949e01c3fc8
1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
16 02110-1301, USA. */
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "real.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
29 #include "flags.h"
30 #include "recog.h"
31 #include "obstack.h"
32 #include "tree.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "except.h"
36 #include "function.h"
37 #include "output.h"
38 #include "basic-block.h"
39 #include "integrate.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "hashtab.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "langhooks.h"
47 #include "reload.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
50 #include "params.h"
51 #include "assert.h"
52 #include "c-common.h"
53 #include "machmode.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
61 int low, high;
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
104 int can_issue_more);
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
109 int flags,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
112 int flags,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
119 tree * post_p);
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
127 int *total);
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree, rtx, int);
133 static tree spu_builtin_mul_widen_even (tree);
134 static tree spu_builtin_mul_widen_odd (tree);
135 static tree spu_builtin_mask_for_load (void);
137 extern const char *reg_names[];
138 rtx spu_compare_op0, spu_compare_op1;
140 enum spu_immediate {
141 SPU_NONE,
142 SPU_IL,
143 SPU_ILA,
144 SPU_ILH,
145 SPU_ILHU,
146 SPU_ORI,
147 SPU_ORHI,
148 SPU_ORBI,
149 SPU_IOHL
151 enum immediate_class
153 IC_POOL, /* constant pool */
154 IC_IL1, /* one il* instruction */
155 IC_IL2, /* both ilhu and iohl instructions */
156 IC_IL1s, /* one il* instruction */
157 IC_IL2s, /* both ilhu and iohl instructions */
158 IC_FSMBI, /* the fsmbi instruction */
159 IC_CPAT, /* one of the c*d instructions */
162 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
163 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
164 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
165 static enum immediate_class classify_immediate (rtx op,
166 enum machine_mode mode);
168 /* Built in types. */
169 tree spu_builtin_types[SPU_BTI_MAX];
171 /* TARGET overrides. */
173 #undef TARGET_INIT_BUILTINS
174 #define TARGET_INIT_BUILTINS spu_init_builtins
176 #undef TARGET_EXPAND_BUILTIN
177 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
179 #undef TARGET_EH_RETURN_FILTER_MODE
180 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
182 /* The .8byte directive doesn't seem to work well for a 32 bit
183 architecture. */
184 #undef TARGET_ASM_UNALIGNED_DI_OP
185 #define TARGET_ASM_UNALIGNED_DI_OP NULL
187 #undef TARGET_RTX_COSTS
188 #define TARGET_RTX_COSTS spu_rtx_costs
190 #undef TARGET_ADDRESS_COST
191 #define TARGET_ADDRESS_COST hook_int_rtx_0
193 #undef TARGET_SCHED_ISSUE_RATE
194 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
196 #undef TARGET_SCHED_VARIABLE_ISSUE
197 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
199 #undef TARGET_SCHED_ADJUST_PRIORITY
200 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
202 #undef TARGET_SCHED_ADJUST_COST
203 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
205 const struct attribute_spec spu_attribute_table[];
206 #undef TARGET_ATTRIBUTE_TABLE
207 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
209 #undef TARGET_ASM_INTEGER
210 #define TARGET_ASM_INTEGER spu_assemble_integer
212 #undef TARGET_SCALAR_MODE_SUPPORTED_P
213 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
215 #undef TARGET_VECTOR_MODE_SUPPORTED_P
216 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
218 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
219 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
221 #undef TARGET_ASM_GLOBALIZE_LABEL
222 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
224 #undef TARGET_PASS_BY_REFERENCE
225 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
227 #undef TARGET_MUST_PASS_IN_STACK
228 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
230 #undef TARGET_BUILD_BUILTIN_VA_LIST
231 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
233 #undef TARGET_SETUP_INCOMING_VARARGS
234 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
236 #undef TARGET_MACHINE_DEPENDENT_REORG
237 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
239 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
240 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
242 #undef TARGET_DEFAULT_TARGET_FLAGS
243 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
245 #undef TARGET_INIT_LIBFUNCS
246 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
248 #undef TARGET_RETURN_IN_MEMORY
249 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
251 #undef TARGET_ENCODE_SECTION_INFO
252 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
254 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
255 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
257 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
258 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
260 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
261 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
263 struct gcc_target targetm = TARGET_INITIALIZER;
265 /* Sometimes certain combinations of command options do not make sense
266 on a particular target machine. You can define a macro
267 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
268 executed once just after all the command options have been parsed. */
269 void
270 spu_override_options (void)
272 /* Override some of the default param values. With so many registers
273 larger values are better for these params. */
274 if (MAX_UNROLLED_INSNS == 100)
275 MAX_UNROLLED_INSNS = 250;
276 if (MAX_PENDING_LIST_LENGTH == 32)
277 MAX_PENDING_LIST_LENGTH = 128;
279 flag_omit_frame_pointer = 1;
281 if (align_functions < 8)
282 align_functions = 8;
284 if (spu_fixed_range_string)
285 fix_range (spu_fixed_range_string);
288 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
289 struct attribute_spec.handler. */
291 /* Table of machine attributes. */
292 const struct attribute_spec spu_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
295 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
296 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
297 { NULL, 0, 0, false, false, false, NULL }
300 /* True if MODE is valid for the target. By "valid", we mean able to
301 be manipulated in non-trivial ways. In particular, this means all
302 the arithmetic is supported. */
303 static bool
304 spu_scalar_mode_supported_p (enum machine_mode mode)
306 switch (mode)
308 case QImode:
309 case HImode:
310 case SImode:
311 case SFmode:
312 case DImode:
313 case TImode:
314 case DFmode:
315 return true;
317 default:
318 return false;
322 /* Similarly for vector modes. "Supported" here is less strict. At
323 least some operations are supported; need to check optabs or builtins
324 for further details. */
325 static bool
326 spu_vector_mode_supported_p (enum machine_mode mode)
328 switch (mode)
330 case V16QImode:
331 case V8HImode:
332 case V4SImode:
333 case V2DImode:
334 case V4SFmode:
335 case V2DFmode:
336 return true;
338 default:
339 return false;
343 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344 least significant bytes of the outer mode. This function returns
345 TRUE for the SUBREG's where this is correct. */
347 valid_subreg (rtx op)
349 enum machine_mode om = GET_MODE (op);
350 enum machine_mode im = GET_MODE (SUBREG_REG (op));
351 return om != VOIDmode && im != VOIDmode
352 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
353 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
356 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
357 and adjust the start offset. */
358 static rtx
359 adjust_operand (rtx op, HOST_WIDE_INT * start)
361 enum machine_mode mode;
362 int op_size;
363 /* Strip any SUBREG */
364 if (GET_CODE (op) == SUBREG)
366 if (start)
367 *start -=
368 GET_MODE_BITSIZE (GET_MODE (op)) -
369 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
370 op = SUBREG_REG (op);
372 /* If it is smaller than SI, assure a SUBREG */
373 op_size = GET_MODE_BITSIZE (GET_MODE (op));
374 if (op_size < 32)
376 if (start)
377 *start += 32 - op_size;
378 op_size = 32;
380 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
381 mode = mode_for_size (op_size, MODE_INT, 0);
382 if (mode != GET_MODE (op))
383 op = gen_rtx_SUBREG (mode, op, 0);
384 return op;
387 void
388 spu_expand_extv (rtx ops[], int unsignedp)
390 HOST_WIDE_INT width = INTVAL (ops[2]);
391 HOST_WIDE_INT start = INTVAL (ops[3]);
392 HOST_WIDE_INT src_size, dst_size;
393 enum machine_mode src_mode, dst_mode;
394 rtx dst = ops[0], src = ops[1];
395 rtx s;
397 dst = adjust_operand (ops[0], 0);
398 dst_mode = GET_MODE (dst);
399 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
401 src = adjust_operand (src, &start);
402 src_mode = GET_MODE (src);
403 src_size = GET_MODE_BITSIZE (GET_MODE (src));
405 if (start > 0)
407 s = gen_reg_rtx (src_mode);
408 switch (src_mode)
410 case SImode:
411 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
412 break;
413 case DImode:
414 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
415 break;
416 case TImode:
417 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
418 break;
419 default:
420 abort ();
422 src = s;
425 if (width < src_size)
427 rtx pat;
428 int icode;
429 switch (src_mode)
431 case SImode:
432 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
433 break;
434 case DImode:
435 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
436 break;
437 case TImode:
438 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
439 break;
440 default:
441 abort ();
443 s = gen_reg_rtx (src_mode);
444 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
445 emit_insn (pat);
446 src = s;
449 convert_move (dst, src, unsignedp);
452 void
453 spu_expand_insv (rtx ops[])
455 HOST_WIDE_INT width = INTVAL (ops[1]);
456 HOST_WIDE_INT start = INTVAL (ops[2]);
457 HOST_WIDE_INT maskbits;
458 enum machine_mode dst_mode, src_mode;
459 rtx dst = ops[0], src = ops[3];
460 int dst_size, src_size;
461 rtx mask;
462 rtx shift_reg;
463 int shift;
466 if (GET_CODE (ops[0]) == MEM)
467 dst = gen_reg_rtx (TImode);
468 else
469 dst = adjust_operand (dst, &start);
470 dst_mode = GET_MODE (dst);
471 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
473 if (CONSTANT_P (src))
475 enum machine_mode m =
476 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
477 src = force_reg (m, convert_to_mode (m, src, 0));
479 src = adjust_operand (src, 0);
480 src_mode = GET_MODE (src);
481 src_size = GET_MODE_BITSIZE (GET_MODE (src));
483 mask = gen_reg_rtx (dst_mode);
484 shift_reg = gen_reg_rtx (dst_mode);
485 shift = dst_size - start - width;
487 /* It's not safe to use subreg here because the compiler assumes
488 that the SUBREG_REG is right justified in the SUBREG. */
489 convert_move (shift_reg, src, 1);
491 if (shift > 0)
493 switch (dst_mode)
495 case SImode:
496 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
497 break;
498 case DImode:
499 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
500 break;
501 case TImode:
502 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
503 break;
504 default:
505 abort ();
508 else if (shift < 0)
509 abort ();
511 switch (dst_size)
513 case 32:
514 maskbits = (-1ll << (32 - width - start));
515 if (start)
516 maskbits += (1ll << (32 - start));
517 emit_move_insn (mask, GEN_INT (maskbits));
518 break;
519 case 64:
520 maskbits = (-1ll << (64 - width - start));
521 if (start)
522 maskbits += (1ll << (64 - start));
523 emit_move_insn (mask, GEN_INT (maskbits));
524 break;
525 case 128:
527 unsigned char arr[16];
528 int i = start / 8;
529 memset (arr, 0, sizeof (arr));
530 arr[i] = 0xff >> (start & 7);
531 for (i++; i <= (start + width - 1) / 8; i++)
532 arr[i] = 0xff;
533 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
534 emit_move_insn (mask, array_to_constant (TImode, arr));
536 break;
537 default:
538 abort ();
540 if (GET_CODE (ops[0]) == MEM)
542 rtx aligned = gen_reg_rtx (SImode);
543 rtx low = gen_reg_rtx (SImode);
544 rtx addr = gen_reg_rtx (SImode);
545 rtx rotl = gen_reg_rtx (SImode);
546 rtx mask0 = gen_reg_rtx (TImode);
547 rtx mem;
549 emit_move_insn (addr, XEXP (ops[0], 0));
550 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
551 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
552 emit_insn (gen_negsi2 (rotl, low));
553 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
554 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
555 mem = change_address (ops[0], TImode, aligned);
556 set_mem_alias_set (mem, 0);
557 emit_move_insn (dst, mem);
558 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
559 emit_move_insn (mem, dst);
560 if (start + width > MEM_ALIGN (ops[0]))
562 rtx shl = gen_reg_rtx (SImode);
563 rtx mask1 = gen_reg_rtx (TImode);
564 rtx dst1 = gen_reg_rtx (TImode);
565 rtx mem1;
566 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
567 emit_insn (gen_shlqby_ti (mask1, mask, shl));
568 mem1 = adjust_address (mem, TImode, 16);
569 set_mem_alias_set (mem1, 0);
570 emit_move_insn (dst1, mem1);
571 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
572 emit_move_insn (mem1, dst1);
575 else
576 emit_insn (gen_selb (dst, dst, shift_reg, mask));
581 spu_expand_block_move (rtx ops[])
583 HOST_WIDE_INT bytes, align, offset;
584 rtx src, dst, sreg, dreg, target;
585 int i;
586 if (GET_CODE (ops[2]) != CONST_INT
587 || GET_CODE (ops[3]) != CONST_INT
588 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
589 return 0;
591 bytes = INTVAL (ops[2]);
592 align = INTVAL (ops[3]);
594 if (bytes <= 0)
595 return 1;
597 dst = ops[0];
598 src = ops[1];
600 if (align == 16)
602 for (offset = 0; offset + 16 <= bytes; offset += 16)
604 dst = adjust_address (ops[0], V16QImode, offset);
605 src = adjust_address (ops[1], V16QImode, offset);
606 emit_move_insn (dst, src);
608 if (offset < bytes)
610 rtx mask;
611 unsigned char arr[16] = { 0 };
612 for (i = 0; i < bytes - offset; i++)
613 arr[i] = 0xff;
614 dst = adjust_address (ops[0], V16QImode, offset);
615 src = adjust_address (ops[1], V16QImode, offset);
616 mask = gen_reg_rtx (V16QImode);
617 sreg = gen_reg_rtx (V16QImode);
618 dreg = gen_reg_rtx (V16QImode);
619 target = gen_reg_rtx (V16QImode);
620 emit_move_insn (mask, array_to_constant (V16QImode, arr));
621 emit_move_insn (dreg, dst);
622 emit_move_insn (sreg, src);
623 emit_insn (gen_selb (target, dreg, sreg, mask));
624 emit_move_insn (dst, target);
626 return 1;
628 return 0;
631 enum spu_comp_code
632 { SPU_EQ, SPU_GT, SPU_GTU };
635 int spu_comp_icode[8][3] = {
636 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
637 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
638 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
639 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
640 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
641 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
642 {0, 0, 0},
643 {CODE_FOR_ceq_vec, 0, 0},
646 /* Generate a compare for CODE. Return a brand-new rtx that represents
647 the result of the compare. GCC can figure this out too if we don't
648 provide all variations of compares, but GCC always wants to use
649 WORD_MODE, we can generate better code in most cases if we do it
650 ourselves. */
651 void
652 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
654 int reverse_compare = 0;
655 int reverse_test = 0;
656 rtx compare_result;
657 rtx comp_rtx;
658 rtx target = operands[0];
659 enum machine_mode comp_mode;
660 enum machine_mode op_mode;
661 enum spu_comp_code scode;
662 int index;
664 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
665 and so on, to keep the constant in operand 1. */
666 if (GET_CODE (spu_compare_op1) == CONST_INT)
668 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
669 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
670 switch (code)
672 case GE:
673 spu_compare_op1 = GEN_INT (val);
674 code = GT;
675 break;
676 case LT:
677 spu_compare_op1 = GEN_INT (val);
678 code = LE;
679 break;
680 case GEU:
681 spu_compare_op1 = GEN_INT (val);
682 code = GTU;
683 break;
684 case LTU:
685 spu_compare_op1 = GEN_INT (val);
686 code = LEU;
687 break;
688 default:
689 break;
693 switch (code)
695 case GE:
696 reverse_compare = 1;
697 reverse_test = 1;
698 scode = SPU_GT;
699 break;
700 case LE:
701 reverse_compare = 0;
702 reverse_test = 1;
703 scode = SPU_GT;
704 break;
705 case LT:
706 reverse_compare = 1;
707 reverse_test = 0;
708 scode = SPU_GT;
709 break;
710 case GEU:
711 reverse_compare = 1;
712 reverse_test = 1;
713 scode = SPU_GTU;
714 break;
715 case LEU:
716 reverse_compare = 0;
717 reverse_test = 1;
718 scode = SPU_GTU;
719 break;
720 case LTU:
721 reverse_compare = 1;
722 reverse_test = 0;
723 scode = SPU_GTU;
724 break;
725 case NE:
726 reverse_compare = 0;
727 reverse_test = 1;
728 scode = SPU_EQ;
729 break;
731 case EQ:
732 scode = SPU_EQ;
733 break;
734 case GT:
735 scode = SPU_GT;
736 break;
737 case GTU:
738 scode = SPU_GTU;
739 break;
740 default:
741 scode = SPU_EQ;
742 break;
745 comp_mode = SImode;
746 op_mode = GET_MODE (spu_compare_op0);
748 switch (op_mode)
750 case QImode:
751 index = 0;
752 comp_mode = QImode;
753 break;
754 case HImode:
755 index = 1;
756 comp_mode = HImode;
757 break;
758 case SImode:
759 index = 2;
760 break;
761 case DImode:
762 index = 3;
763 break;
764 case TImode:
765 index = 4;
766 break;
767 case SFmode:
768 index = 5;
769 break;
770 case DFmode:
771 index = 6;
772 break;
773 case V16QImode:
774 case V8HImode:
775 case V4SImode:
776 case V2DImode:
777 case V4SFmode:
778 case V2DFmode:
779 index = 7;
780 break;
781 default:
782 abort ();
785 if (GET_MODE (spu_compare_op1) == DFmode)
787 rtx reg = gen_reg_rtx (DFmode);
788 if (!flag_unsafe_math_optimizations
789 || (scode != SPU_GT && scode != SPU_EQ))
790 abort ();
791 if (reverse_compare)
792 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
793 else
794 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
795 reverse_compare = 0;
796 spu_compare_op0 = reg;
797 spu_compare_op1 = CONST0_RTX (DFmode);
800 if (is_set == 0 && spu_compare_op1 == const0_rtx
801 && (GET_MODE (spu_compare_op0) == SImode
802 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
804 /* Don't need to set a register with the result when we are
805 comparing against zero and branching. */
806 reverse_test = !reverse_test;
807 compare_result = spu_compare_op0;
809 else
811 compare_result = gen_reg_rtx (comp_mode);
813 if (reverse_compare)
815 rtx t = spu_compare_op1;
816 spu_compare_op1 = spu_compare_op0;
817 spu_compare_op0 = t;
820 if (spu_comp_icode[index][scode] == 0)
821 abort ();
823 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
824 (spu_compare_op0, op_mode))
825 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
826 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
827 (spu_compare_op1, op_mode))
828 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
829 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
830 spu_compare_op0,
831 spu_compare_op1);
832 if (comp_rtx == 0)
833 abort ();
834 emit_insn (comp_rtx);
838 if (is_set == 0)
840 rtx bcomp;
841 rtx loc_ref;
843 /* We don't have branch on QI compare insns, so we convert the
844 QI compare result to a HI result. */
845 if (comp_mode == QImode)
847 rtx old_res = compare_result;
848 compare_result = gen_reg_rtx (HImode);
849 comp_mode = HImode;
850 emit_insn (gen_extendqihi2 (compare_result, old_res));
853 if (reverse_test)
854 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
855 else
856 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
858 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
859 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
860 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
861 loc_ref, pc_rtx)));
863 else if (is_set == 2)
865 int compare_size = GET_MODE_BITSIZE (comp_mode);
866 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
867 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
868 rtx select_mask;
869 rtx op_t = operands[2];
870 rtx op_f = operands[3];
872 /* The result of the comparison can be SI, HI or QI mode. Create a
873 mask based on that result. */
874 if (target_size > compare_size)
876 select_mask = gen_reg_rtx (mode);
877 emit_insn (gen_extend_compare (select_mask, compare_result));
879 else if (target_size < compare_size)
880 select_mask =
881 gen_rtx_SUBREG (mode, compare_result,
882 (compare_size - target_size) / BITS_PER_UNIT);
883 else if (comp_mode != mode)
884 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
885 else
886 select_mask = compare_result;
888 if (GET_MODE (target) != GET_MODE (op_t)
889 || GET_MODE (target) != GET_MODE (op_f))
890 abort ();
892 if (reverse_test)
893 emit_insn (gen_selb (target, op_t, op_f, select_mask));
894 else
895 emit_insn (gen_selb (target, op_f, op_t, select_mask));
897 else
899 if (reverse_test)
900 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
901 gen_rtx_NOT (comp_mode, compare_result)));
902 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
903 emit_insn (gen_extendhisi2 (target, compare_result));
904 else if (GET_MODE (target) == SImode
905 && GET_MODE (compare_result) == QImode)
906 emit_insn (gen_extend_compare (target, compare_result));
907 else
908 emit_move_insn (target, compare_result);
912 HOST_WIDE_INT
913 const_double_to_hwint (rtx x)
915 HOST_WIDE_INT val;
916 REAL_VALUE_TYPE rv;
917 if (GET_MODE (x) == SFmode)
919 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
920 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
922 else if (GET_MODE (x) == DFmode)
924 long l[2];
925 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
926 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
927 val = l[0];
928 val = (val << 32) | (l[1] & 0xffffffff);
930 else
931 abort ();
932 return val;
936 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
938 long tv[2];
939 REAL_VALUE_TYPE rv;
940 gcc_assert (mode == SFmode || mode == DFmode);
942 if (mode == SFmode)
943 tv[0] = (v << 32) >> 32;
944 else if (mode == DFmode)
946 tv[1] = (v << 32) >> 32;
947 tv[0] = v >> 32;
949 real_from_target (&rv, tv, mode);
950 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
953 void
954 print_operand_address (FILE * file, register rtx addr)
956 rtx reg;
957 rtx offset;
959 if (GET_CODE (addr) == AND
960 && GET_CODE (XEXP (addr, 1)) == CONST_INT
961 && INTVAL (XEXP (addr, 1)) == -16)
962 addr = XEXP (addr, 0);
964 switch (GET_CODE (addr))
966 case REG:
967 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
968 break;
970 case PLUS:
971 reg = XEXP (addr, 0);
972 offset = XEXP (addr, 1);
973 if (GET_CODE (offset) == REG)
975 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
976 reg_names[REGNO (offset)]);
978 else if (GET_CODE (offset) == CONST_INT)
980 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
981 INTVAL (offset), reg_names[REGNO (reg)]);
983 else
984 abort ();
985 break;
987 case CONST:
988 case LABEL_REF:
989 case SYMBOL_REF:
990 case CONST_INT:
991 output_addr_const (file, addr);
992 break;
994 default:
995 debug_rtx (addr);
996 abort ();
1000 void
1001 print_operand (FILE * file, rtx x, int code)
1003 enum machine_mode mode = GET_MODE (x);
1004 HOST_WIDE_INT val;
1005 unsigned char arr[16];
1006 int xcode = GET_CODE (x);
1007 int i, info;
1008 if (GET_MODE (x) == VOIDmode)
1009 switch (code)
1011 case 'L': /* 128 bits, signed */
1012 case 'm': /* 128 bits, signed */
1013 case 'T': /* 128 bits, signed */
1014 case 't': /* 128 bits, signed */
1015 mode = TImode;
1016 break;
1017 case 'K': /* 64 bits, signed */
1018 case 'k': /* 64 bits, signed */
1019 case 'D': /* 64 bits, signed */
1020 case 'd': /* 64 bits, signed */
1021 mode = DImode;
1022 break;
1023 case 'J': /* 32 bits, signed */
1024 case 'j': /* 32 bits, signed */
1025 case 's': /* 32 bits, signed */
1026 case 'S': /* 32 bits, signed */
1027 mode = SImode;
1028 break;
1030 switch (code)
1033 case 'j': /* 32 bits, signed */
1034 case 'k': /* 64 bits, signed */
1035 case 'm': /* 128 bits, signed */
1036 if (xcode == CONST_INT
1037 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1039 gcc_assert (logical_immediate_p (x, mode));
1040 constant_to_array (mode, x, arr);
1041 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1042 val = trunc_int_for_mode (val, SImode);
1043 switch (which_logical_immediate (val))
1045 case SPU_ORI:
1046 break;
1047 case SPU_ORHI:
1048 fprintf (file, "h");
1049 break;
1050 case SPU_ORBI:
1051 fprintf (file, "b");
1052 break;
1053 default:
1054 gcc_unreachable();
1057 else
1058 gcc_unreachable();
1059 return;
1061 case 'J': /* 32 bits, signed */
1062 case 'K': /* 64 bits, signed */
1063 case 'L': /* 128 bits, signed */
1064 if (xcode == CONST_INT
1065 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1067 gcc_assert (logical_immediate_p (x, mode)
1068 || iohl_immediate_p (x, mode));
1069 constant_to_array (mode, x, arr);
1070 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1071 val = trunc_int_for_mode (val, SImode);
1072 switch (which_logical_immediate (val))
1074 case SPU_ORI:
1075 case SPU_IOHL:
1076 break;
1077 case SPU_ORHI:
1078 val = trunc_int_for_mode (val, HImode);
1079 break;
1080 case SPU_ORBI:
1081 val = trunc_int_for_mode (val, QImode);
1082 break;
1083 default:
1084 gcc_unreachable();
1086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1088 else
1089 gcc_unreachable();
1090 return;
1092 case 't': /* 128 bits, signed */
1093 case 'd': /* 64 bits, signed */
1094 case 's': /* 32 bits, signed */
1095 if (CONSTANT_P (x))
1097 enum immediate_class c = classify_immediate (x, mode);
1098 switch (c)
1100 case IC_IL1:
1101 constant_to_array (mode, x, arr);
1102 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1103 val = trunc_int_for_mode (val, SImode);
1104 switch (which_immediate_load (val))
1106 case SPU_IL:
1107 break;
1108 case SPU_ILA:
1109 fprintf (file, "a");
1110 break;
1111 case SPU_ILH:
1112 fprintf (file, "h");
1113 break;
1114 case SPU_ILHU:
1115 fprintf (file, "hu");
1116 break;
1117 default:
1118 gcc_unreachable ();
1120 break;
1121 case IC_CPAT:
1122 constant_to_array (mode, x, arr);
1123 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1124 if (info == 1)
1125 fprintf (file, "b");
1126 else if (info == 2)
1127 fprintf (file, "h");
1128 else if (info == 4)
1129 fprintf (file, "w");
1130 else if (info == 8)
1131 fprintf (file, "d");
1132 break;
1133 case IC_IL1s:
1134 if (xcode == CONST_VECTOR)
1136 x = CONST_VECTOR_ELT (x, 0);
1137 xcode = GET_CODE (x);
1139 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1140 fprintf (file, "a");
1141 else if (xcode == HIGH)
1142 fprintf (file, "hu");
1143 break;
1144 case IC_FSMBI:
1145 case IC_IL2:
1146 case IC_IL2s:
1147 case IC_POOL:
1148 abort ();
1151 else
1152 gcc_unreachable ();
1153 return;
1155 case 'T': /* 128 bits, signed */
1156 case 'D': /* 64 bits, signed */
1157 case 'S': /* 32 bits, signed */
1158 if (CONSTANT_P (x))
1160 enum immediate_class c = classify_immediate (x, mode);
1161 switch (c)
1163 case IC_IL1:
1164 constant_to_array (mode, x, arr);
1165 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1166 val = trunc_int_for_mode (val, SImode);
1167 switch (which_immediate_load (val))
1169 case SPU_IL:
1170 case SPU_ILA:
1171 break;
1172 case SPU_ILH:
1173 case SPU_ILHU:
1174 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1175 break;
1176 default:
1177 gcc_unreachable ();
1179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1180 break;
1181 case IC_FSMBI:
1182 constant_to_array (mode, x, arr);
1183 val = 0;
1184 for (i = 0; i < 16; i++)
1186 val <<= 1;
1187 val |= arr[i] & 1;
1189 print_operand (file, GEN_INT (val), 0);
1190 break;
1191 case IC_CPAT:
1192 constant_to_array (mode, x, arr);
1193 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1195 break;
1196 case IC_IL1s:
1197 if (xcode == CONST_VECTOR)
1199 x = CONST_VECTOR_ELT (x, 0);
1200 xcode = GET_CODE (x);
1202 if (xcode == HIGH)
1204 output_addr_const (file, XEXP (x, 0));
1205 fprintf (file, "@h");
1207 else
1208 output_addr_const (file, x);
1209 break;
1210 case IC_IL2:
1211 case IC_IL2s:
1212 case IC_POOL:
1213 abort ();
1216 else
1217 gcc_unreachable ();
1218 return;
1220 case 'C':
1221 if (xcode == CONST_INT)
1223 /* Only 4 least significant bits are relevant for generate
1224 control word instructions. */
1225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1226 return;
1228 break;
1230 case 'M': /* print code for c*d */
1231 if (GET_CODE (x) == CONST_INT)
1232 switch (INTVAL (x))
1234 case 1:
1235 fprintf (file, "b");
1236 break;
1237 case 2:
1238 fprintf (file, "h");
1239 break;
1240 case 4:
1241 fprintf (file, "w");
1242 break;
1243 case 8:
1244 fprintf (file, "d");
1245 break;
1246 default:
1247 gcc_unreachable();
1249 else
1250 gcc_unreachable();
1251 return;
1253 case 'N': /* Negate the operand */
1254 if (xcode == CONST_INT)
1255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1256 else if (xcode == CONST_VECTOR)
1257 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1258 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1259 return;
1261 case 'I': /* enable/disable interrupts */
1262 if (xcode == CONST_INT)
1263 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1264 return;
1266 case 'b': /* branch modifiers */
1267 if (xcode == REG)
1268 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1269 else if (COMPARISON_P (x))
1270 fprintf (file, "%s", xcode == NE ? "n" : "");
1271 return;
1273 case 'i': /* indirect call */
1274 if (xcode == MEM)
1276 if (GET_CODE (XEXP (x, 0)) == REG)
1277 /* Used in indirect function calls. */
1278 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1279 else
1280 output_address (XEXP (x, 0));
1282 return;
1284 case 'p': /* load/store */
1285 if (xcode == MEM)
1287 x = XEXP (x, 0);
1288 xcode = GET_CODE (x);
1290 if (xcode == AND)
1292 x = XEXP (x, 0);
1293 xcode = GET_CODE (x);
1295 if (xcode == REG)
1296 fprintf (file, "d");
1297 else if (xcode == CONST_INT)
1298 fprintf (file, "a");
1299 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1300 fprintf (file, "r");
1301 else if (xcode == PLUS || xcode == LO_SUM)
1303 if (GET_CODE (XEXP (x, 1)) == REG)
1304 fprintf (file, "x");
1305 else
1306 fprintf (file, "d");
1308 return;
1310 case 0:
1311 if (xcode == REG)
1312 fprintf (file, "%s", reg_names[REGNO (x)]);
1313 else if (xcode == MEM)
1314 output_address (XEXP (x, 0));
1315 else if (xcode == CONST_VECTOR)
1316 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1317 else
1318 output_addr_const (file, x);
1319 return;
1321 default:
1322 output_operand_lossage ("invalid %%xn code");
1324 gcc_unreachable ();
1327 extern char call_used_regs[];
1328 extern char regs_ever_live[];
1330 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1331 caller saved register. For leaf functions it is more efficient to
1332 use a volatile register because we won't need to save and restore the
1333 pic register. This routine is only valid after register allocation
1334 is completed, so we can pick an unused register. */
1335 static rtx
1336 get_pic_reg (void)
1338 rtx pic_reg = pic_offset_table_rtx;
1339 if (!reload_completed && !reload_in_progress)
1340 abort ();
1341 return pic_reg;
1344 /* Split constant addresses to handle cases that are too large. Also, add in
1345 the pic register when in PIC mode. */
1347 spu_split_immediate (rtx * ops)
1349 enum machine_mode mode = GET_MODE (ops[0]);
1350 enum immediate_class c = classify_immediate (ops[1], mode);
1352 switch (c)
1354 case IC_IL2:
1356 unsigned char arrhi[16];
1357 unsigned char arrlo[16];
1358 rtx to, hi, lo;
1359 int i;
1360 constant_to_array (mode, ops[1], arrhi);
1361 to = no_new_pseudos ? ops[0] : gen_reg_rtx (mode);
1362 for (i = 0; i < 16; i += 4)
1364 arrlo[i + 2] = arrhi[i + 2];
1365 arrlo[i + 3] = arrhi[i + 3];
1366 arrlo[i + 0] = arrlo[i + 1] = 0;
1367 arrhi[i + 2] = arrhi[i + 3] = 0;
1369 hi = array_to_constant (mode, arrhi);
1370 lo = array_to_constant (mode, arrlo);
1371 emit_move_insn (to, hi);
1372 emit_insn (gen_rtx_SET
1373 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1374 return 1;
1376 case IC_POOL:
1377 if (reload_in_progress || reload_completed)
1379 rtx mem = force_const_mem (mode, ops[1]);
1380 if (TARGET_LARGE_MEM)
1382 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1383 emit_move_insn (addr, XEXP (mem, 0));
1384 mem = replace_equiv_address (mem, addr);
1386 emit_move_insn (ops[0], mem);
1387 return 1;
1389 break;
1390 case IC_IL1s:
1391 case IC_IL2s:
1392 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1394 if (c == IC_IL2s)
1396 emit_insn (gen_high (ops[0], ops[1]));
1397 emit_insn (gen_low (ops[0], ops[0], ops[1]));
1399 else if (flag_pic)
1400 emit_insn (gen_pic (ops[0], ops[1]));
1401 if (flag_pic)
1403 rtx pic_reg = get_pic_reg ();
1404 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1405 current_function_uses_pic_offset_table = 1;
1407 return flag_pic || c == IC_IL2s;
1409 break;
1410 case IC_IL1:
1411 case IC_FSMBI:
1412 case IC_CPAT:
1413 break;
1415 return 0;
1418 /* SAVING is TRUE when we are generating the actual load and store
1419 instructions for REGNO. When determining the size of the stack
1420 needed for saving register we must allocate enough space for the
1421 worst case, because we don't always have the information early enough
1422 to not allocate it. But we can at least eliminate the actual loads
1423 and stores during the prologue/epilogue. */
1424 static int
1425 need_to_save_reg (int regno, int saving)
1427 if (regs_ever_live[regno] && !call_used_regs[regno])
1428 return 1;
1429 if (flag_pic
1430 && regno == PIC_OFFSET_TABLE_REGNUM
1431 && (!saving || current_function_uses_pic_offset_table)
1432 && (!saving
1433 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1434 return 1;
1435 return 0;
1438 /* This function is only correct starting with local register
1439 allocation */
1441 spu_saved_regs_size (void)
1443 int reg_save_size = 0;
1444 int regno;
1446 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1447 if (need_to_save_reg (regno, 0))
1448 reg_save_size += 0x10;
1449 return reg_save_size;
1452 static rtx
1453 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1455 rtx reg = gen_rtx_REG (V4SImode, regno);
1456 rtx mem =
1457 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1458 return emit_insn (gen_movv4si (mem, reg));
1461 static rtx
1462 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1464 rtx reg = gen_rtx_REG (V4SImode, regno);
1465 rtx mem =
1466 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1467 return emit_insn (gen_movv4si (reg, mem));
1470 /* This happens after reload, so we need to expand it. */
1471 static rtx
1472 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1474 rtx insn;
1475 if (satisfies_constraint_K (GEN_INT (imm)))
1477 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1479 else
1481 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1482 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1483 REG_NOTES (insn));
1484 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1485 if (REGNO (src) == REGNO (scratch))
1486 abort ();
1488 if (REGNO (dst) == REGNO (scratch))
1489 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1490 REG_NOTES (insn));
1491 return insn;
1494 /* Return nonzero if this function is known to have a null epilogue. */
1497 direct_return (void)
1499 if (reload_completed)
1501 if (cfun->static_chain_decl == 0
1502 && (spu_saved_regs_size ()
1503 + get_frame_size ()
1504 + current_function_outgoing_args_size
1505 + current_function_pretend_args_size == 0)
1506 && current_function_is_leaf)
1507 return 1;
1509 return 0;
1513 The stack frame looks like this:
1514 +-------------+
1515 | incoming |
1516 AP | args |
1517 +-------------+
1518 | $lr save |
1519 +-------------+
1520 prev SP | back chain |
1521 +-------------+
1522 | var args |
1523 | reg save | current_function_pretend_args_size bytes
1524 +-------------+
1525 | ... |
1526 | saved regs | spu_saved_regs_size() bytes
1527 +-------------+
1528 | ... |
1529 FP | vars | get_frame_size() bytes
1530 +-------------+
1531 | ... |
1532 | outgoing |
1533 | args | current_function_outgoing_args_size bytes
1534 +-------------+
1535 | $lr of next |
1536 | frame |
1537 +-------------+
1538 SP | back chain |
1539 +-------------+
1542 void
1543 spu_expand_prologue (void)
1545 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1546 HOST_WIDE_INT total_size;
1547 HOST_WIDE_INT saved_regs_size;
1548 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1549 rtx scratch_reg_0, scratch_reg_1;
1550 rtx insn, real;
1552 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1553 the "toplevel" insn chain. */
1554 emit_note (NOTE_INSN_DELETED);
1556 if (flag_pic && optimize == 0)
1557 current_function_uses_pic_offset_table = 1;
1559 if (spu_naked_function_p (current_function_decl))
1560 return;
1562 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1563 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1565 saved_regs_size = spu_saved_regs_size ();
1566 total_size = size + saved_regs_size
1567 + current_function_outgoing_args_size
1568 + current_function_pretend_args_size;
1570 if (!current_function_is_leaf
1571 || current_function_calls_alloca || total_size > 0)
1572 total_size += STACK_POINTER_OFFSET;
1574 /* Save this first because code after this might use the link
1575 register as a scratch register. */
1576 if (!current_function_is_leaf)
1578 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1579 RTX_FRAME_RELATED_P (insn) = 1;
1582 if (total_size > 0)
1584 offset = -current_function_pretend_args_size;
1585 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1586 if (need_to_save_reg (regno, 1))
1588 offset -= 16;
1589 insn = frame_emit_store (regno, sp_reg, offset);
1590 RTX_FRAME_RELATED_P (insn) = 1;
1594 if (flag_pic && current_function_uses_pic_offset_table)
1596 rtx pic_reg = get_pic_reg ();
1597 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1598 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1599 REG_NOTES (insn));
1600 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1601 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1602 REG_NOTES (insn));
1605 if (total_size > 0)
1607 if (flag_stack_check)
1609 /* We compare against total_size-1 because
1610 ($sp >= total_size) <=> ($sp > total_size-1) */
1611 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1612 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1613 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1614 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1616 emit_move_insn (scratch_v4si, size_v4si);
1617 size_v4si = scratch_v4si;
1619 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1620 emit_insn (gen_vec_extractv4si
1621 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1622 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1625 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1626 the value of the previous $sp because we save it as the back
1627 chain. */
1628 if (total_size <= 2000)
1630 /* In this case we save the back chain first. */
1631 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1632 insn =
1633 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1635 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1637 insn = emit_move_insn (scratch_reg_0, sp_reg);
1638 insn =
1639 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1641 else
1643 insn = emit_move_insn (scratch_reg_0, sp_reg);
1644 insn =
1645 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1647 RTX_FRAME_RELATED_P (insn) = 1;
1648 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1649 REG_NOTES (insn) =
1650 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1652 if (total_size > 2000)
1654 /* Save the back chain ptr */
1655 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1658 if (frame_pointer_needed)
1660 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1661 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1662 + current_function_outgoing_args_size;
1663 /* Set the new frame_pointer */
1664 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1665 RTX_FRAME_RELATED_P (insn) = 1;
1666 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1667 REG_NOTES (insn) =
1668 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1669 real, REG_NOTES (insn));
1673 emit_note (NOTE_INSN_DELETED);
1676 void
1677 spu_expand_epilogue (bool sibcall_p)
1679 int size = get_frame_size (), offset, regno;
1680 HOST_WIDE_INT saved_regs_size, total_size;
1681 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1682 rtx jump, scratch_reg_0;
1684 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1685 the "toplevel" insn chain. */
1686 emit_note (NOTE_INSN_DELETED);
1688 if (spu_naked_function_p (current_function_decl))
1689 return;
1691 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1693 saved_regs_size = spu_saved_regs_size ();
1694 total_size = size + saved_regs_size
1695 + current_function_outgoing_args_size
1696 + current_function_pretend_args_size;
1698 if (!current_function_is_leaf
1699 || current_function_calls_alloca || total_size > 0)
1700 total_size += STACK_POINTER_OFFSET;
1702 if (total_size > 0)
1704 if (current_function_calls_alloca)
1705 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1706 else
1707 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1710 if (saved_regs_size > 0)
1712 offset = -current_function_pretend_args_size;
1713 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1714 if (need_to_save_reg (regno, 1))
1716 offset -= 0x10;
1717 frame_emit_load (regno, sp_reg, offset);
1722 if (!current_function_is_leaf)
1723 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1725 if (!sibcall_p)
1727 emit_insn (gen_rtx_USE
1728 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1729 jump = emit_jump_insn (gen__return ());
1730 emit_barrier_after (jump);
1733 emit_note (NOTE_INSN_DELETED);
1737 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1739 if (count != 0)
1740 return 0;
1741 /* This is inefficient because it ends up copying to a save-register
1742 which then gets saved even though $lr has already been saved. But
1743 it does generate better code for leaf functions and we don't need
1744 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1745 used for __builtin_return_address anyway, so maybe we don't care if
1746 it's inefficient. */
1747 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1751 /* Given VAL, generate a constant appropriate for MODE.
1752 If MODE is a vector mode, every element will be VAL.
1753 For TImode, VAL will be zero extended to 128 bits. */
1755 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1757 rtx inner;
1758 rtvec v;
1759 int units, i;
1761 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1762 || GET_MODE_CLASS (mode) == MODE_FLOAT
1763 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1764 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1766 if (GET_MODE_CLASS (mode) == MODE_INT)
1767 return immed_double_const (val, 0, mode);
1769 /* val is the bit representation of the float */
1770 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1771 return hwint_to_const_double (mode, val);
1773 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1774 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1775 else
1776 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1778 units = GET_MODE_NUNITS (mode);
1780 v = rtvec_alloc (units);
1782 for (i = 0; i < units; ++i)
1783 RTVEC_ELT (v, i) = inner;
1785 return gen_rtx_CONST_VECTOR (mode, v);
1788 /* branch hint stuff */
1790 /* The hardware requires 8 insns between a hint and the branch it
1791 effects. This variable describes how many rtl instructions the
1792 compiler needs to see before inserting a hint. (FIXME: We should
1793 accept less and insert nops to enforce it because hinting is always
1794 profitable for performance, but we do need to be careful of code
1795 size.) */
1796 int spu_hint_dist = (8 * 4);
1798 /* An array of these is used to propagate hints to predecessor blocks. */
1799 struct spu_bb_info
1801 rtx prop_jump; /* propagated from another block */
1802 basic_block bb; /* the original block. */
1805 /* The special $hbr register is used to prevent the insn scheduler from
1806 moving hbr insns across instructions which invalidate them. It
1807 should only be used in a clobber, and this function searches for
1808 insns which clobber it. */
1809 static bool
1810 insn_clobbers_hbr (rtx insn)
1812 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1814 rtx parallel = PATTERN (insn);
1815 rtx clobber;
1816 int j;
1817 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1819 clobber = XVECEXP (parallel, 0, j);
1820 if (GET_CODE (clobber) == CLOBBER
1821 && GET_CODE (XEXP (clobber, 0)) == REG
1822 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1823 return 1;
1826 return 0;
1829 static void
1830 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1832 rtx branch_label;
1833 rtx hint, insn, prev, next;
1835 if (before == 0 || branch == 0 || target == 0)
1836 return;
1838 if (distance > 600)
1839 return;
1842 branch_label = gen_label_rtx ();
1843 LABEL_NUSES (branch_label)++;
1844 LABEL_PRESERVE_P (branch_label) = 1;
1845 insn = emit_label_before (branch_label, branch);
1846 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1848 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1849 the current insn is pipe0, dual issue with it. */
1850 prev = prev_active_insn (before);
1851 if (prev && get_pipe (prev) == 0)
1852 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1853 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1855 next = next_active_insn (before);
1856 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1857 if (next)
1858 PUT_MODE (next, TImode);
1860 else
1862 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1863 PUT_MODE (hint, TImode);
1865 recog_memoized (hint);
1868 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1869 the rtx for the branch target. */
1870 static rtx
1871 get_branch_target (rtx branch)
1873 if (GET_CODE (branch) == JUMP_INSN)
1875 rtx set, src;
1877 /* Return statements */
1878 if (GET_CODE (PATTERN (branch)) == RETURN)
1879 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1881 /* jump table */
1882 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1883 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1884 return 0;
1886 set = single_set (branch);
1887 src = SET_SRC (set);
1888 if (GET_CODE (SET_DEST (set)) != PC)
1889 abort ();
1891 if (GET_CODE (src) == IF_THEN_ELSE)
1893 rtx lab = 0;
1894 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1895 if (note)
1897 /* If the more probable case is not a fall through, then
1898 try a branch hint. */
1899 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1900 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1901 && GET_CODE (XEXP (src, 1)) != PC)
1902 lab = XEXP (src, 1);
1903 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1904 && GET_CODE (XEXP (src, 2)) != PC)
1905 lab = XEXP (src, 2);
1907 if (lab)
1909 if (GET_CODE (lab) == RETURN)
1910 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1911 return lab;
1913 return 0;
1916 return src;
1918 else if (GET_CODE (branch) == CALL_INSN)
1920 rtx call;
1921 /* All of our call patterns are in a PARALLEL and the CALL is
1922 the first pattern in the PARALLEL. */
1923 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1924 abort ();
1925 call = XVECEXP (PATTERN (branch), 0, 0);
1926 if (GET_CODE (call) == SET)
1927 call = SET_SRC (call);
1928 if (GET_CODE (call) != CALL)
1929 abort ();
1930 return XEXP (XEXP (call, 0), 0);
1932 return 0;
1935 static void
1936 insert_branch_hints (void)
1938 struct spu_bb_info *spu_bb_info;
1939 rtx branch, insn, next;
1940 rtx branch_target = 0;
1941 int branch_addr = 0, insn_addr, head_addr;
1942 basic_block bb;
1943 unsigned int j;
1945 spu_bb_info =
1946 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1947 sizeof (struct spu_bb_info));
1949 /* We need exact insn addresses and lengths. */
1950 shorten_branches (get_insns ());
1952 FOR_EACH_BB_REVERSE (bb)
1954 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1955 branch = 0;
1956 if (spu_bb_info[bb->index].prop_jump)
1958 branch = spu_bb_info[bb->index].prop_jump;
1959 branch_target = get_branch_target (branch);
1960 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1962 /* Search from end of a block to beginning. In this loop, find
1963 jumps which need a branch and emit them only when:
1964 - it's an indirect branch and we're at the insn which sets
1965 the register
1966 - we're at an insn that will invalidate the hint. e.g., a
1967 call, another hint insn, inline asm that clobbers $hbr, and
1968 some inlined operations (divmodsi4). Don't consider jumps
1969 because they are only at the end of a block and are
1970 considered when we are deciding whether to propagate
1971 - we're getting too far away from the branch. The hbr insns
1972 only have a signed 10-bit offset
1973 We go back as far as possible so the branch will be considered
1974 for propagation when we get to the beginning of the block. */
1975 next = 0;
1976 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1978 if (INSN_P (insn))
1980 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1981 if (branch && next
1982 && ((GET_CODE (branch_target) == REG
1983 && set_of (branch_target, insn) != NULL_RTX)
1984 || insn_clobbers_hbr (insn)
1985 || branch_addr - insn_addr > 600))
1987 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1988 if (insn != BB_END (bb)
1989 && branch_addr - next_addr >= spu_hint_dist)
1991 if (dump_file)
1992 fprintf (dump_file,
1993 "hint for %i in block %i before %i\n",
1994 INSN_UID (branch), bb->index, INSN_UID (next));
1995 spu_emit_branch_hint (next, branch, branch_target,
1996 branch_addr - next_addr);
1998 branch = 0;
2001 /* JUMP_P will only be true at the end of a block. When
2002 branch is already set it means we've previously decided
2003 to propagate a hint for that branch into this block. */
2004 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2006 branch = 0;
2007 if ((branch_target = get_branch_target (insn)))
2009 branch = insn;
2010 branch_addr = insn_addr;
2014 /* When a branch hint is emitted it will be inserted
2015 before "next". Make sure next is the beginning of a
2016 cycle to minimize impact on the scheduled insns. */
2017 if (GET_MODE (insn) == TImode)
2018 next = insn;
2020 if (insn == BB_HEAD (bb))
2021 break;
2024 if (branch)
2026 /* If we haven't emitted a hint for this branch yet, it might
2027 be profitable to emit it in one of the predecessor blocks,
2028 especially for loops. */
2029 rtx bbend;
2030 basic_block prev = 0, prop = 0, prev2 = 0;
2031 int loop_exit = 0, simple_loop = 0;
2032 int next_addr = 0;
2033 if (next)
2034 next_addr = INSN_ADDRESSES (INSN_UID (next));
2036 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2037 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2038 prev = EDGE_PRED (bb, j)->src;
2039 else
2040 prev2 = EDGE_PRED (bb, j)->src;
2042 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2043 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2044 loop_exit = 1;
2045 else if (EDGE_SUCC (bb, j)->dest == bb)
2046 simple_loop = 1;
2048 /* If this branch is a loop exit then propagate to previous
2049 fallthru block. This catches the cases when it is a simple
2050 loop or when there is an initial branch into the loop. */
2051 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2052 prop = prev;
2054 /* If there is only one adjacent predecessor. Don't propagate
2055 outside this loop. This loop_depth test isn't perfect, but
2056 I'm not sure the loop_father member is valid at this point. */
2057 else if (prev && single_pred_p (bb)
2058 && prev->loop_depth == bb->loop_depth)
2059 prop = prev;
2061 /* If this is the JOIN block of a simple IF-THEN then
2062 propogate the hint to the HEADER block. */
2063 else if (prev && prev2
2064 && EDGE_COUNT (bb->preds) == 2
2065 && EDGE_COUNT (prev->preds) == 1
2066 && EDGE_PRED (prev, 0)->src == prev2
2067 && prev2->loop_depth == bb->loop_depth
2068 && GET_CODE (branch_target) != REG)
2069 prop = prev;
2071 /* Don't propagate when:
2072 - this is a simple loop and the hint would be too far
2073 - this is not a simple loop and there are 16 insns in
2074 this block already
2075 - the predecessor block ends in a branch that will be
2076 hinted
2077 - the predecessor block ends in an insn that invalidates
2078 the hint */
2079 if (prop
2080 && prop->index >= 0
2081 && (bbend = BB_END (prop))
2082 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2083 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2084 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2086 if (dump_file)
2087 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2088 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2089 bb->index, prop->index, bb->loop_depth,
2090 INSN_UID (branch), loop_exit, simple_loop,
2091 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2093 spu_bb_info[prop->index].prop_jump = branch;
2094 spu_bb_info[prop->index].bb = bb;
2096 else if (next && branch_addr - next_addr >= spu_hint_dist)
2098 if (dump_file)
2099 fprintf (dump_file, "hint for %i in block %i before %i\n",
2100 INSN_UID (branch), bb->index, INSN_UID (next));
2101 spu_emit_branch_hint (next, branch, branch_target,
2102 branch_addr - next_addr);
2104 branch = 0;
2107 free (spu_bb_info);
2110 /* Emit a nop for INSN such that the two will dual issue. This assumes
2111 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2112 We check for TImode to handle a MULTI1 insn which has dual issued its
2113 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2114 ADDR_VEC insns. */
2115 static void
2116 emit_nop_for_insn (rtx insn)
2118 int p;
2119 rtx new_insn;
2120 p = get_pipe (insn);
2121 if (p == 1 && GET_MODE (insn) == TImode)
2123 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2124 PUT_MODE (new_insn, TImode);
2125 PUT_MODE (insn, VOIDmode);
2127 else
2128 new_insn = emit_insn_after (gen_lnop (), insn);
2131 /* Insert nops in basic blocks to meet dual issue alignment
2132 requirements. */
2133 static void
2134 insert_nops (void)
2136 rtx insn, next_insn, prev_insn;
2137 int length;
2138 int addr;
2140 /* This sets up INSN_ADDRESSES. */
2141 shorten_branches (get_insns ());
2143 /* Keep track of length added by nops. */
2144 length = 0;
2146 prev_insn = 0;
2147 for (insn = get_insns (); insn; insn = next_insn)
2149 next_insn = next_active_insn (insn);
2150 addr = INSN_ADDRESSES (INSN_UID (insn));
2151 if (GET_MODE (insn) == TImode
2152 && next_insn
2153 && GET_MODE (next_insn) != TImode
2154 && ((addr + length) & 7) != 0)
2156 /* prev_insn will always be set because the first insn is
2157 always 8-byte aligned. */
2158 emit_nop_for_insn (prev_insn);
2159 length += 4;
2161 prev_insn = insn;
2165 static void
2166 spu_machine_dependent_reorg (void)
2168 if (optimize > 0)
2170 if (TARGET_BRANCH_HINTS)
2171 insert_branch_hints ();
2172 insert_nops ();
2177 /* Insn scheduling routines, primarily for dual issue. */
2178 static int
2179 spu_sched_issue_rate (void)
2181 return 2;
2184 static int
2185 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2186 int verbose ATTRIBUTE_UNUSED, rtx insn,
2187 int can_issue_more)
2189 if (GET_CODE (PATTERN (insn)) != USE
2190 && GET_CODE (PATTERN (insn)) != CLOBBER
2191 && get_pipe (insn) != -2)
2192 can_issue_more--;
2193 return can_issue_more;
2196 static int
2197 get_pipe (rtx insn)
2199 enum attr_type t;
2200 /* Handle inline asm */
2201 if (INSN_CODE (insn) == -1)
2202 return -1;
2203 t = get_attr_type (insn);
2204 switch (t)
2206 case TYPE_CONVERT:
2207 return -2;
2208 case TYPE_MULTI0:
2209 return -1;
2211 case TYPE_FX2:
2212 case TYPE_FX3:
2213 case TYPE_SPR:
2214 case TYPE_NOP:
2215 case TYPE_FXB:
2216 case TYPE_FPD:
2217 case TYPE_FP6:
2218 case TYPE_FP7:
2219 case TYPE_IPREFETCH:
2220 return 0;
2222 case TYPE_LNOP:
2223 case TYPE_SHUF:
2224 case TYPE_LOAD:
2225 case TYPE_STORE:
2226 case TYPE_BR:
2227 case TYPE_MULTI1:
2228 case TYPE_HBR:
2229 return 1;
2230 default:
2231 abort ();
2235 static int
2236 spu_sched_adjust_priority (rtx insn, int pri)
2238 int p = get_pipe (insn);
2239 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2240 * scheduling. */
2241 if (GET_CODE (PATTERN (insn)) == USE
2242 || GET_CODE (PATTERN (insn)) == CLOBBER
2243 || p == -2)
2244 return pri + 100;
2245 /* Schedule pipe0 insns early for greedier dual issue. */
2246 if (p != 1)
2247 return pri + 50;
2248 return pri;
2251 /* INSN is dependent on DEP_INSN. */
2252 static int
2253 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2254 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2256 if (GET_CODE (insn) == CALL_INSN)
2257 return cost - 2;
2258 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2259 scheduler makes every insn in a block anti-dependent on the final
2260 jump_insn. We adjust here so higher cost insns will get scheduled
2261 earlier. */
2262 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2263 return insn_cost (dep_insn) - 3;
2264 return cost;
2267 /* Create a CONST_DOUBLE from a string. */
2268 struct rtx_def *
2269 spu_float_const (const char *string, enum machine_mode mode)
2271 REAL_VALUE_TYPE value;
2272 value = REAL_VALUE_ATOF (string, mode);
2273 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2276 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2277 CONST_INT fits constraint 'K', i.e., is small. */
2279 legitimate_const (rtx x, int aligned)
2281 /* We can never know if the resulting address fits in 18 bits and can be
2282 loaded with ila. Instead we should use the HI and LO relocations to
2283 load a 32-bit address. */
2284 rtx sym, cst;
2286 gcc_assert (GET_CODE (x) == CONST);
2288 if (GET_CODE (XEXP (x, 0)) != PLUS)
2289 return 0;
2290 sym = XEXP (XEXP (x, 0), 0);
2291 cst = XEXP (XEXP (x, 0), 1);
2292 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2293 return 0;
2294 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2295 return 0;
2296 return satisfies_constraint_K (cst);
2300 spu_constant_address_p (rtx x)
2302 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2303 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2304 || GET_CODE (x) == HIGH);
2307 static enum spu_immediate
2308 which_immediate_load (HOST_WIDE_INT val)
2310 gcc_assert (val == trunc_int_for_mode (val, SImode));
2312 if (val >= -0x8000 && val <= 0x7fff)
2313 return SPU_IL;
2314 if (val >= 0 && val <= 0x3ffff)
2315 return SPU_ILA;
2316 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2317 return SPU_ILH;
2318 if ((val & 0xffff) == 0)
2319 return SPU_ILHU;
2321 return SPU_NONE;
2324 /* Return true when OP can be loaded by one of the il instructions, or
2325 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2327 immediate_load_p (rtx op, enum machine_mode mode)
2329 if (CONSTANT_P (op))
2331 enum immediate_class c = classify_immediate (op, mode);
2332 return c == IC_IL1 || (!flow2_completed && c == IC_IL2);
2334 return 0;
2337 /* Return true if the first SIZE bytes of arr is a constant that can be
2338 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2339 represent the size and offset of the instruction to use. */
2340 static int
2341 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2343 int cpat, run, i, start;
2344 cpat = 1;
2345 run = 0;
2346 start = -1;
2347 for (i = 0; i < size && cpat; i++)
2348 if (arr[i] != i+16)
2350 if (!run)
2352 start = i;
2353 if (arr[i] == 3)
2354 run = 1;
2355 else if (arr[i] == 2 && arr[i+1] == 3)
2356 run = 2;
2357 else if (arr[i] == 0)
2359 while (arr[i+run] == run && i+run < 16)
2360 run++;
2361 if (run != 4 && run != 8)
2362 cpat = 0;
2364 else
2365 cpat = 0;
2366 if ((i & (run-1)) != 0)
2367 cpat = 0;
2368 i += run;
2370 else
2371 cpat = 0;
2373 if (cpat && (run || size < 16))
2375 if (run == 0)
2376 run = 1;
2377 if (prun)
2378 *prun = run;
2379 if (pstart)
2380 *pstart = start == -1 ? 16-run : start;
2381 return 1;
2383 return 0;
2386 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2387 it into a register. MODE is only valid when OP is a CONST_INT. */
2388 static enum immediate_class
2389 classify_immediate (rtx op, enum machine_mode mode)
2391 HOST_WIDE_INT val;
2392 unsigned char arr[16];
2393 int i, j, repeated, fsmbi;
2395 gcc_assert (CONSTANT_P (op));
2397 if (GET_MODE (op) != VOIDmode)
2398 mode = GET_MODE (op);
2400 /* A V4SI const_vector with all identical symbols is ok. */
2401 if (mode == V4SImode
2402 && GET_CODE (op) == CONST_VECTOR
2403 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2404 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2405 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2406 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2407 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2408 op = CONST_VECTOR_ELT (op, 0);
2410 switch (GET_CODE (op))
2412 case SYMBOL_REF:
2413 case LABEL_REF:
2414 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2416 case CONST:
2417 return TARGET_LARGE_MEM
2418 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2420 case HIGH:
2421 return IC_IL1s;
2423 case CONST_VECTOR:
2424 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2425 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2426 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2427 return IC_POOL;
2428 /* Fall through. */
2430 case CONST_INT:
2431 case CONST_DOUBLE:
2432 constant_to_array (mode, op, arr);
2434 /* Check that each 4-byte slot is identical. */
2435 repeated = 1;
2436 for (i = 4; i < 16; i += 4)
2437 for (j = 0; j < 4; j++)
2438 if (arr[j] != arr[i + j])
2439 repeated = 0;
2441 if (repeated)
2443 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2444 val = trunc_int_for_mode (val, SImode);
2446 if (which_immediate_load (val) != SPU_NONE)
2447 return IC_IL1;
2450 /* Any mode of 2 bytes or smaller can be loaded with an il
2451 instruction. */
2452 gcc_assert (GET_MODE_SIZE (mode) > 2);
2454 fsmbi = 1;
2455 for (i = 0; i < 16 && fsmbi; i++)
2456 if (arr[i] != 0 && arr[i] != 0xff)
2457 fsmbi = 0;
2458 if (fsmbi)
2459 return IC_FSMBI;
2461 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2462 return IC_CPAT;
2464 if (repeated)
2465 return IC_IL2;
2467 return IC_POOL;
2468 default:
2469 break;
2471 gcc_unreachable ();
2474 static enum spu_immediate
2475 which_logical_immediate (HOST_WIDE_INT val)
2477 gcc_assert (val == trunc_int_for_mode (val, SImode));
2479 if (val >= -0x200 && val <= 0x1ff)
2480 return SPU_ORI;
2481 if (val >= 0 && val <= 0xffff)
2482 return SPU_IOHL;
2483 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2485 val = trunc_int_for_mode (val, HImode);
2486 if (val >= -0x200 && val <= 0x1ff)
2487 return SPU_ORHI;
2488 if ((val & 0xff) == ((val >> 8) & 0xff))
2490 val = trunc_int_for_mode (val, QImode);
2491 if (val >= -0x200 && val <= 0x1ff)
2492 return SPU_ORBI;
2495 return SPU_NONE;
2499 logical_immediate_p (rtx op, enum machine_mode mode)
2501 HOST_WIDE_INT val;
2502 unsigned char arr[16];
2503 int i, j;
2505 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2506 || GET_CODE (op) == CONST_VECTOR);
2508 if (GET_MODE (op) != VOIDmode)
2509 mode = GET_MODE (op);
2511 constant_to_array (mode, op, arr);
2513 /* Check that bytes are repeated. */
2514 for (i = 4; i < 16; i += 4)
2515 for (j = 0; j < 4; j++)
2516 if (arr[j] != arr[i + j])
2517 return 0;
2519 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2520 val = trunc_int_for_mode (val, SImode);
2522 i = which_logical_immediate (val);
2523 return i != SPU_NONE && i != SPU_IOHL;
2527 iohl_immediate_p (rtx op, enum machine_mode mode)
2529 HOST_WIDE_INT val;
2530 unsigned char arr[16];
2531 int i, j;
2533 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2534 || GET_CODE (op) == CONST_VECTOR);
2536 if (GET_MODE (op) != VOIDmode)
2537 mode = GET_MODE (op);
2539 constant_to_array (mode, op, arr);
2541 /* Check that bytes are repeated. */
2542 for (i = 4; i < 16; i += 4)
2543 for (j = 0; j < 4; j++)
2544 if (arr[j] != arr[i + j])
2545 return 0;
2547 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2548 val = trunc_int_for_mode (val, SImode);
2550 return val >= 0 && val <= 0xffff;
2554 arith_immediate_p (rtx op, enum machine_mode mode,
2555 HOST_WIDE_INT low, HOST_WIDE_INT high)
2557 HOST_WIDE_INT val;
2558 unsigned char arr[16];
2559 int bytes, i, j;
2561 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2562 || GET_CODE (op) == CONST_VECTOR);
2564 if (GET_MODE (op) != VOIDmode)
2565 mode = GET_MODE (op);
2567 constant_to_array (mode, op, arr);
2569 if (VECTOR_MODE_P (mode))
2570 mode = GET_MODE_INNER (mode);
2572 bytes = GET_MODE_SIZE (mode);
2573 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2575 /* Check that bytes are repeated. */
2576 for (i = bytes; i < 16; i += bytes)
2577 for (j = 0; j < bytes; j++)
2578 if (arr[j] != arr[i + j])
2579 return 0;
2581 val = arr[0];
2582 for (j = 1; j < bytes; j++)
2583 val = (val << 8) | arr[j];
2585 val = trunc_int_for_mode (val, mode);
2587 return val >= low && val <= high;
2590 /* We accept:
2591 - any 32-bit constant (SImode, SFmode)
2592 - any constant that can be generated with fsmbi (any mode)
2593 - a 64-bit constant where the high and low bits are identical
2594 (DImode, DFmode)
2595 - a 128-bit constant where the four 32-bit words match. */
2597 spu_legitimate_constant_p (rtx x)
2599 int i;
2600 /* V4SI with all identical symbols is valid. */
2601 if (GET_MODE (x) == V4SImode
2602 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2603 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2604 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2605 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2606 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2607 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2608 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2610 if (VECTOR_MODE_P (GET_MODE (x)))
2611 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2612 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2613 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2614 return 0;
2615 return 1;
2618 /* Valid address are:
2619 - symbol_ref, label_ref, const
2620 - reg
2621 - reg + const, where either reg or const is 16 byte aligned
2622 - reg + reg, alignment doesn't matter
2623 The alignment matters in the reg+const case because lqd and stqd
2624 ignore the 4 least significant bits of the const. (TODO: It might be
2625 preferable to allow any alignment and fix it up when splitting.) */
2627 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2628 rtx x, int reg_ok_strict)
2630 if (mode == TImode && GET_CODE (x) == AND
2631 && GET_CODE (XEXP (x, 1)) == CONST_INT
2632 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2633 x = XEXP (x, 0);
2634 switch (GET_CODE (x))
2636 case SYMBOL_REF:
2637 case LABEL_REF:
2638 return !TARGET_LARGE_MEM;
2640 case CONST:
2641 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2643 case CONST_INT:
2644 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2646 case SUBREG:
2647 x = XEXP (x, 0);
2648 gcc_assert (GET_CODE (x) == REG);
2650 case REG:
2651 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2653 case PLUS:
2654 case LO_SUM:
2656 rtx op0 = XEXP (x, 0);
2657 rtx op1 = XEXP (x, 1);
2658 if (GET_CODE (op0) == SUBREG)
2659 op0 = XEXP (op0, 0);
2660 if (GET_CODE (op1) == SUBREG)
2661 op1 = XEXP (op1, 0);
2662 /* We can't just accept any aligned register because CSE can
2663 change it to a register that is not marked aligned and then
2664 recog will fail. So we only accept frame registers because
2665 they will only be changed to other frame registers. */
2666 if (GET_CODE (op0) == REG
2667 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2668 && GET_CODE (op1) == CONST_INT
2669 && INTVAL (op1) >= -0x2000
2670 && INTVAL (op1) <= 0x1fff
2671 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2672 return 1;
2673 if (GET_CODE (op0) == REG
2674 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2675 && GET_CODE (op1) == REG
2676 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2677 return 1;
2679 break;
2681 default:
2682 break;
2684 return 0;
2687 /* When the address is reg + const_int, force the const_int into a
2688 register. */
2690 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2691 enum machine_mode mode)
2693 rtx op0, op1;
2694 /* Make sure both operands are registers. */
2695 if (GET_CODE (x) == PLUS)
2697 op0 = XEXP (x, 0);
2698 op1 = XEXP (x, 1);
2699 if (ALIGNED_SYMBOL_REF_P (op0))
2701 op0 = force_reg (Pmode, op0);
2702 mark_reg_pointer (op0, 128);
2704 else if (GET_CODE (op0) != REG)
2705 op0 = force_reg (Pmode, op0);
2706 if (ALIGNED_SYMBOL_REF_P (op1))
2708 op1 = force_reg (Pmode, op1);
2709 mark_reg_pointer (op1, 128);
2711 else if (GET_CODE (op1) != REG)
2712 op1 = force_reg (Pmode, op1);
2713 x = gen_rtx_PLUS (Pmode, op0, op1);
2714 if (spu_legitimate_address (mode, x, 0))
2715 return x;
2717 return NULL_RTX;
2720 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2721 struct attribute_spec.handler. */
2722 static tree
2723 spu_handle_fndecl_attribute (tree * node,
2724 tree name,
2725 tree args ATTRIBUTE_UNUSED,
2726 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2728 if (TREE_CODE (*node) != FUNCTION_DECL)
2730 warning (0, "`%s' attribute only applies to functions",
2731 IDENTIFIER_POINTER (name));
2732 *no_add_attrs = true;
2735 return NULL_TREE;
2738 /* Handle the "vector" attribute. */
2739 static tree
2740 spu_handle_vector_attribute (tree * node, tree name,
2741 tree args ATTRIBUTE_UNUSED,
2742 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2744 tree type = *node, result = NULL_TREE;
2745 enum machine_mode mode;
2746 int unsigned_p;
2748 while (POINTER_TYPE_P (type)
2749 || TREE_CODE (type) == FUNCTION_TYPE
2750 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2751 type = TREE_TYPE (type);
2753 mode = TYPE_MODE (type);
2755 unsigned_p = TYPE_UNSIGNED (type);
2756 switch (mode)
2758 case DImode:
2759 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2760 break;
2761 case SImode:
2762 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2763 break;
2764 case HImode:
2765 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2766 break;
2767 case QImode:
2768 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2769 break;
2770 case SFmode:
2771 result = V4SF_type_node;
2772 break;
2773 case DFmode:
2774 result = V2DF_type_node;
2775 break;
2776 default:
2777 break;
2780 /* Propagate qualifiers attached to the element type
2781 onto the vector type. */
2782 if (result && result != type && TYPE_QUALS (type))
2783 result = build_qualified_type (result, TYPE_QUALS (type));
2785 *no_add_attrs = true; /* No need to hang on to the attribute. */
2787 if (!result)
2788 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2789 else
2790 *node = reconstruct_complex_type (*node, result);
2792 return NULL_TREE;
2795 /* Return nonzero if FUNC is a naked function. */
2796 static int
2797 spu_naked_function_p (tree func)
2799 tree a;
2801 if (TREE_CODE (func) != FUNCTION_DECL)
2802 abort ();
2804 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2805 return a != NULL_TREE;
2809 spu_initial_elimination_offset (int from, int to)
2811 int saved_regs_size = spu_saved_regs_size ();
2812 int sp_offset = 0;
2813 if (!current_function_is_leaf || current_function_outgoing_args_size
2814 || get_frame_size () || saved_regs_size)
2815 sp_offset = STACK_POINTER_OFFSET;
2816 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2817 return (sp_offset + current_function_outgoing_args_size);
2818 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2819 return 0;
2820 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2821 return sp_offset + current_function_outgoing_args_size
2822 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2823 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2824 return get_frame_size () + saved_regs_size + sp_offset;
2825 return 0;
2829 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2831 enum machine_mode mode = TYPE_MODE (type);
2832 int byte_size = ((mode == BLKmode)
2833 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2835 /* Make sure small structs are left justified in a register. */
2836 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2837 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2839 enum machine_mode smode;
2840 rtvec v;
2841 int i;
2842 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2843 int n = byte_size / UNITS_PER_WORD;
2844 v = rtvec_alloc (nregs);
2845 for (i = 0; i < n; i++)
2847 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2848 gen_rtx_REG (TImode,
2849 FIRST_RETURN_REGNUM
2850 + i),
2851 GEN_INT (UNITS_PER_WORD * i));
2852 byte_size -= UNITS_PER_WORD;
2855 if (n < nregs)
2857 if (byte_size < 4)
2858 byte_size = 4;
2859 smode =
2860 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2861 RTVEC_ELT (v, n) =
2862 gen_rtx_EXPR_LIST (VOIDmode,
2863 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2864 GEN_INT (UNITS_PER_WORD * n));
2866 return gen_rtx_PARALLEL (mode, v);
2868 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2872 spu_function_arg (CUMULATIVE_ARGS cum,
2873 enum machine_mode mode,
2874 tree type, int named ATTRIBUTE_UNUSED)
2876 int byte_size;
2878 if (cum >= MAX_REGISTER_ARGS)
2879 return 0;
2881 byte_size = ((mode == BLKmode)
2882 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2884 /* The ABI does not allow parameters to be passed partially in
2885 reg and partially in stack. */
2886 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2887 return 0;
2889 /* Make sure small structs are left justified in a register. */
2890 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2891 && byte_size < UNITS_PER_WORD && byte_size > 0)
2893 enum machine_mode smode;
2894 rtx gr_reg;
2895 if (byte_size < 4)
2896 byte_size = 4;
2897 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2898 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2899 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2900 const0_rtx);
2901 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2903 else
2904 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2907 /* Variable sized types are passed by reference. */
2908 static bool
2909 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2910 enum machine_mode mode ATTRIBUTE_UNUSED,
2911 tree type, bool named ATTRIBUTE_UNUSED)
2913 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2917 /* Var args. */
2919 /* Create and return the va_list datatype.
2921 On SPU, va_list is an array type equivalent to
2923 typedef struct __va_list_tag
2925 void *__args __attribute__((__aligned(16)));
2926 void *__skip __attribute__((__aligned(16)));
2928 } va_list[1];
2930 where __args points to the arg that will be returned by the next
2931 va_arg(), and __skip points to the previous stack frame such that
2932 when __args == __skip we should advance __args by 32 bytes. */
2933 static tree
2934 spu_build_builtin_va_list (void)
2936 tree f_args, f_skip, record, type_decl;
2937 bool owp;
2939 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2941 type_decl =
2942 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2944 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2945 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2947 DECL_FIELD_CONTEXT (f_args) = record;
2948 DECL_ALIGN (f_args) = 128;
2949 DECL_USER_ALIGN (f_args) = 1;
2951 DECL_FIELD_CONTEXT (f_skip) = record;
2952 DECL_ALIGN (f_skip) = 128;
2953 DECL_USER_ALIGN (f_skip) = 1;
2955 TREE_CHAIN (record) = type_decl;
2956 TYPE_NAME (record) = type_decl;
2957 TYPE_FIELDS (record) = f_args;
2958 TREE_CHAIN (f_args) = f_skip;
2960 /* We know this is being padded and we want it too. It is an internal
2961 type so hide the warnings from the user. */
2962 owp = warn_padded;
2963 warn_padded = false;
2965 layout_type (record);
2967 warn_padded = owp;
2969 /* The correct type is an array type of one element. */
2970 return build_array_type (record, build_index_type (size_zero_node));
2973 /* Implement va_start by filling the va_list structure VALIST.
2974 NEXTARG points to the first anonymous stack argument.
2976 The following global variables are used to initialize
2977 the va_list structure:
2979 current_function_args_info;
2980 the CUMULATIVE_ARGS for this function
2982 current_function_arg_offset_rtx:
2983 holds the offset of the first anonymous stack argument
2984 (relative to the virtual arg pointer). */
2986 void
2987 spu_va_start (tree valist, rtx nextarg)
2989 tree f_args, f_skip;
2990 tree args, skip, t;
2992 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2993 f_skip = TREE_CHAIN (f_args);
2995 valist = build_va_arg_indirect_ref (valist);
2996 args =
2997 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2998 skip =
2999 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3001 /* Find the __args area. */
3002 t = make_tree (TREE_TYPE (args), nextarg);
3003 if (current_function_pretend_args_size > 0)
3004 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
3005 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
3006 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3010 /* Find the __skip area. */
3011 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3012 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
3013 build_int_cst (integer_type_node,
3014 (current_function_pretend_args_size
3015 - STACK_POINTER_OFFSET)));
3016 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3017 TREE_SIDE_EFFECTS (t) = 1;
3018 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3021 /* Gimplify va_arg by updating the va_list structure
3022 VALIST as required to retrieve an argument of type
3023 TYPE, and returning that argument.
3025 ret = va_arg(VALIST, TYPE);
3027 generates code equivalent to:
3029 paddedsize = (sizeof(TYPE) + 15) & -16;
3030 if (VALIST.__args + paddedsize > VALIST.__skip
3031 && VALIST.__args <= VALIST.__skip)
3032 addr = VALIST.__skip + 32;
3033 else
3034 addr = VALIST.__args;
3035 VALIST.__args = addr + paddedsize;
3036 ret = *(TYPE *)addr;
3038 static tree
3039 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3040 tree * post_p ATTRIBUTE_UNUSED)
3042 tree f_args, f_skip;
3043 tree args, skip;
3044 HOST_WIDE_INT size, rsize;
3045 tree paddedsize, addr, tmp;
3046 bool pass_by_reference_p;
3048 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3049 f_skip = TREE_CHAIN (f_args);
3051 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3052 args =
3053 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3054 skip =
3055 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3057 addr = create_tmp_var (ptr_type_node, "va_arg");
3058 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3060 /* if an object is dynamically sized, a pointer to it is passed
3061 instead of the object itself. */
3062 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3063 false);
3064 if (pass_by_reference_p)
3065 type = build_pointer_type (type);
3066 size = int_size_in_bytes (type);
3067 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3069 /* build conditional expression to calculate addr. The expression
3070 will be gimplified later. */
3071 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
3072 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
3073 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3074 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3075 build2 (LE_EXPR, boolean_type_node, args, skip));
3077 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3078 build2 (PLUS_EXPR, ptr_type_node, skip,
3079 fold_convert (ptr_type_node, size_int (32))), args);
3081 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3082 gimplify_and_add (tmp, pre_p);
3084 /* update VALIST.__args */
3085 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
3086 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3087 gimplify_and_add (tmp, pre_p);
3089 addr = fold_convert (build_pointer_type (type), addr);
3091 if (pass_by_reference_p)
3092 addr = build_va_arg_indirect_ref (addr);
3094 return build_va_arg_indirect_ref (addr);
3097 /* Save parameter registers starting with the register that corresponds
3098 to the first unnamed parameters. If the first unnamed parameter is
3099 in the stack then save no registers. Set pretend_args_size to the
3100 amount of space needed to save the registers. */
3101 void
3102 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3103 tree type, int *pretend_size, int no_rtl)
3105 if (!no_rtl)
3107 rtx tmp;
3108 int regno;
3109 int offset;
3110 int ncum = *cum;
3112 /* cum currently points to the last named argument, we want to
3113 start at the next argument. */
3114 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3116 offset = -STACK_POINTER_OFFSET;
3117 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3119 tmp = gen_frame_mem (V4SImode,
3120 plus_constant (virtual_incoming_args_rtx,
3121 offset));
3122 emit_move_insn (tmp,
3123 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3124 offset += 16;
3126 *pretend_size = offset + STACK_POINTER_OFFSET;
3130 void
3131 spu_conditional_register_usage (void)
3133 if (flag_pic)
3135 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3136 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3138 global_regs[INTR_REGNUM] = 1;
3141 /* This is called to decide when we can simplify a load instruction. We
3142 must only return true for registers which we know will always be
3143 aligned. Taking into account that CSE might replace this reg with
3144 another one that has not been marked aligned.
3145 So this is really only true for frame, stack and virtual registers,
3146 which we know are always aligned and should not be adversely effected
3147 by CSE. */
3148 static int
3149 regno_aligned_for_load (int regno)
3151 return regno == FRAME_POINTER_REGNUM
3152 || regno == HARD_FRAME_POINTER_REGNUM
3153 || regno == STACK_POINTER_REGNUM
3154 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
3157 /* Return TRUE when mem is known to be 16-byte aligned. */
3159 aligned_mem_p (rtx mem)
3161 if (MEM_ALIGN (mem) >= 128)
3162 return 1;
3163 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3164 return 1;
3165 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3167 rtx p0 = XEXP (XEXP (mem, 0), 0);
3168 rtx p1 = XEXP (XEXP (mem, 0), 1);
3169 if (regno_aligned_for_load (REGNO (p0)))
3171 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3172 return 1;
3173 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3174 return 1;
3177 else if (GET_CODE (XEXP (mem, 0)) == REG)
3179 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3180 return 1;
3182 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3183 return 1;
3184 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3186 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3187 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3188 if (GET_CODE (p0) == SYMBOL_REF
3189 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3190 return 1;
3192 return 0;
3195 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3196 into its SYMBOL_REF_FLAGS. */
3197 static void
3198 spu_encode_section_info (tree decl, rtx rtl, int first)
3200 default_encode_section_info (decl, rtl, first);
3202 /* If a variable has a forced alignment to < 16 bytes, mark it with
3203 SYMBOL_FLAG_ALIGN1. */
3204 if (TREE_CODE (decl) == VAR_DECL
3205 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3206 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3209 /* Return TRUE if we are certain the mem refers to a complete object
3210 which is both 16-byte aligned and padded to a 16-byte boundary. This
3211 would make it safe to store with a single instruction.
3212 We guarantee the alignment and padding for static objects by aligning
3213 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3214 FIXME: We currently cannot guarantee this for objects on the stack
3215 because assign_parm_setup_stack calls assign_stack_local with the
3216 alignment of the parameter mode and in that case the alignment never
3217 gets adjusted by LOCAL_ALIGNMENT. */
3218 static int
3219 store_with_one_insn_p (rtx mem)
3221 rtx addr = XEXP (mem, 0);
3222 if (GET_MODE (mem) == BLKmode)
3223 return 0;
3224 /* Only static objects. */
3225 if (GET_CODE (addr) == SYMBOL_REF)
3227 /* We use the associated declaration to make sure the access is
3228 referring to the whole object.
3229 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3230 if it is necessary. Will there be cases where one exists, and
3231 the other does not? Will there be cases where both exist, but
3232 have different types? */
3233 tree decl = MEM_EXPR (mem);
3234 if (decl
3235 && TREE_CODE (decl) == VAR_DECL
3236 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3237 return 1;
3238 decl = SYMBOL_REF_DECL (addr);
3239 if (decl
3240 && TREE_CODE (decl) == VAR_DECL
3241 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3242 return 1;
3244 return 0;
3248 spu_expand_mov (rtx * ops, enum machine_mode mode)
3250 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3251 abort ();
3253 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3255 rtx from = SUBREG_REG (ops[1]);
3256 enum machine_mode imode = GET_MODE (from);
3258 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3259 && GET_MODE_CLASS (imode) == MODE_INT
3260 && subreg_lowpart_p (ops[1]));
3262 if (GET_MODE_SIZE (imode) < 4)
3264 from = gen_rtx_SUBREG (SImode, from, 0);
3265 imode = SImode;
3268 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3270 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3271 emit_insn (GEN_FCN (icode) (ops[0], from));
3273 else
3274 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3275 return 1;
3278 /* At least one of the operands needs to be a register. */
3279 if ((reload_in_progress | reload_completed) == 0
3280 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3282 rtx temp = force_reg (mode, ops[1]);
3283 emit_move_insn (ops[0], temp);
3284 return 1;
3286 if (reload_in_progress || reload_completed)
3288 if (CONSTANT_P (ops[1]))
3289 return spu_split_immediate (ops);
3290 return 0;
3292 else
3294 if (GET_CODE (ops[0]) == MEM)
3296 if (!spu_valid_move (ops))
3298 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3299 gen_reg_rtx (TImode)));
3300 return 1;
3303 else if (GET_CODE (ops[1]) == MEM)
3305 if (!spu_valid_move (ops))
3307 emit_insn (gen_load
3308 (ops[0], ops[1], gen_reg_rtx (TImode),
3309 gen_reg_rtx (SImode)));
3310 return 1;
3313 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3314 extend them. */
3315 if (GET_CODE (ops[1]) == CONST_INT)
3317 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3318 if (val != INTVAL (ops[1]))
3320 emit_move_insn (ops[0], GEN_INT (val));
3321 return 1;
3325 return 0;
3328 static int
3329 reg_align (rtx reg)
3331 /* For now, only frame registers are known to be aligned at all times.
3332 We can't trust REGNO_POINTER_ALIGN because optimization will move
3333 registers around, potentially changing an "aligned" register in an
3334 address to an unaligned register, which would result in an invalid
3335 address. */
3336 int regno = REGNO (reg);
3337 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3340 void
3341 spu_split_load (rtx * ops)
3343 enum machine_mode mode = GET_MODE (ops[0]);
3344 rtx addr, load, rot, mem, p0, p1;
3345 int rot_amt;
3347 addr = XEXP (ops[1], 0);
3349 rot = 0;
3350 rot_amt = 0;
3351 if (GET_CODE (addr) == PLUS)
3353 /* 8 cases:
3354 aligned reg + aligned reg => lqx
3355 aligned reg + unaligned reg => lqx, rotqby
3356 aligned reg + aligned const => lqd
3357 aligned reg + unaligned const => lqd, rotqbyi
3358 unaligned reg + aligned reg => lqx, rotqby
3359 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3360 unaligned reg + aligned const => lqd, rotqby
3361 unaligned reg + unaligned const -> not allowed by legitimate address
3363 p0 = XEXP (addr, 0);
3364 p1 = XEXP (addr, 1);
3365 if (reg_align (p0) < 128)
3367 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3369 emit_insn (gen_addsi3 (ops[3], p0, p1));
3370 rot = ops[3];
3372 else
3373 rot = p0;
3375 else
3377 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3379 rot_amt = INTVAL (p1) & 15;
3380 p1 = GEN_INT (INTVAL (p1) & -16);
3381 addr = gen_rtx_PLUS (SImode, p0, p1);
3383 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3384 rot = p1;
3387 else if (GET_CODE (addr) == REG)
3389 if (reg_align (addr) < 128)
3390 rot = addr;
3392 else if (GET_CODE (addr) == CONST)
3394 if (GET_CODE (XEXP (addr, 0)) == PLUS
3395 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3396 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3398 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3399 if (rot_amt & -16)
3400 addr = gen_rtx_CONST (Pmode,
3401 gen_rtx_PLUS (Pmode,
3402 XEXP (XEXP (addr, 0), 0),
3403 GEN_INT (rot_amt & -16)));
3404 else
3405 addr = XEXP (XEXP (addr, 0), 0);
3407 else
3408 rot = addr;
3410 else if (GET_CODE (addr) == CONST_INT)
3412 rot_amt = INTVAL (addr);
3413 addr = GEN_INT (rot_amt & -16);
3415 else if (!ALIGNED_SYMBOL_REF_P (addr))
3416 rot = addr;
3418 if (GET_MODE_SIZE (mode) < 4)
3419 rot_amt += GET_MODE_SIZE (mode) - 4;
3421 rot_amt &= 15;
3423 if (rot && rot_amt)
3425 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3426 rot = ops[3];
3427 rot_amt = 0;
3430 load = ops[2];
3432 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3433 mem = change_address (ops[1], TImode, addr);
3435 emit_insn (gen_movti (load, mem));
3437 if (rot)
3438 emit_insn (gen_rotqby_ti (load, load, rot));
3439 else if (rot_amt)
3440 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3442 if (reload_completed)
3443 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3444 else
3445 emit_insn (gen_spu_convert (ops[0], load));
3448 void
3449 spu_split_store (rtx * ops)
3451 enum machine_mode mode = GET_MODE (ops[0]);
3452 rtx pat = ops[2];
3453 rtx reg = ops[3];
3454 rtx addr, p0, p1, p1_lo, smem;
3455 int aform;
3456 int scalar;
3458 addr = XEXP (ops[0], 0);
3460 if (GET_CODE (addr) == PLUS)
3462 /* 8 cases:
3463 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3464 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3465 aligned reg + aligned const => lqd, c?d, shuf, stqx
3466 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3467 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3468 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3469 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3470 unaligned reg + unaligned const -> not allowed by legitimate address
3472 aform = 0;
3473 p0 = XEXP (addr, 0);
3474 p1 = p1_lo = XEXP (addr, 1);
3475 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3477 p1_lo = GEN_INT (INTVAL (p1) & 15);
3478 p1 = GEN_INT (INTVAL (p1) & -16);
3479 addr = gen_rtx_PLUS (SImode, p0, p1);
3482 else if (GET_CODE (addr) == REG)
3484 aform = 0;
3485 p0 = addr;
3486 p1 = p1_lo = const0_rtx;
3488 else
3490 aform = 1;
3491 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3492 p1 = 0; /* aform doesn't use p1 */
3493 p1_lo = addr;
3494 if (ALIGNED_SYMBOL_REF_P (addr))
3495 p1_lo = const0_rtx;
3496 else if (GET_CODE (addr) == CONST)
3498 if (GET_CODE (XEXP (addr, 0)) == PLUS
3499 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3500 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3502 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3503 if ((v & -16) != 0)
3504 addr = gen_rtx_CONST (Pmode,
3505 gen_rtx_PLUS (Pmode,
3506 XEXP (XEXP (addr, 0), 0),
3507 GEN_INT (v & -16)));
3508 else
3509 addr = XEXP (XEXP (addr, 0), 0);
3510 p1_lo = GEN_INT (v & 15);
3513 else if (GET_CODE (addr) == CONST_INT)
3515 p1_lo = GEN_INT (INTVAL (addr) & 15);
3516 addr = GEN_INT (INTVAL (addr) & -16);
3520 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3522 scalar = store_with_one_insn_p (ops[0]);
3523 if (!scalar)
3525 /* We could copy the flags from the ops[0] MEM to mem here,
3526 We don't because we want this load to be optimized away if
3527 possible, and copying the flags will prevent that in certain
3528 cases, e.g. consider the volatile flag. */
3530 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3531 set_mem_alias_set (lmem, 0);
3532 emit_insn (gen_movti (reg, lmem));
3534 if (!p0 || reg_align (p0) >= 128)
3535 p0 = stack_pointer_rtx;
3536 if (!p1_lo)
3537 p1_lo = const0_rtx;
3539 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3540 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3542 else if (reload_completed)
3544 if (GET_CODE (ops[1]) == REG)
3545 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3546 else if (GET_CODE (ops[1]) == SUBREG)
3547 emit_move_insn (reg,
3548 gen_rtx_REG (GET_MODE (reg),
3549 REGNO (SUBREG_REG (ops[1]))));
3550 else
3551 abort ();
3553 else
3555 if (GET_CODE (ops[1]) == REG)
3556 emit_insn (gen_spu_convert (reg, ops[1]));
3557 else if (GET_CODE (ops[1]) == SUBREG)
3558 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3559 else
3560 abort ();
3563 if (GET_MODE_SIZE (mode) < 4 && scalar)
3564 emit_insn (gen_shlqby_ti
3565 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3567 smem = change_address (ops[0], TImode, addr);
3568 /* We can't use the previous alias set because the memory has changed
3569 size and can potentially overlap objects of other types. */
3570 set_mem_alias_set (smem, 0);
3572 emit_insn (gen_movti (smem, reg));
3575 /* Return TRUE if X is MEM which is a struct member reference
3576 and the member can safely be loaded and stored with a single
3577 instruction because it is padded. */
3578 static int
3579 mem_is_padded_component_ref (rtx x)
3581 tree t = MEM_EXPR (x);
3582 tree r;
3583 if (!t || TREE_CODE (t) != COMPONENT_REF)
3584 return 0;
3585 t = TREE_OPERAND (t, 1);
3586 if (!t || TREE_CODE (t) != FIELD_DECL
3587 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3588 return 0;
3589 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3590 r = DECL_FIELD_CONTEXT (t);
3591 if (!r || TREE_CODE (r) != RECORD_TYPE)
3592 return 0;
3593 /* Make sure they are the same mode */
3594 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3595 return 0;
3596 /* If there are no following fields then the field alignment assures
3597 the structure is padded to the alignment which means this field is
3598 padded too. */
3599 if (TREE_CHAIN (t) == 0)
3600 return 1;
3601 /* If the following field is also aligned then this field will be
3602 padded. */
3603 t = TREE_CHAIN (t);
3604 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3605 return 1;
3606 return 0;
3609 /* Parse the -mfixed-range= option string. */
3610 static void
3611 fix_range (const char *const_str)
3613 int i, first, last;
3614 char *str, *dash, *comma;
3616 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3617 REG2 are either register names or register numbers. The effect
3618 of this option is to mark the registers in the range from REG1 to
3619 REG2 as ``fixed'' so they won't be used by the compiler. */
3621 i = strlen (const_str);
3622 str = (char *) alloca (i + 1);
3623 memcpy (str, const_str, i + 1);
3625 while (1)
3627 dash = strchr (str, '-');
3628 if (!dash)
3630 warning (0, "value of -mfixed-range must have form REG1-REG2");
3631 return;
3633 *dash = '\0';
3634 comma = strchr (dash + 1, ',');
3635 if (comma)
3636 *comma = '\0';
3638 first = decode_reg_name (str);
3639 if (first < 0)
3641 warning (0, "unknown register name: %s", str);
3642 return;
3645 last = decode_reg_name (dash + 1);
3646 if (last < 0)
3648 warning (0, "unknown register name: %s", dash + 1);
3649 return;
3652 *dash = '-';
3654 if (first > last)
3656 warning (0, "%s-%s is an empty range", str, dash + 1);
3657 return;
3660 for (i = first; i <= last; ++i)
3661 fixed_regs[i] = call_used_regs[i] = 1;
3663 if (!comma)
3664 break;
3666 *comma = ',';
3667 str = comma + 1;
3672 spu_valid_move (rtx * ops)
3674 enum machine_mode mode = GET_MODE (ops[0]);
3675 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3676 return 0;
3678 /* init_expr_once tries to recog against load and store insns to set
3679 the direct_load[] and direct_store[] arrays. We always want to
3680 consider those loads and stores valid. init_expr_once is called in
3681 the context of a dummy function which does not have a decl. */
3682 if (cfun->decl == 0)
3683 return 1;
3685 /* Don't allows loads/stores which would require more than 1 insn.
3686 During and after reload we assume loads and stores only take 1
3687 insn. */
3688 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3690 if (GET_CODE (ops[0]) == MEM
3691 && (GET_MODE_SIZE (mode) < 4
3692 || !(store_with_one_insn_p (ops[0])
3693 || mem_is_padded_component_ref (ops[0]))))
3694 return 0;
3695 if (GET_CODE (ops[1]) == MEM
3696 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3697 return 0;
3699 return 1;
3702 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3703 can be generated using the fsmbi instruction. */
3705 fsmbi_const_p (rtx x)
3707 if (CONSTANT_P (x))
3709 /* We can always choose DImode for CONST_INT because the high bits
3710 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3711 enum immediate_class c = classify_immediate (x, DImode);
3712 return c == IC_FSMBI;
3714 return 0;
3717 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3718 can be generated using the cbd, chd, cwd or cdd instruction. */
3720 cpat_const_p (rtx x, enum machine_mode mode)
3722 if (CONSTANT_P (x))
3724 enum immediate_class c = classify_immediate (x, mode);
3725 return c == IC_CPAT;
3727 return 0;
3731 gen_cpat_const (rtx * ops)
3733 unsigned char dst[16];
3734 int i, offset, shift, isize;
3735 if (GET_CODE (ops[3]) != CONST_INT
3736 || GET_CODE (ops[2]) != CONST_INT
3737 || (GET_CODE (ops[1]) != CONST_INT
3738 && GET_CODE (ops[1]) != REG))
3739 return 0;
3740 if (GET_CODE (ops[1]) == REG
3741 && (!REG_POINTER (ops[1])
3742 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3743 return 0;
3745 for (i = 0; i < 16; i++)
3746 dst[i] = i + 16;
3747 isize = INTVAL (ops[3]);
3748 if (isize == 1)
3749 shift = 3;
3750 else if (isize == 2)
3751 shift = 2;
3752 else
3753 shift = 0;
3754 offset = (INTVAL (ops[2]) +
3755 (GET_CODE (ops[1]) ==
3756 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3757 for (i = 0; i < isize; i++)
3758 dst[offset + i] = i + shift;
3759 return array_to_constant (TImode, dst);
3762 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3763 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3764 than 16 bytes, the value is repeated across the rest of the array. */
3765 void
3766 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3768 HOST_WIDE_INT val;
3769 int i, j, first;
3771 memset (arr, 0, 16);
3772 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3773 if (GET_CODE (x) == CONST_INT
3774 || (GET_CODE (x) == CONST_DOUBLE
3775 && (mode == SFmode || mode == DFmode)))
3777 gcc_assert (mode != VOIDmode && mode != BLKmode);
3779 if (GET_CODE (x) == CONST_DOUBLE)
3780 val = const_double_to_hwint (x);
3781 else
3782 val = INTVAL (x);
3783 first = GET_MODE_SIZE (mode) - 1;
3784 for (i = first; i >= 0; i--)
3786 arr[i] = val & 0xff;
3787 val >>= 8;
3789 /* Splat the constant across the whole array. */
3790 for (j = 0, i = first + 1; i < 16; i++)
3792 arr[i] = arr[j];
3793 j = (j == first) ? 0 : j + 1;
3796 else if (GET_CODE (x) == CONST_DOUBLE)
3798 val = CONST_DOUBLE_LOW (x);
3799 for (i = 15; i >= 8; i--)
3801 arr[i] = val & 0xff;
3802 val >>= 8;
3804 val = CONST_DOUBLE_HIGH (x);
3805 for (i = 7; i >= 0; i--)
3807 arr[i] = val & 0xff;
3808 val >>= 8;
3811 else if (GET_CODE (x) == CONST_VECTOR)
3813 int units;
3814 rtx elt;
3815 mode = GET_MODE_INNER (mode);
3816 units = CONST_VECTOR_NUNITS (x);
3817 for (i = 0; i < units; i++)
3819 elt = CONST_VECTOR_ELT (x, i);
3820 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3822 if (GET_CODE (elt) == CONST_DOUBLE)
3823 val = const_double_to_hwint (elt);
3824 else
3825 val = INTVAL (elt);
3826 first = GET_MODE_SIZE (mode) - 1;
3827 if (first + i * GET_MODE_SIZE (mode) > 16)
3828 abort ();
3829 for (j = first; j >= 0; j--)
3831 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3832 val >>= 8;
3837 else
3838 gcc_unreachable();
3841 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3842 smaller than 16 bytes, use the bytes that would represent that value
3843 in a register, e.g., for QImode return the value of arr[3]. */
3845 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3847 enum machine_mode inner_mode;
3848 rtvec v;
3849 int units, size, i, j, k;
3850 HOST_WIDE_INT val;
3852 if (GET_MODE_CLASS (mode) == MODE_INT
3853 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3855 j = GET_MODE_SIZE (mode);
3856 i = j < 4 ? 4 - j : 0;
3857 for (val = 0; i < j; i++)
3858 val = (val << 8) | arr[i];
3859 val = trunc_int_for_mode (val, mode);
3860 return GEN_INT (val);
3863 if (mode == TImode)
3865 HOST_WIDE_INT high;
3866 for (i = high = 0; i < 8; i++)
3867 high = (high << 8) | arr[i];
3868 for (i = 8, val = 0; i < 16; i++)
3869 val = (val << 8) | arr[i];
3870 return immed_double_const (val, high, TImode);
3872 if (mode == SFmode)
3874 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3875 val = trunc_int_for_mode (val, SImode);
3876 return hwint_to_const_double (SFmode, val);
3878 if (mode == DFmode)
3880 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3881 val <<= 32;
3882 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3883 return hwint_to_const_double (DFmode, val);
3886 if (!VECTOR_MODE_P (mode))
3887 abort ();
3889 units = GET_MODE_NUNITS (mode);
3890 size = GET_MODE_UNIT_SIZE (mode);
3891 inner_mode = GET_MODE_INNER (mode);
3892 v = rtvec_alloc (units);
3894 for (k = i = 0; i < units; ++i)
3896 val = 0;
3897 for (j = 0; j < size; j++, k++)
3898 val = (val << 8) | arr[k];
3900 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3901 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3902 else
3903 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3905 if (k > 16)
3906 abort ();
3908 return gen_rtx_CONST_VECTOR (mode, v);
3911 static void
3912 reloc_diagnostic (rtx x)
3914 tree loc_decl, decl = 0;
3915 const char *msg;
3916 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3917 return;
3919 if (GET_CODE (x) == SYMBOL_REF)
3920 decl = SYMBOL_REF_DECL (x);
3921 else if (GET_CODE (x) == CONST
3922 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3923 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3925 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3926 if (decl && !DECL_P (decl))
3927 decl = 0;
3929 /* We use last_assemble_variable_decl to get line information. It's
3930 not always going to be right and might not even be close, but will
3931 be right for the more common cases. */
3932 if (!last_assemble_variable_decl)
3933 loc_decl = decl;
3934 else
3935 loc_decl = last_assemble_variable_decl;
3937 /* The decl could be a string constant. */
3938 if (decl && DECL_P (decl))
3939 msg = "%Jcreating run-time relocation for %qD";
3940 else
3941 msg = "creating run-time relocation";
3943 if (TARGET_WARN_RELOC)
3944 warning (0, msg, loc_decl, decl);
3945 else
3946 error (msg, loc_decl, decl);
3949 /* Hook into assemble_integer so we can generate an error for run-time
3950 relocations. The SPU ABI disallows them. */
3951 static bool
3952 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3954 /* By default run-time relocations aren't supported, but we allow them
3955 in case users support it in their own run-time loader. And we provide
3956 a warning for those users that don't. */
3957 if ((GET_CODE (x) == SYMBOL_REF)
3958 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3959 reloc_diagnostic (x);
3961 return default_assemble_integer (x, size, aligned_p);
3964 static void
3965 spu_asm_globalize_label (FILE * file, const char *name)
3967 fputs ("\t.global\t", file);
3968 assemble_name (file, name);
3969 fputs ("\n", file);
3972 static bool
3973 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3975 enum machine_mode mode = GET_MODE (x);
3976 int cost = COSTS_N_INSNS (2);
3978 /* Folding to a CONST_VECTOR will use extra space but there might
3979 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3980 only if it allows us to fold away multiple insns. Changing the cost
3981 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3982 because this cost will only be compared against a single insn.
3983 if (code == CONST_VECTOR)
3984 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3987 /* Use defaults for float operations. Not accurate but good enough. */
3988 if (mode == DFmode)
3990 *total = COSTS_N_INSNS (13);
3991 return true;
3993 if (mode == SFmode)
3995 *total = COSTS_N_INSNS (6);
3996 return true;
3998 switch (code)
4000 case CONST_INT:
4001 if (satisfies_constraint_K (x))
4002 *total = 0;
4003 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4004 *total = COSTS_N_INSNS (1);
4005 else
4006 *total = COSTS_N_INSNS (3);
4007 return true;
4009 case CONST:
4010 *total = COSTS_N_INSNS (3);
4011 return true;
4013 case LABEL_REF:
4014 case SYMBOL_REF:
4015 *total = COSTS_N_INSNS (0);
4016 return true;
4018 case CONST_DOUBLE:
4019 *total = COSTS_N_INSNS (5);
4020 return true;
4022 case FLOAT_EXTEND:
4023 case FLOAT_TRUNCATE:
4024 case FLOAT:
4025 case UNSIGNED_FLOAT:
4026 case FIX:
4027 case UNSIGNED_FIX:
4028 *total = COSTS_N_INSNS (7);
4029 return true;
4031 case PLUS:
4032 if (mode == TImode)
4034 *total = COSTS_N_INSNS (9);
4035 return true;
4037 break;
4039 case MULT:
4040 cost =
4041 GET_CODE (XEXP (x, 0)) ==
4042 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4043 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4045 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4047 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4048 cost = COSTS_N_INSNS (14);
4049 if ((val & 0xffff) == 0)
4050 cost = COSTS_N_INSNS (9);
4051 else if (val > 0 && val < 0x10000)
4052 cost = COSTS_N_INSNS (11);
4055 *total = cost;
4056 return true;
4057 case DIV:
4058 case UDIV:
4059 case MOD:
4060 case UMOD:
4061 *total = COSTS_N_INSNS (20);
4062 return true;
4063 case ROTATE:
4064 case ROTATERT:
4065 case ASHIFT:
4066 case ASHIFTRT:
4067 case LSHIFTRT:
4068 *total = COSTS_N_INSNS (4);
4069 return true;
4070 case UNSPEC:
4071 if (XINT (x, 1) == UNSPEC_CONVERT)
4072 *total = COSTS_N_INSNS (0);
4073 else
4074 *total = COSTS_N_INSNS (4);
4075 return true;
4077 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4078 if (GET_MODE_CLASS (mode) == MODE_INT
4079 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4080 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4081 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4082 *total = cost;
4083 return true;
4086 enum machine_mode
4087 spu_eh_return_filter_mode (void)
4089 /* We would like this to be SImode, but sjlj exceptions seems to work
4090 only with word_mode. */
4091 return TImode;
4094 /* Decide whether we can make a sibling call to a function. DECL is the
4095 declaration of the function being targeted by the call and EXP is the
4096 CALL_EXPR representing the call. */
4097 static bool
4098 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4100 return decl && !TARGET_LARGE_MEM;
4103 /* We need to correctly update the back chain pointer and the Available
4104 Stack Size (which is in the second slot of the sp register.) */
4105 void
4106 spu_allocate_stack (rtx op0, rtx op1)
4108 HOST_WIDE_INT v;
4109 rtx chain = gen_reg_rtx (V4SImode);
4110 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4111 rtx sp = gen_reg_rtx (V4SImode);
4112 rtx splatted = gen_reg_rtx (V4SImode);
4113 rtx pat = gen_reg_rtx (TImode);
4115 /* copy the back chain so we can save it back again. */
4116 emit_move_insn (chain, stack_bot);
4118 op1 = force_reg (SImode, op1);
4120 v = 0x1020300010203ll;
4121 emit_move_insn (pat, immed_double_const (v, v, TImode));
4122 emit_insn (gen_shufb (splatted, op1, op1, pat));
4124 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4125 emit_insn (gen_subv4si3 (sp, sp, splatted));
4127 if (flag_stack_check)
4129 rtx avail = gen_reg_rtx(SImode);
4130 rtx result = gen_reg_rtx(SImode);
4131 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4132 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4133 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4136 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4138 emit_move_insn (stack_bot, chain);
4140 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4143 void
4144 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4146 static unsigned char arr[16] =
4147 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4148 rtx temp = gen_reg_rtx (SImode);
4149 rtx temp2 = gen_reg_rtx (SImode);
4150 rtx temp3 = gen_reg_rtx (V4SImode);
4151 rtx temp4 = gen_reg_rtx (V4SImode);
4152 rtx pat = gen_reg_rtx (TImode);
4153 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4155 /* Restore the backchain from the first word, sp from the second. */
4156 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4157 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4159 emit_move_insn (pat, array_to_constant (TImode, arr));
4161 /* Compute Available Stack Size for sp */
4162 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4163 emit_insn (gen_shufb (temp3, temp, temp, pat));
4165 /* Compute Available Stack Size for back chain */
4166 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4167 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4168 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4170 emit_insn (gen_addv4si3 (sp, sp, temp3));
4171 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4174 static void
4175 spu_init_libfuncs (void)
4177 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4178 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4179 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4180 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4181 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4182 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4183 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4184 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4185 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4186 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4187 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4189 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4190 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4193 /* Make a subreg, stripping any existing subreg. We could possibly just
4194 call simplify_subreg, but in this case we know what we want. */
4196 spu_gen_subreg (enum machine_mode mode, rtx x)
4198 if (GET_CODE (x) == SUBREG)
4199 x = SUBREG_REG (x);
4200 if (GET_MODE (x) == mode)
4201 return x;
4202 return gen_rtx_SUBREG (mode, x, 0);
4205 static bool
4206 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
4208 return (TYPE_MODE (type) == BLKmode
4209 && ((type) == 0
4210 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4211 || int_size_in_bytes (type) >
4212 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4215 /* Create the built-in types and functions */
4217 struct spu_builtin_description spu_builtins[] = {
4218 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4219 {fcode, icode, name, type, params, NULL_TREE},
4220 #include "spu-builtins.def"
4221 #undef DEF_BUILTIN
4224 static void
4225 spu_init_builtins (void)
4227 struct spu_builtin_description *d;
4228 unsigned int i;
4230 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4231 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4232 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4233 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4234 V4SF_type_node = build_vector_type (float_type_node, 4);
4235 V2DF_type_node = build_vector_type (double_type_node, 2);
4237 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4238 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4239 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4240 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4242 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4244 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4245 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4246 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4247 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4248 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4249 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4250 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4251 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4252 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4253 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4254 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4255 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4257 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4258 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4259 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4260 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4261 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4262 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4263 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4264 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4266 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4267 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4269 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4271 spu_builtin_types[SPU_BTI_PTR] =
4272 build_pointer_type (build_qualified_type
4273 (void_type_node,
4274 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4276 /* For each builtin we build a new prototype. The tree code will make
4277 sure nodes are shared. */
4278 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4280 tree p;
4281 char name[64]; /* build_function will make a copy. */
4282 int parm;
4284 if (d->name == 0)
4285 continue;
4287 /* find last parm */
4288 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4292 p = void_list_node;
4293 while (parm > 1)
4294 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4296 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4298 sprintf (name, "__builtin_%s", d->name);
4299 d->fndecl =
4300 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4301 NULL, NULL_TREE);
4302 if (d->fcode == SPU_MASK_FOR_LOAD)
4303 TREE_READONLY (d->fndecl) = 1;
4307 void
4308 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4310 static unsigned char arr[16] =
4311 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4313 rtx temp = gen_reg_rtx (Pmode);
4314 rtx temp2 = gen_reg_rtx (V4SImode);
4315 rtx temp3 = gen_reg_rtx (V4SImode);
4316 rtx pat = gen_reg_rtx (TImode);
4317 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4319 emit_move_insn (pat, array_to_constant (TImode, arr));
4321 /* Restore the sp. */
4322 emit_move_insn (temp, op1);
4323 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4325 /* Compute available stack size for sp. */
4326 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4327 emit_insn (gen_shufb (temp3, temp, temp, pat));
4329 emit_insn (gen_addv4si3 (sp, sp, temp3));
4330 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4334 spu_safe_dma (HOST_WIDE_INT channel)
4336 return (channel >= 21 && channel <= 27);
4339 void
4340 spu_builtin_splats (rtx ops[])
4342 enum machine_mode mode = GET_MODE (ops[0]);
4343 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4345 unsigned char arr[16];
4346 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4347 emit_move_insn (ops[0], array_to_constant (mode, arr));
4349 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4351 rtvec v = rtvec_alloc (4);
4352 RTVEC_ELT (v, 0) = ops[1];
4353 RTVEC_ELT (v, 1) = ops[1];
4354 RTVEC_ELT (v, 2) = ops[1];
4355 RTVEC_ELT (v, 3) = ops[1];
4356 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4358 else
4360 rtx reg = gen_reg_rtx (TImode);
4361 rtx shuf;
4362 if (GET_CODE (ops[1]) != REG
4363 && GET_CODE (ops[1]) != SUBREG)
4364 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4365 switch (mode)
4367 case V2DImode:
4368 case V2DFmode:
4369 shuf =
4370 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4371 TImode);
4372 break;
4373 case V4SImode:
4374 case V4SFmode:
4375 shuf =
4376 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4377 TImode);
4378 break;
4379 case V8HImode:
4380 shuf =
4381 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4382 TImode);
4383 break;
4384 case V16QImode:
4385 shuf =
4386 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4387 TImode);
4388 break;
4389 default:
4390 abort ();
4392 emit_move_insn (reg, shuf);
4393 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4397 void
4398 spu_builtin_extract (rtx ops[])
4400 enum machine_mode mode;
4401 rtx rot, from, tmp;
4403 mode = GET_MODE (ops[1]);
4405 if (GET_CODE (ops[2]) == CONST_INT)
4407 switch (mode)
4409 case V16QImode:
4410 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4411 break;
4412 case V8HImode:
4413 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4414 break;
4415 case V4SFmode:
4416 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4417 break;
4418 case V4SImode:
4419 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4420 break;
4421 case V2DImode:
4422 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4423 break;
4424 case V2DFmode:
4425 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4426 break;
4427 default:
4428 abort ();
4430 return;
4433 from = spu_gen_subreg (TImode, ops[1]);
4434 rot = gen_reg_rtx (TImode);
4435 tmp = gen_reg_rtx (SImode);
4437 switch (mode)
4439 case V16QImode:
4440 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4441 break;
4442 case V8HImode:
4443 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4444 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4445 break;
4446 case V4SFmode:
4447 case V4SImode:
4448 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4449 break;
4450 case V2DImode:
4451 case V2DFmode:
4452 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4453 break;
4454 default:
4455 abort ();
4457 emit_insn (gen_rotqby_ti (rot, from, tmp));
4459 emit_insn (gen_spu_convert (ops[0], rot));
4462 void
4463 spu_builtin_insert (rtx ops[])
4465 enum machine_mode mode = GET_MODE (ops[0]);
4466 enum machine_mode imode = GET_MODE_INNER (mode);
4467 rtx mask = gen_reg_rtx (TImode);
4468 rtx offset;
4470 if (GET_CODE (ops[3]) == CONST_INT)
4471 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4472 else
4474 offset = gen_reg_rtx (SImode);
4475 emit_insn (gen_mulsi3
4476 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4478 emit_insn (gen_cpat
4479 (mask, stack_pointer_rtx, offset,
4480 GEN_INT (GET_MODE_SIZE (imode))));
4481 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4484 void
4485 spu_builtin_promote (rtx ops[])
4487 enum machine_mode mode, imode;
4488 rtx rot, from, offset;
4489 HOST_WIDE_INT pos;
4491 mode = GET_MODE (ops[0]);
4492 imode = GET_MODE_INNER (mode);
4494 from = gen_reg_rtx (TImode);
4495 rot = spu_gen_subreg (TImode, ops[0]);
4497 emit_insn (gen_spu_convert (from, ops[1]));
4499 if (GET_CODE (ops[2]) == CONST_INT)
4501 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4502 if (GET_MODE_SIZE (imode) < 4)
4503 pos += 4 - GET_MODE_SIZE (imode);
4504 offset = GEN_INT (pos & 15);
4506 else
4508 offset = gen_reg_rtx (SImode);
4509 switch (mode)
4511 case V16QImode:
4512 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4513 break;
4514 case V8HImode:
4515 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4516 emit_insn (gen_addsi3 (offset, offset, offset));
4517 break;
4518 case V4SFmode:
4519 case V4SImode:
4520 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4521 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4522 break;
4523 case V2DImode:
4524 case V2DFmode:
4525 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4526 break;
4527 default:
4528 abort ();
4531 emit_insn (gen_rotqby_ti (rot, from, offset));
4534 void
4535 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4537 rtx shuf = gen_reg_rtx (V4SImode);
4538 rtx insn = gen_reg_rtx (V4SImode);
4539 rtx shufc;
4540 rtx insnc;
4541 rtx mem;
4543 fnaddr = force_reg (SImode, fnaddr);
4544 cxt = force_reg (SImode, cxt);
4546 if (TARGET_LARGE_MEM)
4548 rtx rotl = gen_reg_rtx (V4SImode);
4549 rtx mask = gen_reg_rtx (V4SImode);
4550 rtx bi = gen_reg_rtx (SImode);
4551 unsigned char shufa[16] = {
4552 2, 3, 0, 1, 18, 19, 16, 17,
4553 0, 1, 2, 3, 16, 17, 18, 19
4555 unsigned char insna[16] = {
4556 0x41, 0, 0, 79,
4557 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4558 0x60, 0x80, 0, 79,
4559 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4562 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4563 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4565 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4566 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4567 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4568 emit_insn (gen_selb (insn, insnc, rotl, mask));
4570 mem = memory_address (Pmode, tramp);
4571 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4573 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4574 mem = memory_address (Pmode, plus_constant (tramp, 16));
4575 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4577 else
4579 rtx scxt = gen_reg_rtx (SImode);
4580 rtx sfnaddr = gen_reg_rtx (SImode);
4581 unsigned char insna[16] = {
4582 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4583 0x30, 0, 0, 0,
4584 0, 0, 0, 0,
4585 0, 0, 0, 0
4588 shufc = gen_reg_rtx (TImode);
4589 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4591 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4592 fits 18 bits and the last 4 are zeros. This will be true if
4593 the stack pointer is initialized to 0x3fff0 at program start,
4594 otherwise the ila instruction will be garbage. */
4596 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4597 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4598 emit_insn (gen_cpat
4599 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4600 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4601 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4603 mem = memory_address (Pmode, tramp);
4604 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4607 emit_insn (gen_sync ());
4610 void
4611 spu_expand_sign_extend (rtx ops[])
4613 unsigned char arr[16];
4614 rtx pat = gen_reg_rtx (TImode);
4615 rtx sign, c;
4616 int i, last;
4617 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4618 if (GET_MODE (ops[1]) == QImode)
4620 sign = gen_reg_rtx (HImode);
4621 emit_insn (gen_extendqihi2 (sign, ops[1]));
4622 for (i = 0; i < 16; i++)
4623 arr[i] = 0x12;
4624 arr[last] = 0x13;
4626 else
4628 for (i = 0; i < 16; i++)
4629 arr[i] = 0x10;
4630 switch (GET_MODE (ops[1]))
4632 case HImode:
4633 sign = gen_reg_rtx (SImode);
4634 emit_insn (gen_extendhisi2 (sign, ops[1]));
4635 arr[last] = 0x03;
4636 arr[last - 1] = 0x02;
4637 break;
4638 case SImode:
4639 sign = gen_reg_rtx (SImode);
4640 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4641 for (i = 0; i < 4; i++)
4642 arr[last - i] = 3 - i;
4643 break;
4644 case DImode:
4645 sign = gen_reg_rtx (SImode);
4646 c = gen_reg_rtx (SImode);
4647 emit_insn (gen_spu_convert (c, ops[1]));
4648 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4649 for (i = 0; i < 8; i++)
4650 arr[last - i] = 7 - i;
4651 break;
4652 default:
4653 abort ();
4656 emit_move_insn (pat, array_to_constant (TImode, arr));
4657 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4660 /* expand vector initialization. If there are any constant parts,
4661 load constant parts first. Then load any non-constant parts. */
4662 void
4663 spu_expand_vector_init (rtx target, rtx vals)
4665 enum machine_mode mode = GET_MODE (target);
4666 int n_elts = GET_MODE_NUNITS (mode);
4667 int n_var = 0;
4668 bool all_same = true;
4669 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4670 int i;
4672 first = XVECEXP (vals, 0, 0);
4673 for (i = 0; i < n_elts; ++i)
4675 x = XVECEXP (vals, 0, i);
4676 if (!CONSTANT_P (x))
4677 ++n_var;
4678 else
4680 if (first_constant == NULL_RTX)
4681 first_constant = x;
4683 if (i > 0 && !rtx_equal_p (x, first))
4684 all_same = false;
4687 /* if all elements are the same, use splats to repeat elements */
4688 if (all_same)
4690 if (!CONSTANT_P (first)
4691 && !register_operand (first, GET_MODE (x)))
4692 first = force_reg (GET_MODE (first), first);
4693 emit_insn (gen_spu_splats (target, first));
4694 return;
4697 /* load constant parts */
4698 if (n_var != n_elts)
4700 if (n_var == 0)
4702 emit_move_insn (target,
4703 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4705 else
4707 rtx constant_parts_rtx = copy_rtx (vals);
4709 gcc_assert (first_constant != NULL_RTX);
4710 /* fill empty slots with the first constant, this increases
4711 our chance of using splats in the recursive call below. */
4712 for (i = 0; i < n_elts; ++i)
4713 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4714 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4716 spu_expand_vector_init (target, constant_parts_rtx);
4720 /* load variable parts */
4721 if (n_var != 0)
4723 rtx insert_operands[4];
4725 insert_operands[0] = target;
4726 insert_operands[2] = target;
4727 for (i = 0; i < n_elts; ++i)
4729 x = XVECEXP (vals, 0, i);
4730 if (!CONSTANT_P (x))
4732 if (!register_operand (x, GET_MODE (x)))
4733 x = force_reg (GET_MODE (x), x);
4734 insert_operands[1] = x;
4735 insert_operands[3] = GEN_INT (i);
4736 spu_builtin_insert (insert_operands);
4742 static rtx
4743 spu_force_reg (enum machine_mode mode, rtx op)
4745 rtx x, r;
4746 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4748 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4749 || GET_MODE (op) == BLKmode)
4750 return force_reg (mode, convert_to_mode (mode, op, 0));
4751 abort ();
4754 r = force_reg (GET_MODE (op), op);
4755 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4757 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4758 if (x)
4759 return x;
4762 x = gen_reg_rtx (mode);
4763 emit_insn (gen_spu_convert (x, r));
4764 return x;
4767 static void
4768 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4770 HOST_WIDE_INT v = 0;
4771 int lsbits;
4772 /* Check the range of immediate operands. */
4773 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4775 int range = p - SPU_BTI_7;
4776 if (!CONSTANT_P (op)
4777 || (GET_CODE (op) == CONST_INT
4778 && (INTVAL (op) < spu_builtin_range[range].low
4779 || INTVAL (op) > spu_builtin_range[range].high)))
4780 error ("%s expects an integer literal in the range [%d, %d].",
4781 d->name,
4782 spu_builtin_range[range].low, spu_builtin_range[range].high);
4784 if (GET_CODE (op) == CONST
4785 && (GET_CODE (XEXP (op, 0)) == PLUS
4786 || GET_CODE (XEXP (op, 0)) == MINUS))
4788 v = INTVAL (XEXP (XEXP (op, 0), 1));
4789 op = XEXP (XEXP (op, 0), 0);
4791 else if (GET_CODE (op) == CONST_INT)
4792 v = INTVAL (op);
4794 switch (p)
4796 case SPU_BTI_S10_4:
4797 lsbits = 4;
4798 break;
4799 case SPU_BTI_U16_2:
4800 /* This is only used in lqa, and stqa. Even though the insns
4801 encode 16 bits of the address (all but the 2 least
4802 significant), only 14 bits are used because it is masked to
4803 be 16 byte aligned. */
4804 lsbits = 4;
4805 break;
4806 case SPU_BTI_S16_2:
4807 /* This is used for lqr and stqr. */
4808 lsbits = 2;
4809 break;
4810 default:
4811 lsbits = 0;
4814 if (GET_CODE (op) == LABEL_REF
4815 || (GET_CODE (op) == SYMBOL_REF
4816 && SYMBOL_REF_FUNCTION_P (op))
4817 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4818 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4819 d->name);
4824 static void
4825 expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4826 rtx target, rtx ops[])
4828 enum insn_code icode = d->icode;
4829 int i = 0;
4831 /* Expand the arguments into rtl. */
4833 if (d->parm[0] != SPU_BTI_VOID)
4834 ops[i++] = target;
4836 for (; i < insn_data[icode].n_operands; i++)
4838 tree arg = TREE_VALUE (arglist);
4839 if (arg == 0)
4840 abort ();
4841 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4842 arglist = TREE_CHAIN (arglist);
4846 static rtx
4847 spu_expand_builtin_1 (struct spu_builtin_description *d,
4848 tree arglist, rtx target)
4850 rtx pat;
4851 rtx ops[8];
4852 enum insn_code icode = d->icode;
4853 enum machine_mode mode, tmode;
4854 int i, p;
4855 tree return_type;
4857 /* Set up ops[] with values from arglist. */
4858 expand_builtin_args (d, arglist, target, ops);
4860 /* Handle the target operand which must be operand 0. */
4861 i = 0;
4862 if (d->parm[0] != SPU_BTI_VOID)
4865 /* We prefer the mode specified for the match_operand otherwise
4866 use the mode from the builtin function prototype. */
4867 tmode = insn_data[d->icode].operand[0].mode;
4868 if (tmode == VOIDmode)
4869 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4871 /* Try to use target because not using it can lead to extra copies
4872 and when we are using all of the registers extra copies leads
4873 to extra spills. */
4874 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4875 ops[0] = target;
4876 else
4877 target = ops[0] = gen_reg_rtx (tmode);
4879 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4880 abort ();
4882 i++;
4885 if (d->fcode == SPU_MASK_FOR_LOAD)
4887 enum machine_mode mode = insn_data[icode].operand[1].mode;
4888 tree arg;
4889 rtx addr, op, pat;
4891 /* get addr */
4892 arg = TREE_VALUE (arglist);
4893 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
4894 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
4895 addr = memory_address (mode, op);
4897 /* negate addr */
4898 op = gen_reg_rtx (GET_MODE (addr));
4899 emit_insn (gen_rtx_SET (VOIDmode, op,
4900 gen_rtx_NEG (GET_MODE (addr), addr)));
4901 op = gen_rtx_MEM (mode, op);
4903 pat = GEN_FCN (icode) (target, op);
4904 if (!pat)
4905 return 0;
4906 emit_insn (pat);
4907 return target;
4910 /* Ignore align_hint, but still expand it's args in case they have
4911 side effects. */
4912 if (icode == CODE_FOR_spu_align_hint)
4913 return 0;
4915 /* Handle the rest of the operands. */
4916 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4918 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4919 mode = insn_data[d->icode].operand[i].mode;
4920 else
4921 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4923 /* mode can be VOIDmode here for labels */
4925 /* For specific intrinsics with an immediate operand, e.g.,
4926 si_ai(), we sometimes need to convert the scalar argument to a
4927 vector argument by splatting the scalar. */
4928 if (VECTOR_MODE_P (mode)
4929 && (GET_CODE (ops[i]) == CONST_INT
4930 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4931 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4933 if (GET_CODE (ops[i]) == CONST_INT)
4934 ops[i] = spu_const (mode, INTVAL (ops[i]));
4935 else
4937 rtx reg = gen_reg_rtx (mode);
4938 enum machine_mode imode = GET_MODE_INNER (mode);
4939 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4940 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4941 if (imode != GET_MODE (ops[i]))
4942 ops[i] = convert_to_mode (imode, ops[i],
4943 TYPE_UNSIGNED (spu_builtin_types
4944 [d->parm[i]]));
4945 emit_insn (gen_spu_splats (reg, ops[i]));
4946 ops[i] = reg;
4950 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4951 ops[i] = spu_force_reg (mode, ops[i]);
4953 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4956 switch (insn_data[icode].n_operands)
4958 case 0:
4959 pat = GEN_FCN (icode) (0);
4960 break;
4961 case 1:
4962 pat = GEN_FCN (icode) (ops[0]);
4963 break;
4964 case 2:
4965 pat = GEN_FCN (icode) (ops[0], ops[1]);
4966 break;
4967 case 3:
4968 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4969 break;
4970 case 4:
4971 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4972 break;
4973 case 5:
4974 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4975 break;
4976 case 6:
4977 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4978 break;
4979 default:
4980 abort ();
4983 if (!pat)
4984 abort ();
4986 if (d->type == B_CALL || d->type == B_BISLED)
4987 emit_call_insn (pat);
4988 else if (d->type == B_JUMP)
4990 emit_jump_insn (pat);
4991 emit_barrier ();
4993 else
4994 emit_insn (pat);
4996 return_type = spu_builtin_types[d->parm[0]];
4997 if (d->parm[0] != SPU_BTI_VOID
4998 && GET_MODE (target) != TYPE_MODE (return_type))
5000 /* target is the return value. It should always be the mode of
5001 the builtin function prototype. */
5002 target = spu_force_reg (TYPE_MODE (return_type), target);
5005 return target;
5009 spu_expand_builtin (tree exp,
5010 rtx target,
5011 rtx subtarget ATTRIBUTE_UNUSED,
5012 enum machine_mode mode ATTRIBUTE_UNUSED,
5013 int ignore ATTRIBUTE_UNUSED)
5015 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
5016 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5017 tree arglist = TREE_OPERAND (exp, 1);
5018 struct spu_builtin_description *d;
5020 if (fcode < NUM_SPU_BUILTINS)
5022 d = &spu_builtins[fcode];
5024 return spu_expand_builtin_1 (d, arglist, target);
5026 abort ();
5029 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5030 static tree
5031 spu_builtin_mul_widen_even (tree type)
5033 switch (TYPE_MODE (type))
5035 case V8HImode:
5036 if (TYPE_UNSIGNED (type))
5037 return spu_builtins[SPU_MULE_0].fndecl;
5038 else
5039 return spu_builtins[SPU_MULE_1].fndecl;
5040 break;
5041 default:
5042 return NULL_TREE;
5046 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5047 static tree
5048 spu_builtin_mul_widen_odd (tree type)
5050 switch (TYPE_MODE (type))
5052 case V8HImode:
5053 if (TYPE_UNSIGNED (type))
5054 return spu_builtins[SPU_MULO_1].fndecl;
5055 else
5056 return spu_builtins[SPU_MULO_0].fndecl;
5057 break;
5058 default:
5059 return NULL_TREE;
5063 /* Implement targetm.vectorize.builtin_mask_for_load. */
5064 static tree
5065 spu_builtin_mask_for_load (void)
5067 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5068 gcc_assert (d);
5069 return d->fndecl;