* config/i386/darwin.h, config/spu/spu.c, tree-ssa-live.c,
[official-gcc.git] / gcc / config / spu / spu.c
blob07680b71ae67c09be7efd0272da19d6225226cf2
1 /* Copyright (C) 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
16 02110-1301, USA. */
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "real.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
29 #include "flags.h"
30 #include "recog.h"
31 #include "obstack.h"
32 #include "tree.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "except.h"
36 #include "function.h"
37 #include "output.h"
38 #include "basic-block.h"
39 #include "integrate.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "hashtab.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "langhooks.h"
47 #include "reload.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
50 #include "params.h"
51 #include "assert.h"
52 #include "c-common.h"
53 #include "machmode.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
61 int low, high;
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
104 int can_issue_more);
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
109 int flags,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
112 int flags,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
119 tree * post_p);
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
127 int *total);
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
132 extern const char *reg_names[];
133 rtx spu_compare_op0, spu_compare_op1;
135 enum spu_immediate {
136 SPU_NONE,
137 SPU_IL,
138 SPU_ILA,
139 SPU_ILH,
140 SPU_ILHU,
141 SPU_ORI,
142 SPU_ORHI,
143 SPU_ORBI,
144 SPU_IOHL
147 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
148 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
150 /* Built in types. */
151 tree spu_builtin_types[SPU_BTI_MAX];
153 /* TARGET overrides. */
155 #undef TARGET_INIT_BUILTINS
156 #define TARGET_INIT_BUILTINS spu_init_builtins
158 #undef TARGET_EXPAND_BUILTIN
159 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
161 #undef TARGET_EH_RETURN_FILTER_MODE
162 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
164 /* The .8byte directive doesn't seem to work well for a 32 bit
165 architecture. */
166 #undef TARGET_ASM_UNALIGNED_DI_OP
167 #define TARGET_ASM_UNALIGNED_DI_OP NULL
169 #undef TARGET_RTX_COSTS
170 #define TARGET_RTX_COSTS spu_rtx_costs
172 #undef TARGET_ADDRESS_COST
173 #define TARGET_ADDRESS_COST hook_int_rtx_0
175 #undef TARGET_SCHED_ISSUE_RATE
176 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
178 #undef TARGET_SCHED_VARIABLE_ISSUE
179 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
181 #undef TARGET_SCHED_ADJUST_PRIORITY
182 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
184 #undef TARGET_SCHED_ADJUST_COST
185 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
187 const struct attribute_spec spu_attribute_table[];
188 #undef TARGET_ATTRIBUTE_TABLE
189 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
191 #undef TARGET_ASM_INTEGER
192 #define TARGET_ASM_INTEGER spu_assemble_integer
194 #undef TARGET_SCALAR_MODE_SUPPORTED_P
195 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
197 #undef TARGET_VECTOR_MODE_SUPPORTED_P
198 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
200 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
201 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
203 #undef TARGET_ASM_GLOBALIZE_LABEL
204 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
206 #undef TARGET_PASS_BY_REFERENCE
207 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
209 #undef TARGET_MUST_PASS_IN_STACK
210 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
212 #undef TARGET_BUILD_BUILTIN_VA_LIST
213 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
215 #undef TARGET_SETUP_INCOMING_VARARGS
216 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
218 #undef TARGET_MACHINE_DEPENDENT_REORG
219 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
221 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
222 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
224 #undef TARGET_DEFAULT_TARGET_FLAGS
225 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
227 #undef TARGET_INIT_LIBFUNCS
228 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
230 #undef TARGET_RETURN_IN_MEMORY
231 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
233 struct gcc_target targetm = TARGET_INITIALIZER;
235 /* Sometimes certain combinations of command options do not make sense
236 on a particular target machine. You can define a macro
237 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
238 executed once just after all the command options have been parsed. */
239 void
240 spu_override_options (void)
243 /* Override some of the default param values. With so many registers
244 larger values are better for these params. */
245 if (MAX_UNROLLED_INSNS == 100)
246 MAX_UNROLLED_INSNS = 250;
247 if (MAX_PENDING_LIST_LENGTH == 32)
248 MAX_PENDING_LIST_LENGTH = 128;
250 flag_omit_frame_pointer = 1;
252 if (align_functions < 8)
253 align_functions = 8;
256 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
257 struct attribute_spec.handler. */
259 /* Table of machine attributes. */
260 const struct attribute_spec spu_attribute_table[] =
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
263 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
264 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
265 { NULL, 0, 0, false, false, false, NULL }
268 /* True if MODE is valid for the target. By "valid", we mean able to
269 be manipulated in non-trivial ways. In particular, this means all
270 the arithmetic is supported. */
271 static bool
272 spu_scalar_mode_supported_p (enum machine_mode mode)
274 switch (mode)
276 case QImode:
277 case HImode:
278 case SImode:
279 case SFmode:
280 case DImode:
281 case TImode:
282 case DFmode:
283 return true;
285 default:
286 return false;
290 /* Similarly for vector modes. "Supported" here is less strict. At
291 least some operations are supported; need to check optabs or builtins
292 for further details. */
293 static bool
294 spu_vector_mode_supported_p (enum machine_mode mode)
296 switch (mode)
298 case V16QImode:
299 case V8HImode:
300 case V4SImode:
301 case V2DImode:
302 case V4SFmode:
303 case V2DFmode:
304 return true;
306 default:
307 return false;
311 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
312 least significant bytes of the outer mode. This function returns
313 TRUE for the SUBREG's where this is correct. */
315 valid_subreg (rtx op)
317 enum machine_mode om = GET_MODE (op);
318 enum machine_mode im = GET_MODE (SUBREG_REG (op));
319 return om != VOIDmode && im != VOIDmode
320 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
321 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
324 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
325 and adjust the start offset. */
326 static rtx
327 adjust_operand (rtx op, HOST_WIDE_INT * start)
329 enum machine_mode mode;
330 int op_size;
331 /* Strip any SUBREG */
332 if (GET_CODE (op) == SUBREG)
334 if (start)
335 *start -=
336 GET_MODE_BITSIZE (GET_MODE (op)) -
337 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
338 op = SUBREG_REG (op);
340 /* If it is smaller than SI, assure a SUBREG */
341 op_size = GET_MODE_BITSIZE (GET_MODE (op));
342 if (op_size < 32)
344 if (start)
345 *start += 32 - op_size;
346 op_size = 32;
348 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
349 mode = mode_for_size (op_size, MODE_INT, 0);
350 if (mode != GET_MODE (op))
351 op = gen_rtx_SUBREG (mode, op, 0);
352 return op;
355 void
356 spu_expand_extv (rtx ops[], int unsignedp)
358 HOST_WIDE_INT width = INTVAL (ops[2]);
359 HOST_WIDE_INT start = INTVAL (ops[3]);
360 HOST_WIDE_INT src_size, dst_size;
361 enum machine_mode src_mode, dst_mode;
362 rtx dst = ops[0], src = ops[1];
363 rtx s;
365 dst = adjust_operand (ops[0], 0);
366 dst_mode = GET_MODE (dst);
367 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
369 src = adjust_operand (src, &start);
370 src_mode = GET_MODE (src);
371 src_size = GET_MODE_BITSIZE (GET_MODE (src));
373 if (start > 0)
375 s = gen_reg_rtx (src_mode);
376 switch (src_mode)
378 case SImode:
379 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
380 break;
381 case DImode:
382 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
383 break;
384 case TImode:
385 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
386 break;
387 default:
388 abort ();
390 src = s;
393 if (width < src_size)
395 rtx pat;
396 int icode;
397 switch (src_mode)
399 case SImode:
400 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
401 break;
402 case DImode:
403 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
404 break;
405 case TImode:
406 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
407 break;
408 default:
409 abort ();
411 s = gen_reg_rtx (src_mode);
412 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
413 emit_insn (pat);
414 src = s;
417 convert_move (dst, src, unsignedp);
420 void
421 spu_expand_insv (rtx ops[])
423 HOST_WIDE_INT width = INTVAL (ops[1]);
424 HOST_WIDE_INT start = INTVAL (ops[2]);
425 HOST_WIDE_INT maskbits;
426 enum machine_mode dst_mode, src_mode;
427 rtx dst = ops[0], src = ops[3];
428 int dst_size, src_size;
429 rtx mask;
430 rtx shift_reg;
431 int shift;
434 if (GET_CODE (ops[0]) == MEM)
435 dst = gen_reg_rtx (TImode);
436 else
437 dst = adjust_operand (dst, &start);
438 dst_mode = GET_MODE (dst);
439 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
441 if (CONSTANT_P (src))
443 enum machine_mode m =
444 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
445 src = force_reg (m, convert_to_mode (m, src, 0));
447 src = adjust_operand (src, 0);
448 src_mode = GET_MODE (src);
449 src_size = GET_MODE_BITSIZE (GET_MODE (src));
451 mask = gen_reg_rtx (dst_mode);
452 shift_reg = gen_reg_rtx (dst_mode);
453 shift = dst_size - start - width;
455 /* It's not safe to use subreg here because the compiler assumes
456 that the SUBREG_REG is right justified in the SUBREG. */
457 convert_move (shift_reg, src, 1);
459 if (shift > 0)
461 switch (dst_mode)
463 case SImode:
464 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
465 break;
466 case DImode:
467 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
468 break;
469 case TImode:
470 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
471 break;
472 default:
473 abort ();
476 else if (shift < 0)
477 abort ();
479 switch (dst_size)
481 case 32:
482 maskbits = (-1ll << (32 - width - start));
483 if (start)
484 maskbits += (1ll << (32 - start));
485 emit_move_insn (mask, GEN_INT (maskbits));
486 break;
487 case 64:
488 maskbits = (-1ll << (64 - width - start));
489 if (start)
490 maskbits += (1ll << (64 - start));
491 emit_move_insn (mask, GEN_INT (maskbits));
492 break;
493 case 128:
495 unsigned char arr[16];
496 int i = start / 8;
497 memset (arr, 0, sizeof (arr));
498 arr[i] = 0xff >> (start & 7);
499 for (i++; i <= (start + width - 1) / 8; i++)
500 arr[i] = 0xff;
501 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
502 emit_move_insn (mask, array_to_constant (TImode, arr));
504 break;
505 default:
506 abort ();
508 if (GET_CODE (ops[0]) == MEM)
510 rtx aligned = gen_reg_rtx (SImode);
511 rtx low = gen_reg_rtx (SImode);
512 rtx addr = gen_reg_rtx (SImode);
513 rtx rotl = gen_reg_rtx (SImode);
514 rtx mask0 = gen_reg_rtx (TImode);
515 rtx mem;
517 emit_move_insn (addr, XEXP (ops[0], 0));
518 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
519 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
520 emit_insn (gen_negsi2 (rotl, low));
521 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
522 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
523 mem = change_address (ops[0], TImode, aligned);
524 set_mem_alias_set (mem, 0);
525 emit_move_insn (dst, mem);
526 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
527 emit_move_insn (mem, dst);
528 if (start + width > MEM_ALIGN (ops[0]))
530 rtx shl = gen_reg_rtx (SImode);
531 rtx mask1 = gen_reg_rtx (TImode);
532 rtx dst1 = gen_reg_rtx (TImode);
533 rtx mem1;
534 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
535 emit_insn (gen_shlqby_ti (mask1, mask, shl));
536 mem1 = adjust_address (mem, TImode, 16);
537 set_mem_alias_set (mem1, 0);
538 emit_move_insn (dst1, mem1);
539 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
540 emit_move_insn (mem1, dst1);
543 else
544 emit_insn (gen_selb (dst, dst, shift_reg, mask));
549 spu_expand_block_move (rtx ops[])
551 HOST_WIDE_INT bytes, align, offset;
552 rtx src, dst, sreg, dreg, target;
553 int i;
554 if (GET_CODE (ops[2]) != CONST_INT
555 || GET_CODE (ops[3]) != CONST_INT
556 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
557 return 0;
559 bytes = INTVAL (ops[2]);
560 align = INTVAL (ops[3]);
562 if (bytes <= 0)
563 return 1;
565 dst = ops[0];
566 src = ops[1];
568 if (align == 16)
570 for (offset = 0; offset + 16 <= bytes; offset += 16)
572 dst = adjust_address (ops[0], V16QImode, offset);
573 src = adjust_address (ops[1], V16QImode, offset);
574 emit_move_insn (dst, src);
576 if (offset < bytes)
578 rtx mask;
579 unsigned char arr[16] = { 0 };
580 for (i = 0; i < bytes - offset; i++)
581 arr[i] = 0xff;
582 dst = adjust_address (ops[0], V16QImode, offset);
583 src = adjust_address (ops[1], V16QImode, offset);
584 mask = gen_reg_rtx (V16QImode);
585 sreg = gen_reg_rtx (V16QImode);
586 dreg = gen_reg_rtx (V16QImode);
587 target = gen_reg_rtx (V16QImode);
588 emit_move_insn (mask, array_to_constant (V16QImode, arr));
589 emit_move_insn (dreg, dst);
590 emit_move_insn (sreg, src);
591 emit_insn (gen_selb (target, dreg, sreg, mask));
592 emit_move_insn (dst, target);
594 return 1;
596 return 0;
599 enum spu_comp_code
600 { SPU_EQ, SPU_GT, SPU_GTU };
603 int spu_comp_icode[8][3] = {
604 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
605 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
606 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
607 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
608 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
609 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
610 {0, 0, 0},
611 {CODE_FOR_ceq_vec, 0, 0},
614 /* Generate a compare for CODE. Return a brand-new rtx that represents
615 the result of the compare. GCC can figure this out too if we don't
616 provide all variations of compares, but GCC always wants to use
617 WORD_MODE, we can generate better code in most cases if we do it
618 ourselves. */
619 void
620 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
622 int reverse_compare = 0;
623 int reverse_test = 0;
624 rtx compare_result;
625 rtx comp_rtx;
626 rtx target = operands[0];
627 enum machine_mode comp_mode;
628 enum machine_mode op_mode;
629 enum spu_comp_code scode;
630 int index;
632 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
633 and so on, to keep the constant in operand 1. */
634 if (GET_CODE (spu_compare_op1) == CONST_INT)
636 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
637 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
638 switch (code)
640 case GE:
641 spu_compare_op1 = GEN_INT (val);
642 code = GT;
643 break;
644 case LT:
645 spu_compare_op1 = GEN_INT (val);
646 code = LE;
647 break;
648 case GEU:
649 spu_compare_op1 = GEN_INT (val);
650 code = GTU;
651 break;
652 case LTU:
653 spu_compare_op1 = GEN_INT (val);
654 code = LEU;
655 break;
656 default:
657 break;
661 switch (code)
663 case GE:
664 reverse_compare = 1;
665 reverse_test = 1;
666 scode = SPU_GT;
667 break;
668 case LE:
669 reverse_compare = 0;
670 reverse_test = 1;
671 scode = SPU_GT;
672 break;
673 case LT:
674 reverse_compare = 1;
675 reverse_test = 0;
676 scode = SPU_GT;
677 break;
678 case GEU:
679 reverse_compare = 1;
680 reverse_test = 1;
681 scode = SPU_GTU;
682 break;
683 case LEU:
684 reverse_compare = 0;
685 reverse_test = 1;
686 scode = SPU_GTU;
687 break;
688 case LTU:
689 reverse_compare = 1;
690 reverse_test = 0;
691 scode = SPU_GTU;
692 break;
693 case NE:
694 reverse_compare = 0;
695 reverse_test = 1;
696 scode = SPU_EQ;
697 break;
699 case EQ:
700 scode = SPU_EQ;
701 break;
702 case GT:
703 scode = SPU_GT;
704 break;
705 case GTU:
706 scode = SPU_GTU;
707 break;
708 default:
709 scode = SPU_EQ;
710 break;
713 comp_mode = SImode;
714 op_mode = GET_MODE (spu_compare_op0);
716 switch (op_mode)
718 case QImode:
719 index = 0;
720 comp_mode = QImode;
721 break;
722 case HImode:
723 index = 1;
724 comp_mode = HImode;
725 break;
726 case SImode:
727 index = 2;
728 break;
729 case DImode:
730 index = 3;
731 break;
732 case TImode:
733 index = 4;
734 break;
735 case SFmode:
736 index = 5;
737 break;
738 case DFmode:
739 index = 6;
740 break;
741 case V16QImode:
742 case V8HImode:
743 case V4SImode:
744 case V2DImode:
745 case V4SFmode:
746 case V2DFmode:
747 index = 7;
748 break;
749 default:
750 abort ();
753 if (GET_MODE (spu_compare_op1) == DFmode)
755 rtx reg = gen_reg_rtx (DFmode);
756 if (!flag_unsafe_math_optimizations
757 || (scode != SPU_GT && scode != SPU_EQ))
758 abort ();
759 if (reverse_compare)
760 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
761 else
762 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
763 reverse_compare = 0;
764 spu_compare_op0 = reg;
765 spu_compare_op1 = CONST0_RTX (DFmode);
768 if (is_set == 0 && spu_compare_op1 == const0_rtx
769 && (GET_MODE (spu_compare_op0) == SImode
770 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
772 /* Don't need to set a register with the result when we are
773 comparing against zero and branching. */
774 reverse_test = !reverse_test;
775 compare_result = spu_compare_op0;
777 else
779 compare_result = gen_reg_rtx (comp_mode);
781 if (reverse_compare)
783 rtx t = spu_compare_op1;
784 spu_compare_op1 = spu_compare_op0;
785 spu_compare_op0 = t;
788 if (spu_comp_icode[index][scode] == 0)
789 abort ();
791 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
792 (spu_compare_op0, op_mode))
793 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
794 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
795 (spu_compare_op1, op_mode))
796 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
797 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
798 spu_compare_op0,
799 spu_compare_op1);
800 if (comp_rtx == 0)
801 abort ();
802 emit_insn (comp_rtx);
806 if (is_set == 0)
808 rtx bcomp;
809 rtx loc_ref;
811 /* We don't have branch on QI compare insns, so we convert the
812 QI compare result to a HI result. */
813 if (comp_mode == QImode)
815 rtx old_res = compare_result;
816 compare_result = gen_reg_rtx (HImode);
817 comp_mode = HImode;
818 emit_insn (gen_extendqihi2 (compare_result, old_res));
821 if (reverse_test)
822 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
823 else
824 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
826 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
828 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
829 loc_ref, pc_rtx)));
831 else if (is_set == 2)
833 int compare_size = GET_MODE_BITSIZE (comp_mode);
834 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
835 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
836 rtx select_mask;
837 rtx op_t = operands[2];
838 rtx op_f = operands[3];
840 /* The result of the comparison can be SI, HI or QI mode. Create a
841 mask based on that result. */
842 if (target_size > compare_size)
844 select_mask = gen_reg_rtx (mode);
845 emit_insn (gen_extend_compare (select_mask, compare_result));
847 else if (target_size < compare_size)
848 select_mask =
849 gen_rtx_SUBREG (mode, compare_result,
850 (compare_size - target_size) / BITS_PER_UNIT);
851 else if (comp_mode != mode)
852 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
853 else
854 select_mask = compare_result;
856 if (GET_MODE (target) != GET_MODE (op_t)
857 || GET_MODE (target) != GET_MODE (op_f))
858 abort ();
860 if (reverse_test)
861 emit_insn (gen_selb (target, op_t, op_f, select_mask));
862 else
863 emit_insn (gen_selb (target, op_f, op_t, select_mask));
865 else
867 if (reverse_test)
868 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
869 gen_rtx_NOT (comp_mode, compare_result)));
870 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
871 emit_insn (gen_extendhisi2 (target, compare_result));
872 else if (GET_MODE (target) == SImode
873 && GET_MODE (compare_result) == QImode)
874 emit_insn (gen_extend_compare (target, compare_result));
875 else
876 emit_move_insn (target, compare_result);
880 HOST_WIDE_INT
881 const_double_to_hwint (rtx x)
883 HOST_WIDE_INT val;
884 REAL_VALUE_TYPE rv;
885 if (GET_MODE (x) == SFmode)
887 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
888 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
890 else if (GET_MODE (x) == DFmode)
892 long l[2];
893 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
894 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
895 val = l[0];
896 val = (val << 32) | (l[1] & 0xffffffff);
898 else
899 abort ();
900 return val;
904 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
906 long tv[2];
907 REAL_VALUE_TYPE rv;
908 gcc_assert (mode == SFmode || mode == DFmode);
910 if (mode == SFmode)
911 tv[0] = (v << 32) >> 32;
912 else if (mode == DFmode)
914 tv[1] = (v << 32) >> 32;
915 tv[0] = v >> 32;
917 real_from_target (&rv, tv, mode);
918 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
921 void
922 print_operand_address (FILE * file, register rtx addr)
924 rtx reg;
925 rtx offset;
927 if (GET_CODE (addr) == AND
928 && GET_CODE (XEXP (addr, 1)) == CONST_INT
929 && INTVAL (XEXP (addr, 1)) == -16)
930 addr = XEXP (addr, 0);
932 switch (GET_CODE (addr))
934 case REG:
935 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
936 break;
938 case PLUS:
939 reg = XEXP (addr, 0);
940 offset = XEXP (addr, 1);
941 if (GET_CODE (offset) == REG)
943 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
944 reg_names[REGNO (offset)]);
946 else if (GET_CODE (offset) == CONST_INT)
948 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
949 INTVAL (offset), reg_names[REGNO (reg)]);
951 else
952 abort ();
953 break;
955 case CONST:
956 case LABEL_REF:
957 case SYMBOL_REF:
958 case CONST_INT:
959 output_addr_const (file, addr);
960 break;
962 default:
963 debug_rtx (addr);
964 abort ();
968 void
969 print_operand (FILE * file, rtx x, int code)
971 enum machine_mode mode = GET_MODE (x);
972 HOST_WIDE_INT val;
973 unsigned char arr[16];
974 int xcode = GET_CODE (x);
975 if (GET_MODE (x) == VOIDmode)
976 switch (code)
978 case 'H': /* 128 bits, signed */
979 case 'L': /* 128 bits, signed */
980 case 'm': /* 128 bits, signed */
981 case 'T': /* 128 bits, signed */
982 case 't': /* 128 bits, signed */
983 mode = TImode;
984 break;
985 case 'G': /* 64 bits, signed */
986 case 'K': /* 64 bits, signed */
987 case 'k': /* 64 bits, signed */
988 case 'D': /* 64 bits, signed */
989 case 'd': /* 64 bits, signed */
990 mode = DImode;
991 break;
992 case 'F': /* 32 bits, signed */
993 case 'J': /* 32 bits, signed */
994 case 'j': /* 32 bits, signed */
995 case 's': /* 32 bits, signed */
996 case 'S': /* 32 bits, signed */
997 mode = SImode;
998 break;
1000 switch (code)
1003 case 'j': /* 32 bits, signed */
1004 case 'k': /* 64 bits, signed */
1005 case 'm': /* 128 bits, signed */
1006 if (xcode == CONST_INT
1007 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1009 gcc_assert (logical_immediate_p (x, mode));
1010 constant_to_array (mode, x, arr);
1011 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1012 val = trunc_int_for_mode (val, SImode);
1013 switch (which_logical_immediate (val))
1015 case SPU_ORI:
1016 break;
1017 case SPU_ORHI:
1018 fprintf (file, "h");
1019 break;
1020 case SPU_ORBI:
1021 fprintf (file, "b");
1022 break;
1023 default:
1024 gcc_unreachable();
1027 else
1028 gcc_unreachable();
1029 return;
1031 case 'J': /* 32 bits, signed */
1032 case 'K': /* 64 bits, signed */
1033 case 'L': /* 128 bits, signed */
1034 if (xcode == CONST_INT
1035 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1037 gcc_assert (logical_immediate_p (x, mode)
1038 || iohl_immediate_p (x, mode));
1039 constant_to_array (mode, x, arr);
1040 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1041 val = trunc_int_for_mode (val, SImode);
1042 switch (which_logical_immediate (val))
1044 case SPU_ORI:
1045 case SPU_IOHL:
1046 break;
1047 case SPU_ORHI:
1048 val = trunc_int_for_mode (val, HImode);
1049 break;
1050 case SPU_ORBI:
1051 val = trunc_int_for_mode (val, QImode);
1052 break;
1053 default:
1054 gcc_unreachable();
1056 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1058 else
1059 gcc_unreachable();
1060 return;
1062 case 't': /* 128 bits, signed */
1063 case 'd': /* 64 bits, signed */
1064 case 's': /* 32 bits, signed */
1065 if (xcode == CONST_INT
1066 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1068 gcc_assert (immediate_load_p (x, mode));
1069 constant_to_array (mode, x, arr);
1070 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1071 val = trunc_int_for_mode (val, SImode);
1072 switch (which_immediate_load (val))
1074 case SPU_IL:
1075 break;
1076 case SPU_ILA:
1077 fprintf (file, "a");
1078 break;
1079 case SPU_ILH:
1080 fprintf (file, "h");
1081 break;
1082 case SPU_ILHU:
1083 fprintf (file, "hu");
1084 break;
1085 default:
1086 gcc_unreachable();
1089 else if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1090 fprintf (file, "a");
1091 else if (xcode == HIGH)
1092 fprintf (file, "hu");
1093 else
1094 gcc_unreachable ();
1095 return;
1097 case 'T': /* 128 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'S': /* 32 bits, signed */
1100 if (xcode == CONST_INT
1101 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1103 gcc_assert (immediate_load_p (x, mode));
1104 constant_to_array (mode, x, arr);
1105 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1106 val = trunc_int_for_mode (val, SImode);
1107 switch (which_immediate_load (val))
1109 case SPU_IL:
1110 case SPU_ILA:
1111 break;
1112 case SPU_ILH:
1113 case SPU_ILHU:
1114 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1115 break;
1116 default:
1117 gcc_unreachable();
1119 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1121 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1122 output_addr_const (file, x);
1123 else if (xcode == HIGH)
1125 output_addr_const (file, XEXP (x, 0));
1126 fprintf (file, "@h");
1128 else
1129 gcc_unreachable ();
1130 return;
1132 case 'F':
1133 case 'G':
1134 case 'H':
1135 if (xcode == CONST_INT
1136 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1137 { /* immediate operand for fsmbi */
1138 int i;
1139 HOST_WIDE_INT val = 0;
1140 unsigned char arr[16];
1141 constant_to_array (mode, x, arr);
1142 for (i = 0; i < 16; i++)
1144 val <<= 1;
1145 val |= arr[i] & 1;
1147 print_operand (file, GEN_INT (val), 0);
1149 else
1150 gcc_unreachable();
1151 return;
1153 case 'C':
1154 if (xcode == CONST_INT)
1156 /* Only 4 least significant bits are relevant for generate
1157 control word instructions. */
1158 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1159 return;
1161 break;
1163 case 'M': /* print code for c*d */
1164 if (GET_CODE (x) == CONST_INT)
1165 switch (INTVAL (x))
1167 case 1:
1168 fprintf (file, "b");
1169 break;
1170 case 2:
1171 fprintf (file, "h");
1172 break;
1173 case 4:
1174 fprintf (file, "w");
1175 break;
1176 case 8:
1177 fprintf (file, "d");
1178 break;
1179 default:
1180 gcc_unreachable();
1182 else
1183 gcc_unreachable();
1184 return;
1186 case 'N': /* Negate the operand */
1187 if (xcode == CONST_INT)
1188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1189 else if (xcode == CONST_VECTOR)
1190 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1191 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1192 return;
1194 case 'I': /* enable/disable interrupts */
1195 if (xcode == CONST_INT)
1196 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1197 return;
1199 case 'b': /* branch modifiers */
1200 if (xcode == REG)
1201 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1202 else if (COMPARISON_P (x))
1203 fprintf (file, "%s", xcode == NE ? "n" : "");
1204 return;
1206 case 'i': /* indirect call */
1207 if (xcode == MEM)
1209 if (GET_CODE (XEXP (x, 0)) == REG)
1210 /* Used in indirect function calls. */
1211 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1212 else
1213 output_address (XEXP (x, 0));
1215 return;
1217 case 'p': /* load/store */
1218 if (xcode == MEM)
1220 x = XEXP (x, 0);
1221 xcode = GET_CODE (x);
1223 if (xcode == AND)
1225 x = XEXP (x, 0);
1226 xcode = GET_CODE (x);
1228 if (xcode == REG)
1229 fprintf (file, "d");
1230 else if (xcode == CONST_INT)
1231 fprintf (file, "a");
1232 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1233 fprintf (file, "r");
1234 else if (xcode == PLUS || xcode == LO_SUM)
1236 if (GET_CODE (XEXP (x, 1)) == REG)
1237 fprintf (file, "x");
1238 else
1239 fprintf (file, "d");
1241 return;
1243 case 0:
1244 if (xcode == REG)
1245 fprintf (file, "%s", reg_names[REGNO (x)]);
1246 else if (xcode == MEM)
1247 output_address (XEXP (x, 0));
1248 else if (xcode == CONST_VECTOR)
1249 output_addr_const (file, CONST_VECTOR_ELT (x, 0));
1250 else
1251 output_addr_const (file, x);
1252 return;
1254 default:
1255 output_operand_lossage ("invalid %%xn code");
1257 gcc_unreachable ();
1260 extern char call_used_regs[];
1261 extern char regs_ever_live[];
1263 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1264 caller saved register. For leaf functions it is more efficient to
1265 use a volatile register because we won't need to save and restore the
1266 pic register. This routine is only valid after register allocation
1267 is completed, so we can pick an unused register. */
1268 static rtx
1269 get_pic_reg (void)
1271 rtx pic_reg = pic_offset_table_rtx;
1272 if (!reload_completed && !reload_in_progress)
1273 abort ();
1274 return pic_reg;
1277 /* Split constant addresses to handle cases that are too large. Also, add in
1278 the pic register when in PIC mode. */
1279 void
1280 spu_split_address (rtx * ops)
1282 if (TARGET_LARGE_MEM
1283 || (GET_CODE (ops[1]) == CONST && !legitimate_const (ops[1], 0)))
1285 emit_insn (gen_high (ops[0], ops[1]));
1286 emit_insn (gen_low (ops[0], ops[0], ops[1]));
1288 else if (flag_pic)
1289 emit_insn (gen_pic (ops[0], ops[1]));
1290 if (flag_pic)
1292 rtx pic_reg = get_pic_reg ();
1293 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1294 current_function_uses_pic_offset_table = 1;
1298 /* SAVING is TRUE when we are generating the actual load and store
1299 instructions for REGNO. When determining the size of the stack
1300 needed for saving register we must allocate enough space for the
1301 worst case, because we don't always have the information early enough
1302 to not allocate it. But we can at least eliminate the actual loads
1303 and stores during the prologue/epilogue. */
1304 static int
1305 need_to_save_reg (int regno, int saving)
1307 if (regs_ever_live[regno] && !call_used_regs[regno])
1308 return 1;
1309 if (flag_pic
1310 && regno == PIC_OFFSET_TABLE_REGNUM
1311 && (!saving || current_function_uses_pic_offset_table)
1312 && (!saving
1313 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1314 return 1;
1315 return 0;
1318 /* This function is only correct starting with local register
1319 allocation */
1321 spu_saved_regs_size (void)
1323 int reg_save_size = 0;
1324 int regno;
1326 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1327 if (need_to_save_reg (regno, 0))
1328 reg_save_size += 0x10;
1329 return reg_save_size;
1332 static rtx
1333 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1335 rtx reg = gen_rtx_REG (V4SImode, regno);
1336 rtx mem =
1337 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1338 return emit_insn (gen_movv4si (mem, reg));
1341 static rtx
1342 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1344 rtx reg = gen_rtx_REG (V4SImode, regno);
1345 rtx mem =
1346 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1347 return emit_insn (gen_movv4si (reg, mem));
1350 /* This happens after reload, so we need to expand it. */
1351 static rtx
1352 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1354 rtx insn;
1355 if (satisfies_constraint_K (GEN_INT (imm)))
1357 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1359 else
1361 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1362 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1363 REG_NOTES (insn));
1364 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1365 if (REGNO (src) == REGNO (scratch))
1366 abort ();
1368 if (REGNO (dst) == REGNO (scratch))
1369 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1370 REG_NOTES (insn));
1371 return insn;
1374 /* Return nonzero if this function is known to have a null epilogue. */
1377 direct_return (void)
1379 if (reload_completed)
1381 if (cfun->static_chain_decl == 0
1382 && (spu_saved_regs_size ()
1383 + get_frame_size ()
1384 + current_function_outgoing_args_size
1385 + current_function_pretend_args_size == 0)
1386 && current_function_is_leaf)
1387 return 1;
1389 return 0;
1393 The stack frame looks like this:
1394 +-------------+
1395 | incoming |
1396 AP | args |
1397 +-------------+
1398 | $lr save |
1399 +-------------+
1400 prev SP | back chain |
1401 +-------------+
1402 | var args |
1403 | reg save | current_function_pretend_args_size bytes
1404 +-------------+
1405 | ... |
1406 | saved regs | spu_saved_regs_size() bytes
1407 +-------------+
1408 | ... |
1409 FP | vars | get_frame_size() bytes
1410 +-------------+
1411 | ... |
1412 | outgoing |
1413 | args | current_function_outgoing_args_size bytes
1414 +-------------+
1415 | $lr of next |
1416 | frame |
1417 +-------------+
1418 SP | back chain |
1419 +-------------+
1422 void
1423 spu_expand_prologue (void)
1425 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1426 HOST_WIDE_INT total_size;
1427 HOST_WIDE_INT saved_regs_size;
1428 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1429 rtx scratch_reg_0, scratch_reg_1;
1430 rtx insn, real;
1432 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1433 the "toplevel" insn chain. */
1434 emit_note (NOTE_INSN_DELETED);
1436 if (flag_pic && optimize == 0)
1437 current_function_uses_pic_offset_table = 1;
1439 if (spu_naked_function_p (current_function_decl))
1440 return;
1442 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1443 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1445 saved_regs_size = spu_saved_regs_size ();
1446 total_size = size + saved_regs_size
1447 + current_function_outgoing_args_size
1448 + current_function_pretend_args_size;
1450 if (!current_function_is_leaf
1451 || current_function_calls_alloca || total_size > 0)
1452 total_size += STACK_POINTER_OFFSET;
1454 /* Save this first because code after this might use the link
1455 register as a scratch register. */
1456 if (!current_function_is_leaf)
1458 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1459 RTX_FRAME_RELATED_P (insn) = 1;
1462 if (total_size > 0)
1464 offset = -current_function_pretend_args_size;
1465 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1466 if (need_to_save_reg (regno, 1))
1468 offset -= 16;
1469 insn = frame_emit_store (regno, sp_reg, offset);
1470 RTX_FRAME_RELATED_P (insn) = 1;
1474 if (flag_pic && current_function_uses_pic_offset_table)
1476 rtx pic_reg = get_pic_reg ();
1477 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1478 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1479 REG_NOTES (insn));
1480 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1481 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1482 REG_NOTES (insn));
1485 if (total_size > 0)
1487 if (flag_stack_check)
1489 /* We compare agains total_size-1 because
1490 ($sp >= total_size) <=> ($sp > total_size-1) */
1491 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1492 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1493 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1494 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1496 emit_move_insn (scratch_v4si, size_v4si);
1497 size_v4si = scratch_v4si;
1499 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1500 emit_insn (gen_vec_extractv4si
1501 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1502 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1505 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1506 the value of the previous $sp because we save it as the back
1507 chain. */
1508 if (total_size <= 2000)
1510 /* In this case we save the back chain first. */
1511 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1512 RTX_FRAME_RELATED_P (insn) = 1;
1513 insn =
1514 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1516 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1518 insn = emit_move_insn (scratch_reg_0, sp_reg);
1519 RTX_FRAME_RELATED_P (insn) = 1;
1520 insn =
1521 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1523 else
1525 insn = emit_move_insn (scratch_reg_0, sp_reg);
1526 RTX_FRAME_RELATED_P (insn) = 1;
1527 insn =
1528 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1530 RTX_FRAME_RELATED_P (insn) = 1;
1531 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1532 REG_NOTES (insn) =
1533 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1535 if (total_size > 2000)
1537 /* Save the back chain ptr */
1538 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1539 RTX_FRAME_RELATED_P (insn) = 1;
1542 if (frame_pointer_needed)
1544 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1545 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1546 + current_function_outgoing_args_size;
1547 /* Set the new frame_pointer */
1548 frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1552 emit_note (NOTE_INSN_DELETED);
1555 void
1556 spu_expand_epilogue (bool sibcall_p)
1558 int size = get_frame_size (), offset, regno;
1559 HOST_WIDE_INT saved_regs_size, total_size;
1560 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1561 rtx jump, scratch_reg_0;
1563 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1564 the "toplevel" insn chain. */
1565 emit_note (NOTE_INSN_DELETED);
1567 if (spu_naked_function_p (current_function_decl))
1568 return;
1570 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1572 saved_regs_size = spu_saved_regs_size ();
1573 total_size = size + saved_regs_size
1574 + current_function_outgoing_args_size
1575 + current_function_pretend_args_size;
1577 if (!current_function_is_leaf
1578 || current_function_calls_alloca || total_size > 0)
1579 total_size += STACK_POINTER_OFFSET;
1581 if (total_size > 0)
1583 if (current_function_calls_alloca)
1584 /* Load it from the back chain because our save_stack_block and
1585 restore_stack_block do nothing. */
1586 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1587 else
1588 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1591 if (saved_regs_size > 0)
1593 offset = -current_function_pretend_args_size;
1594 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1595 if (need_to_save_reg (regno, 1))
1597 offset -= 0x10;
1598 frame_emit_load (regno, sp_reg, offset);
1603 if (!current_function_is_leaf)
1604 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1606 if (!sibcall_p)
1608 emit_insn (gen_rtx_USE
1609 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1610 jump = emit_jump_insn (gen__return ());
1611 emit_barrier_after (jump);
1614 emit_note (NOTE_INSN_DELETED);
1618 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1620 if (count != 0)
1621 return 0;
1622 /* This is inefficient because it ends up copying to a save-register
1623 which then gets saved even though $lr has already been saved. But
1624 it does generate better code for leaf functions and we don't need
1625 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1626 used for __builtin_return_address anyway, so maybe we don't care if
1627 it's inefficient. */
1628 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1632 /* Given VAL, generate a constant appropriate for MODE.
1633 If MODE is a vector mode, every element will be VAL.
1634 For TImode, VAL will be zero extended to 128 bits. */
1636 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1638 rtx inner;
1639 rtvec v;
1640 int units, i;
1642 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1643 || GET_MODE_CLASS (mode) == MODE_FLOAT
1644 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1645 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1647 if (GET_MODE_CLASS (mode) == MODE_INT)
1648 return immed_double_const (val, 0, mode);
1650 /* val is the bit representation of the float */
1651 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1652 return hwint_to_const_double (mode, val);
1654 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1655 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1656 else
1657 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1659 units = GET_MODE_NUNITS (mode);
1661 v = rtvec_alloc (units);
1663 for (i = 0; i < units; ++i)
1664 RTVEC_ELT (v, i) = inner;
1666 return gen_rtx_CONST_VECTOR (mode, v);
1669 /* branch hint stuff */
1671 /* The hardware requires 8 insns between a hint and the branch it
1672 effects. This variable describes how many rtl instructions the
1673 compiler needs to see before inserting a hint. (FIXME: We should
1674 accept less and insert nops to enforce it because hinting is always
1675 profitable for performance, but we do need to be careful of code
1676 size.) */
1677 int spu_hint_dist = (8 * 4);
1679 /* An array of these is used to propagate hints to predecessor blocks. */
1680 struct spu_bb_info
1682 rtx prop_jump; /* propagated from another block */
1683 basic_block bb; /* the original block. */
1686 /* The special $hbr register is used to prevent the insn scheduler from
1687 moving hbr insns across instructions which invalidate them. It
1688 should only be used in a clobber, and this function searches for
1689 insns which clobber it. */
1690 static bool
1691 insn_clobbers_hbr (rtx insn)
1693 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1695 rtx parallel = PATTERN (insn);
1696 rtx clobber;
1697 int j;
1698 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1700 clobber = XVECEXP (parallel, 0, j);
1701 if (GET_CODE (clobber) == CLOBBER
1702 && GET_CODE (XEXP (clobber, 0)) == REG
1703 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1704 return 1;
1707 return 0;
1710 static void
1711 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1713 rtx branch_label;
1714 rtx hint, insn, prev, next;
1716 if (before == 0 || branch == 0 || target == 0)
1717 return;
1719 if (distance > 600)
1720 return;
1723 branch_label = gen_label_rtx ();
1724 LABEL_NUSES (branch_label)++;
1725 LABEL_PRESERVE_P (branch_label) = 1;
1726 insn = emit_label_before (branch_label, branch);
1727 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1729 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1730 the current insn is pipe0, dual issue with it. */
1731 prev = prev_active_insn (before);
1732 if (prev && get_pipe (prev) == 0)
1733 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1734 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1736 next = next_active_insn (before);
1737 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1738 if (next)
1739 PUT_MODE (next, TImode);
1741 else
1743 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1744 PUT_MODE (hint, TImode);
1746 recog_memoized (hint);
1749 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1750 the rtx for the branch target. */
1751 static rtx
1752 get_branch_target (rtx branch)
1754 if (GET_CODE (branch) == JUMP_INSN)
1756 rtx set, src;
1758 /* Return statements */
1759 if (GET_CODE (PATTERN (branch)) == RETURN)
1760 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1762 /* jump table */
1763 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1764 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1765 return 0;
1767 set = single_set (branch);
1768 src = SET_SRC (set);
1769 if (GET_CODE (SET_DEST (set)) != PC)
1770 abort ();
1772 if (GET_CODE (src) == IF_THEN_ELSE)
1774 rtx lab = 0;
1775 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1776 if (note)
1778 /* If the more probable case is not a fall through, then
1779 try a branch hint. */
1780 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1781 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1782 && GET_CODE (XEXP (src, 1)) != PC)
1783 lab = XEXP (src, 1);
1784 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1785 && GET_CODE (XEXP (src, 2)) != PC)
1786 lab = XEXP (src, 2);
1788 if (lab)
1790 if (GET_CODE (lab) == RETURN)
1791 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1792 return lab;
1794 return 0;
1797 return src;
1799 else if (GET_CODE (branch) == CALL_INSN)
1801 rtx call;
1802 /* All of our call patterns are in a PARALLEL and the CALL is
1803 the first pattern in the PARALLEL. */
1804 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1805 abort ();
1806 call = XVECEXP (PATTERN (branch), 0, 0);
1807 if (GET_CODE (call) == SET)
1808 call = SET_SRC (call);
1809 if (GET_CODE (call) != CALL)
1810 abort ();
1811 return XEXP (XEXP (call, 0), 0);
1813 return 0;
1816 static void
1817 insert_branch_hints (void)
1819 struct spu_bb_info *spu_bb_info;
1820 rtx branch, insn, next;
1821 rtx branch_target = 0;
1822 int branch_addr = 0, insn_addr, head_addr;
1823 basic_block bb;
1824 unsigned int j;
1826 spu_bb_info =
1827 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1828 sizeof (struct spu_bb_info));
1830 /* We need exact insn addresses and lengths. */
1831 shorten_branches (get_insns ());
1833 FOR_EACH_BB_REVERSE (bb)
1835 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1836 branch = 0;
1837 if (spu_bb_info[bb->index].prop_jump)
1839 branch = spu_bb_info[bb->index].prop_jump;
1840 branch_target = get_branch_target (branch);
1841 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1843 /* Search from end of a block to beginning. In this loop, find
1844 jumps which need a branch and emit them only when:
1845 - it's an indirect branch and we're at the insn which sets
1846 the register
1847 - we're at an insn that will invalidate the hint. e.g., a
1848 call, another hint insn, inline asm that clobbers $hbr, and
1849 some inlined operations (divmodsi4). Don't consider jumps
1850 because they are only at the end of a block and are
1851 considered when we are deciding whether to propagate
1852 - we're getting too far away from the branch. The hbr insns
1853 only have a signed 10 bit offset
1854 We go back as far as possible so the branch will be considered
1855 for propagation when we get to the beginning of the block. */
1856 next = 0;
1857 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1859 if (INSN_P (insn))
1861 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1862 if (branch && next
1863 && ((GET_CODE (branch_target) == REG
1864 && set_of (branch_target, insn) != NULL_RTX)
1865 || insn_clobbers_hbr (insn)
1866 || branch_addr - insn_addr > 600))
1868 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1869 if (insn != BB_END (bb)
1870 && branch_addr - next_addr >= spu_hint_dist)
1872 if (dump_file)
1873 fprintf (dump_file,
1874 "hint for %i in block %i before %i\n",
1875 INSN_UID (branch), bb->index, INSN_UID (next));
1876 spu_emit_branch_hint (next, branch, branch_target,
1877 branch_addr - next_addr);
1879 branch = 0;
1882 /* JUMP_P will only be true at the end of a block. When
1883 branch is already set it means we've previously decided
1884 to propagate a hint for that branch into this block. */
1885 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
1887 branch = 0;
1888 if ((branch_target = get_branch_target (insn)))
1890 branch = insn;
1891 branch_addr = insn_addr;
1895 /* When a branch hint is emitted it will be inserted
1896 before "next". Make sure next is the beginning of a
1897 cycle to minimize impact on the scheduled insns. */
1898 if (GET_MODE (insn) == TImode)
1899 next = insn;
1901 if (insn == BB_HEAD (bb))
1902 break;
1905 if (branch)
1907 /* If we haven't emitted a hint for this branch yet, it might
1908 be profitable to emit it in one of the predecessor blocks,
1909 especially for loops. */
1910 rtx bbend;
1911 basic_block prev = 0, prop = 0, prev2 = 0;
1912 int loop_exit = 0, simple_loop = 0;
1913 int next_addr = 0;
1914 if (next)
1915 next_addr = INSN_ADDRESSES (INSN_UID (next));
1917 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
1918 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
1919 prev = EDGE_PRED (bb, j)->src;
1920 else
1921 prev2 = EDGE_PRED (bb, j)->src;
1923 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
1924 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
1925 loop_exit = 1;
1926 else if (EDGE_SUCC (bb, j)->dest == bb)
1927 simple_loop = 1;
1929 /* If this branch is a loop exit then propagate to previous
1930 fallthru block. This catches the cases when it is a simple
1931 loop or when there is an initial branch into the loop. */
1932 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
1933 prop = prev;
1935 /* If there is only one adjacent predecessor. Don't propagate
1936 outside this loop. This loop_depth test isn't perfect, but
1937 I'm not sure the loop_father member is valid at this point. */
1938 else if (prev && single_pred_p (bb)
1939 && prev->loop_depth == bb->loop_depth)
1940 prop = prev;
1942 /* If this is the JOIN block of a simple IF-THEN then
1943 propogate the hint to the HEADER block. */
1944 else if (prev && prev2
1945 && EDGE_COUNT (bb->preds) == 2
1946 && EDGE_COUNT (prev->preds) == 1
1947 && EDGE_PRED (prev, 0)->src == prev2
1948 && prev2->loop_depth == bb->loop_depth
1949 && GET_CODE (branch_target) != REG)
1950 prop = prev;
1952 /* Don't propagate when:
1953 - this is a simple loop and the hint would be too far
1954 - this is not a simple loop and there are 16 insns in
1955 this block already
1956 - the predecessor block ends in a branch that will be
1957 hinted
1958 - the predecessor block ends in an insn that invalidates
1959 the hint */
1960 if (prop
1961 && prop->index >= 0
1962 && (bbend = BB_END (prop))
1963 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
1964 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
1965 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
1967 if (dump_file)
1968 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
1969 "for %i (loop_exit %i simple_loop %i dist %i)\n",
1970 bb->index, prop->index, bb->loop_depth,
1971 INSN_UID (branch), loop_exit, simple_loop,
1972 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
1974 spu_bb_info[prop->index].prop_jump = branch;
1975 spu_bb_info[prop->index].bb = bb;
1977 else if (next && branch_addr - next_addr >= spu_hint_dist)
1979 if (dump_file)
1980 fprintf (dump_file, "hint for %i in block %i before %i\n",
1981 INSN_UID (branch), bb->index, INSN_UID (next));
1982 spu_emit_branch_hint (next, branch, branch_target,
1983 branch_addr - next_addr);
1985 branch = 0;
1988 free (spu_bb_info);
1991 /* Emit a nop for INSN such that the two will dual issue. This assumes
1992 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1993 We check for TImode to handle a MULTI1 insn which has dual issued its
1994 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1995 ADDR_VEC insns. */
1996 static void
1997 emit_nop_for_insn (rtx insn)
1999 int p;
2000 rtx new_insn;
2001 p = get_pipe (insn);
2002 if (p == 1 && GET_MODE (insn) == TImode)
2004 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005 PUT_MODE (new_insn, TImode);
2006 PUT_MODE (insn, VOIDmode);
2008 else
2009 new_insn = emit_insn_after (gen_lnop (), insn);
2012 /* Insert nops in basic blocks to meet dual issue alignment
2013 requirements. */
2014 static void
2015 insert_nops (void)
2017 rtx insn, next_insn, prev_insn;
2018 int length;
2019 int addr;
2021 /* This sets up INSN_ADDRESSES. */
2022 shorten_branches (get_insns ());
2024 /* Keep track of length added by nops. */
2025 length = 0;
2027 prev_insn = 0;
2028 for (insn = get_insns (); insn; insn = next_insn)
2030 next_insn = next_active_insn (insn);
2031 addr = INSN_ADDRESSES (INSN_UID (insn));
2032 if (GET_MODE (insn) == TImode
2033 && next_insn
2034 && GET_MODE (next_insn) != TImode
2035 && ((addr + length) & 7) != 0)
2037 /* prev_insn will always be set because the first insn is
2038 always 8-byte aligned. */
2039 emit_nop_for_insn (prev_insn);
2040 length += 4;
2042 prev_insn = insn;
2046 static void
2047 spu_machine_dependent_reorg (void)
2049 if (optimize > 0)
2051 if (TARGET_BRANCH_HINTS)
2052 insert_branch_hints ();
2053 insert_nops ();
2058 /* Insn scheduling routines, primarily for dual issue. */
2059 static int
2060 spu_sched_issue_rate (void)
2062 return 2;
2065 static int
2066 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2067 int verbose ATTRIBUTE_UNUSED, rtx insn,
2068 int can_issue_more)
2070 if (GET_CODE (PATTERN (insn)) != USE
2071 && GET_CODE (PATTERN (insn)) != CLOBBER
2072 && get_pipe (insn) != -2)
2073 can_issue_more--;
2074 return can_issue_more;
2077 static int
2078 get_pipe (rtx insn)
2080 enum attr_type t;
2081 /* Handle inline asm */
2082 if (INSN_CODE (insn) == -1)
2083 return -1;
2084 t = get_attr_type (insn);
2085 switch (t)
2087 case TYPE_CONVERT:
2088 return -2;
2089 case TYPE_MULTI0:
2090 return -1;
2092 case TYPE_FX2:
2093 case TYPE_FX3:
2094 case TYPE_SPR:
2095 case TYPE_NOP:
2096 case TYPE_FXB:
2097 case TYPE_FPD:
2098 case TYPE_FP6:
2099 case TYPE_FP7:
2100 case TYPE_IPREFETCH:
2101 return 0;
2103 case TYPE_LNOP:
2104 case TYPE_SHUF:
2105 case TYPE_LOAD:
2106 case TYPE_STORE:
2107 case TYPE_BR:
2108 case TYPE_MULTI1:
2109 case TYPE_HBR:
2110 return 1;
2111 default:
2112 abort ();
2116 static int
2117 spu_sched_adjust_priority (rtx insn, int pri)
2119 int p = get_pipe (insn);
2120 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2121 * scheduling. */
2122 if (GET_CODE (PATTERN (insn)) == USE
2123 || GET_CODE (PATTERN (insn)) == CLOBBER
2124 || p == -2)
2125 return pri + 100;
2126 /* Schedule pipe0 insns early for greedier dual issue. */
2127 if (p != 1)
2128 return pri + 50;
2129 return pri;
2132 /* INSN is dependent on DEP_INSN. */
2133 static int
2134 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2135 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2137 if (GET_CODE (insn) == CALL_INSN)
2138 return cost - 2;
2139 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2140 scheduler makes every insn in a block anti-dependent on the final
2141 jump_insn. We adjust here so higher cost insns will get scheduled
2142 earlier. */
2143 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2144 return INSN_COST (dep_insn) - 3;
2145 return cost;
2148 /* Create a CONST_DOUBLE from a string. */
2149 struct rtx_def *
2150 spu_float_const (const char *string, enum machine_mode mode)
2152 REAL_VALUE_TYPE value;
2153 value = REAL_VALUE_ATOF (string, mode);
2154 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2157 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2158 CONST_INT fits constraint 'K', i.e., is small. */
2160 legitimate_const (rtx x, int aligned)
2162 /* We can never know if the resulting address fits in 18 bits and can be
2163 loaded with ila. Instead we should use the HI and LO relocations to
2164 load a 32 bit address. */
2165 rtx sym, cst;
2167 gcc_assert (GET_CODE (x) == CONST);
2169 if (GET_CODE (XEXP (x, 0)) != PLUS)
2170 return 0;
2171 sym = XEXP (XEXP (x, 0), 0);
2172 cst = XEXP (XEXP (x, 0), 1);
2173 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2174 return 0;
2175 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2176 return 0;
2177 return satisfies_constraint_K (cst);
2181 spu_constant_address_p (rtx x)
2183 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2184 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2185 || GET_CODE (x) == HIGH);
2188 static enum spu_immediate
2189 which_immediate_load (HOST_WIDE_INT val)
2191 gcc_assert (val == trunc_int_for_mode (val, SImode));
2193 if (val >= -0x8000 && val <= 0x7fff)
2194 return SPU_IL;
2195 if (val >= 0 && val <= 0x3ffff)
2196 return SPU_ILA;
2197 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2198 return SPU_ILH;
2199 if ((val & 0xffff) == 0)
2200 return SPU_ILHU;
2202 return SPU_NONE;
2206 immediate_load_p (rtx op, enum machine_mode mode)
2208 HOST_WIDE_INT val;
2209 unsigned char arr[16];
2210 int i, j;
2211 if (GET_MODE (op) != VOIDmode)
2212 mode = GET_MODE (op);
2214 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2215 || GET_CODE (op) == CONST_VECTOR);
2217 /* V4SI with all identical symbols is valid. */
2218 if (mode == V4SImode
2219 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == SYMBOL_REF)
2220 return !TARGET_LARGE_MEM && !flag_pic
2221 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2222 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2223 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3);
2225 constant_to_array (mode, op, arr);
2227 /* Check that bytes are repeated. */
2228 for (i = 4; i < 16; i += 4)
2229 for (j = 0; j < 4; j++)
2230 if (arr[j] != arr[i + j])
2231 return 0;
2233 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2234 val = trunc_int_for_mode (val, SImode);
2236 return which_immediate_load (val) != SPU_NONE;
2239 static enum spu_immediate
2240 which_logical_immediate (HOST_WIDE_INT val)
2242 gcc_assert (val == trunc_int_for_mode (val, SImode));
2244 if (val >= -0x200 && val <= 0x1ff)
2245 return SPU_ORI;
2246 if (val >= 0 && val <= 0xffff)
2247 return SPU_IOHL;
2248 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2250 val = trunc_int_for_mode (val, HImode);
2251 if (val >= -0x200 && val <= 0x1ff)
2252 return SPU_ORHI;
2253 if ((val & 0xff) == ((val >> 8) & 0xff))
2255 val = trunc_int_for_mode (val, QImode);
2256 if (val >= -0x200 && val <= 0x1ff)
2257 return SPU_ORBI;
2260 return SPU_NONE;
2264 logical_immediate_p (rtx op, enum machine_mode mode)
2266 HOST_WIDE_INT val;
2267 unsigned char arr[16];
2268 int i, j;
2270 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2271 || GET_CODE (op) == CONST_VECTOR);
2273 if (GET_MODE (op) != VOIDmode)
2274 mode = GET_MODE (op);
2276 constant_to_array (mode, op, arr);
2278 /* Check that bytes are repeated. */
2279 for (i = 4; i < 16; i += 4)
2280 for (j = 0; j < 4; j++)
2281 if (arr[j] != arr[i + j])
2282 return 0;
2284 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2285 val = trunc_int_for_mode (val, SImode);
2287 i = which_logical_immediate (val);
2288 return i != SPU_NONE && i != SPU_IOHL;
2292 iohl_immediate_p (rtx op, enum machine_mode mode)
2294 HOST_WIDE_INT val;
2295 unsigned char arr[16];
2296 int i, j;
2298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2299 || GET_CODE (op) == CONST_VECTOR);
2301 if (GET_MODE (op) != VOIDmode)
2302 mode = GET_MODE (op);
2304 constant_to_array (mode, op, arr);
2306 /* Check that bytes are repeated. */
2307 for (i = 4; i < 16; i += 4)
2308 for (j = 0; j < 4; j++)
2309 if (arr[j] != arr[i + j])
2310 return 0;
2312 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2313 val = trunc_int_for_mode (val, SImode);
2315 return val >= 0 && val <= 0xffff;
2319 arith_immediate_p (rtx op, enum machine_mode mode,
2320 HOST_WIDE_INT low, HOST_WIDE_INT high)
2322 HOST_WIDE_INT val;
2323 unsigned char arr[16];
2324 int bytes, i, j;
2326 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2327 || GET_CODE (op) == CONST_VECTOR);
2329 if (GET_MODE (op) != VOIDmode)
2330 mode = GET_MODE (op);
2332 constant_to_array (mode, op, arr);
2334 if (VECTOR_MODE_P (mode))
2335 mode = GET_MODE_INNER (mode);
2337 bytes = GET_MODE_SIZE (mode);
2338 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2340 /* Check that bytes are repeated. */
2341 for (i = bytes; i < 16; i += bytes)
2342 for (j = 0; j < bytes; j++)
2343 if (arr[j] != arr[i + j])
2344 return 0;
2346 val = arr[0];
2347 for (j = 1; j < bytes; j++)
2348 val = (val << 8) | arr[j];
2350 val = trunc_int_for_mode (val, mode);
2352 return val >= low && val <= high;
2355 /* We accept:
2356 - any 32 bit constant (SImode, SFmode)
2357 - any constant that can be generated with fsmbi (any mode)
2358 - a 64 bit constant where the high and low bits are identical
2359 (DImode, DFmode)
2360 - a 128 bit constant where the four 32 bit words match. */
2362 spu_legitimate_constant_p (rtx x)
2364 unsigned char arr[16];
2365 int i, j;
2367 if (GET_CODE (x) == HIGH
2368 || GET_CODE (x) == CONST
2369 || GET_CODE (x) == SYMBOL_REF
2370 || GET_CODE (x) == LABEL_REF)
2371 return 1;
2373 if (fsmbi_const_p (x))
2374 return 1;
2376 if (GET_CODE (x) == CONST_INT)
2377 return (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0x7fffffffll)
2378 || ((INTVAL (x) >> 32) & 0xffffffffll) == (INTVAL (x) & 0xffffffffll);
2380 if (GET_MODE (x) == SFmode)
2381 return 1;
2383 if (GET_MODE (x) == DFmode)
2385 HOST_WIDE_INT val = const_double_to_hwint (x);
2386 return ((val >> 32) & 0xffffffffll) == (val & 0xffffffffll);
2389 /* V4SI with all identical symbols is valid. */
2390 if (GET_MODE (x) == V4SImode
2391 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2392 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2393 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2394 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2395 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2396 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2397 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2399 if (VECTOR_MODE_P (GET_MODE (x)))
2400 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2401 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2402 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2403 return 0;
2405 constant_to_array (SImode, x, arr);
2407 /* Check that bytes are repeated. */
2408 for (i = 4; i < 16; i += 4)
2409 for (j = 0; j < 4; j++)
2410 if (arr[j] != arr[i + j])
2411 return 0;
2413 return 1;
2416 /* Valid address are:
2417 - symbol_ref, label_ref, const
2418 - reg
2419 - reg + const, where either reg or const is 16 byte aligned
2420 - reg + reg, alignment doesn't matter
2421 The alignment matters in the reg+const case because lqd and stqd
2422 ignore the 4 least significant bits of the const. (TODO: It might be
2423 preferable to allow any alignment and fix it up when splitting.) */
2425 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2426 rtx x, int reg_ok_strict)
2428 if (mode == TImode && GET_CODE (x) == AND
2429 && GET_CODE (XEXP (x, 1)) == CONST_INT
2430 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2431 x = XEXP (x, 0);
2432 switch (GET_CODE (x))
2434 case SYMBOL_REF:
2435 case LABEL_REF:
2436 return !TARGET_LARGE_MEM;
2438 case CONST:
2439 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2441 case CONST_INT:
2442 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2444 case SUBREG:
2445 x = XEXP (x, 0);
2446 gcc_assert (GET_CODE (x) == REG);
2448 case REG:
2449 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2451 case PLUS:
2452 case LO_SUM:
2454 rtx op0 = XEXP (x, 0);
2455 rtx op1 = XEXP (x, 1);
2456 if (GET_CODE (op0) == SUBREG)
2457 op0 = XEXP (op0, 0);
2458 if (GET_CODE (op1) == SUBREG)
2459 op1 = XEXP (op1, 0);
2460 /* We can't just accept any aligned register because CSE can
2461 change it to a register that is not marked aligned and then
2462 recog will fail. So we only accept frame registers because
2463 they will only be changed to other frame registers. */
2464 if (GET_CODE (op0) == REG
2465 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2466 && GET_CODE (op1) == CONST_INT
2467 && INTVAL (op1) >= -0x2000
2468 && INTVAL (op1) <= 0x1fff
2469 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2470 return 1;
2471 if (GET_CODE (op0) == REG
2472 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2473 && GET_CODE (op1) == REG
2474 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2475 return 1;
2477 break;
2479 default:
2480 break;
2482 return 0;
2485 /* When the address is reg + const_int, force the const_int into a
2486 register. */
2488 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2489 enum machine_mode mode)
2491 rtx op0, op1;
2492 /* Make sure both operands are registers. */
2493 if (GET_CODE (x) == PLUS)
2495 op0 = XEXP (x, 0);
2496 op1 = XEXP (x, 1);
2497 if (ALIGNED_SYMBOL_REF_P (op0))
2499 op0 = force_reg (Pmode, op0);
2500 mark_reg_pointer (op0, 128);
2502 else if (GET_CODE (op0) != REG)
2503 op0 = force_reg (Pmode, op0);
2504 if (ALIGNED_SYMBOL_REF_P (op1))
2506 op1 = force_reg (Pmode, op1);
2507 mark_reg_pointer (op1, 128);
2509 else if (GET_CODE (op1) != REG)
2510 op1 = force_reg (Pmode, op1);
2511 x = gen_rtx_PLUS (Pmode, op0, op1);
2512 if (spu_legitimate_address (mode, x, 0))
2513 return x;
2515 return NULL_RTX;
2518 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2519 struct attribute_spec.handler. */
2520 static tree
2521 spu_handle_fndecl_attribute (tree * node,
2522 tree name,
2523 tree args ATTRIBUTE_UNUSED,
2524 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2526 if (TREE_CODE (*node) != FUNCTION_DECL)
2528 warning (0, "`%s' attribute only applies to functions",
2529 IDENTIFIER_POINTER (name));
2530 *no_add_attrs = true;
2533 return NULL_TREE;
2536 /* Handle the "vector" attribute. */
2537 static tree
2538 spu_handle_vector_attribute (tree * node, tree name,
2539 tree args ATTRIBUTE_UNUSED,
2540 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2542 tree type = *node, result = NULL_TREE;
2543 enum machine_mode mode;
2544 int unsigned_p;
2546 while (POINTER_TYPE_P (type)
2547 || TREE_CODE (type) == FUNCTION_TYPE
2548 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2549 type = TREE_TYPE (type);
2551 mode = TYPE_MODE (type);
2553 unsigned_p = TYPE_UNSIGNED (type);
2554 switch (mode)
2556 case DImode:
2557 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2558 break;
2559 case SImode:
2560 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2561 break;
2562 case HImode:
2563 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2564 break;
2565 case QImode:
2566 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2567 break;
2568 case SFmode:
2569 result = V4SF_type_node;
2570 break;
2571 case DFmode:
2572 result = V2DF_type_node;
2573 break;
2574 default:
2575 break;
2578 /* Propagate qualifiers attached to the element type
2579 onto the vector type. */
2580 if (result && result != type && TYPE_QUALS (type))
2581 result = build_qualified_type (result, TYPE_QUALS (type));
2583 *no_add_attrs = true; /* No need to hang on to the attribute. */
2585 if (!result)
2586 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2587 else
2588 *node = reconstruct_complex_type (*node, result);
2590 return NULL_TREE;
2593 /* Return non-zero if FUNC is a naked function. */
2594 static int
2595 spu_naked_function_p (tree func)
2597 tree a;
2599 if (TREE_CODE (func) != FUNCTION_DECL)
2600 abort ();
2602 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2603 return a != NULL_TREE;
2607 spu_initial_elimination_offset (int from, int to)
2609 int saved_regs_size = spu_saved_regs_size ();
2610 int sp_offset = 0;
2611 if (!current_function_is_leaf || current_function_outgoing_args_size
2612 || get_frame_size () || saved_regs_size)
2613 sp_offset = STACK_POINTER_OFFSET;
2614 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2615 return (sp_offset + current_function_outgoing_args_size);
2616 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2617 return 0;
2618 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2619 return sp_offset + current_function_outgoing_args_size
2620 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2621 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2622 return get_frame_size () + saved_regs_size + sp_offset;
2623 return 0;
2627 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2629 enum machine_mode mode = TYPE_MODE (type);
2630 int byte_size = ((mode == BLKmode)
2631 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2633 /* Make sure small structs are left justified in a register. */
2634 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2635 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2637 enum machine_mode smode;
2638 rtvec v;
2639 int i;
2640 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2641 int n = byte_size / UNITS_PER_WORD;
2642 v = rtvec_alloc (nregs);
2643 for (i = 0; i < n; i++)
2645 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2646 gen_rtx_REG (TImode,
2647 FIRST_RETURN_REGNUM
2648 + i),
2649 GEN_INT (UNITS_PER_WORD * i));
2650 byte_size -= UNITS_PER_WORD;
2653 if (n < nregs)
2655 if (byte_size < 4)
2656 byte_size = 4;
2657 smode =
2658 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2659 RTVEC_ELT (v, n) =
2660 gen_rtx_EXPR_LIST (VOIDmode,
2661 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2662 GEN_INT (UNITS_PER_WORD * n));
2664 return gen_rtx_PARALLEL (mode, v);
2666 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2670 spu_function_arg (CUMULATIVE_ARGS cum,
2671 enum machine_mode mode,
2672 tree type, int named ATTRIBUTE_UNUSED)
2674 int byte_size;
2676 if (cum >= MAX_REGISTER_ARGS)
2677 return 0;
2679 byte_size = ((mode == BLKmode)
2680 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2682 /* The ABI does not allow parameters to be passed partially in
2683 reg and partially in stack. */
2684 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2685 return 0;
2687 /* Make sure small structs are left justified in a register. */
2688 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2689 && byte_size < UNITS_PER_WORD && byte_size > 0)
2691 enum machine_mode smode;
2692 rtx gr_reg;
2693 if (byte_size < 4)
2694 byte_size = 4;
2695 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2696 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2697 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2698 const0_rtx);
2699 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2701 else
2702 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2705 /* Variable sized types are passed by reference. */
2706 static bool
2707 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2708 enum machine_mode mode ATTRIBUTE_UNUSED,
2709 tree type, bool named ATTRIBUTE_UNUSED)
2711 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2715 /* Var args. */
2717 /* Create and return the va_list datatype.
2719 On SPU, va_list is an array type equivalent to
2721 typedef struct __va_list_tag
2723 void *__args __attribute__((__aligned(16)));
2724 void *__skip __attribute__((__aligned(16)));
2726 } va_list[1];
2728 where __args points to the arg that will be returned by the next
2729 va_arg(), and __skip points to the previous stack frame such that
2730 when __args == __skip we should advance __args by 32 bytes. */
2731 static tree
2732 spu_build_builtin_va_list (void)
2734 tree f_args, f_skip, record, type_decl;
2735 bool owp;
2737 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2739 type_decl =
2740 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2742 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2743 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2745 DECL_FIELD_CONTEXT (f_args) = record;
2746 DECL_ALIGN (f_args) = 128;
2747 DECL_USER_ALIGN (f_args) = 1;
2749 DECL_FIELD_CONTEXT (f_skip) = record;
2750 DECL_ALIGN (f_skip) = 128;
2751 DECL_USER_ALIGN (f_skip) = 1;
2753 TREE_CHAIN (record) = type_decl;
2754 TYPE_NAME (record) = type_decl;
2755 TYPE_FIELDS (record) = f_args;
2756 TREE_CHAIN (f_args) = f_skip;
2758 /* We know this is being padded and we want it too. It is an internal
2759 type so hide the warnings from the user. */
2760 owp = warn_padded;
2761 warn_padded = false;
2763 layout_type (record);
2765 warn_padded = owp;
2767 /* The correct type is an array type of one element. */
2768 return build_array_type (record, build_index_type (size_zero_node));
2771 /* Implement va_start by filling the va_list structure VALIST.
2772 NEXTARG points to the first anonymous stack argument.
2774 The following global variables are used to initialize
2775 the va_list structure:
2777 current_function_args_info;
2778 the CUMULATIVE_ARGS for this function
2780 current_function_arg_offset_rtx:
2781 holds the offset of the first anonymous stack argument
2782 (relative to the virtual arg pointer). */
2784 void
2785 spu_va_start (tree valist, rtx nextarg)
2787 tree f_args, f_skip;
2788 tree args, skip, t;
2790 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2791 f_skip = TREE_CHAIN (f_args);
2793 valist = build_va_arg_indirect_ref (valist);
2794 args =
2795 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2796 skip =
2797 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2799 /* Find the __args area. */
2800 t = make_tree (TREE_TYPE (args), nextarg);
2801 if (current_function_pretend_args_size > 0)
2802 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
2803 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
2804 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
2805 TREE_SIDE_EFFECTS (t) = 1;
2806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2808 /* Find the __skip area. */
2809 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2810 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
2811 build_int_cst (integer_type_node,
2812 (current_function_pretend_args_size
2813 - STACK_POINTER_OFFSET)));
2814 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
2815 TREE_SIDE_EFFECTS (t) = 1;
2816 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2819 /* Gimplify va_arg by updating the va_list structure
2820 VALIST as required to retrieve an argument of type
2821 TYPE, and returning that argument.
2823 ret = va_arg(VALIST, TYPE);
2825 generates code equivalent to:
2827 paddedsize = (sizeof(TYPE) + 15) & -16;
2828 if (VALIST.__args + paddedsize > VALIST.__skip
2829 && VALIST.__args <= VALIST.__skip)
2830 addr = VALIST.__skip + 32;
2831 else
2832 addr = VALIST.__args;
2833 VALIST.__args = addr + paddedsize;
2834 ret = *(TYPE *)addr;
2836 static tree
2837 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
2838 tree * post_p ATTRIBUTE_UNUSED)
2840 tree f_args, f_skip;
2841 tree args, skip;
2842 HOST_WIDE_INT size, rsize;
2843 tree paddedsize, addr, tmp;
2844 bool pass_by_reference_p;
2846 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2847 f_skip = TREE_CHAIN (f_args);
2849 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2850 args =
2851 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2852 skip =
2853 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2855 addr = create_tmp_var (ptr_type_node, "va_arg");
2856 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
2858 /* if an object is dynamically sized, a pointer to it is passed
2859 instead of the object itself. */
2860 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
2861 false);
2862 if (pass_by_reference_p)
2863 type = build_pointer_type (type);
2864 size = int_size_in_bytes (type);
2865 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
2867 /* build conditional expression to calculate addr. The expression
2868 will be gimplified later. */
2869 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
2870 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
2871 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
2872 build2 (GT_EXPR, boolean_type_node, tmp, skip),
2873 build2 (LE_EXPR, boolean_type_node, args, skip));
2875 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2876 build2 (PLUS_EXPR, ptr_type_node, skip,
2877 fold_convert (ptr_type_node, size_int (32))), args);
2879 tmp = build2 (MODIFY_EXPR, ptr_type_node, addr, tmp);
2880 gimplify_and_add (tmp, pre_p);
2882 /* update VALIST.__args */
2883 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
2884 tmp = build2 (MODIFY_EXPR, TREE_TYPE (args), args, tmp);
2885 gimplify_and_add (tmp, pre_p);
2887 addr = fold_convert (build_pointer_type (type), addr);
2889 if (pass_by_reference_p)
2890 addr = build_va_arg_indirect_ref (addr);
2892 return build_va_arg_indirect_ref (addr);
2895 /* Save parameter registers starting with the register that corresponds
2896 to the first unnamed parameters. If the first unnamed parameter is
2897 in the stack then save no registers. Set pretend_args_size to the
2898 amount of space needed to save the registers. */
2899 void
2900 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
2901 tree type, int *pretend_size, int no_rtl)
2903 if (!no_rtl)
2905 rtx tmp;
2906 int regno;
2907 int offset;
2908 int ncum = *cum;
2910 /* cum currently points to the last named argument, we want to
2911 start at the next argument. */
2912 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
2914 offset = -STACK_POINTER_OFFSET;
2915 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
2917 tmp = gen_frame_mem (V4SImode,
2918 plus_constant (virtual_incoming_args_rtx,
2919 offset));
2920 emit_move_insn (tmp,
2921 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
2922 offset += 16;
2924 *pretend_size = offset + STACK_POINTER_OFFSET;
2928 void
2929 spu_conditional_register_usage (void)
2931 if (flag_pic)
2933 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2934 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2936 global_regs[INTR_REGNUM] = 1;
2939 /* This is called to decide when we can simplify a load instruction. We
2940 must only return true for registers which we know will always be
2941 aligned. Taking into account that CSE might replace this reg with
2942 another one that has not been marked aligned.
2943 So this is really only true for frame, stack and virtual registers,
2944 which we know are always aligned and should not be adversely effected
2945 by CSE. */
2946 static int
2947 regno_aligned_for_load (int regno)
2949 return regno == FRAME_POINTER_REGNUM
2950 || regno == HARD_FRAME_POINTER_REGNUM
2951 || regno == STACK_POINTER_REGNUM
2952 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
2955 /* Return TRUE when mem is known to be 16-byte aligned. */
2957 aligned_mem_p (rtx mem)
2959 if (MEM_ALIGN (mem) >= 128)
2960 return 1;
2961 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
2962 return 1;
2963 if (GET_CODE (XEXP (mem, 0)) == PLUS)
2965 rtx p0 = XEXP (XEXP (mem, 0), 0);
2966 rtx p1 = XEXP (XEXP (mem, 0), 1);
2967 if (regno_aligned_for_load (REGNO (p0)))
2969 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
2970 return 1;
2971 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2972 return 1;
2975 else if (GET_CODE (XEXP (mem, 0)) == REG)
2977 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
2978 return 1;
2980 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
2981 return 1;
2982 else if (GET_CODE (XEXP (mem, 0)) == CONST)
2984 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
2985 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
2986 if (GET_CODE (p0) == SYMBOL_REF
2987 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2988 return 1;
2990 return 0;
2993 /* Return TRUE if we are certain the mem refers to a complete object
2994 which is both 16-byte aligned and padded to a 16-byte boundary. This
2995 would make it safe to store with a single instruction.
2996 We guarantee the alignment and padding for static objects by aligning
2997 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
2998 FIXME: We currently cannot guarantee this for objects on the stack
2999 because assign_parm_setup_stack calls assign_stack_local with the
3000 alignment of the parameter mode and in that case the alignment never
3001 gets adjusted by LOCAL_ALIGNMENT. */
3002 static int
3003 store_with_one_insn_p (rtx mem)
3005 rtx addr = XEXP (mem, 0);
3006 if (GET_MODE (mem) == BLKmode)
3007 return 0;
3008 /* Only static objects. */
3009 if (GET_CODE (addr) == SYMBOL_REF)
3011 /* We use the associated declaration to make sure the access is
3012 referring to the whole object.
3013 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3014 if it is necessary. Will there be cases where one exists, and
3015 the other does not? Will there be cases where both exist, but
3016 have different types? */
3017 tree decl = MEM_EXPR (mem);
3018 if (decl
3019 && TREE_CODE (decl) == VAR_DECL
3020 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3021 return 1;
3022 decl = SYMBOL_REF_DECL (addr);
3023 if (decl
3024 && TREE_CODE (decl) == VAR_DECL
3025 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3026 return 1;
3028 return 0;
3032 spu_expand_mov (rtx * ops, enum machine_mode mode)
3034 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3035 abort ();
3037 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3039 rtx from = SUBREG_REG (ops[1]);
3040 enum machine_mode imode = GET_MODE (from);
3042 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3043 && GET_MODE_CLASS (imode) == MODE_INT
3044 && subreg_lowpart_p (ops[1]));
3046 if (GET_MODE_SIZE (imode) < 4)
3048 from = gen_rtx_SUBREG (SImode, from, 0);
3049 imode = SImode;
3052 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3054 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3055 emit_insn (GEN_FCN (icode) (ops[0], from));
3057 else
3058 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3059 return 1;
3062 /* At least one of the operands needs to be a register. */
3063 if ((reload_in_progress | reload_completed) == 0
3064 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3066 rtx temp = force_reg (mode, ops[1]);
3067 emit_move_insn (ops[0], temp);
3068 return 1;
3070 if (reload_in_progress || reload_completed)
3072 enum machine_mode mode = GET_MODE (ops[0]);
3073 if (GET_CODE (ops[1]) == CONST_INT
3074 && (mode == DImode || mode == TImode)
3075 && ((INTVAL (ops[1]) >> 32) & 0xffffffffll) !=
3076 (INTVAL (ops[1]) & 0xffffffffll))
3078 rtx mem = force_const_mem (mode, ops[1]);
3079 if (TARGET_LARGE_MEM)
3081 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
3082 emit_move_insn (addr, XEXP (mem, 0));
3083 mem = replace_equiv_address (mem, addr);
3085 emit_move_insn (ops[0], mem);
3086 return 1;
3088 else if ((GET_CODE (ops[1]) == CONST_INT
3089 || GET_CODE (ops[1]) == CONST_DOUBLE
3090 || GET_CODE (ops[1]) == CONST_VECTOR)
3091 && !immediate_load_p (ops[1], mode)
3092 && !fsmbi_const_p (ops[1]))
3094 unsigned char arrlo[16];
3095 unsigned char arrhi[16];
3096 rtx to = ops[0], hi, lo;
3097 int i;
3098 constant_to_array (mode, ops[1], arrhi);
3099 for (i = 0; i < 16; i += 4)
3101 arrlo[i + 2] = arrhi[i + 2];
3102 arrlo[i + 3] = arrhi[i + 3];
3103 arrlo[i + 0] = arrlo[i + 1] = 0;
3104 arrhi[i + 2] = arrhi[i + 3] = 0;
3106 if (mode == SFmode)
3108 to = spu_gen_subreg (SImode, ops[0]);
3109 mode = SImode;
3111 else if (mode == V4SFmode)
3113 to = spu_gen_subreg (V4SImode, ops[0]);
3114 mode = V4SImode;
3116 hi = array_to_constant (mode, arrhi);
3117 lo = array_to_constant (mode, arrlo);
3118 emit_move_insn (to, hi);
3119 emit_insn (gen_rtx_SET (VOIDmode, to, gen_rtx_IOR (mode, to, lo)));
3120 return 1;
3122 return 0;
3124 else
3126 if (GET_CODE (ops[0]) == MEM)
3128 if (!spu_valid_move (ops))
3130 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3131 gen_reg_rtx (TImode)));
3132 return 1;
3135 else if (GET_CODE (ops[1]) == MEM)
3137 if (!spu_valid_move (ops))
3139 emit_insn (gen_load
3140 (ops[0], ops[1], gen_reg_rtx (TImode),
3141 gen_reg_rtx (SImode)));
3142 return 1;
3145 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3146 extend them. */
3147 if (GET_CODE (ops[1]) == CONST_INT)
3149 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3150 if (val != INTVAL (ops[1]))
3152 emit_move_insn (ops[0], GEN_INT (val));
3153 return 1;
3157 return 0;
3160 static int
3161 reg_align (rtx reg)
3163 /* For now, only frame registers are known to be aligned at all times.
3164 We can't trust REGNO_POINTER_ALIGN because optimization will move
3165 registers around, potentially changing an "aligned" register in an
3166 address to an unaligned register, which would result in an invalid
3167 address. */
3168 int regno = REGNO (reg);
3169 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3172 void
3173 spu_split_load (rtx * ops)
3175 enum machine_mode mode = GET_MODE (ops[0]);
3176 rtx addr, load, rot, mem, p0, p1;
3177 int rot_amt;
3179 addr = XEXP (ops[1], 0);
3181 rot = 0;
3182 rot_amt = 0;
3183 if (GET_CODE (addr) == PLUS)
3185 /* 8 cases:
3186 aligned reg + aligned reg => lqx
3187 aligned reg + unaligned reg => lqx, rotqby
3188 aligned reg + aligned const => lqd
3189 aligned reg + unaligned const => lqd, rotqbyi
3190 unaligned reg + aligned reg => lqx, rotqby
3191 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3192 unaligned reg + aligned const => lqd, rotqby
3193 unaligned reg + unaligned const -> not allowed by legitimate address
3195 p0 = XEXP (addr, 0);
3196 p1 = XEXP (addr, 1);
3197 if (reg_align (p0) < 128)
3199 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3201 emit_insn (gen_addsi3 (ops[3], p0, p1));
3202 rot = ops[3];
3204 else
3205 rot = p0;
3207 else
3209 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3211 rot_amt = INTVAL (p1) & 15;
3212 p1 = GEN_INT (INTVAL (p1) & -16);
3213 addr = gen_rtx_PLUS (SImode, p0, p1);
3215 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3216 rot = p1;
3219 else if (GET_CODE (addr) == REG)
3221 if (reg_align (addr) < 128)
3222 rot = addr;
3224 else if (GET_CODE (addr) == CONST)
3226 if (GET_CODE (XEXP (addr, 0)) == PLUS
3227 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3228 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3230 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3231 if (rot_amt & -16)
3232 addr = gen_rtx_CONST (Pmode,
3233 gen_rtx_PLUS (Pmode,
3234 XEXP (XEXP (addr, 0), 0),
3235 GEN_INT (rot_amt & -16)));
3236 else
3237 addr = XEXP (XEXP (addr, 0), 0);
3239 else
3240 rot = addr;
3242 else if (GET_CODE (addr) == CONST_INT)
3244 rot_amt = INTVAL (addr);
3245 addr = GEN_INT (rot_amt & -16);
3247 else if (!ALIGNED_SYMBOL_REF_P (addr))
3248 rot = addr;
3250 if (GET_MODE_SIZE (mode) < 4)
3251 rot_amt += GET_MODE_SIZE (mode) - 4;
3253 rot_amt &= 15;
3255 if (rot && rot_amt)
3257 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3258 rot = ops[3];
3259 rot_amt = 0;
3262 load = ops[2];
3264 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3265 mem = change_address (ops[1], TImode, addr);
3267 emit_insn (gen_movti (load, mem));
3269 if (rot)
3270 emit_insn (gen_rotqby_ti (load, load, rot));
3271 else if (rot_amt)
3272 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3274 if (reload_completed)
3275 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3276 else
3277 emit_insn (gen_spu_convert (ops[0], load));
3280 void
3281 spu_split_store (rtx * ops)
3283 enum machine_mode mode = GET_MODE (ops[0]);
3284 rtx pat = ops[2];
3285 rtx reg = ops[3];
3286 rtx addr, p0, p1, p1_lo, smem;
3287 int aform;
3288 int scalar;
3290 addr = XEXP (ops[0], 0);
3292 if (GET_CODE (addr) == PLUS)
3294 /* 8 cases:
3295 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3296 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3297 aligned reg + aligned const => lqd, c?d, shuf, stqx
3298 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3299 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3300 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3301 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3302 unaligned reg + unaligned const -> not allowed by legitimate address
3304 aform = 0;
3305 p0 = XEXP (addr, 0);
3306 p1 = p1_lo = XEXP (addr, 1);
3307 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3309 p1_lo = GEN_INT (INTVAL (p1) & 15);
3310 p1 = GEN_INT (INTVAL (p1) & -16);
3311 addr = gen_rtx_PLUS (SImode, p0, p1);
3314 else if (GET_CODE (addr) == REG)
3316 aform = 0;
3317 p0 = addr;
3318 p1 = p1_lo = const0_rtx;
3320 else
3322 aform = 1;
3323 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3324 p1 = 0; /* aform doesn't use p1 */
3325 p1_lo = addr;
3326 if (ALIGNED_SYMBOL_REF_P (addr))
3327 p1_lo = const0_rtx;
3328 else if (GET_CODE (addr) == CONST)
3330 if (GET_CODE (XEXP (addr, 0)) == PLUS
3331 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3332 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3334 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3335 if ((v & -16) != 0)
3336 addr = gen_rtx_CONST (Pmode,
3337 gen_rtx_PLUS (Pmode,
3338 XEXP (XEXP (addr, 0), 0),
3339 GEN_INT (v & -16)));
3340 else
3341 addr = XEXP (XEXP (addr, 0), 0);
3342 p1_lo = GEN_INT (v & 15);
3345 else if (GET_CODE (addr) == CONST_INT)
3347 p1_lo = GEN_INT (INTVAL (addr) & 15);
3348 addr = GEN_INT (INTVAL (addr) & -16);
3352 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3354 scalar = store_with_one_insn_p (ops[0]);
3355 if (!scalar)
3357 /* We could copy the flags from the ops[0] MEM to mem here,
3358 We don't because we want this load to be optimized away if
3359 possible, and copying the flags will prevent that in certain
3360 cases, e.g. consider the volatile flag. */
3362 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3363 set_mem_alias_set (lmem, 0);
3364 emit_insn (gen_movti (reg, lmem));
3366 if (!p0 || reg_align (p0) >= 128)
3367 p0 = stack_pointer_rtx;
3368 if (!p1_lo)
3369 p1_lo = const0_rtx;
3371 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3372 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3374 else if (reload_completed)
3376 if (GET_CODE (ops[1]) == REG)
3377 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3378 else if (GET_CODE (ops[1]) == SUBREG)
3379 emit_move_insn (reg,
3380 gen_rtx_REG (GET_MODE (reg),
3381 REGNO (SUBREG_REG (ops[1]))));
3382 else
3383 abort ();
3385 else
3387 if (GET_CODE (ops[1]) == REG)
3388 emit_insn (gen_spu_convert (reg, ops[1]));
3389 else if (GET_CODE (ops[1]) == SUBREG)
3390 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3391 else
3392 abort ();
3395 if (GET_MODE_SIZE (mode) < 4 && scalar)
3396 emit_insn (gen_shlqby_ti
3397 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3399 smem = change_address (ops[0], TImode, addr);
3400 /* We can't use the previous alias set because the memory has changed
3401 size and can potentially overlap objects of other types. */
3402 set_mem_alias_set (smem, 0);
3404 emit_insn (gen_movti (smem, reg));
3407 /* Return TRUE if X is MEM which is a struct member reference
3408 and the member can safely be loaded and stored with a single
3409 instruction because it is padded. */
3410 static int
3411 mem_is_padded_component_ref (rtx x)
3413 tree t = MEM_EXPR (x);
3414 tree r;
3415 if (!t || TREE_CODE (t) != COMPONENT_REF)
3416 return 0;
3417 t = TREE_OPERAND (t, 1);
3418 if (!t || TREE_CODE (t) != FIELD_DECL
3419 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3420 return 0;
3421 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3422 r = DECL_FIELD_CONTEXT (t);
3423 if (!r || TREE_CODE (r) != RECORD_TYPE)
3424 return 0;
3425 /* Make sure they are the same mode */
3426 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3427 return 0;
3428 /* If there are no following fields then the field alignment assures
3429 the structure is padded to the alignment which means this field is
3430 padded too. */
3431 if (TREE_CHAIN (t) == 0)
3432 return 1;
3433 /* If the following field is also aligned then this field will be
3434 padded. */
3435 t = TREE_CHAIN (t);
3436 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3437 return 1;
3438 return 0;
3442 spu_valid_move (rtx * ops)
3444 enum machine_mode mode = GET_MODE (ops[0]);
3445 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3446 return 0;
3448 /* init_expr_once tries to recog against load and store insns to set
3449 the direct_load[] and direct_store[] arrays. We always want to
3450 consider those loads and stores valid. init_expr_once is called in
3451 the context of a dummy function which does not have a decl. */
3452 if (cfun->decl == 0)
3453 return 1;
3455 /* Don't allows loads/stores which would require more than 1 insn.
3456 During and after reload we assume loads and stores only take 1
3457 insn. */
3458 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3460 if (GET_CODE (ops[0]) == MEM
3461 && (GET_MODE_SIZE (mode) < 4
3462 || !(store_with_one_insn_p (ops[0])
3463 || mem_is_padded_component_ref (ops[0]))))
3464 return 0;
3465 if (GET_CODE (ops[1]) == MEM
3466 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3467 return 0;
3469 return 1;
3472 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3473 can be generated using the fsmbi instruction. */
3475 fsmbi_const_p (rtx x)
3477 enum machine_mode mode;
3478 unsigned char arr[16];
3479 int i;
3481 /* We can always choose DImode for CONST_INT because the high bits of
3482 an SImode will always be all 1s, i.e., valid for fsmbi. */
3483 mode = GET_CODE (x) == CONST_INT ? DImode : GET_MODE (x);
3484 constant_to_array (mode, x, arr);
3486 for (i = 0; i < 16; i++)
3487 if (arr[i] != 0 && arr[i] != 0xff)
3488 return 0;
3489 return 1;
3492 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3493 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3494 than 16 bytes, the value is repeated across the rest of the array. */
3495 void
3496 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3498 HOST_WIDE_INT val;
3499 int i, j, first;
3501 memset (arr, 0, 16);
3502 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3503 if (GET_CODE (x) == CONST_INT
3504 || (GET_CODE (x) == CONST_DOUBLE
3505 && (mode == SFmode || mode == DFmode)))
3507 gcc_assert (mode != VOIDmode && mode != BLKmode);
3509 if (GET_CODE (x) == CONST_DOUBLE)
3510 val = const_double_to_hwint (x);
3511 else
3512 val = INTVAL (x);
3513 first = GET_MODE_SIZE (mode) - 1;
3514 for (i = first; i >= 0; i--)
3516 arr[i] = val & 0xff;
3517 val >>= 8;
3519 /* Splat the constant across the whole array. */
3520 for (j = 0, i = first + 1; i < 16; i++)
3522 arr[i] = arr[j];
3523 j = (j == first) ? 0 : j + 1;
3526 else if (GET_CODE (x) == CONST_DOUBLE)
3528 val = CONST_DOUBLE_LOW (x);
3529 for (i = 15; i >= 8; i--)
3531 arr[i] = val & 0xff;
3532 val >>= 8;
3534 val = CONST_DOUBLE_HIGH (x);
3535 for (i = 7; i >= 0; i--)
3537 arr[i] = val & 0xff;
3538 val >>= 8;
3541 else if (GET_CODE (x) == CONST_VECTOR)
3543 int units;
3544 rtx elt;
3545 mode = GET_MODE_INNER (mode);
3546 units = CONST_VECTOR_NUNITS (x);
3547 for (i = 0; i < units; i++)
3549 elt = CONST_VECTOR_ELT (x, i);
3550 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3552 if (GET_CODE (elt) == CONST_DOUBLE)
3553 val = const_double_to_hwint (elt);
3554 else
3555 val = INTVAL (elt);
3556 first = GET_MODE_SIZE (mode) - 1;
3557 if (first + i * GET_MODE_SIZE (mode) > 16)
3558 abort ();
3559 for (j = first; j >= 0; j--)
3561 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3562 val >>= 8;
3567 else
3568 gcc_unreachable();
3571 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3572 smaller than 16 bytes, use the bytes that would represent that value
3573 in a register, e.g., for QImode return the value of arr[3]. */
3575 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3577 enum machine_mode inner_mode;
3578 rtvec v;
3579 int units, size, i, j, k;
3580 HOST_WIDE_INT val;
3582 if (GET_MODE_CLASS (mode) == MODE_INT
3583 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3585 j = GET_MODE_SIZE (mode);
3586 i = j < 4 ? 4 - j : 0;
3587 for (val = 0; i < j; i++)
3588 val = (val << 8) | arr[i];
3589 val = trunc_int_for_mode (val, mode);
3590 return GEN_INT (val);
3593 if (mode == TImode)
3595 HOST_WIDE_INT high;
3596 for (i = high = 0; i < 8; i++)
3597 high = (high << 8) | arr[i];
3598 for (i = 8, val = 0; i < 16; i++)
3599 val = (val << 8) | arr[i];
3600 return immed_double_const (val, high, TImode);
3602 if (mode == SFmode)
3604 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3605 val = trunc_int_for_mode (val, SImode);
3606 return hwint_to_const_double (val, SFmode);
3608 if (mode == DFmode)
3610 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3611 val <<= 32;
3612 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3613 return hwint_to_const_double (val, DFmode);
3616 if (!VECTOR_MODE_P (mode))
3617 abort ();
3619 units = GET_MODE_NUNITS (mode);
3620 size = GET_MODE_UNIT_SIZE (mode);
3621 inner_mode = GET_MODE_INNER (mode);
3622 v = rtvec_alloc (units);
3624 for (k = i = 0; i < units; ++i)
3626 val = 0;
3627 for (j = 0; j < size; j++, k++)
3628 val = (val << 8) | arr[k];
3630 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3631 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3632 else
3633 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3635 if (k > 16)
3636 abort ();
3638 return gen_rtx_CONST_VECTOR (mode, v);
3641 static void
3642 reloc_diagnostic (rtx x)
3644 tree loc_decl, decl = 0;
3645 const char *msg;
3646 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3647 return;
3649 if (GET_CODE (x) == SYMBOL_REF)
3650 decl = SYMBOL_REF_DECL (x);
3651 else if (GET_CODE (x) == CONST
3652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3653 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3655 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3656 if (decl && !DECL_P (decl))
3657 decl = 0;
3659 /* We use last_assemble_variable_decl to get line information. It's
3660 not always going to be right and might not even be close, but will
3661 be right for the more common cases. */
3662 if (!last_assemble_variable_decl)
3663 loc_decl = decl;
3664 else
3665 loc_decl = last_assemble_variable_decl;
3667 /* The decl could be a string constant. */
3668 if (decl && DECL_P (decl))
3669 msg = "%Jcreating run-time relocation for %qD";
3670 else
3671 msg = "creating run-time relocation";
3673 if (TARGET_WARN_RELOC)
3674 warning (0, msg, loc_decl, decl);
3675 else
3676 error (msg, loc_decl, decl);
3679 /* Hook into assemble_integer so we can generate an error for run-time
3680 relocations. The SPU ABI disallows them. */
3681 static bool
3682 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3684 /* By default run-time relocations aren't supported, but we allow them
3685 in case users support it in their own run-time loader. And we provide
3686 a warning for those users that don't. */
3687 if ((GET_CODE (x) == SYMBOL_REF)
3688 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3689 reloc_diagnostic (x);
3691 return default_assemble_integer (x, size, aligned_p);
3694 static void
3695 spu_asm_globalize_label (FILE * file, const char *name)
3697 fputs ("\t.global\t", file);
3698 assemble_name (file, name);
3699 fputs ("\n", file);
3702 static bool
3703 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3705 enum machine_mode mode = GET_MODE (x);
3706 int cost = COSTS_N_INSNS (2);
3708 /* Folding to a CONST_VECTOR will use extra space but there might
3709 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3710 only if it allows us to fold away multiple insns. Changing the cost
3711 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3712 because this cost will only be compared against a single insn.
3713 if (code == CONST_VECTOR)
3714 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3717 /* Use defaults for float operations. Not accurate but good enough. */
3718 if (mode == DFmode)
3720 *total = COSTS_N_INSNS (13);
3721 return true;
3723 if (mode == SFmode)
3725 *total = COSTS_N_INSNS (6);
3726 return true;
3728 switch (code)
3730 case CONST_INT:
3731 if (satisfies_constraint_K (x))
3732 *total = 0;
3733 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
3734 *total = COSTS_N_INSNS (1);
3735 else
3736 *total = COSTS_N_INSNS (3);
3737 return true;
3739 case CONST:
3740 *total = COSTS_N_INSNS (3);
3741 return true;
3743 case LABEL_REF:
3744 case SYMBOL_REF:
3745 *total = COSTS_N_INSNS (0);
3746 return true;
3748 case CONST_DOUBLE:
3749 *total = COSTS_N_INSNS (5);
3750 return true;
3752 case FLOAT_EXTEND:
3753 case FLOAT_TRUNCATE:
3754 case FLOAT:
3755 case UNSIGNED_FLOAT:
3756 case FIX:
3757 case UNSIGNED_FIX:
3758 *total = COSTS_N_INSNS (7);
3759 return true;
3761 case PLUS:
3762 if (mode == TImode)
3764 *total = COSTS_N_INSNS (9);
3765 return true;
3767 break;
3769 case MULT:
3770 cost =
3771 GET_CODE (XEXP (x, 0)) ==
3772 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
3773 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
3775 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3777 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3778 cost = COSTS_N_INSNS (14);
3779 if ((val & 0xffff) == 0)
3780 cost = COSTS_N_INSNS (9);
3781 else if (val > 0 && val < 0x10000)
3782 cost = COSTS_N_INSNS (11);
3785 *total = cost;
3786 return true;
3787 case DIV:
3788 case UDIV:
3789 case MOD:
3790 case UMOD:
3791 *total = COSTS_N_INSNS (20);
3792 return true;
3793 case ROTATE:
3794 case ROTATERT:
3795 case ASHIFT:
3796 case ASHIFTRT:
3797 case LSHIFTRT:
3798 *total = COSTS_N_INSNS (4);
3799 return true;
3800 case UNSPEC:
3801 if (XINT (x, 1) == UNSPEC_CONVERT)
3802 *total = COSTS_N_INSNS (0);
3803 else
3804 *total = COSTS_N_INSNS (4);
3805 return true;
3807 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
3808 if (GET_MODE_CLASS (mode) == MODE_INT
3809 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
3810 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
3811 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
3812 *total = cost;
3813 return true;
3816 enum machine_mode
3817 spu_eh_return_filter_mode (void)
3819 /* We would like this to be SImode, but sjlj exceptions seems to work
3820 only with word_mode. */
3821 return TImode;
3824 /* Decide whether we can make a sibling call to a function. DECL is the
3825 declaration of the function being targeted by the call and EXP is the
3826 CALL_EXPR representing the call. */
3827 static bool
3828 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3830 return decl && !TARGET_LARGE_MEM;
3833 /* We need to correctly update the back chain pointer and the Available
3834 Stack Size (which is in the second slot of the sp register.) */
3835 void
3836 spu_allocate_stack (rtx op0, rtx op1)
3838 HOST_WIDE_INT v;
3839 rtx chain = gen_reg_rtx (V4SImode);
3840 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
3841 rtx sp = gen_reg_rtx (V4SImode);
3842 rtx splatted = gen_reg_rtx (V4SImode);
3843 rtx pat = gen_reg_rtx (TImode);
3845 /* copy the back chain so we can save it back again. */
3846 emit_move_insn (chain, stack_bot);
3848 op1 = force_reg (SImode, op1);
3850 v = 0x1020300010203ll;
3851 emit_move_insn (pat, immed_double_const (v, v, TImode));
3852 emit_insn (gen_shufb (splatted, op1, op1, pat));
3854 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
3855 emit_insn (gen_subv4si3 (sp, sp, splatted));
3857 if (flag_stack_check)
3859 rtx avail = gen_reg_rtx(SImode);
3860 rtx result = gen_reg_rtx(SImode);
3861 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
3862 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
3863 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
3866 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
3868 emit_move_insn (stack_bot, chain);
3870 emit_move_insn (op0, virtual_stack_dynamic_rtx);
3873 void
3874 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
3876 static unsigned char arr[16] =
3877 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
3878 rtx temp = gen_reg_rtx (SImode);
3879 rtx temp2 = gen_reg_rtx (SImode);
3880 rtx temp3 = gen_reg_rtx (V4SImode);
3881 rtx temp4 = gen_reg_rtx (V4SImode);
3882 rtx pat = gen_reg_rtx (TImode);
3883 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
3885 /* Restore the backchain from the first word, sp from the second. */
3886 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
3887 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
3889 emit_move_insn (pat, array_to_constant (TImode, arr));
3891 /* Compute Available Stack Size for sp */
3892 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
3893 emit_insn (gen_shufb (temp3, temp, temp, pat));
3895 /* Compute Available Stack Size for back chain */
3896 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
3897 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
3898 emit_insn (gen_addv4si3 (temp4, sp, temp4));
3900 emit_insn (gen_addv4si3 (sp, sp, temp3));
3901 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
3904 static void
3905 spu_init_libfuncs (void)
3907 set_optab_libfunc (smul_optab, DImode, "__muldi3");
3908 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
3909 set_optab_libfunc (smod_optab, DImode, "__moddi3");
3910 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
3911 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
3912 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
3913 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
3914 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
3915 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
3916 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
3917 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
3919 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
3920 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
3923 /* Make a subreg, stripping any existing subreg. We could possibly just
3924 call simplify_subreg, but in this case we know what we want. */
3926 spu_gen_subreg (enum machine_mode mode, rtx x)
3928 if (GET_CODE (x) == SUBREG)
3929 x = SUBREG_REG (x);
3930 if (GET_MODE (x) == mode)
3931 return x;
3932 return gen_rtx_SUBREG (mode, x, 0);
3935 static bool
3936 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
3938 return (TYPE_MODE (type) == BLKmode
3939 && ((type) == 0
3940 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3941 || int_size_in_bytes (type) >
3942 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
3945 /* Create the built-in types and functions */
3947 struct spu_builtin_description spu_builtins[] = {
3948 #define DEF_BUILTIN(fcode, icode, name, type, params) \
3949 {fcode, icode, name, type, params, NULL_TREE},
3950 #include "spu-builtins.def"
3951 #undef DEF_BUILTIN
3954 static void
3955 spu_init_builtins (void)
3957 struct spu_builtin_description *d;
3958 unsigned int i;
3960 V16QI_type_node = build_vector_type (intQI_type_node, 16);
3961 V8HI_type_node = build_vector_type (intHI_type_node, 8);
3962 V4SI_type_node = build_vector_type (intSI_type_node, 4);
3963 V2DI_type_node = build_vector_type (intDI_type_node, 2);
3964 V4SF_type_node = build_vector_type (float_type_node, 4);
3965 V2DF_type_node = build_vector_type (double_type_node, 2);
3967 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
3968 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
3969 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
3970 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
3972 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
3974 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
3975 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
3976 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
3977 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
3978 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
3979 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
3980 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
3981 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
3982 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
3983 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
3984 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
3985 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
3987 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
3988 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
3989 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
3990 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
3991 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
3992 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
3993 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
3994 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
3996 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
3997 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
3999 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4001 spu_builtin_types[SPU_BTI_PTR] =
4002 build_pointer_type (build_qualified_type
4003 (void_type_node,
4004 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4006 /* For each builtin we build a new prototype. The tree code will make
4007 sure nodes are shared. */
4008 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4010 tree p;
4011 char name[64]; /* build_function will make a copy. */
4012 int parm;
4014 if (d->name == 0)
4015 continue;
4017 /* find last parm */
4018 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4022 p = void_list_node;
4023 while (parm > 1)
4024 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4026 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4028 sprintf (name, "__builtin_%s", d->name);
4029 d->fndecl =
4030 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4031 NULL, NULL_TREE);
4036 spu_safe_dma (HOST_WIDE_INT channel)
4038 return (channel >= 21 && channel <= 27);
4041 void
4042 spu_builtin_splats (rtx ops[])
4044 enum machine_mode mode = GET_MODE (ops[0]);
4045 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4047 unsigned char arr[16];
4048 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4049 emit_move_insn (ops[0], array_to_constant (mode, arr));
4051 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4053 rtvec v = rtvec_alloc (4);
4054 RTVEC_ELT (v, 0) = ops[1];
4055 RTVEC_ELT (v, 1) = ops[1];
4056 RTVEC_ELT (v, 2) = ops[1];
4057 RTVEC_ELT (v, 3) = ops[1];
4058 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4060 else
4062 rtx reg = gen_reg_rtx (TImode);
4063 rtx shuf;
4064 if (GET_CODE (ops[1]) != REG
4065 && GET_CODE (ops[1]) != SUBREG)
4066 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4067 switch (mode)
4069 case V2DImode:
4070 case V2DFmode:
4071 shuf =
4072 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4073 TImode);
4074 break;
4075 case V4SImode:
4076 case V4SFmode:
4077 shuf =
4078 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4079 TImode);
4080 break;
4081 case V8HImode:
4082 shuf =
4083 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4084 TImode);
4085 break;
4086 case V16QImode:
4087 shuf =
4088 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4089 TImode);
4090 break;
4091 default:
4092 abort ();
4094 emit_move_insn (reg, shuf);
4095 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4099 void
4100 spu_builtin_extract (rtx ops[])
4102 enum machine_mode mode;
4103 rtx rot, from, tmp;
4105 mode = GET_MODE (ops[1]);
4107 if (GET_CODE (ops[2]) == CONST_INT)
4109 switch (mode)
4111 case V16QImode:
4112 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4113 break;
4114 case V8HImode:
4115 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4116 break;
4117 case V4SFmode:
4118 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4119 break;
4120 case V4SImode:
4121 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4122 break;
4123 case V2DImode:
4124 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4125 break;
4126 case V2DFmode:
4127 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4128 break;
4129 default:
4130 abort ();
4132 return;
4135 from = spu_gen_subreg (TImode, ops[1]);
4136 rot = gen_reg_rtx (TImode);
4137 tmp = gen_reg_rtx (SImode);
4139 switch (mode)
4141 case V16QImode:
4142 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4143 break;
4144 case V8HImode:
4145 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4146 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4147 break;
4148 case V4SFmode:
4149 case V4SImode:
4150 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4151 break;
4152 case V2DImode:
4153 case V2DFmode:
4154 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4155 break;
4156 default:
4157 abort ();
4159 emit_insn (gen_rotqby_ti (rot, from, tmp));
4161 emit_insn (gen_spu_convert (ops[0], rot));
4164 void
4165 spu_builtin_insert (rtx ops[])
4167 enum machine_mode mode = GET_MODE (ops[0]);
4168 enum machine_mode imode = GET_MODE_INNER (mode);
4169 rtx mask = gen_reg_rtx (TImode);
4170 rtx offset;
4172 if (GET_CODE (ops[3]) == CONST_INT)
4173 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4174 else
4176 offset = gen_reg_rtx (SImode);
4177 emit_insn (gen_mulsi3
4178 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4180 emit_insn (gen_cpat
4181 (mask, stack_pointer_rtx, offset,
4182 GEN_INT (GET_MODE_SIZE (imode))));
4183 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4186 void
4187 spu_builtin_promote (rtx ops[])
4189 enum machine_mode mode, imode;
4190 rtx rot, from, offset;
4191 HOST_WIDE_INT pos;
4193 mode = GET_MODE (ops[0]);
4194 imode = GET_MODE_INNER (mode);
4196 from = gen_reg_rtx (TImode);
4197 rot = spu_gen_subreg (TImode, ops[0]);
4199 emit_insn (gen_spu_convert (from, ops[1]));
4201 if (GET_CODE (ops[2]) == CONST_INT)
4203 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4204 if (GET_MODE_SIZE (imode) < 4)
4205 pos += 4 - GET_MODE_SIZE (imode);
4206 offset = GEN_INT (pos & 15);
4208 else
4210 offset = gen_reg_rtx (SImode);
4211 switch (mode)
4213 case V16QImode:
4214 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4215 break;
4216 case V8HImode:
4217 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4218 emit_insn (gen_addsi3 (offset, offset, offset));
4219 break;
4220 case V4SFmode:
4221 case V4SImode:
4222 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4223 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4224 break;
4225 case V2DImode:
4226 case V2DFmode:
4227 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4228 break;
4229 default:
4230 abort ();
4233 emit_insn (gen_rotqby_ti (rot, from, offset));
4236 void
4237 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4239 rtx shuf = gen_reg_rtx (V4SImode);
4240 rtx insn = gen_reg_rtx (V4SImode);
4241 rtx shufc;
4242 rtx insnc;
4243 rtx mem;
4245 fnaddr = force_reg (SImode, fnaddr);
4246 cxt = force_reg (SImode, cxt);
4248 if (TARGET_LARGE_MEM)
4250 rtx rotl = gen_reg_rtx (V4SImode);
4251 rtx mask = gen_reg_rtx (V4SImode);
4252 rtx bi = gen_reg_rtx (SImode);
4253 unsigned char shufa[16] = {
4254 2, 3, 0, 1, 18, 19, 16, 17,
4255 0, 1, 2, 3, 16, 17, 18, 19
4257 unsigned char insna[16] = {
4258 0x41, 0, 0, 79,
4259 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4260 0x60, 0x80, 0, 79,
4261 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4264 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4265 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4267 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4268 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4269 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4270 emit_insn (gen_selb (insn, insnc, rotl, mask));
4272 mem = memory_address (Pmode, tramp);
4273 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4275 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4276 mem = memory_address (Pmode, plus_constant (tramp, 16));
4277 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4279 else
4281 rtx scxt = gen_reg_rtx (SImode);
4282 rtx sfnaddr = gen_reg_rtx (SImode);
4283 unsigned char insna[16] = {
4284 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4285 0x30, 0, 0, 0,
4286 0, 0, 0, 0,
4287 0, 0, 0, 0
4290 shufc = gen_reg_rtx (TImode);
4291 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4293 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4294 fits 18 bits and the last 4 are zeros. This will be true if
4295 the stack pointer is initialized to 0x3fff0 at program start,
4296 otherwise the ila instruction will be garbage. */
4298 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4299 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4300 emit_insn (gen_cpat
4301 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4302 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4303 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4305 mem = memory_address (Pmode, tramp);
4306 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4309 emit_insn (gen_sync ());
4312 void
4313 spu_expand_sign_extend (rtx ops[])
4315 unsigned char arr[16];
4316 rtx pat = gen_reg_rtx (TImode);
4317 rtx sign, c;
4318 int i, last;
4319 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4320 if (GET_MODE (ops[1]) == QImode)
4322 sign = gen_reg_rtx (HImode);
4323 emit_insn (gen_extendqihi2 (sign, ops[1]));
4324 for (i = 0; i < 16; i++)
4325 arr[i] = 0x12;
4326 arr[last] = 0x13;
4328 else
4330 for (i = 0; i < 16; i++)
4331 arr[i] = 0x10;
4332 switch (GET_MODE (ops[1]))
4334 case HImode:
4335 sign = gen_reg_rtx (SImode);
4336 emit_insn (gen_extendhisi2 (sign, ops[1]));
4337 arr[last] = 0x03;
4338 arr[last - 1] = 0x02;
4339 break;
4340 case SImode:
4341 sign = gen_reg_rtx (SImode);
4342 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4343 for (i = 0; i < 4; i++)
4344 arr[last - i] = 3 - i;
4345 break;
4346 case DImode:
4347 sign = gen_reg_rtx (SImode);
4348 c = gen_reg_rtx (SImode);
4349 emit_insn (gen_spu_convert (c, ops[1]));
4350 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4351 for (i = 0; i < 8; i++)
4352 arr[last - i] = 7 - i;
4353 break;
4354 default:
4355 abort ();
4358 emit_move_insn (pat, array_to_constant (TImode, arr));
4359 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4362 /* expand vector initialization. If there are any constant parts,
4363 load constant parts first. Then load any non-constant parts. */
4364 void
4365 spu_expand_vector_init (rtx target, rtx vals)
4367 enum machine_mode mode = GET_MODE (target);
4368 int n_elts = GET_MODE_NUNITS (mode);
4369 int n_var = 0;
4370 bool all_same = true;
4371 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4372 int i;
4374 first = XVECEXP (vals, 0, 0);
4375 for (i = 0; i < n_elts; ++i)
4377 x = XVECEXP (vals, 0, i);
4378 if (!CONSTANT_P (x))
4379 ++n_var;
4380 else
4382 if (first_constant == NULL_RTX)
4383 first_constant = x;
4385 if (i > 0 && !rtx_equal_p (x, first))
4386 all_same = false;
4389 /* if all elements are the same, use splats to repeat elements */
4390 if (all_same)
4392 if (!CONSTANT_P (first)
4393 && !register_operand (first, GET_MODE (x)))
4394 first = force_reg (GET_MODE (first), first);
4395 emit_insn (gen_spu_splats (target, first));
4396 return;
4399 /* load constant parts */
4400 if (n_var != n_elts)
4402 if (n_var == 0)
4404 emit_move_insn (target,
4405 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4407 else
4409 rtx constant_parts_rtx = copy_rtx (vals);
4411 gcc_assert (first_constant != NULL_RTX);
4412 /* fill empty slots with the first constant, this increases
4413 our chance of using splats in the recursive call below. */
4414 for (i = 0; i < n_elts; ++i)
4415 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4416 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4418 spu_expand_vector_init (target, constant_parts_rtx);
4422 /* load variable parts */
4423 if (n_var != 0)
4425 rtx insert_operands[4];
4427 insert_operands[0] = target;
4428 insert_operands[2] = target;
4429 for (i = 0; i < n_elts; ++i)
4431 x = XVECEXP (vals, 0, i);
4432 if (!CONSTANT_P (x))
4434 if (!register_operand (x, GET_MODE (x)))
4435 x = force_reg (GET_MODE (x), x);
4436 insert_operands[1] = x;
4437 insert_operands[3] = GEN_INT (i);
4438 spu_builtin_insert (insert_operands);
4444 static rtx
4445 spu_force_reg (enum machine_mode mode, rtx op)
4447 rtx x, r;
4448 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4450 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4451 || GET_MODE (op) == BLKmode)
4452 return force_reg (mode, convert_to_mode (mode, op, 0));
4453 abort ();
4456 r = force_reg (GET_MODE (op), op);
4457 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4459 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4460 if (x)
4461 return x;
4464 x = gen_reg_rtx (mode);
4465 emit_insn (gen_spu_convert (x, r));
4466 return x;
4469 static void
4470 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4472 HOST_WIDE_INT v = 0;
4473 int lsbits;
4474 /* Check the range of immediate operands. */
4475 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4477 int range = p - SPU_BTI_7;
4478 if (!CONSTANT_P (op)
4479 || (GET_CODE (op) == CONST_INT
4480 && (INTVAL (op) < spu_builtin_range[range].low
4481 || INTVAL (op) > spu_builtin_range[range].high)))
4482 error ("%s expects an integer literal in the range [%d, %d].",
4483 d->name,
4484 spu_builtin_range[range].low, spu_builtin_range[range].high);
4486 if (GET_CODE (op) == CONST
4487 && (GET_CODE (XEXP (op, 0)) == PLUS
4488 || GET_CODE (XEXP (op, 0)) == MINUS))
4490 v = INTVAL (XEXP (XEXP (op, 0), 1));
4491 op = XEXP (XEXP (op, 0), 0);
4493 else if (GET_CODE (op) == CONST_INT)
4494 v = INTVAL (op);
4496 switch (p)
4498 case SPU_BTI_S10_4:
4499 lsbits = 4;
4500 break;
4501 case SPU_BTI_U16_2:
4502 /* This is only used in lqa, and stqa. Even though the insns
4503 encode 16 bits of the address (all but the 2 least
4504 significant), only 14 bits are used because it is masked to
4505 be 16 byte aligned. */
4506 lsbits = 4;
4507 break;
4508 case SPU_BTI_S16_2:
4509 /* This is used for lqr and stqr. */
4510 lsbits = 2;
4511 break;
4512 default:
4513 lsbits = 0;
4516 if (GET_CODE (op) == LABEL_REF
4517 || (GET_CODE (op) == SYMBOL_REF
4518 && SYMBOL_REF_FUNCTION_P (op))
4519 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4520 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4521 d->name);
4526 static void
4527 expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4528 rtx target, rtx ops[])
4530 enum insn_code icode = d->icode;
4531 int i = 0;
4533 /* Expand the arguments into rtl. */
4535 if (d->parm[0] != SPU_BTI_VOID)
4536 ops[i++] = target;
4538 for (; i < insn_data[icode].n_operands; i++)
4540 tree arg = TREE_VALUE (arglist);
4541 if (arg == 0)
4542 abort ();
4543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4544 arglist = TREE_CHAIN (arglist);
4548 static rtx
4549 spu_expand_builtin_1 (struct spu_builtin_description *d,
4550 tree arglist, rtx target)
4552 rtx pat;
4553 rtx ops[8];
4554 enum insn_code icode = d->icode;
4555 enum machine_mode mode, tmode;
4556 int i, p;
4557 tree return_type;
4559 /* Set up ops[] with values from arglist. */
4560 expand_builtin_args (d, arglist, target, ops);
4562 /* Handle the target operand which must be operand 0. */
4563 i = 0;
4564 if (d->parm[0] != SPU_BTI_VOID)
4567 /* We prefer the mode specified for the match_operand otherwise
4568 use the mode from the builtin function prototype. */
4569 tmode = insn_data[d->icode].operand[0].mode;
4570 if (tmode == VOIDmode)
4571 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4573 /* Try to use target because not using it can lead to extra copies
4574 and when we are using all of the registers extra copies leads
4575 to extra spills. */
4576 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4577 ops[0] = target;
4578 else
4579 target = ops[0] = gen_reg_rtx (tmode);
4581 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4582 abort ();
4584 i++;
4587 /* Ignore align_hint, but still expand it's args in case they have
4588 side effects. */
4589 if (icode == CODE_FOR_spu_align_hint)
4590 return 0;
4592 /* Handle the rest of the operands. */
4593 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4595 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4596 mode = insn_data[d->icode].operand[i].mode;
4597 else
4598 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4600 /* mode can be VOIDmode here for labels */
4602 /* For specific intrinsics with an immediate operand, e.g.,
4603 si_ai(), we sometimes need to convert the scalar argument to a
4604 vector argument by splatting the scalar. */
4605 if (VECTOR_MODE_P (mode)
4606 && (GET_CODE (ops[i]) == CONST_INT
4607 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4608 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4610 if (GET_CODE (ops[i]) == CONST_INT)
4611 ops[i] = spu_const (mode, INTVAL (ops[i]));
4612 else
4614 rtx reg = gen_reg_rtx (mode);
4615 enum machine_mode imode = GET_MODE_INNER (mode);
4616 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4617 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4618 if (imode != GET_MODE (ops[i]))
4619 ops[i] = convert_to_mode (imode, ops[i],
4620 TYPE_UNSIGNED (spu_builtin_types
4621 [d->parm[i]]));
4622 emit_insn (gen_spu_splats (reg, ops[i]));
4623 ops[i] = reg;
4627 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4628 ops[i] = spu_force_reg (mode, ops[i]);
4630 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4633 switch (insn_data[icode].n_operands)
4635 case 0:
4636 pat = GEN_FCN (icode) (0);
4637 break;
4638 case 1:
4639 pat = GEN_FCN (icode) (ops[0]);
4640 break;
4641 case 2:
4642 pat = GEN_FCN (icode) (ops[0], ops[1]);
4643 break;
4644 case 3:
4645 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4646 break;
4647 case 4:
4648 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4649 break;
4650 case 5:
4651 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4652 break;
4653 case 6:
4654 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4655 break;
4656 default:
4657 abort ();
4660 if (!pat)
4661 abort ();
4663 if (d->type == B_CALL || d->type == B_BISLED)
4664 emit_call_insn (pat);
4665 else if (d->type == B_JUMP)
4667 emit_jump_insn (pat);
4668 emit_barrier ();
4670 else
4671 emit_insn (pat);
4673 return_type = spu_builtin_types[d->parm[0]];
4674 if (d->parm[0] != SPU_BTI_VOID
4675 && GET_MODE (target) != TYPE_MODE (return_type))
4677 /* target is the return value. It should always be the mode of
4678 the builtin function prototype. */
4679 target = spu_force_reg (TYPE_MODE (return_type), target);
4682 return target;
4686 spu_expand_builtin (tree exp,
4687 rtx target,
4688 rtx subtarget ATTRIBUTE_UNUSED,
4689 enum machine_mode mode ATTRIBUTE_UNUSED,
4690 int ignore ATTRIBUTE_UNUSED)
4692 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
4693 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
4694 tree arglist = TREE_OPERAND (exp, 1);
4695 struct spu_builtin_description *d;
4697 if (fcode < NUM_SPU_BUILTINS)
4699 d = &spu_builtins[fcode];
4701 return spu_expand_builtin_1 (d, arglist, target);
4703 abort ();