re PR target/33347 (gcc.c-torture/compile/20000804-1.c ICEs at -O3 -funroll-loops)
[official-gcc.git] / gcc / config / spu / spu.c
blobbf2e9e854177ec957c9ae1107bfa8661e0162d84
1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
56 #include "ddg.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
61 int low, high;
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
100 static rtx get_branch_target (rtx branch);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107 static int get_pipe (rtx insn);
108 static int spu_sched_adjust_priority (rtx insn, int pri);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116 static int spu_naked_function_p (tree func);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
119 static tree spu_build_builtin_va_list (void);
120 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
121 tree * post_p);
122 static int regno_aligned_for_load (int regno);
123 static int store_with_one_insn_p (rtx mem);
124 static int reg_align (rtx reg);
125 static int mem_is_padded_component_ref (rtx x);
126 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127 static void spu_asm_globalize_label (FILE * file, const char *name);
128 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
129 int *total);
130 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131 static void spu_init_libfuncs (void);
132 static bool spu_return_in_memory (const_tree type, const_tree fntype);
133 static void fix_range (const char *);
134 static void spu_encode_section_info (tree, rtx, int);
135 static tree spu_builtin_mul_widen_even (tree);
136 static tree spu_builtin_mul_widen_odd (tree);
137 static tree spu_builtin_mask_for_load (void);
138 static int spu_builtin_vectorization_cost (bool);
139 static bool spu_vector_alignment_reachable (const_tree, bool);
140 static int spu_sms_res_mii (struct ddg *g);
142 extern const char *reg_names[];
143 rtx spu_compare_op0, spu_compare_op1;
145 /* Which instruction set architecture to use. */
146 int spu_arch;
147 /* Which cpu are we tuning for. */
148 int spu_tune;
150 enum spu_immediate {
151 SPU_NONE,
152 SPU_IL,
153 SPU_ILA,
154 SPU_ILH,
155 SPU_ILHU,
156 SPU_ORI,
157 SPU_ORHI,
158 SPU_ORBI,
159 SPU_IOHL
161 enum immediate_class
163 IC_POOL, /* constant pool */
164 IC_IL1, /* one il* instruction */
165 IC_IL2, /* both ilhu and iohl instructions */
166 IC_IL1s, /* one il* instruction */
167 IC_IL2s, /* both ilhu and iohl instructions */
168 IC_FSMBI, /* the fsmbi instruction */
169 IC_CPAT, /* one of the c*d instructions */
170 IC_FSMBI2 /* fsmbi plus 1 other instruction */
173 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
174 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
175 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
176 static enum immediate_class classify_immediate (rtx op,
177 enum machine_mode mode);
179 static enum machine_mode
180 spu_libgcc_cmp_return_mode (void);
182 static enum machine_mode
183 spu_libgcc_shift_count_mode (void);
185 /* Built in types. */
186 tree spu_builtin_types[SPU_BTI_MAX];
188 /* TARGET overrides. */
190 #undef TARGET_INIT_BUILTINS
191 #define TARGET_INIT_BUILTINS spu_init_builtins
193 #undef TARGET_EXPAND_BUILTIN
194 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
196 #undef TARGET_EH_RETURN_FILTER_MODE
197 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
199 /* The .8byte directive doesn't seem to work well for a 32 bit
200 architecture. */
201 #undef TARGET_ASM_UNALIGNED_DI_OP
202 #define TARGET_ASM_UNALIGNED_DI_OP NULL
204 #undef TARGET_RTX_COSTS
205 #define TARGET_RTX_COSTS spu_rtx_costs
207 #undef TARGET_ADDRESS_COST
208 #define TARGET_ADDRESS_COST hook_int_rtx_0
210 #undef TARGET_SCHED_ISSUE_RATE
211 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
213 #undef TARGET_SCHED_VARIABLE_ISSUE
214 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
216 #undef TARGET_SCHED_ADJUST_PRIORITY
217 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
219 #undef TARGET_SCHED_ADJUST_COST
220 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
222 const struct attribute_spec spu_attribute_table[];
223 #undef TARGET_ATTRIBUTE_TABLE
224 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
226 #undef TARGET_ASM_INTEGER
227 #define TARGET_ASM_INTEGER spu_assemble_integer
229 #undef TARGET_SCALAR_MODE_SUPPORTED_P
230 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
232 #undef TARGET_VECTOR_MODE_SUPPORTED_P
233 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
235 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
236 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
238 #undef TARGET_ASM_GLOBALIZE_LABEL
239 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
241 #undef TARGET_PASS_BY_REFERENCE
242 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
244 #undef TARGET_MUST_PASS_IN_STACK
245 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
247 #undef TARGET_BUILD_BUILTIN_VA_LIST
248 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
250 #undef TARGET_SETUP_INCOMING_VARARGS
251 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
253 #undef TARGET_MACHINE_DEPENDENT_REORG
254 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
256 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
257 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
259 #undef TARGET_DEFAULT_TARGET_FLAGS
260 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
262 #undef TARGET_INIT_LIBFUNCS
263 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
265 #undef TARGET_RETURN_IN_MEMORY
266 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
268 #undef TARGET_ENCODE_SECTION_INFO
269 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
271 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
272 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
274 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
275 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
277 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
278 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
280 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
281 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
283 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
284 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
286 #undef TARGET_LIBGCC_CMP_RETURN_MODE
287 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
289 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
290 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
292 #undef TARGET_SCHED_SMS_RES_MII
293 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
295 struct gcc_target targetm = TARGET_INITIALIZER;
297 void
298 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
300 /* Override some of the default param values. With so many registers
301 larger values are better for these params. */
302 MAX_PENDING_LIST_LENGTH = 128;
304 /* With so many registers this is better on by default. */
305 flag_rename_registers = 1;
308 /* Sometimes certain combinations of command options do not make sense
309 on a particular target machine. You can define a macro
310 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
311 executed once just after all the command options have been parsed. */
312 void
313 spu_override_options (void)
315 /* Small loops will be unpeeled at -O3. For SPU it is more important
316 to keep code small by default. */
317 if (!flag_unroll_loops && !flag_peel_loops
318 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
319 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
321 flag_omit_frame_pointer = 1;
323 if (align_functions < 8)
324 align_functions = 8;
326 if (spu_fixed_range_string)
327 fix_range (spu_fixed_range_string);
329 /* Determine processor architectural level. */
330 if (spu_arch_string)
332 if (strcmp (&spu_arch_string[0], "cell") == 0)
333 spu_arch = PROCESSOR_CELL;
334 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
335 spu_arch = PROCESSOR_CELLEDP;
336 else
337 error ("Unknown architecture '%s'", &spu_arch_string[0]);
340 /* Determine processor to tune for. */
341 if (spu_tune_string)
343 if (strcmp (&spu_tune_string[0], "cell") == 0)
344 spu_tune = PROCESSOR_CELL;
345 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
346 spu_tune = PROCESSOR_CELLEDP;
347 else
348 error ("Unknown architecture '%s'", &spu_tune_string[0]);
352 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
353 struct attribute_spec.handler. */
355 /* Table of machine attributes. */
356 const struct attribute_spec spu_attribute_table[] =
358 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
359 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
360 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
361 { NULL, 0, 0, false, false, false, NULL }
364 /* True if MODE is valid for the target. By "valid", we mean able to
365 be manipulated in non-trivial ways. In particular, this means all
366 the arithmetic is supported. */
367 static bool
368 spu_scalar_mode_supported_p (enum machine_mode mode)
370 switch (mode)
372 case QImode:
373 case HImode:
374 case SImode:
375 case SFmode:
376 case DImode:
377 case TImode:
378 case DFmode:
379 return true;
381 default:
382 return false;
386 /* Similarly for vector modes. "Supported" here is less strict. At
387 least some operations are supported; need to check optabs or builtins
388 for further details. */
389 static bool
390 spu_vector_mode_supported_p (enum machine_mode mode)
392 switch (mode)
394 case V16QImode:
395 case V8HImode:
396 case V4SImode:
397 case V2DImode:
398 case V4SFmode:
399 case V2DFmode:
400 return true;
402 default:
403 return false;
407 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
408 least significant bytes of the outer mode. This function returns
409 TRUE for the SUBREG's where this is correct. */
411 valid_subreg (rtx op)
413 enum machine_mode om = GET_MODE (op);
414 enum machine_mode im = GET_MODE (SUBREG_REG (op));
415 return om != VOIDmode && im != VOIDmode
416 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
417 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
420 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
421 and adjust the start offset. */
422 static rtx
423 adjust_operand (rtx op, HOST_WIDE_INT * start)
425 enum machine_mode mode;
426 int op_size;
427 /* Strip any SUBREG */
428 if (GET_CODE (op) == SUBREG)
430 if (start)
431 *start -=
432 GET_MODE_BITSIZE (GET_MODE (op)) -
433 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
434 op = SUBREG_REG (op);
436 /* If it is smaller than SI, assure a SUBREG */
437 op_size = GET_MODE_BITSIZE (GET_MODE (op));
438 if (op_size < 32)
440 if (start)
441 *start += 32 - op_size;
442 op_size = 32;
444 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
445 mode = mode_for_size (op_size, MODE_INT, 0);
446 if (mode != GET_MODE (op))
447 op = gen_rtx_SUBREG (mode, op, 0);
448 return op;
451 void
452 spu_expand_extv (rtx ops[], int unsignedp)
454 HOST_WIDE_INT width = INTVAL (ops[2]);
455 HOST_WIDE_INT start = INTVAL (ops[3]);
456 HOST_WIDE_INT src_size, dst_size;
457 enum machine_mode src_mode, dst_mode;
458 rtx dst = ops[0], src = ops[1];
459 rtx s;
461 dst = adjust_operand (ops[0], 0);
462 dst_mode = GET_MODE (dst);
463 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
465 src = adjust_operand (src, &start);
466 src_mode = GET_MODE (src);
467 src_size = GET_MODE_BITSIZE (GET_MODE (src));
469 if (start > 0)
471 s = gen_reg_rtx (src_mode);
472 switch (src_mode)
474 case SImode:
475 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
476 break;
477 case DImode:
478 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
479 break;
480 case TImode:
481 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
482 break;
483 default:
484 abort ();
486 src = s;
489 if (width < src_size)
491 rtx pat;
492 int icode;
493 switch (src_mode)
495 case SImode:
496 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
497 break;
498 case DImode:
499 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
500 break;
501 case TImode:
502 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
503 break;
504 default:
505 abort ();
507 s = gen_reg_rtx (src_mode);
508 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
509 emit_insn (pat);
510 src = s;
513 convert_move (dst, src, unsignedp);
516 void
517 spu_expand_insv (rtx ops[])
519 HOST_WIDE_INT width = INTVAL (ops[1]);
520 HOST_WIDE_INT start = INTVAL (ops[2]);
521 HOST_WIDE_INT maskbits;
522 enum machine_mode dst_mode, src_mode;
523 rtx dst = ops[0], src = ops[3];
524 int dst_size, src_size;
525 rtx mask;
526 rtx shift_reg;
527 int shift;
530 if (GET_CODE (ops[0]) == MEM)
531 dst = gen_reg_rtx (TImode);
532 else
533 dst = adjust_operand (dst, &start);
534 dst_mode = GET_MODE (dst);
535 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
537 if (CONSTANT_P (src))
539 enum machine_mode m =
540 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
541 src = force_reg (m, convert_to_mode (m, src, 0));
543 src = adjust_operand (src, 0);
544 src_mode = GET_MODE (src);
545 src_size = GET_MODE_BITSIZE (GET_MODE (src));
547 mask = gen_reg_rtx (dst_mode);
548 shift_reg = gen_reg_rtx (dst_mode);
549 shift = dst_size - start - width;
551 /* It's not safe to use subreg here because the compiler assumes
552 that the SUBREG_REG is right justified in the SUBREG. */
553 convert_move (shift_reg, src, 1);
555 if (shift > 0)
557 switch (dst_mode)
559 case SImode:
560 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
561 break;
562 case DImode:
563 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
564 break;
565 case TImode:
566 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
567 break;
568 default:
569 abort ();
572 else if (shift < 0)
573 abort ();
575 switch (dst_size)
577 case 32:
578 maskbits = (-1ll << (32 - width - start));
579 if (start)
580 maskbits += (1ll << (32 - start));
581 emit_move_insn (mask, GEN_INT (maskbits));
582 break;
583 case 64:
584 maskbits = (-1ll << (64 - width - start));
585 if (start)
586 maskbits += (1ll << (64 - start));
587 emit_move_insn (mask, GEN_INT (maskbits));
588 break;
589 case 128:
591 unsigned char arr[16];
592 int i = start / 8;
593 memset (arr, 0, sizeof (arr));
594 arr[i] = 0xff >> (start & 7);
595 for (i++; i <= (start + width - 1) / 8; i++)
596 arr[i] = 0xff;
597 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
598 emit_move_insn (mask, array_to_constant (TImode, arr));
600 break;
601 default:
602 abort ();
604 if (GET_CODE (ops[0]) == MEM)
606 rtx aligned = gen_reg_rtx (SImode);
607 rtx low = gen_reg_rtx (SImode);
608 rtx addr = gen_reg_rtx (SImode);
609 rtx rotl = gen_reg_rtx (SImode);
610 rtx mask0 = gen_reg_rtx (TImode);
611 rtx mem;
613 emit_move_insn (addr, XEXP (ops[0], 0));
614 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
615 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
616 emit_insn (gen_negsi2 (rotl, low));
617 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
618 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
619 mem = change_address (ops[0], TImode, aligned);
620 set_mem_alias_set (mem, 0);
621 emit_move_insn (dst, mem);
622 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
623 emit_move_insn (mem, dst);
624 if (start + width > MEM_ALIGN (ops[0]))
626 rtx shl = gen_reg_rtx (SImode);
627 rtx mask1 = gen_reg_rtx (TImode);
628 rtx dst1 = gen_reg_rtx (TImode);
629 rtx mem1;
630 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
631 emit_insn (gen_shlqby_ti (mask1, mask, shl));
632 mem1 = adjust_address (mem, TImode, 16);
633 set_mem_alias_set (mem1, 0);
634 emit_move_insn (dst1, mem1);
635 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
636 emit_move_insn (mem1, dst1);
639 else
640 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
645 spu_expand_block_move (rtx ops[])
647 HOST_WIDE_INT bytes, align, offset;
648 rtx src, dst, sreg, dreg, target;
649 int i;
650 if (GET_CODE (ops[2]) != CONST_INT
651 || GET_CODE (ops[3]) != CONST_INT
652 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
653 return 0;
655 bytes = INTVAL (ops[2]);
656 align = INTVAL (ops[3]);
658 if (bytes <= 0)
659 return 1;
661 dst = ops[0];
662 src = ops[1];
664 if (align == 16)
666 for (offset = 0; offset + 16 <= bytes; offset += 16)
668 dst = adjust_address (ops[0], V16QImode, offset);
669 src = adjust_address (ops[1], V16QImode, offset);
670 emit_move_insn (dst, src);
672 if (offset < bytes)
674 rtx mask;
675 unsigned char arr[16] = { 0 };
676 for (i = 0; i < bytes - offset; i++)
677 arr[i] = 0xff;
678 dst = adjust_address (ops[0], V16QImode, offset);
679 src = adjust_address (ops[1], V16QImode, offset);
680 mask = gen_reg_rtx (V16QImode);
681 sreg = gen_reg_rtx (V16QImode);
682 dreg = gen_reg_rtx (V16QImode);
683 target = gen_reg_rtx (V16QImode);
684 emit_move_insn (mask, array_to_constant (V16QImode, arr));
685 emit_move_insn (dreg, dst);
686 emit_move_insn (sreg, src);
687 emit_insn (gen_selb (target, dreg, sreg, mask));
688 emit_move_insn (dst, target);
690 return 1;
692 return 0;
695 enum spu_comp_code
696 { SPU_EQ, SPU_GT, SPU_GTU };
698 int spu_comp_icode[12][3] = {
699 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
700 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
701 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
702 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
703 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
704 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
705 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
706 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
707 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
708 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
709 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
710 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
713 /* Generate a compare for CODE. Return a brand-new rtx that represents
714 the result of the compare. GCC can figure this out too if we don't
715 provide all variations of compares, but GCC always wants to use
716 WORD_MODE, we can generate better code in most cases if we do it
717 ourselves. */
718 void
719 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
721 int reverse_compare = 0;
722 int reverse_test = 0;
723 rtx compare_result, eq_result;
724 rtx comp_rtx, eq_rtx;
725 rtx target = operands[0];
726 enum machine_mode comp_mode;
727 enum machine_mode op_mode;
728 enum spu_comp_code scode, eq_code, ior_code;
729 int index;
730 int eq_test = 0;
732 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
733 and so on, to keep the constant in operand 1. */
734 if (GET_CODE (spu_compare_op1) == CONST_INT)
736 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
737 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
738 switch (code)
740 case GE:
741 spu_compare_op1 = GEN_INT (val);
742 code = GT;
743 break;
744 case LT:
745 spu_compare_op1 = GEN_INT (val);
746 code = LE;
747 break;
748 case GEU:
749 spu_compare_op1 = GEN_INT (val);
750 code = GTU;
751 break;
752 case LTU:
753 spu_compare_op1 = GEN_INT (val);
754 code = LEU;
755 break;
756 default:
757 break;
761 comp_mode = SImode;
762 op_mode = GET_MODE (spu_compare_op0);
764 switch (code)
766 case GE:
767 scode = SPU_GT;
768 if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP)
770 reverse_compare = 0;
771 reverse_test = 0;
772 eq_test = 1;
773 eq_code = SPU_EQ;
775 else
777 reverse_compare = 1;
778 reverse_test = 1;
780 break;
781 case LE:
782 scode = SPU_GT;
783 if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP)
785 reverse_compare = 1;
786 reverse_test = 0;
787 eq_test = 1;
788 eq_code = SPU_EQ;
790 else
792 reverse_compare = 0;
793 reverse_test = 1;
795 break;
796 case LT:
797 reverse_compare = 1;
798 reverse_test = 0;
799 scode = SPU_GT;
800 break;
801 case GEU:
802 reverse_compare = 1;
803 reverse_test = 1;
804 scode = SPU_GTU;
805 break;
806 case LEU:
807 reverse_compare = 0;
808 reverse_test = 1;
809 scode = SPU_GTU;
810 break;
811 case LTU:
812 reverse_compare = 1;
813 reverse_test = 0;
814 scode = SPU_GTU;
815 break;
816 case NE:
817 reverse_compare = 0;
818 reverse_test = 1;
819 scode = SPU_EQ;
820 break;
822 case EQ:
823 scode = SPU_EQ;
824 break;
825 case GT:
826 scode = SPU_GT;
827 break;
828 case GTU:
829 scode = SPU_GTU;
830 break;
831 default:
832 scode = SPU_EQ;
833 break;
836 switch (op_mode)
838 case QImode:
839 index = 0;
840 comp_mode = QImode;
841 break;
842 case HImode:
843 index = 1;
844 comp_mode = HImode;
845 break;
846 case SImode:
847 index = 2;
848 break;
849 case DImode:
850 index = 3;
851 break;
852 case TImode:
853 index = 4;
854 break;
855 case SFmode:
856 index = 5;
857 break;
858 case DFmode:
859 index = 6;
860 break;
861 case V16QImode:
862 index = 7;
863 comp_mode = op_mode;
864 break;
865 case V8HImode:
866 index = 8;
867 comp_mode = op_mode;
868 break;
869 case V4SImode:
870 index = 9;
871 comp_mode = op_mode;
872 break;
873 case V4SFmode:
874 index = 10;
875 comp_mode = V4SImode;
876 break;
877 case V2DFmode:
878 index = 11;
879 comp_mode = V2DImode;
880 break;
881 case V2DImode:
882 default:
883 abort ();
886 if (GET_MODE (spu_compare_op1) == DFmode)
888 rtx reg = gen_reg_rtx (DFmode);
889 if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
890 || (scode != SPU_GT && scode != SPU_EQ))
891 abort ();
892 if (spu_arch == PROCESSOR_CELL)
894 if (reverse_compare)
895 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
896 else
897 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
898 reverse_compare = 0;
899 spu_compare_op0 = reg;
900 spu_compare_op1 = CONST0_RTX (DFmode);
904 if (is_set == 0 && spu_compare_op1 == const0_rtx
905 && (GET_MODE (spu_compare_op0) == SImode
906 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
908 /* Don't need to set a register with the result when we are
909 comparing against zero and branching. */
910 reverse_test = !reverse_test;
911 compare_result = spu_compare_op0;
913 else
915 compare_result = gen_reg_rtx (comp_mode);
917 if (reverse_compare)
919 rtx t = spu_compare_op1;
920 spu_compare_op1 = spu_compare_op0;
921 spu_compare_op0 = t;
924 if (spu_comp_icode[index][scode] == 0)
925 abort ();
927 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
928 (spu_compare_op0, op_mode))
929 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
930 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
931 (spu_compare_op1, op_mode))
932 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
933 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
934 spu_compare_op0,
935 spu_compare_op1);
936 if (comp_rtx == 0)
937 abort ();
938 emit_insn (comp_rtx);
940 if (eq_test)
942 eq_result = gen_reg_rtx (comp_mode);
943 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
944 spu_compare_op0,
945 spu_compare_op1);
946 if (eq_rtx == 0)
947 abort ();
948 emit_insn (eq_rtx);
949 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
950 gcc_assert (ior_code != CODE_FOR_nothing);
951 emit_insn (GEN_FCN (ior_code)
952 (compare_result, compare_result, eq_result));
956 if (is_set == 0)
958 rtx bcomp;
959 rtx loc_ref;
961 /* We don't have branch on QI compare insns, so we convert the
962 QI compare result to a HI result. */
963 if (comp_mode == QImode)
965 rtx old_res = compare_result;
966 compare_result = gen_reg_rtx (HImode);
967 comp_mode = HImode;
968 emit_insn (gen_extendqihi2 (compare_result, old_res));
971 if (reverse_test)
972 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
973 else
974 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
976 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
977 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
978 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
979 loc_ref, pc_rtx)));
981 else if (is_set == 2)
983 int compare_size = GET_MODE_BITSIZE (comp_mode);
984 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
985 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
986 rtx select_mask;
987 rtx op_t = operands[2];
988 rtx op_f = operands[3];
990 /* The result of the comparison can be SI, HI or QI mode. Create a
991 mask based on that result. */
992 if (target_size > compare_size)
994 select_mask = gen_reg_rtx (mode);
995 emit_insn (gen_extend_compare (select_mask, compare_result));
997 else if (target_size < compare_size)
998 select_mask =
999 gen_rtx_SUBREG (mode, compare_result,
1000 (compare_size - target_size) / BITS_PER_UNIT);
1001 else if (comp_mode != mode)
1002 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1003 else
1004 select_mask = compare_result;
1006 if (GET_MODE (target) != GET_MODE (op_t)
1007 || GET_MODE (target) != GET_MODE (op_f))
1008 abort ();
1010 if (reverse_test)
1011 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1012 else
1013 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1015 else
1017 if (reverse_test)
1018 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1019 gen_rtx_NOT (comp_mode, compare_result)));
1020 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1021 emit_insn (gen_extendhisi2 (target, compare_result));
1022 else if (GET_MODE (target) == SImode
1023 && GET_MODE (compare_result) == QImode)
1024 emit_insn (gen_extend_compare (target, compare_result));
1025 else
1026 emit_move_insn (target, compare_result);
1030 HOST_WIDE_INT
1031 const_double_to_hwint (rtx x)
1033 HOST_WIDE_INT val;
1034 REAL_VALUE_TYPE rv;
1035 if (GET_MODE (x) == SFmode)
1037 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1038 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1040 else if (GET_MODE (x) == DFmode)
1042 long l[2];
1043 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1044 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1045 val = l[0];
1046 val = (val << 32) | (l[1] & 0xffffffff);
1048 else
1049 abort ();
1050 return val;
1054 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1056 long tv[2];
1057 REAL_VALUE_TYPE rv;
1058 gcc_assert (mode == SFmode || mode == DFmode);
1060 if (mode == SFmode)
1061 tv[0] = (v << 32) >> 32;
1062 else if (mode == DFmode)
1064 tv[1] = (v << 32) >> 32;
1065 tv[0] = v >> 32;
1067 real_from_target (&rv, tv, mode);
1068 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1071 void
1072 print_operand_address (FILE * file, register rtx addr)
1074 rtx reg;
1075 rtx offset;
1077 if (GET_CODE (addr) == AND
1078 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1079 && INTVAL (XEXP (addr, 1)) == -16)
1080 addr = XEXP (addr, 0);
1082 switch (GET_CODE (addr))
1084 case REG:
1085 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1086 break;
1088 case PLUS:
1089 reg = XEXP (addr, 0);
1090 offset = XEXP (addr, 1);
1091 if (GET_CODE (offset) == REG)
1093 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1094 reg_names[REGNO (offset)]);
1096 else if (GET_CODE (offset) == CONST_INT)
1098 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1099 INTVAL (offset), reg_names[REGNO (reg)]);
1101 else
1102 abort ();
1103 break;
1105 case CONST:
1106 case LABEL_REF:
1107 case SYMBOL_REF:
1108 case CONST_INT:
1109 output_addr_const (file, addr);
1110 break;
1112 default:
1113 debug_rtx (addr);
1114 abort ();
1118 void
1119 print_operand (FILE * file, rtx x, int code)
1121 enum machine_mode mode = GET_MODE (x);
1122 HOST_WIDE_INT val;
1123 unsigned char arr[16];
1124 int xcode = GET_CODE (x);
1125 int i, info;
1126 if (GET_MODE (x) == VOIDmode)
1127 switch (code)
1129 case 'L': /* 128 bits, signed */
1130 case 'm': /* 128 bits, signed */
1131 case 'T': /* 128 bits, signed */
1132 case 't': /* 128 bits, signed */
1133 mode = TImode;
1134 break;
1135 case 'K': /* 64 bits, signed */
1136 case 'k': /* 64 bits, signed */
1137 case 'D': /* 64 bits, signed */
1138 case 'd': /* 64 bits, signed */
1139 mode = DImode;
1140 break;
1141 case 'J': /* 32 bits, signed */
1142 case 'j': /* 32 bits, signed */
1143 case 's': /* 32 bits, signed */
1144 case 'S': /* 32 bits, signed */
1145 mode = SImode;
1146 break;
1148 switch (code)
1151 case 'j': /* 32 bits, signed */
1152 case 'k': /* 64 bits, signed */
1153 case 'm': /* 128 bits, signed */
1154 if (xcode == CONST_INT
1155 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1157 gcc_assert (logical_immediate_p (x, mode));
1158 constant_to_array (mode, x, arr);
1159 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1160 val = trunc_int_for_mode (val, SImode);
1161 switch (which_logical_immediate (val))
1163 case SPU_ORI:
1164 break;
1165 case SPU_ORHI:
1166 fprintf (file, "h");
1167 break;
1168 case SPU_ORBI:
1169 fprintf (file, "b");
1170 break;
1171 default:
1172 gcc_unreachable();
1175 else
1176 gcc_unreachable();
1177 return;
1179 case 'J': /* 32 bits, signed */
1180 case 'K': /* 64 bits, signed */
1181 case 'L': /* 128 bits, signed */
1182 if (xcode == CONST_INT
1183 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1185 gcc_assert (logical_immediate_p (x, mode)
1186 || iohl_immediate_p (x, mode));
1187 constant_to_array (mode, x, arr);
1188 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1189 val = trunc_int_for_mode (val, SImode);
1190 switch (which_logical_immediate (val))
1192 case SPU_ORI:
1193 case SPU_IOHL:
1194 break;
1195 case SPU_ORHI:
1196 val = trunc_int_for_mode (val, HImode);
1197 break;
1198 case SPU_ORBI:
1199 val = trunc_int_for_mode (val, QImode);
1200 break;
1201 default:
1202 gcc_unreachable();
1204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1206 else
1207 gcc_unreachable();
1208 return;
1210 case 't': /* 128 bits, signed */
1211 case 'd': /* 64 bits, signed */
1212 case 's': /* 32 bits, signed */
1213 if (CONSTANT_P (x))
1215 enum immediate_class c = classify_immediate (x, mode);
1216 switch (c)
1218 case IC_IL1:
1219 constant_to_array (mode, x, arr);
1220 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1221 val = trunc_int_for_mode (val, SImode);
1222 switch (which_immediate_load (val))
1224 case SPU_IL:
1225 break;
1226 case SPU_ILA:
1227 fprintf (file, "a");
1228 break;
1229 case SPU_ILH:
1230 fprintf (file, "h");
1231 break;
1232 case SPU_ILHU:
1233 fprintf (file, "hu");
1234 break;
1235 default:
1236 gcc_unreachable ();
1238 break;
1239 case IC_CPAT:
1240 constant_to_array (mode, x, arr);
1241 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1242 if (info == 1)
1243 fprintf (file, "b");
1244 else if (info == 2)
1245 fprintf (file, "h");
1246 else if (info == 4)
1247 fprintf (file, "w");
1248 else if (info == 8)
1249 fprintf (file, "d");
1250 break;
1251 case IC_IL1s:
1252 if (xcode == CONST_VECTOR)
1254 x = CONST_VECTOR_ELT (x, 0);
1255 xcode = GET_CODE (x);
1257 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1258 fprintf (file, "a");
1259 else if (xcode == HIGH)
1260 fprintf (file, "hu");
1261 break;
1262 case IC_FSMBI:
1263 case IC_FSMBI2:
1264 case IC_IL2:
1265 case IC_IL2s:
1266 case IC_POOL:
1267 abort ();
1270 else
1271 gcc_unreachable ();
1272 return;
1274 case 'T': /* 128 bits, signed */
1275 case 'D': /* 64 bits, signed */
1276 case 'S': /* 32 bits, signed */
1277 if (CONSTANT_P (x))
1279 enum immediate_class c = classify_immediate (x, mode);
1280 switch (c)
1282 case IC_IL1:
1283 constant_to_array (mode, x, arr);
1284 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1285 val = trunc_int_for_mode (val, SImode);
1286 switch (which_immediate_load (val))
1288 case SPU_IL:
1289 case SPU_ILA:
1290 break;
1291 case SPU_ILH:
1292 case SPU_ILHU:
1293 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1294 break;
1295 default:
1296 gcc_unreachable ();
1298 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1299 break;
1300 case IC_FSMBI:
1301 constant_to_array (mode, x, arr);
1302 val = 0;
1303 for (i = 0; i < 16; i++)
1305 val <<= 1;
1306 val |= arr[i] & 1;
1308 print_operand (file, GEN_INT (val), 0);
1309 break;
1310 case IC_CPAT:
1311 constant_to_array (mode, x, arr);
1312 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1313 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1314 break;
1315 case IC_IL1s:
1316 if (xcode == HIGH)
1317 x = XEXP (x, 0);
1318 if (GET_CODE (x) == CONST_VECTOR)
1319 x = CONST_VECTOR_ELT (x, 0);
1320 output_addr_const (file, x);
1321 if (xcode == HIGH)
1322 fprintf (file, "@h");
1323 break;
1324 case IC_IL2:
1325 case IC_IL2s:
1326 case IC_FSMBI2:
1327 case IC_POOL:
1328 abort ();
1331 else
1332 gcc_unreachable ();
1333 return;
1335 case 'C':
1336 if (xcode == CONST_INT)
1338 /* Only 4 least significant bits are relevant for generate
1339 control word instructions. */
1340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1341 return;
1343 break;
1345 case 'M': /* print code for c*d */
1346 if (GET_CODE (x) == CONST_INT)
1347 switch (INTVAL (x))
1349 case 1:
1350 fprintf (file, "b");
1351 break;
1352 case 2:
1353 fprintf (file, "h");
1354 break;
1355 case 4:
1356 fprintf (file, "w");
1357 break;
1358 case 8:
1359 fprintf (file, "d");
1360 break;
1361 default:
1362 gcc_unreachable();
1364 else
1365 gcc_unreachable();
1366 return;
1368 case 'N': /* Negate the operand */
1369 if (xcode == CONST_INT)
1370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1371 else if (xcode == CONST_VECTOR)
1372 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1373 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1374 return;
1376 case 'I': /* enable/disable interrupts */
1377 if (xcode == CONST_INT)
1378 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1379 return;
1381 case 'b': /* branch modifiers */
1382 if (xcode == REG)
1383 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1384 else if (COMPARISON_P (x))
1385 fprintf (file, "%s", xcode == NE ? "n" : "");
1386 return;
1388 case 'i': /* indirect call */
1389 if (xcode == MEM)
1391 if (GET_CODE (XEXP (x, 0)) == REG)
1392 /* Used in indirect function calls. */
1393 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1394 else
1395 output_address (XEXP (x, 0));
1397 return;
1399 case 'p': /* load/store */
1400 if (xcode == MEM)
1402 x = XEXP (x, 0);
1403 xcode = GET_CODE (x);
1405 if (xcode == AND)
1407 x = XEXP (x, 0);
1408 xcode = GET_CODE (x);
1410 if (xcode == REG)
1411 fprintf (file, "d");
1412 else if (xcode == CONST_INT)
1413 fprintf (file, "a");
1414 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1415 fprintf (file, "r");
1416 else if (xcode == PLUS || xcode == LO_SUM)
1418 if (GET_CODE (XEXP (x, 1)) == REG)
1419 fprintf (file, "x");
1420 else
1421 fprintf (file, "d");
1423 return;
1425 case 'e':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val &= 0x7;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1431 case 'f':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val &= 0x1f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1437 case 'g':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val &= 0x3f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1443 case 'h':
1444 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1445 val = (val >> 3) & 0x1f;
1446 output_addr_const (file, GEN_INT (val));
1447 return;
1449 case 'E':
1450 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1451 val = -val;
1452 val &= 0x7;
1453 output_addr_const (file, GEN_INT (val));
1454 return;
1456 case 'F':
1457 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1458 val = -val;
1459 val &= 0x1f;
1460 output_addr_const (file, GEN_INT (val));
1461 return;
1463 case 'G':
1464 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1465 val = -val;
1466 val &= 0x3f;
1467 output_addr_const (file, GEN_INT (val));
1468 return;
1470 case 'H':
1471 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1472 val = -(val & -8ll);
1473 val = (val >> 3) & 0x1f;
1474 output_addr_const (file, GEN_INT (val));
1475 return;
1477 case 0:
1478 if (xcode == REG)
1479 fprintf (file, "%s", reg_names[REGNO (x)]);
1480 else if (xcode == MEM)
1481 output_address (XEXP (x, 0));
1482 else if (xcode == CONST_VECTOR)
1483 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1484 else
1485 output_addr_const (file, x);
1486 return;
1488 /* unused letters
1489 o qr uvw yz
1490 AB OPQR UVWXYZ */
1491 default:
1492 output_operand_lossage ("invalid %%xn code");
1494 gcc_unreachable ();
1497 extern char call_used_regs[];
1499 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1500 caller saved register. For leaf functions it is more efficient to
1501 use a volatile register because we won't need to save and restore the
1502 pic register. This routine is only valid after register allocation
1503 is completed, so we can pick an unused register. */
1504 static rtx
1505 get_pic_reg (void)
1507 rtx pic_reg = pic_offset_table_rtx;
1508 if (!reload_completed && !reload_in_progress)
1509 abort ();
1510 return pic_reg;
1513 /* Split constant addresses to handle cases that are too large.
1514 Add in the pic register when in PIC mode.
1515 Split immediates that require more than 1 instruction. */
1517 spu_split_immediate (rtx * ops)
1519 enum machine_mode mode = GET_MODE (ops[0]);
1520 enum immediate_class c = classify_immediate (ops[1], mode);
1522 switch (c)
1524 case IC_IL2:
1526 unsigned char arrhi[16];
1527 unsigned char arrlo[16];
1528 rtx to, hi, lo;
1529 int i;
1530 constant_to_array (mode, ops[1], arrhi);
1531 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
1532 for (i = 0; i < 16; i += 4)
1534 arrlo[i + 2] = arrhi[i + 2];
1535 arrlo[i + 3] = arrhi[i + 3];
1536 arrlo[i + 0] = arrlo[i + 1] = 0;
1537 arrhi[i + 2] = arrhi[i + 3] = 0;
1539 hi = array_to_constant (mode, arrhi);
1540 lo = array_to_constant (mode, arrlo);
1541 emit_move_insn (to, hi);
1542 emit_insn (gen_rtx_SET
1543 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1544 return 1;
1546 case IC_FSMBI2:
1548 unsigned char arr_fsmbi[16];
1549 unsigned char arr_andbi[16];
1550 rtx to, reg_fsmbi, reg_and;
1551 int i;
1552 enum machine_mode imode = mode;
1553 /* We need to do reals as ints because the constant used in the
1554 * AND might not be a legitimate real constant. */
1555 imode = int_mode_for_mode (mode);
1556 constant_to_array (mode, ops[1], arr_fsmbi);
1557 if (imode != mode)
1558 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1559 else
1560 to = ops[0];
1561 for (i = 0; i < 16; i++)
1562 if (arr_fsmbi[i] != 0)
1564 arr_andbi[0] = arr_fsmbi[i];
1565 arr_fsmbi[i] = 0xff;
1567 for (i = 1; i < 16; i++)
1568 arr_andbi[i] = arr_andbi[0];
1569 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1570 reg_and = array_to_constant (imode, arr_andbi);
1571 emit_move_insn (to, reg_fsmbi);
1572 emit_insn (gen_rtx_SET
1573 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1574 return 1;
1576 case IC_POOL:
1577 if (reload_in_progress || reload_completed)
1579 rtx mem = force_const_mem (mode, ops[1]);
1580 if (TARGET_LARGE_MEM)
1582 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1583 emit_move_insn (addr, XEXP (mem, 0));
1584 mem = replace_equiv_address (mem, addr);
1586 emit_move_insn (ops[0], mem);
1587 return 1;
1589 break;
1590 case IC_IL1s:
1591 case IC_IL2s:
1592 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1594 if (c == IC_IL2s)
1596 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1597 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1599 else if (flag_pic)
1600 emit_insn (gen_pic (ops[0], ops[1]));
1601 if (flag_pic)
1603 rtx pic_reg = get_pic_reg ();
1604 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1605 current_function_uses_pic_offset_table = 1;
1607 return flag_pic || c == IC_IL2s;
1609 break;
1610 case IC_IL1:
1611 case IC_FSMBI:
1612 case IC_CPAT:
1613 break;
1615 return 0;
1618 /* SAVING is TRUE when we are generating the actual load and store
1619 instructions for REGNO. When determining the size of the stack
1620 needed for saving register we must allocate enough space for the
1621 worst case, because we don't always have the information early enough
1622 to not allocate it. But we can at least eliminate the actual loads
1623 and stores during the prologue/epilogue. */
1624 static int
1625 need_to_save_reg (int regno, int saving)
1627 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1628 return 1;
1629 if (flag_pic
1630 && regno == PIC_OFFSET_TABLE_REGNUM
1631 && (!saving || current_function_uses_pic_offset_table)
1632 && (!saving
1633 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1634 return 1;
1635 return 0;
1638 /* This function is only correct starting with local register
1639 allocation */
1641 spu_saved_regs_size (void)
1643 int reg_save_size = 0;
1644 int regno;
1646 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1647 if (need_to_save_reg (regno, 0))
1648 reg_save_size += 0x10;
1649 return reg_save_size;
1652 static rtx
1653 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1655 rtx reg = gen_rtx_REG (V4SImode, regno);
1656 rtx mem =
1657 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1658 return emit_insn (gen_movv4si (mem, reg));
1661 static rtx
1662 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1664 rtx reg = gen_rtx_REG (V4SImode, regno);
1665 rtx mem =
1666 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1667 return emit_insn (gen_movv4si (reg, mem));
1670 /* This happens after reload, so we need to expand it. */
1671 static rtx
1672 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1674 rtx insn;
1675 if (satisfies_constraint_K (GEN_INT (imm)))
1677 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1679 else
1681 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1682 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1683 if (REGNO (src) == REGNO (scratch))
1684 abort ();
1686 return insn;
1689 /* Return nonzero if this function is known to have a null epilogue. */
1692 direct_return (void)
1694 if (reload_completed)
1696 if (cfun->static_chain_decl == 0
1697 && (spu_saved_regs_size ()
1698 + get_frame_size ()
1699 + current_function_outgoing_args_size
1700 + current_function_pretend_args_size == 0)
1701 && current_function_is_leaf)
1702 return 1;
1704 return 0;
1708 The stack frame looks like this:
1709 +-------------+
1710 | incoming |
1711 AP | args |
1712 +-------------+
1713 | $lr save |
1714 +-------------+
1715 prev SP | back chain |
1716 +-------------+
1717 | var args |
1718 | reg save | current_function_pretend_args_size bytes
1719 +-------------+
1720 | ... |
1721 | saved regs | spu_saved_regs_size() bytes
1722 +-------------+
1723 | ... |
1724 FP | vars | get_frame_size() bytes
1725 +-------------+
1726 | ... |
1727 | outgoing |
1728 | args | current_function_outgoing_args_size bytes
1729 +-------------+
1730 | $lr of next |
1731 | frame |
1732 +-------------+
1733 SP | back chain |
1734 +-------------+
1737 void
1738 spu_expand_prologue (void)
1740 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1741 HOST_WIDE_INT total_size;
1742 HOST_WIDE_INT saved_regs_size;
1743 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1744 rtx scratch_reg_0, scratch_reg_1;
1745 rtx insn, real;
1747 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1748 the "toplevel" insn chain. */
1749 emit_note (NOTE_INSN_DELETED);
1751 if (flag_pic && optimize == 0)
1752 current_function_uses_pic_offset_table = 1;
1754 if (spu_naked_function_p (current_function_decl))
1755 return;
1757 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1758 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1760 saved_regs_size = spu_saved_regs_size ();
1761 total_size = size + saved_regs_size
1762 + current_function_outgoing_args_size
1763 + current_function_pretend_args_size;
1765 if (!current_function_is_leaf
1766 || current_function_calls_alloca || total_size > 0)
1767 total_size += STACK_POINTER_OFFSET;
1769 /* Save this first because code after this might use the link
1770 register as a scratch register. */
1771 if (!current_function_is_leaf)
1773 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1774 RTX_FRAME_RELATED_P (insn) = 1;
1777 if (total_size > 0)
1779 offset = -current_function_pretend_args_size;
1780 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1781 if (need_to_save_reg (regno, 1))
1783 offset -= 16;
1784 insn = frame_emit_store (regno, sp_reg, offset);
1785 RTX_FRAME_RELATED_P (insn) = 1;
1789 if (flag_pic && current_function_uses_pic_offset_table)
1791 rtx pic_reg = get_pic_reg ();
1792 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1793 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1796 if (total_size > 0)
1798 if (flag_stack_check)
1800 /* We compare against total_size-1 because
1801 ($sp >= total_size) <=> ($sp > total_size-1) */
1802 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1803 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1804 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1805 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1807 emit_move_insn (scratch_v4si, size_v4si);
1808 size_v4si = scratch_v4si;
1810 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1811 emit_insn (gen_vec_extractv4si
1812 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1813 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1816 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1817 the value of the previous $sp because we save it as the back
1818 chain. */
1819 if (total_size <= 2000)
1821 /* In this case we save the back chain first. */
1822 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1823 insn =
1824 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1826 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1828 insn = emit_move_insn (scratch_reg_0, sp_reg);
1829 insn =
1830 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1832 else
1834 insn = emit_move_insn (scratch_reg_0, sp_reg);
1835 insn =
1836 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1838 RTX_FRAME_RELATED_P (insn) = 1;
1839 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1840 REG_NOTES (insn) =
1841 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1843 if (total_size > 2000)
1845 /* Save the back chain ptr */
1846 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1849 if (frame_pointer_needed)
1851 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1852 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1853 + current_function_outgoing_args_size;
1854 /* Set the new frame_pointer */
1855 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1856 RTX_FRAME_RELATED_P (insn) = 1;
1857 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1858 REG_NOTES (insn) =
1859 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1860 real, REG_NOTES (insn));
1861 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1865 emit_note (NOTE_INSN_DELETED);
1868 void
1869 spu_expand_epilogue (bool sibcall_p)
1871 int size = get_frame_size (), offset, regno;
1872 HOST_WIDE_INT saved_regs_size, total_size;
1873 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1874 rtx jump, scratch_reg_0;
1876 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1877 the "toplevel" insn chain. */
1878 emit_note (NOTE_INSN_DELETED);
1880 if (spu_naked_function_p (current_function_decl))
1881 return;
1883 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1885 saved_regs_size = spu_saved_regs_size ();
1886 total_size = size + saved_regs_size
1887 + current_function_outgoing_args_size
1888 + current_function_pretend_args_size;
1890 if (!current_function_is_leaf
1891 || current_function_calls_alloca || total_size > 0)
1892 total_size += STACK_POINTER_OFFSET;
1894 if (total_size > 0)
1896 if (current_function_calls_alloca)
1897 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1898 else
1899 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1902 if (saved_regs_size > 0)
1904 offset = -current_function_pretend_args_size;
1905 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1906 if (need_to_save_reg (regno, 1))
1908 offset -= 0x10;
1909 frame_emit_load (regno, sp_reg, offset);
1914 if (!current_function_is_leaf)
1915 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1917 if (!sibcall_p)
1919 emit_insn (gen_rtx_USE
1920 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1921 jump = emit_jump_insn (gen__return ());
1922 emit_barrier_after (jump);
1925 emit_note (NOTE_INSN_DELETED);
1929 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1931 if (count != 0)
1932 return 0;
1933 /* This is inefficient because it ends up copying to a save-register
1934 which then gets saved even though $lr has already been saved. But
1935 it does generate better code for leaf functions and we don't need
1936 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1937 used for __builtin_return_address anyway, so maybe we don't care if
1938 it's inefficient. */
1939 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1943 /* Given VAL, generate a constant appropriate for MODE.
1944 If MODE is a vector mode, every element will be VAL.
1945 For TImode, VAL will be zero extended to 128 bits. */
1947 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1949 rtx inner;
1950 rtvec v;
1951 int units, i;
1953 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1954 || GET_MODE_CLASS (mode) == MODE_FLOAT
1955 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1956 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1958 if (GET_MODE_CLASS (mode) == MODE_INT)
1959 return immed_double_const (val, 0, mode);
1961 /* val is the bit representation of the float */
1962 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1963 return hwint_to_const_double (mode, val);
1965 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1966 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1967 else
1968 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1970 units = GET_MODE_NUNITS (mode);
1972 v = rtvec_alloc (units);
1974 for (i = 0; i < units; ++i)
1975 RTVEC_ELT (v, i) = inner;
1977 return gen_rtx_CONST_VECTOR (mode, v);
1980 /* branch hint stuff */
1982 /* The hardware requires 8 insns between a hint and the branch it
1983 effects. This variable describes how many rtl instructions the
1984 compiler needs to see before inserting a hint. (FIXME: We should
1985 accept less and insert nops to enforce it because hinting is always
1986 profitable for performance, but we do need to be careful of code
1987 size.) */
1988 int spu_hint_dist = (8 * 4);
1990 /* Create a MODE vector constant from 4 ints. */
1992 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1994 unsigned char arr[16];
1995 arr[0] = (a >> 24) & 0xff;
1996 arr[1] = (a >> 16) & 0xff;
1997 arr[2] = (a >> 8) & 0xff;
1998 arr[3] = (a >> 0) & 0xff;
1999 arr[4] = (b >> 24) & 0xff;
2000 arr[5] = (b >> 16) & 0xff;
2001 arr[6] = (b >> 8) & 0xff;
2002 arr[7] = (b >> 0) & 0xff;
2003 arr[8] = (c >> 24) & 0xff;
2004 arr[9] = (c >> 16) & 0xff;
2005 arr[10] = (c >> 8) & 0xff;
2006 arr[11] = (c >> 0) & 0xff;
2007 arr[12] = (d >> 24) & 0xff;
2008 arr[13] = (d >> 16) & 0xff;
2009 arr[14] = (d >> 8) & 0xff;
2010 arr[15] = (d >> 0) & 0xff;
2011 return array_to_constant(mode, arr);
2014 /* An array of these is used to propagate hints to predecessor blocks. */
2015 struct spu_bb_info
2017 rtx prop_jump; /* propagated from another block */
2018 basic_block bb; /* the original block. */
2021 /* The special $hbr register is used to prevent the insn scheduler from
2022 moving hbr insns across instructions which invalidate them. It
2023 should only be used in a clobber, and this function searches for
2024 insns which clobber it. */
2025 static bool
2026 insn_clobbers_hbr (rtx insn)
2028 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2030 rtx parallel = PATTERN (insn);
2031 rtx clobber;
2032 int j;
2033 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2035 clobber = XVECEXP (parallel, 0, j);
2036 if (GET_CODE (clobber) == CLOBBER
2037 && GET_CODE (XEXP (clobber, 0)) == REG
2038 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2039 return 1;
2042 return 0;
2045 static void
2046 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2048 rtx branch_label;
2049 rtx hint, insn, prev, next;
2051 if (before == 0 || branch == 0 || target == 0)
2052 return;
2054 if (distance > 600)
2055 return;
2058 branch_label = gen_label_rtx ();
2059 LABEL_NUSES (branch_label)++;
2060 LABEL_PRESERVE_P (branch_label) = 1;
2061 insn = emit_label_before (branch_label, branch);
2062 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2064 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2065 the current insn is pipe0, dual issue with it. */
2066 prev = prev_active_insn (before);
2067 if (prev && get_pipe (prev) == 0)
2068 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2069 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2071 next = next_active_insn (before);
2072 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2073 if (next)
2074 PUT_MODE (next, TImode);
2076 else
2078 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2079 PUT_MODE (hint, TImode);
2081 recog_memoized (hint);
2084 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2085 the rtx for the branch target. */
2086 static rtx
2087 get_branch_target (rtx branch)
2089 if (GET_CODE (branch) == JUMP_INSN)
2091 rtx set, src;
2093 /* Return statements */
2094 if (GET_CODE (PATTERN (branch)) == RETURN)
2095 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2097 /* jump table */
2098 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2099 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2100 return 0;
2102 set = single_set (branch);
2103 src = SET_SRC (set);
2104 if (GET_CODE (SET_DEST (set)) != PC)
2105 abort ();
2107 if (GET_CODE (src) == IF_THEN_ELSE)
2109 rtx lab = 0;
2110 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2111 if (note)
2113 /* If the more probable case is not a fall through, then
2114 try a branch hint. */
2115 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2116 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2117 && GET_CODE (XEXP (src, 1)) != PC)
2118 lab = XEXP (src, 1);
2119 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2120 && GET_CODE (XEXP (src, 2)) != PC)
2121 lab = XEXP (src, 2);
2123 if (lab)
2125 if (GET_CODE (lab) == RETURN)
2126 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2127 return lab;
2129 return 0;
2132 return src;
2134 else if (GET_CODE (branch) == CALL_INSN)
2136 rtx call;
2137 /* All of our call patterns are in a PARALLEL and the CALL is
2138 the first pattern in the PARALLEL. */
2139 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2140 abort ();
2141 call = XVECEXP (PATTERN (branch), 0, 0);
2142 if (GET_CODE (call) == SET)
2143 call = SET_SRC (call);
2144 if (GET_CODE (call) != CALL)
2145 abort ();
2146 return XEXP (XEXP (call, 0), 0);
2148 return 0;
2151 static void
2152 insert_branch_hints (void)
2154 struct spu_bb_info *spu_bb_info;
2155 rtx branch, insn, next;
2156 rtx branch_target = 0;
2157 int branch_addr = 0, insn_addr, head_addr;
2158 basic_block bb;
2159 unsigned int j;
2161 spu_bb_info =
2162 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2163 sizeof (struct spu_bb_info));
2165 /* We need exact insn addresses and lengths. */
2166 shorten_branches (get_insns ());
2168 FOR_EACH_BB_REVERSE (bb)
2170 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2171 branch = 0;
2172 if (spu_bb_info[bb->index].prop_jump)
2174 branch = spu_bb_info[bb->index].prop_jump;
2175 branch_target = get_branch_target (branch);
2176 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2178 /* Search from end of a block to beginning. In this loop, find
2179 jumps which need a branch and emit them only when:
2180 - it's an indirect branch and we're at the insn which sets
2181 the register
2182 - we're at an insn that will invalidate the hint. e.g., a
2183 call, another hint insn, inline asm that clobbers $hbr, and
2184 some inlined operations (divmodsi4). Don't consider jumps
2185 because they are only at the end of a block and are
2186 considered when we are deciding whether to propagate
2187 - we're getting too far away from the branch. The hbr insns
2188 only have a signed 10-bit offset
2189 We go back as far as possible so the branch will be considered
2190 for propagation when we get to the beginning of the block. */
2191 next = 0;
2192 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2194 if (INSN_P (insn))
2196 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2197 if (branch && next
2198 && ((GET_CODE (branch_target) == REG
2199 && set_of (branch_target, insn) != NULL_RTX)
2200 || insn_clobbers_hbr (insn)
2201 || branch_addr - insn_addr > 600))
2203 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2204 if (insn != BB_END (bb)
2205 && branch_addr - next_addr >= spu_hint_dist)
2207 if (dump_file)
2208 fprintf (dump_file,
2209 "hint for %i in block %i before %i\n",
2210 INSN_UID (branch), bb->index, INSN_UID (next));
2211 spu_emit_branch_hint (next, branch, branch_target,
2212 branch_addr - next_addr);
2214 branch = 0;
2217 /* JUMP_P will only be true at the end of a block. When
2218 branch is already set it means we've previously decided
2219 to propagate a hint for that branch into this block. */
2220 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2222 branch = 0;
2223 if ((branch_target = get_branch_target (insn)))
2225 branch = insn;
2226 branch_addr = insn_addr;
2230 /* When a branch hint is emitted it will be inserted
2231 before "next". Make sure next is the beginning of a
2232 cycle to minimize impact on the scheduled insns. */
2233 if (GET_MODE (insn) == TImode)
2234 next = insn;
2236 if (insn == BB_HEAD (bb))
2237 break;
2240 if (branch)
2242 /* If we haven't emitted a hint for this branch yet, it might
2243 be profitable to emit it in one of the predecessor blocks,
2244 especially for loops. */
2245 rtx bbend;
2246 basic_block prev = 0, prop = 0, prev2 = 0;
2247 int loop_exit = 0, simple_loop = 0;
2248 int next_addr = 0;
2249 if (next)
2250 next_addr = INSN_ADDRESSES (INSN_UID (next));
2252 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2253 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2254 prev = EDGE_PRED (bb, j)->src;
2255 else
2256 prev2 = EDGE_PRED (bb, j)->src;
2258 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2259 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2260 loop_exit = 1;
2261 else if (EDGE_SUCC (bb, j)->dest == bb)
2262 simple_loop = 1;
2264 /* If this branch is a loop exit then propagate to previous
2265 fallthru block. This catches the cases when it is a simple
2266 loop or when there is an initial branch into the loop. */
2267 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2268 prop = prev;
2270 /* If there is only one adjacent predecessor. Don't propagate
2271 outside this loop. This loop_depth test isn't perfect, but
2272 I'm not sure the loop_father member is valid at this point. */
2273 else if (prev && single_pred_p (bb)
2274 && prev->loop_depth == bb->loop_depth)
2275 prop = prev;
2277 /* If this is the JOIN block of a simple IF-THEN then
2278 propagate the hint to the HEADER block. */
2279 else if (prev && prev2
2280 && EDGE_COUNT (bb->preds) == 2
2281 && EDGE_COUNT (prev->preds) == 1
2282 && EDGE_PRED (prev, 0)->src == prev2
2283 && prev2->loop_depth == bb->loop_depth
2284 && GET_CODE (branch_target) != REG)
2285 prop = prev;
2287 /* Don't propagate when:
2288 - this is a simple loop and the hint would be too far
2289 - this is not a simple loop and there are 16 insns in
2290 this block already
2291 - the predecessor block ends in a branch that will be
2292 hinted
2293 - the predecessor block ends in an insn that invalidates
2294 the hint */
2295 if (prop
2296 && prop->index >= 0
2297 && (bbend = BB_END (prop))
2298 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2299 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2300 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2302 if (dump_file)
2303 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2304 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2305 bb->index, prop->index, bb->loop_depth,
2306 INSN_UID (branch), loop_exit, simple_loop,
2307 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2309 spu_bb_info[prop->index].prop_jump = branch;
2310 spu_bb_info[prop->index].bb = bb;
2312 else if (next && branch_addr - next_addr >= spu_hint_dist)
2314 if (dump_file)
2315 fprintf (dump_file, "hint for %i in block %i before %i\n",
2316 INSN_UID (branch), bb->index, INSN_UID (next));
2317 spu_emit_branch_hint (next, branch, branch_target,
2318 branch_addr - next_addr);
2320 branch = 0;
2323 free (spu_bb_info);
2326 /* Emit a nop for INSN such that the two will dual issue. This assumes
2327 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2328 We check for TImode to handle a MULTI1 insn which has dual issued its
2329 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2330 ADDR_VEC insns. */
2331 static void
2332 emit_nop_for_insn (rtx insn)
2334 int p;
2335 rtx new_insn;
2336 p = get_pipe (insn);
2337 if (p == 1 && GET_MODE (insn) == TImode)
2339 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2340 PUT_MODE (new_insn, TImode);
2341 PUT_MODE (insn, VOIDmode);
2343 else
2344 new_insn = emit_insn_after (gen_lnop (), insn);
2347 /* Insert nops in basic blocks to meet dual issue alignment
2348 requirements. */
2349 static void
2350 insert_nops (void)
2352 rtx insn, next_insn, prev_insn;
2353 int length;
2354 int addr;
2356 /* This sets up INSN_ADDRESSES. */
2357 shorten_branches (get_insns ());
2359 /* Keep track of length added by nops. */
2360 length = 0;
2362 prev_insn = 0;
2363 for (insn = get_insns (); insn; insn = next_insn)
2365 next_insn = next_active_insn (insn);
2366 addr = INSN_ADDRESSES (INSN_UID (insn));
2367 if (GET_MODE (insn) == TImode
2368 && next_insn
2369 && GET_MODE (next_insn) != TImode
2370 && ((addr + length) & 7) != 0)
2372 /* prev_insn will always be set because the first insn is
2373 always 8-byte aligned. */
2374 emit_nop_for_insn (prev_insn);
2375 length += 4;
2377 prev_insn = insn;
2381 static void
2382 spu_machine_dependent_reorg (void)
2384 if (optimize > 0)
2386 if (TARGET_BRANCH_HINTS)
2387 insert_branch_hints ();
2388 insert_nops ();
2393 /* Insn scheduling routines, primarily for dual issue. */
2394 static int
2395 spu_sched_issue_rate (void)
2397 return 2;
2400 static int
2401 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2402 int verbose ATTRIBUTE_UNUSED, rtx insn,
2403 int can_issue_more)
2405 if (GET_CODE (PATTERN (insn)) != USE
2406 && GET_CODE (PATTERN (insn)) != CLOBBER
2407 && get_pipe (insn) != -2)
2408 can_issue_more--;
2409 return can_issue_more;
2412 static int
2413 get_pipe (rtx insn)
2415 enum attr_type t;
2416 /* Handle inline asm */
2417 if (INSN_CODE (insn) == -1)
2418 return -1;
2419 t = get_attr_type (insn);
2420 switch (t)
2422 case TYPE_CONVERT:
2423 return -2;
2424 case TYPE_MULTI0:
2425 return -1;
2427 case TYPE_FX2:
2428 case TYPE_FX3:
2429 case TYPE_SPR:
2430 case TYPE_NOP:
2431 case TYPE_FXB:
2432 case TYPE_FPD:
2433 case TYPE_FP6:
2434 case TYPE_FP7:
2435 case TYPE_IPREFETCH:
2436 return 0;
2438 case TYPE_LNOP:
2439 case TYPE_SHUF:
2440 case TYPE_LOAD:
2441 case TYPE_STORE:
2442 case TYPE_BR:
2443 case TYPE_MULTI1:
2444 case TYPE_HBR:
2445 return 1;
2446 default:
2447 abort ();
2451 static int
2452 spu_sched_adjust_priority (rtx insn, int pri)
2454 int p = get_pipe (insn);
2455 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2456 * scheduling. */
2457 if (GET_CODE (PATTERN (insn)) == USE
2458 || GET_CODE (PATTERN (insn)) == CLOBBER
2459 || p == -2)
2460 return pri + 100;
2461 /* Schedule pipe0 insns early for greedier dual issue. */
2462 if (p != 1)
2463 return pri + 50;
2464 return pri;
2467 /* INSN is dependent on DEP_INSN. */
2468 static int
2469 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2470 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2472 if (GET_CODE (insn) == CALL_INSN)
2473 return cost - 2;
2474 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2475 scheduler makes every insn in a block anti-dependent on the final
2476 jump_insn. We adjust here so higher cost insns will get scheduled
2477 earlier. */
2478 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2479 return insn_cost (dep_insn) - 3;
2480 return cost;
2483 /* Create a CONST_DOUBLE from a string. */
2484 struct rtx_def *
2485 spu_float_const (const char *string, enum machine_mode mode)
2487 REAL_VALUE_TYPE value;
2488 value = REAL_VALUE_ATOF (string, mode);
2489 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2492 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2493 CONST_INT fits constraint 'K', i.e., is small. */
2495 legitimate_const (rtx x, int aligned)
2497 /* We can never know if the resulting address fits in 18 bits and can be
2498 loaded with ila. Instead we should use the HI and LO relocations to
2499 load a 32-bit address. */
2500 rtx sym, cst;
2502 gcc_assert (GET_CODE (x) == CONST);
2504 if (GET_CODE (XEXP (x, 0)) != PLUS)
2505 return 0;
2506 sym = XEXP (XEXP (x, 0), 0);
2507 cst = XEXP (XEXP (x, 0), 1);
2508 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2509 return 0;
2510 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2511 return 0;
2512 return satisfies_constraint_K (cst);
2516 spu_constant_address_p (rtx x)
2518 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2519 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2520 || GET_CODE (x) == HIGH);
2523 static enum spu_immediate
2524 which_immediate_load (HOST_WIDE_INT val)
2526 gcc_assert (val == trunc_int_for_mode (val, SImode));
2528 if (val >= -0x8000 && val <= 0x7fff)
2529 return SPU_IL;
2530 if (val >= 0 && val <= 0x3ffff)
2531 return SPU_ILA;
2532 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2533 return SPU_ILH;
2534 if ((val & 0xffff) == 0)
2535 return SPU_ILHU;
2537 return SPU_NONE;
2540 /* Return true when OP can be loaded by one of the il instructions, or
2541 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2543 immediate_load_p (rtx op, enum machine_mode mode)
2545 if (CONSTANT_P (op))
2547 enum immediate_class c = classify_immediate (op, mode);
2548 return c == IC_IL1 || c == IC_IL1s
2549 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2551 return 0;
2554 /* Return true if the first SIZE bytes of arr is a constant that can be
2555 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2556 represent the size and offset of the instruction to use. */
2557 static int
2558 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2560 int cpat, run, i, start;
2561 cpat = 1;
2562 run = 0;
2563 start = -1;
2564 for (i = 0; i < size && cpat; i++)
2565 if (arr[i] != i+16)
2567 if (!run)
2569 start = i;
2570 if (arr[i] == 3)
2571 run = 1;
2572 else if (arr[i] == 2 && arr[i+1] == 3)
2573 run = 2;
2574 else if (arr[i] == 0)
2576 while (arr[i+run] == run && i+run < 16)
2577 run++;
2578 if (run != 4 && run != 8)
2579 cpat = 0;
2581 else
2582 cpat = 0;
2583 if ((i & (run-1)) != 0)
2584 cpat = 0;
2585 i += run;
2587 else
2588 cpat = 0;
2590 if (cpat && (run || size < 16))
2592 if (run == 0)
2593 run = 1;
2594 if (prun)
2595 *prun = run;
2596 if (pstart)
2597 *pstart = start == -1 ? 16-run : start;
2598 return 1;
2600 return 0;
2603 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2604 it into a register. MODE is only valid when OP is a CONST_INT. */
2605 static enum immediate_class
2606 classify_immediate (rtx op, enum machine_mode mode)
2608 HOST_WIDE_INT val;
2609 unsigned char arr[16];
2610 int i, j, repeated, fsmbi, repeat;
2612 gcc_assert (CONSTANT_P (op));
2614 if (GET_MODE (op) != VOIDmode)
2615 mode = GET_MODE (op);
2617 /* A V4SI const_vector with all identical symbols is ok. */
2618 if (!flag_pic
2619 && mode == V4SImode
2620 && GET_CODE (op) == CONST_VECTOR
2621 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2622 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2623 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2624 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2625 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2626 op = CONST_VECTOR_ELT (op, 0);
2628 switch (GET_CODE (op))
2630 case SYMBOL_REF:
2631 case LABEL_REF:
2632 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2634 case CONST:
2635 return TARGET_LARGE_MEM
2636 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2638 case HIGH:
2639 return IC_IL1s;
2641 case CONST_VECTOR:
2642 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2643 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2644 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2645 return IC_POOL;
2646 /* Fall through. */
2648 case CONST_INT:
2649 case CONST_DOUBLE:
2650 constant_to_array (mode, op, arr);
2652 /* Check that each 4-byte slot is identical. */
2653 repeated = 1;
2654 for (i = 4; i < 16; i += 4)
2655 for (j = 0; j < 4; j++)
2656 if (arr[j] != arr[i + j])
2657 repeated = 0;
2659 if (repeated)
2661 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2662 val = trunc_int_for_mode (val, SImode);
2664 if (which_immediate_load (val) != SPU_NONE)
2665 return IC_IL1;
2668 /* Any mode of 2 bytes or smaller can be loaded with an il
2669 instruction. */
2670 gcc_assert (GET_MODE_SIZE (mode) > 2);
2672 fsmbi = 1;
2673 repeat = 0;
2674 for (i = 0; i < 16 && fsmbi; i++)
2675 if (arr[i] != 0 && repeat == 0)
2676 repeat = arr[i];
2677 else if (arr[i] != 0 && arr[i] != repeat)
2678 fsmbi = 0;
2679 if (fsmbi)
2680 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2682 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2683 return IC_CPAT;
2685 if (repeated)
2686 return IC_IL2;
2688 return IC_POOL;
2689 default:
2690 break;
2692 gcc_unreachable ();
2695 static enum spu_immediate
2696 which_logical_immediate (HOST_WIDE_INT val)
2698 gcc_assert (val == trunc_int_for_mode (val, SImode));
2700 if (val >= -0x200 && val <= 0x1ff)
2701 return SPU_ORI;
2702 if (val >= 0 && val <= 0xffff)
2703 return SPU_IOHL;
2704 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2706 val = trunc_int_for_mode (val, HImode);
2707 if (val >= -0x200 && val <= 0x1ff)
2708 return SPU_ORHI;
2709 if ((val & 0xff) == ((val >> 8) & 0xff))
2711 val = trunc_int_for_mode (val, QImode);
2712 if (val >= -0x200 && val <= 0x1ff)
2713 return SPU_ORBI;
2716 return SPU_NONE;
2719 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2720 CONST_DOUBLEs. */
2721 static int
2722 const_vector_immediate_p (rtx x)
2724 int i;
2725 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2726 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2727 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2728 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2729 return 0;
2730 return 1;
2734 logical_immediate_p (rtx op, enum machine_mode mode)
2736 HOST_WIDE_INT val;
2737 unsigned char arr[16];
2738 int i, j;
2740 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2741 || GET_CODE (op) == CONST_VECTOR);
2743 if (GET_CODE (op) == CONST_VECTOR
2744 && !const_vector_immediate_p (op))
2745 return 0;
2747 if (GET_MODE (op) != VOIDmode)
2748 mode = GET_MODE (op);
2750 constant_to_array (mode, op, arr);
2752 /* Check that bytes are repeated. */
2753 for (i = 4; i < 16; i += 4)
2754 for (j = 0; j < 4; j++)
2755 if (arr[j] != arr[i + j])
2756 return 0;
2758 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2759 val = trunc_int_for_mode (val, SImode);
2761 i = which_logical_immediate (val);
2762 return i != SPU_NONE && i != SPU_IOHL;
2766 iohl_immediate_p (rtx op, enum machine_mode mode)
2768 HOST_WIDE_INT val;
2769 unsigned char arr[16];
2770 int i, j;
2772 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2773 || GET_CODE (op) == CONST_VECTOR);
2775 if (GET_CODE (op) == CONST_VECTOR
2776 && !const_vector_immediate_p (op))
2777 return 0;
2779 if (GET_MODE (op) != VOIDmode)
2780 mode = GET_MODE (op);
2782 constant_to_array (mode, op, arr);
2784 /* Check that bytes are repeated. */
2785 for (i = 4; i < 16; i += 4)
2786 for (j = 0; j < 4; j++)
2787 if (arr[j] != arr[i + j])
2788 return 0;
2790 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2791 val = trunc_int_for_mode (val, SImode);
2793 return val >= 0 && val <= 0xffff;
2797 arith_immediate_p (rtx op, enum machine_mode mode,
2798 HOST_WIDE_INT low, HOST_WIDE_INT high)
2800 HOST_WIDE_INT val;
2801 unsigned char arr[16];
2802 int bytes, i, j;
2804 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2805 || GET_CODE (op) == CONST_VECTOR);
2807 if (GET_CODE (op) == CONST_VECTOR
2808 && !const_vector_immediate_p (op))
2809 return 0;
2811 if (GET_MODE (op) != VOIDmode)
2812 mode = GET_MODE (op);
2814 constant_to_array (mode, op, arr);
2816 if (VECTOR_MODE_P (mode))
2817 mode = GET_MODE_INNER (mode);
2819 bytes = GET_MODE_SIZE (mode);
2820 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2822 /* Check that bytes are repeated. */
2823 for (i = bytes; i < 16; i += bytes)
2824 for (j = 0; j < bytes; j++)
2825 if (arr[j] != arr[i + j])
2826 return 0;
2828 val = arr[0];
2829 for (j = 1; j < bytes; j++)
2830 val = (val << 8) | arr[j];
2832 val = trunc_int_for_mode (val, mode);
2834 return val >= low && val <= high;
2837 /* We accept:
2838 - any 32-bit constant (SImode, SFmode)
2839 - any constant that can be generated with fsmbi (any mode)
2840 - a 64-bit constant where the high and low bits are identical
2841 (DImode, DFmode)
2842 - a 128-bit constant where the four 32-bit words match. */
2844 spu_legitimate_constant_p (rtx x)
2846 if (GET_CODE (x) == HIGH)
2847 x = XEXP (x, 0);
2848 /* V4SI with all identical symbols is valid. */
2849 if (!flag_pic
2850 && GET_MODE (x) == V4SImode
2851 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2852 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2853 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2854 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2855 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2856 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2858 if (GET_CODE (x) == CONST_VECTOR
2859 && !const_vector_immediate_p (x))
2860 return 0;
2861 return 1;
2864 /* Valid address are:
2865 - symbol_ref, label_ref, const
2866 - reg
2867 - reg + const, where either reg or const is 16 byte aligned
2868 - reg + reg, alignment doesn't matter
2869 The alignment matters in the reg+const case because lqd and stqd
2870 ignore the 4 least significant bits of the const. (TODO: It might be
2871 preferable to allow any alignment and fix it up when splitting.) */
2873 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2874 rtx x, int reg_ok_strict)
2876 if (mode == TImode && GET_CODE (x) == AND
2877 && GET_CODE (XEXP (x, 1)) == CONST_INT
2878 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2879 x = XEXP (x, 0);
2880 switch (GET_CODE (x))
2882 case SYMBOL_REF:
2883 case LABEL_REF:
2884 return !TARGET_LARGE_MEM;
2886 case CONST:
2887 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2889 case CONST_INT:
2890 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2892 case SUBREG:
2893 x = XEXP (x, 0);
2894 gcc_assert (GET_CODE (x) == REG);
2896 case REG:
2897 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2899 case PLUS:
2900 case LO_SUM:
2902 rtx op0 = XEXP (x, 0);
2903 rtx op1 = XEXP (x, 1);
2904 if (GET_CODE (op0) == SUBREG)
2905 op0 = XEXP (op0, 0);
2906 if (GET_CODE (op1) == SUBREG)
2907 op1 = XEXP (op1, 0);
2908 /* We can't just accept any aligned register because CSE can
2909 change it to a register that is not marked aligned and then
2910 recog will fail. So we only accept frame registers because
2911 they will only be changed to other frame registers. */
2912 if (GET_CODE (op0) == REG
2913 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2914 && GET_CODE (op1) == CONST_INT
2915 && INTVAL (op1) >= -0x2000
2916 && INTVAL (op1) <= 0x1fff
2917 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2918 return 1;
2919 if (GET_CODE (op0) == REG
2920 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2921 && GET_CODE (op1) == REG
2922 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2923 return 1;
2925 break;
2927 default:
2928 break;
2930 return 0;
2933 /* When the address is reg + const_int, force the const_int into a
2934 register. */
2936 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2937 enum machine_mode mode)
2939 rtx op0, op1;
2940 /* Make sure both operands are registers. */
2941 if (GET_CODE (x) == PLUS)
2943 op0 = XEXP (x, 0);
2944 op1 = XEXP (x, 1);
2945 if (ALIGNED_SYMBOL_REF_P (op0))
2947 op0 = force_reg (Pmode, op0);
2948 mark_reg_pointer (op0, 128);
2950 else if (GET_CODE (op0) != REG)
2951 op0 = force_reg (Pmode, op0);
2952 if (ALIGNED_SYMBOL_REF_P (op1))
2954 op1 = force_reg (Pmode, op1);
2955 mark_reg_pointer (op1, 128);
2957 else if (GET_CODE (op1) != REG)
2958 op1 = force_reg (Pmode, op1);
2959 x = gen_rtx_PLUS (Pmode, op0, op1);
2960 if (spu_legitimate_address (mode, x, 0))
2961 return x;
2963 return NULL_RTX;
2966 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2967 struct attribute_spec.handler. */
2968 static tree
2969 spu_handle_fndecl_attribute (tree * node,
2970 tree name,
2971 tree args ATTRIBUTE_UNUSED,
2972 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2974 if (TREE_CODE (*node) != FUNCTION_DECL)
2976 warning (0, "`%s' attribute only applies to functions",
2977 IDENTIFIER_POINTER (name));
2978 *no_add_attrs = true;
2981 return NULL_TREE;
2984 /* Handle the "vector" attribute. */
2985 static tree
2986 spu_handle_vector_attribute (tree * node, tree name,
2987 tree args ATTRIBUTE_UNUSED,
2988 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2990 tree type = *node, result = NULL_TREE;
2991 enum machine_mode mode;
2992 int unsigned_p;
2994 while (POINTER_TYPE_P (type)
2995 || TREE_CODE (type) == FUNCTION_TYPE
2996 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2997 type = TREE_TYPE (type);
2999 mode = TYPE_MODE (type);
3001 unsigned_p = TYPE_UNSIGNED (type);
3002 switch (mode)
3004 case DImode:
3005 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3006 break;
3007 case SImode:
3008 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3009 break;
3010 case HImode:
3011 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3012 break;
3013 case QImode:
3014 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3015 break;
3016 case SFmode:
3017 result = V4SF_type_node;
3018 break;
3019 case DFmode:
3020 result = V2DF_type_node;
3021 break;
3022 default:
3023 break;
3026 /* Propagate qualifiers attached to the element type
3027 onto the vector type. */
3028 if (result && result != type && TYPE_QUALS (type))
3029 result = build_qualified_type (result, TYPE_QUALS (type));
3031 *no_add_attrs = true; /* No need to hang on to the attribute. */
3033 if (!result)
3034 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3035 else
3036 *node = reconstruct_complex_type (*node, result);
3038 return NULL_TREE;
3041 /* Return nonzero if FUNC is a naked function. */
3042 static int
3043 spu_naked_function_p (tree func)
3045 tree a;
3047 if (TREE_CODE (func) != FUNCTION_DECL)
3048 abort ();
3050 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3051 return a != NULL_TREE;
3055 spu_initial_elimination_offset (int from, int to)
3057 int saved_regs_size = spu_saved_regs_size ();
3058 int sp_offset = 0;
3059 if (!current_function_is_leaf || current_function_outgoing_args_size
3060 || get_frame_size () || saved_regs_size)
3061 sp_offset = STACK_POINTER_OFFSET;
3062 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3063 return (sp_offset + current_function_outgoing_args_size);
3064 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3065 return 0;
3066 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3067 return sp_offset + current_function_outgoing_args_size
3068 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3069 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3070 return get_frame_size () + saved_regs_size + sp_offset;
3071 return 0;
3075 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3077 enum machine_mode mode = TYPE_MODE (type);
3078 int byte_size = ((mode == BLKmode)
3079 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3081 /* Make sure small structs are left justified in a register. */
3082 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3083 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3085 enum machine_mode smode;
3086 rtvec v;
3087 int i;
3088 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3089 int n = byte_size / UNITS_PER_WORD;
3090 v = rtvec_alloc (nregs);
3091 for (i = 0; i < n; i++)
3093 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3094 gen_rtx_REG (TImode,
3095 FIRST_RETURN_REGNUM
3096 + i),
3097 GEN_INT (UNITS_PER_WORD * i));
3098 byte_size -= UNITS_PER_WORD;
3101 if (n < nregs)
3103 if (byte_size < 4)
3104 byte_size = 4;
3105 smode =
3106 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3107 RTVEC_ELT (v, n) =
3108 gen_rtx_EXPR_LIST (VOIDmode,
3109 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3110 GEN_INT (UNITS_PER_WORD * n));
3112 return gen_rtx_PARALLEL (mode, v);
3114 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3118 spu_function_arg (CUMULATIVE_ARGS cum,
3119 enum machine_mode mode,
3120 tree type, int named ATTRIBUTE_UNUSED)
3122 int byte_size;
3124 if (cum >= MAX_REGISTER_ARGS)
3125 return 0;
3127 byte_size = ((mode == BLKmode)
3128 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3130 /* The ABI does not allow parameters to be passed partially in
3131 reg and partially in stack. */
3132 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3133 return 0;
3135 /* Make sure small structs are left justified in a register. */
3136 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3137 && byte_size < UNITS_PER_WORD && byte_size > 0)
3139 enum machine_mode smode;
3140 rtx gr_reg;
3141 if (byte_size < 4)
3142 byte_size = 4;
3143 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3144 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3145 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3146 const0_rtx);
3147 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3149 else
3150 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3153 /* Variable sized types are passed by reference. */
3154 static bool
3155 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3156 enum machine_mode mode ATTRIBUTE_UNUSED,
3157 const_tree type, bool named ATTRIBUTE_UNUSED)
3159 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3163 /* Var args. */
3165 /* Create and return the va_list datatype.
3167 On SPU, va_list is an array type equivalent to
3169 typedef struct __va_list_tag
3171 void *__args __attribute__((__aligned(16)));
3172 void *__skip __attribute__((__aligned(16)));
3174 } va_list[1];
3176 where __args points to the arg that will be returned by the next
3177 va_arg(), and __skip points to the previous stack frame such that
3178 when __args == __skip we should advance __args by 32 bytes. */
3179 static tree
3180 spu_build_builtin_va_list (void)
3182 tree f_args, f_skip, record, type_decl;
3183 bool owp;
3185 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3187 type_decl =
3188 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3190 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3191 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3193 DECL_FIELD_CONTEXT (f_args) = record;
3194 DECL_ALIGN (f_args) = 128;
3195 DECL_USER_ALIGN (f_args) = 1;
3197 DECL_FIELD_CONTEXT (f_skip) = record;
3198 DECL_ALIGN (f_skip) = 128;
3199 DECL_USER_ALIGN (f_skip) = 1;
3201 TREE_CHAIN (record) = type_decl;
3202 TYPE_NAME (record) = type_decl;
3203 TYPE_FIELDS (record) = f_args;
3204 TREE_CHAIN (f_args) = f_skip;
3206 /* We know this is being padded and we want it too. It is an internal
3207 type so hide the warnings from the user. */
3208 owp = warn_padded;
3209 warn_padded = false;
3211 layout_type (record);
3213 warn_padded = owp;
3215 /* The correct type is an array type of one element. */
3216 return build_array_type (record, build_index_type (size_zero_node));
3219 /* Implement va_start by filling the va_list structure VALIST.
3220 NEXTARG points to the first anonymous stack argument.
3222 The following global variables are used to initialize
3223 the va_list structure:
3225 current_function_args_info;
3226 the CUMULATIVE_ARGS for this function
3228 current_function_arg_offset_rtx:
3229 holds the offset of the first anonymous stack argument
3230 (relative to the virtual arg pointer). */
3232 void
3233 spu_va_start (tree valist, rtx nextarg)
3235 tree f_args, f_skip;
3236 tree args, skip, t;
3238 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3239 f_skip = TREE_CHAIN (f_args);
3241 valist = build_va_arg_indirect_ref (valist);
3242 args =
3243 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3244 skip =
3245 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3247 /* Find the __args area. */
3248 t = make_tree (TREE_TYPE (args), nextarg);
3249 if (current_function_pretend_args_size > 0)
3250 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3251 size_int (-STACK_POINTER_OFFSET));
3252 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3253 TREE_SIDE_EFFECTS (t) = 1;
3254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3256 /* Find the __skip area. */
3257 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3258 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3259 size_int (current_function_pretend_args_size
3260 - STACK_POINTER_OFFSET));
3261 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3262 TREE_SIDE_EFFECTS (t) = 1;
3263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3266 /* Gimplify va_arg by updating the va_list structure
3267 VALIST as required to retrieve an argument of type
3268 TYPE, and returning that argument.
3270 ret = va_arg(VALIST, TYPE);
3272 generates code equivalent to:
3274 paddedsize = (sizeof(TYPE) + 15) & -16;
3275 if (VALIST.__args + paddedsize > VALIST.__skip
3276 && VALIST.__args <= VALIST.__skip)
3277 addr = VALIST.__skip + 32;
3278 else
3279 addr = VALIST.__args;
3280 VALIST.__args = addr + paddedsize;
3281 ret = *(TYPE *)addr;
3283 static tree
3284 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3285 tree * post_p ATTRIBUTE_UNUSED)
3287 tree f_args, f_skip;
3288 tree args, skip;
3289 HOST_WIDE_INT size, rsize;
3290 tree paddedsize, addr, tmp;
3291 bool pass_by_reference_p;
3293 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3294 f_skip = TREE_CHAIN (f_args);
3296 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3297 args =
3298 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3299 skip =
3300 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3302 addr = create_tmp_var (ptr_type_node, "va_arg");
3303 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3305 /* if an object is dynamically sized, a pointer to it is passed
3306 instead of the object itself. */
3307 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3308 false);
3309 if (pass_by_reference_p)
3310 type = build_pointer_type (type);
3311 size = int_size_in_bytes (type);
3312 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3314 /* build conditional expression to calculate addr. The expression
3315 will be gimplified later. */
3316 paddedsize = size_int (rsize);
3317 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
3318 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3319 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3320 build2 (LE_EXPR, boolean_type_node, args, skip));
3322 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3323 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3324 size_int (32)), args);
3326 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3327 gimplify_and_add (tmp, pre_p);
3329 /* update VALIST.__args */
3330 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3331 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3332 gimplify_and_add (tmp, pre_p);
3334 addr = fold_convert (build_pointer_type (type), addr);
3336 if (pass_by_reference_p)
3337 addr = build_va_arg_indirect_ref (addr);
3339 return build_va_arg_indirect_ref (addr);
3342 /* Save parameter registers starting with the register that corresponds
3343 to the first unnamed parameters. If the first unnamed parameter is
3344 in the stack then save no registers. Set pretend_args_size to the
3345 amount of space needed to save the registers. */
3346 void
3347 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3348 tree type, int *pretend_size, int no_rtl)
3350 if (!no_rtl)
3352 rtx tmp;
3353 int regno;
3354 int offset;
3355 int ncum = *cum;
3357 /* cum currently points to the last named argument, we want to
3358 start at the next argument. */
3359 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3361 offset = -STACK_POINTER_OFFSET;
3362 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3364 tmp = gen_frame_mem (V4SImode,
3365 plus_constant (virtual_incoming_args_rtx,
3366 offset));
3367 emit_move_insn (tmp,
3368 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3369 offset += 16;
3371 *pretend_size = offset + STACK_POINTER_OFFSET;
3375 void
3376 spu_conditional_register_usage (void)
3378 if (flag_pic)
3380 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3381 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3385 /* This is called to decide when we can simplify a load instruction. We
3386 must only return true for registers which we know will always be
3387 aligned. Taking into account that CSE might replace this reg with
3388 another one that has not been marked aligned.
3389 So this is really only true for frame, stack and virtual registers,
3390 which we know are always aligned and should not be adversely effected
3391 by CSE. */
3392 static int
3393 regno_aligned_for_load (int regno)
3395 return regno == FRAME_POINTER_REGNUM
3396 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3397 || regno == STACK_POINTER_REGNUM
3398 || (regno >= FIRST_VIRTUAL_REGISTER
3399 && regno <= LAST_VIRTUAL_REGISTER);
3402 /* Return TRUE when mem is known to be 16-byte aligned. */
3404 aligned_mem_p (rtx mem)
3406 if (MEM_ALIGN (mem) >= 128)
3407 return 1;
3408 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3409 return 1;
3410 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3412 rtx p0 = XEXP (XEXP (mem, 0), 0);
3413 rtx p1 = XEXP (XEXP (mem, 0), 1);
3414 if (regno_aligned_for_load (REGNO (p0)))
3416 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3417 return 1;
3418 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3419 return 1;
3422 else if (GET_CODE (XEXP (mem, 0)) == REG)
3424 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3425 return 1;
3427 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3428 return 1;
3429 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3431 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3432 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3433 if (GET_CODE (p0) == SYMBOL_REF
3434 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3435 return 1;
3437 return 0;
3440 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3441 into its SYMBOL_REF_FLAGS. */
3442 static void
3443 spu_encode_section_info (tree decl, rtx rtl, int first)
3445 default_encode_section_info (decl, rtl, first);
3447 /* If a variable has a forced alignment to < 16 bytes, mark it with
3448 SYMBOL_FLAG_ALIGN1. */
3449 if (TREE_CODE (decl) == VAR_DECL
3450 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3451 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3454 /* Return TRUE if we are certain the mem refers to a complete object
3455 which is both 16-byte aligned and padded to a 16-byte boundary. This
3456 would make it safe to store with a single instruction.
3457 We guarantee the alignment and padding for static objects by aligning
3458 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3459 FIXME: We currently cannot guarantee this for objects on the stack
3460 because assign_parm_setup_stack calls assign_stack_local with the
3461 alignment of the parameter mode and in that case the alignment never
3462 gets adjusted by LOCAL_ALIGNMENT. */
3463 static int
3464 store_with_one_insn_p (rtx mem)
3466 rtx addr = XEXP (mem, 0);
3467 if (GET_MODE (mem) == BLKmode)
3468 return 0;
3469 /* Only static objects. */
3470 if (GET_CODE (addr) == SYMBOL_REF)
3472 /* We use the associated declaration to make sure the access is
3473 referring to the whole object.
3474 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3475 if it is necessary. Will there be cases where one exists, and
3476 the other does not? Will there be cases where both exist, but
3477 have different types? */
3478 tree decl = MEM_EXPR (mem);
3479 if (decl
3480 && TREE_CODE (decl) == VAR_DECL
3481 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3482 return 1;
3483 decl = SYMBOL_REF_DECL (addr);
3484 if (decl
3485 && TREE_CODE (decl) == VAR_DECL
3486 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3487 return 1;
3489 return 0;
3493 spu_expand_mov (rtx * ops, enum machine_mode mode)
3495 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3496 abort ();
3498 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3500 rtx from = SUBREG_REG (ops[1]);
3501 enum machine_mode imode = GET_MODE (from);
3503 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3504 && GET_MODE_CLASS (imode) == MODE_INT
3505 && subreg_lowpart_p (ops[1]));
3507 if (GET_MODE_SIZE (imode) < 4)
3509 from = gen_rtx_SUBREG (SImode, from, 0);
3510 imode = SImode;
3513 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3515 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
3516 emit_insn (GEN_FCN (icode) (ops[0], from));
3518 else
3519 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3520 return 1;
3523 /* At least one of the operands needs to be a register. */
3524 if ((reload_in_progress | reload_completed) == 0
3525 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3527 rtx temp = force_reg (mode, ops[1]);
3528 emit_move_insn (ops[0], temp);
3529 return 1;
3531 if (reload_in_progress || reload_completed)
3533 if (CONSTANT_P (ops[1]))
3534 return spu_split_immediate (ops);
3535 return 0;
3537 else
3539 if (GET_CODE (ops[0]) == MEM)
3541 if (!spu_valid_move (ops))
3543 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3544 gen_reg_rtx (TImode)));
3545 return 1;
3548 else if (GET_CODE (ops[1]) == MEM)
3550 if (!spu_valid_move (ops))
3552 emit_insn (gen_load
3553 (ops[0], ops[1], gen_reg_rtx (TImode),
3554 gen_reg_rtx (SImode)));
3555 return 1;
3558 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3559 extend them. */
3560 if (GET_CODE (ops[1]) == CONST_INT)
3562 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3563 if (val != INTVAL (ops[1]))
3565 emit_move_insn (ops[0], GEN_INT (val));
3566 return 1;
3570 return 0;
3573 static int
3574 reg_align (rtx reg)
3576 /* For now, only frame registers are known to be aligned at all times.
3577 We can't trust REGNO_POINTER_ALIGN because optimization will move
3578 registers around, potentially changing an "aligned" register in an
3579 address to an unaligned register, which would result in an invalid
3580 address. */
3581 int regno = REGNO (reg);
3582 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3585 void
3586 spu_split_load (rtx * ops)
3588 enum machine_mode mode = GET_MODE (ops[0]);
3589 rtx addr, load, rot, mem, p0, p1;
3590 int rot_amt;
3592 addr = XEXP (ops[1], 0);
3594 rot = 0;
3595 rot_amt = 0;
3596 if (GET_CODE (addr) == PLUS)
3598 /* 8 cases:
3599 aligned reg + aligned reg => lqx
3600 aligned reg + unaligned reg => lqx, rotqby
3601 aligned reg + aligned const => lqd
3602 aligned reg + unaligned const => lqd, rotqbyi
3603 unaligned reg + aligned reg => lqx, rotqby
3604 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3605 unaligned reg + aligned const => lqd, rotqby
3606 unaligned reg + unaligned const -> not allowed by legitimate address
3608 p0 = XEXP (addr, 0);
3609 p1 = XEXP (addr, 1);
3610 if (reg_align (p0) < 128)
3612 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3614 emit_insn (gen_addsi3 (ops[3], p0, p1));
3615 rot = ops[3];
3617 else
3618 rot = p0;
3620 else
3622 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3624 rot_amt = INTVAL (p1) & 15;
3625 p1 = GEN_INT (INTVAL (p1) & -16);
3626 addr = gen_rtx_PLUS (SImode, p0, p1);
3628 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3629 rot = p1;
3632 else if (GET_CODE (addr) == REG)
3634 if (reg_align (addr) < 128)
3635 rot = addr;
3637 else if (GET_CODE (addr) == CONST)
3639 if (GET_CODE (XEXP (addr, 0)) == PLUS
3640 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3641 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3643 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3644 if (rot_amt & -16)
3645 addr = gen_rtx_CONST (Pmode,
3646 gen_rtx_PLUS (Pmode,
3647 XEXP (XEXP (addr, 0), 0),
3648 GEN_INT (rot_amt & -16)));
3649 else
3650 addr = XEXP (XEXP (addr, 0), 0);
3652 else
3653 rot = addr;
3655 else if (GET_CODE (addr) == CONST_INT)
3657 rot_amt = INTVAL (addr);
3658 addr = GEN_INT (rot_amt & -16);
3660 else if (!ALIGNED_SYMBOL_REF_P (addr))
3661 rot = addr;
3663 if (GET_MODE_SIZE (mode) < 4)
3664 rot_amt += GET_MODE_SIZE (mode) - 4;
3666 rot_amt &= 15;
3668 if (rot && rot_amt)
3670 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3671 rot = ops[3];
3672 rot_amt = 0;
3675 load = ops[2];
3677 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3678 mem = change_address (ops[1], TImode, addr);
3680 emit_insn (gen_movti (load, mem));
3682 if (rot)
3683 emit_insn (gen_rotqby_ti (load, load, rot));
3684 else if (rot_amt)
3685 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3687 if (reload_completed)
3688 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3689 else
3690 emit_insn (gen_spu_convert (ops[0], load));
3693 void
3694 spu_split_store (rtx * ops)
3696 enum machine_mode mode = GET_MODE (ops[0]);
3697 rtx pat = ops[2];
3698 rtx reg = ops[3];
3699 rtx addr, p0, p1, p1_lo, smem;
3700 int aform;
3701 int scalar;
3703 addr = XEXP (ops[0], 0);
3705 if (GET_CODE (addr) == PLUS)
3707 /* 8 cases:
3708 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3709 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3710 aligned reg + aligned const => lqd, c?d, shuf, stqx
3711 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3712 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3713 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3714 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3715 unaligned reg + unaligned const -> not allowed by legitimate address
3717 aform = 0;
3718 p0 = XEXP (addr, 0);
3719 p1 = p1_lo = XEXP (addr, 1);
3720 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3722 p1_lo = GEN_INT (INTVAL (p1) & 15);
3723 p1 = GEN_INT (INTVAL (p1) & -16);
3724 addr = gen_rtx_PLUS (SImode, p0, p1);
3727 else if (GET_CODE (addr) == REG)
3729 aform = 0;
3730 p0 = addr;
3731 p1 = p1_lo = const0_rtx;
3733 else
3735 aform = 1;
3736 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3737 p1 = 0; /* aform doesn't use p1 */
3738 p1_lo = addr;
3739 if (ALIGNED_SYMBOL_REF_P (addr))
3740 p1_lo = const0_rtx;
3741 else if (GET_CODE (addr) == CONST)
3743 if (GET_CODE (XEXP (addr, 0)) == PLUS
3744 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3745 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3747 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3748 if ((v & -16) != 0)
3749 addr = gen_rtx_CONST (Pmode,
3750 gen_rtx_PLUS (Pmode,
3751 XEXP (XEXP (addr, 0), 0),
3752 GEN_INT (v & -16)));
3753 else
3754 addr = XEXP (XEXP (addr, 0), 0);
3755 p1_lo = GEN_INT (v & 15);
3758 else if (GET_CODE (addr) == CONST_INT)
3760 p1_lo = GEN_INT (INTVAL (addr) & 15);
3761 addr = GEN_INT (INTVAL (addr) & -16);
3765 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3767 scalar = store_with_one_insn_p (ops[0]);
3768 if (!scalar)
3770 /* We could copy the flags from the ops[0] MEM to mem here,
3771 We don't because we want this load to be optimized away if
3772 possible, and copying the flags will prevent that in certain
3773 cases, e.g. consider the volatile flag. */
3775 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3776 set_mem_alias_set (lmem, 0);
3777 emit_insn (gen_movti (reg, lmem));
3779 if (!p0 || reg_align (p0) >= 128)
3780 p0 = stack_pointer_rtx;
3781 if (!p1_lo)
3782 p1_lo = const0_rtx;
3784 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3785 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3787 else if (reload_completed)
3789 if (GET_CODE (ops[1]) == REG)
3790 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3791 else if (GET_CODE (ops[1]) == SUBREG)
3792 emit_move_insn (reg,
3793 gen_rtx_REG (GET_MODE (reg),
3794 REGNO (SUBREG_REG (ops[1]))));
3795 else
3796 abort ();
3798 else
3800 if (GET_CODE (ops[1]) == REG)
3801 emit_insn (gen_spu_convert (reg, ops[1]));
3802 else if (GET_CODE (ops[1]) == SUBREG)
3803 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3804 else
3805 abort ();
3808 if (GET_MODE_SIZE (mode) < 4 && scalar)
3809 emit_insn (gen_shlqby_ti
3810 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3812 smem = change_address (ops[0], TImode, addr);
3813 /* We can't use the previous alias set because the memory has changed
3814 size and can potentially overlap objects of other types. */
3815 set_mem_alias_set (smem, 0);
3817 emit_insn (gen_movti (smem, reg));
3820 /* Return TRUE if X is MEM which is a struct member reference
3821 and the member can safely be loaded and stored with a single
3822 instruction because it is padded. */
3823 static int
3824 mem_is_padded_component_ref (rtx x)
3826 tree t = MEM_EXPR (x);
3827 tree r;
3828 if (!t || TREE_CODE (t) != COMPONENT_REF)
3829 return 0;
3830 t = TREE_OPERAND (t, 1);
3831 if (!t || TREE_CODE (t) != FIELD_DECL
3832 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3833 return 0;
3834 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3835 r = DECL_FIELD_CONTEXT (t);
3836 if (!r || TREE_CODE (r) != RECORD_TYPE)
3837 return 0;
3838 /* Make sure they are the same mode */
3839 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3840 return 0;
3841 /* If there are no following fields then the field alignment assures
3842 the structure is padded to the alignment which means this field is
3843 padded too. */
3844 if (TREE_CHAIN (t) == 0)
3845 return 1;
3846 /* If the following field is also aligned then this field will be
3847 padded. */
3848 t = TREE_CHAIN (t);
3849 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3850 return 1;
3851 return 0;
3854 /* Parse the -mfixed-range= option string. */
3855 static void
3856 fix_range (const char *const_str)
3858 int i, first, last;
3859 char *str, *dash, *comma;
3861 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3862 REG2 are either register names or register numbers. The effect
3863 of this option is to mark the registers in the range from REG1 to
3864 REG2 as ``fixed'' so they won't be used by the compiler. */
3866 i = strlen (const_str);
3867 str = (char *) alloca (i + 1);
3868 memcpy (str, const_str, i + 1);
3870 while (1)
3872 dash = strchr (str, '-');
3873 if (!dash)
3875 warning (0, "value of -mfixed-range must have form REG1-REG2");
3876 return;
3878 *dash = '\0';
3879 comma = strchr (dash + 1, ',');
3880 if (comma)
3881 *comma = '\0';
3883 first = decode_reg_name (str);
3884 if (first < 0)
3886 warning (0, "unknown register name: %s", str);
3887 return;
3890 last = decode_reg_name (dash + 1);
3891 if (last < 0)
3893 warning (0, "unknown register name: %s", dash + 1);
3894 return;
3897 *dash = '-';
3899 if (first > last)
3901 warning (0, "%s-%s is an empty range", str, dash + 1);
3902 return;
3905 for (i = first; i <= last; ++i)
3906 fixed_regs[i] = call_used_regs[i] = 1;
3908 if (!comma)
3909 break;
3911 *comma = ',';
3912 str = comma + 1;
3917 spu_valid_move (rtx * ops)
3919 enum machine_mode mode = GET_MODE (ops[0]);
3920 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3921 return 0;
3923 /* init_expr_once tries to recog against load and store insns to set
3924 the direct_load[] and direct_store[] arrays. We always want to
3925 consider those loads and stores valid. init_expr_once is called in
3926 the context of a dummy function which does not have a decl. */
3927 if (cfun->decl == 0)
3928 return 1;
3930 /* Don't allows loads/stores which would require more than 1 insn.
3931 During and after reload we assume loads and stores only take 1
3932 insn. */
3933 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3935 if (GET_CODE (ops[0]) == MEM
3936 && (GET_MODE_SIZE (mode) < 4
3937 || !(store_with_one_insn_p (ops[0])
3938 || mem_is_padded_component_ref (ops[0]))))
3939 return 0;
3940 if (GET_CODE (ops[1]) == MEM
3941 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3942 return 0;
3944 return 1;
3947 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3948 can be generated using the fsmbi instruction. */
3950 fsmbi_const_p (rtx x)
3952 if (CONSTANT_P (x))
3954 /* We can always choose TImode for CONST_INT because the high bits
3955 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3956 enum immediate_class c = classify_immediate (x, TImode);
3957 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3959 return 0;
3962 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3963 can be generated using the cbd, chd, cwd or cdd instruction. */
3965 cpat_const_p (rtx x, enum machine_mode mode)
3967 if (CONSTANT_P (x))
3969 enum immediate_class c = classify_immediate (x, mode);
3970 return c == IC_CPAT;
3972 return 0;
3976 gen_cpat_const (rtx * ops)
3978 unsigned char dst[16];
3979 int i, offset, shift, isize;
3980 if (GET_CODE (ops[3]) != CONST_INT
3981 || GET_CODE (ops[2]) != CONST_INT
3982 || (GET_CODE (ops[1]) != CONST_INT
3983 && GET_CODE (ops[1]) != REG))
3984 return 0;
3985 if (GET_CODE (ops[1]) == REG
3986 && (!REG_POINTER (ops[1])
3987 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3988 return 0;
3990 for (i = 0; i < 16; i++)
3991 dst[i] = i + 16;
3992 isize = INTVAL (ops[3]);
3993 if (isize == 1)
3994 shift = 3;
3995 else if (isize == 2)
3996 shift = 2;
3997 else
3998 shift = 0;
3999 offset = (INTVAL (ops[2]) +
4000 (GET_CODE (ops[1]) ==
4001 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4002 for (i = 0; i < isize; i++)
4003 dst[offset + i] = i + shift;
4004 return array_to_constant (TImode, dst);
4007 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4008 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4009 than 16 bytes, the value is repeated across the rest of the array. */
4010 void
4011 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4013 HOST_WIDE_INT val;
4014 int i, j, first;
4016 memset (arr, 0, 16);
4017 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4018 if (GET_CODE (x) == CONST_INT
4019 || (GET_CODE (x) == CONST_DOUBLE
4020 && (mode == SFmode || mode == DFmode)))
4022 gcc_assert (mode != VOIDmode && mode != BLKmode);
4024 if (GET_CODE (x) == CONST_DOUBLE)
4025 val = const_double_to_hwint (x);
4026 else
4027 val = INTVAL (x);
4028 first = GET_MODE_SIZE (mode) - 1;
4029 for (i = first; i >= 0; i--)
4031 arr[i] = val & 0xff;
4032 val >>= 8;
4034 /* Splat the constant across the whole array. */
4035 for (j = 0, i = first + 1; i < 16; i++)
4037 arr[i] = arr[j];
4038 j = (j == first) ? 0 : j + 1;
4041 else if (GET_CODE (x) == CONST_DOUBLE)
4043 val = CONST_DOUBLE_LOW (x);
4044 for (i = 15; i >= 8; i--)
4046 arr[i] = val & 0xff;
4047 val >>= 8;
4049 val = CONST_DOUBLE_HIGH (x);
4050 for (i = 7; i >= 0; i--)
4052 arr[i] = val & 0xff;
4053 val >>= 8;
4056 else if (GET_CODE (x) == CONST_VECTOR)
4058 int units;
4059 rtx elt;
4060 mode = GET_MODE_INNER (mode);
4061 units = CONST_VECTOR_NUNITS (x);
4062 for (i = 0; i < units; i++)
4064 elt = CONST_VECTOR_ELT (x, i);
4065 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4067 if (GET_CODE (elt) == CONST_DOUBLE)
4068 val = const_double_to_hwint (elt);
4069 else
4070 val = INTVAL (elt);
4071 first = GET_MODE_SIZE (mode) - 1;
4072 if (first + i * GET_MODE_SIZE (mode) > 16)
4073 abort ();
4074 for (j = first; j >= 0; j--)
4076 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4077 val >>= 8;
4082 else
4083 gcc_unreachable();
4086 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4087 smaller than 16 bytes, use the bytes that would represent that value
4088 in a register, e.g., for QImode return the value of arr[3]. */
4090 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4092 enum machine_mode inner_mode;
4093 rtvec v;
4094 int units, size, i, j, k;
4095 HOST_WIDE_INT val;
4097 if (GET_MODE_CLASS (mode) == MODE_INT
4098 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4100 j = GET_MODE_SIZE (mode);
4101 i = j < 4 ? 4 - j : 0;
4102 for (val = 0; i < j; i++)
4103 val = (val << 8) | arr[i];
4104 val = trunc_int_for_mode (val, mode);
4105 return GEN_INT (val);
4108 if (mode == TImode)
4110 HOST_WIDE_INT high;
4111 for (i = high = 0; i < 8; i++)
4112 high = (high << 8) | arr[i];
4113 for (i = 8, val = 0; i < 16; i++)
4114 val = (val << 8) | arr[i];
4115 return immed_double_const (val, high, TImode);
4117 if (mode == SFmode)
4119 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4120 val = trunc_int_for_mode (val, SImode);
4121 return hwint_to_const_double (SFmode, val);
4123 if (mode == DFmode)
4125 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4126 val <<= 32;
4127 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4128 return hwint_to_const_double (DFmode, val);
4131 if (!VECTOR_MODE_P (mode))
4132 abort ();
4134 units = GET_MODE_NUNITS (mode);
4135 size = GET_MODE_UNIT_SIZE (mode);
4136 inner_mode = GET_MODE_INNER (mode);
4137 v = rtvec_alloc (units);
4139 for (k = i = 0; i < units; ++i)
4141 val = 0;
4142 for (j = 0; j < size; j++, k++)
4143 val = (val << 8) | arr[k];
4145 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4146 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4147 else
4148 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4150 if (k > 16)
4151 abort ();
4153 return gen_rtx_CONST_VECTOR (mode, v);
4156 static void
4157 reloc_diagnostic (rtx x)
4159 tree loc_decl, decl = 0;
4160 const char *msg;
4161 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4162 return;
4164 if (GET_CODE (x) == SYMBOL_REF)
4165 decl = SYMBOL_REF_DECL (x);
4166 else if (GET_CODE (x) == CONST
4167 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4168 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4170 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4171 if (decl && !DECL_P (decl))
4172 decl = 0;
4174 /* We use last_assemble_variable_decl to get line information. It's
4175 not always going to be right and might not even be close, but will
4176 be right for the more common cases. */
4177 if (!last_assemble_variable_decl || in_section == ctors_section)
4178 loc_decl = decl;
4179 else
4180 loc_decl = last_assemble_variable_decl;
4182 /* The decl could be a string constant. */
4183 if (decl && DECL_P (decl))
4184 msg = "%Jcreating run-time relocation for %qD";
4185 else
4186 msg = "creating run-time relocation";
4188 if (TARGET_WARN_RELOC)
4189 warning (0, msg, loc_decl, decl);
4190 else
4191 error (msg, loc_decl, decl);
4194 /* Hook into assemble_integer so we can generate an error for run-time
4195 relocations. The SPU ABI disallows them. */
4196 static bool
4197 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4199 /* By default run-time relocations aren't supported, but we allow them
4200 in case users support it in their own run-time loader. And we provide
4201 a warning for those users that don't. */
4202 if ((GET_CODE (x) == SYMBOL_REF)
4203 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4204 reloc_diagnostic (x);
4206 return default_assemble_integer (x, size, aligned_p);
4209 static void
4210 spu_asm_globalize_label (FILE * file, const char *name)
4212 fputs ("\t.global\t", file);
4213 assemble_name (file, name);
4214 fputs ("\n", file);
4217 static bool
4218 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4220 enum machine_mode mode = GET_MODE (x);
4221 int cost = COSTS_N_INSNS (2);
4223 /* Folding to a CONST_VECTOR will use extra space but there might
4224 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4225 only if it allows us to fold away multiple insns. Changing the cost
4226 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4227 because this cost will only be compared against a single insn.
4228 if (code == CONST_VECTOR)
4229 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4232 /* Use defaults for float operations. Not accurate but good enough. */
4233 if (mode == DFmode)
4235 *total = COSTS_N_INSNS (13);
4236 return true;
4238 if (mode == SFmode)
4240 *total = COSTS_N_INSNS (6);
4241 return true;
4243 switch (code)
4245 case CONST_INT:
4246 if (satisfies_constraint_K (x))
4247 *total = 0;
4248 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4249 *total = COSTS_N_INSNS (1);
4250 else
4251 *total = COSTS_N_INSNS (3);
4252 return true;
4254 case CONST:
4255 *total = COSTS_N_INSNS (3);
4256 return true;
4258 case LABEL_REF:
4259 case SYMBOL_REF:
4260 *total = COSTS_N_INSNS (0);
4261 return true;
4263 case CONST_DOUBLE:
4264 *total = COSTS_N_INSNS (5);
4265 return true;
4267 case FLOAT_EXTEND:
4268 case FLOAT_TRUNCATE:
4269 case FLOAT:
4270 case UNSIGNED_FLOAT:
4271 case FIX:
4272 case UNSIGNED_FIX:
4273 *total = COSTS_N_INSNS (7);
4274 return true;
4276 case PLUS:
4277 if (mode == TImode)
4279 *total = COSTS_N_INSNS (9);
4280 return true;
4282 break;
4284 case MULT:
4285 cost =
4286 GET_CODE (XEXP (x, 0)) ==
4287 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4288 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4290 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4292 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4293 cost = COSTS_N_INSNS (14);
4294 if ((val & 0xffff) == 0)
4295 cost = COSTS_N_INSNS (9);
4296 else if (val > 0 && val < 0x10000)
4297 cost = COSTS_N_INSNS (11);
4300 *total = cost;
4301 return true;
4302 case DIV:
4303 case UDIV:
4304 case MOD:
4305 case UMOD:
4306 *total = COSTS_N_INSNS (20);
4307 return true;
4308 case ROTATE:
4309 case ROTATERT:
4310 case ASHIFT:
4311 case ASHIFTRT:
4312 case LSHIFTRT:
4313 *total = COSTS_N_INSNS (4);
4314 return true;
4315 case UNSPEC:
4316 if (XINT (x, 1) == UNSPEC_CONVERT)
4317 *total = COSTS_N_INSNS (0);
4318 else
4319 *total = COSTS_N_INSNS (4);
4320 return true;
4322 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4323 if (GET_MODE_CLASS (mode) == MODE_INT
4324 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4325 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4326 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4327 *total = cost;
4328 return true;
4331 enum machine_mode
4332 spu_eh_return_filter_mode (void)
4334 /* We would like this to be SImode, but sjlj exceptions seems to work
4335 only with word_mode. */
4336 return TImode;
4339 /* Decide whether we can make a sibling call to a function. DECL is the
4340 declaration of the function being targeted by the call and EXP is the
4341 CALL_EXPR representing the call. */
4342 static bool
4343 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4345 return decl && !TARGET_LARGE_MEM;
4348 /* We need to correctly update the back chain pointer and the Available
4349 Stack Size (which is in the second slot of the sp register.) */
4350 void
4351 spu_allocate_stack (rtx op0, rtx op1)
4353 HOST_WIDE_INT v;
4354 rtx chain = gen_reg_rtx (V4SImode);
4355 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4356 rtx sp = gen_reg_rtx (V4SImode);
4357 rtx splatted = gen_reg_rtx (V4SImode);
4358 rtx pat = gen_reg_rtx (TImode);
4360 /* copy the back chain so we can save it back again. */
4361 emit_move_insn (chain, stack_bot);
4363 op1 = force_reg (SImode, op1);
4365 v = 0x1020300010203ll;
4366 emit_move_insn (pat, immed_double_const (v, v, TImode));
4367 emit_insn (gen_shufb (splatted, op1, op1, pat));
4369 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4370 emit_insn (gen_subv4si3 (sp, sp, splatted));
4372 if (flag_stack_check)
4374 rtx avail = gen_reg_rtx(SImode);
4375 rtx result = gen_reg_rtx(SImode);
4376 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4377 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4378 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4381 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4383 emit_move_insn (stack_bot, chain);
4385 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4388 void
4389 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4391 static unsigned char arr[16] =
4392 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4393 rtx temp = gen_reg_rtx (SImode);
4394 rtx temp2 = gen_reg_rtx (SImode);
4395 rtx temp3 = gen_reg_rtx (V4SImode);
4396 rtx temp4 = gen_reg_rtx (V4SImode);
4397 rtx pat = gen_reg_rtx (TImode);
4398 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4400 /* Restore the backchain from the first word, sp from the second. */
4401 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4402 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4404 emit_move_insn (pat, array_to_constant (TImode, arr));
4406 /* Compute Available Stack Size for sp */
4407 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4408 emit_insn (gen_shufb (temp3, temp, temp, pat));
4410 /* Compute Available Stack Size for back chain */
4411 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4412 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4413 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4415 emit_insn (gen_addv4si3 (sp, sp, temp3));
4416 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4419 static void
4420 spu_init_libfuncs (void)
4422 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4423 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4424 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4425 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4426 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4427 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4428 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4429 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4430 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4431 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4432 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4434 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4435 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4438 /* Make a subreg, stripping any existing subreg. We could possibly just
4439 call simplify_subreg, but in this case we know what we want. */
4441 spu_gen_subreg (enum machine_mode mode, rtx x)
4443 if (GET_CODE (x) == SUBREG)
4444 x = SUBREG_REG (x);
4445 if (GET_MODE (x) == mode)
4446 return x;
4447 return gen_rtx_SUBREG (mode, x, 0);
4450 static bool
4451 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4453 return (TYPE_MODE (type) == BLKmode
4454 && ((type) == 0
4455 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4456 || int_size_in_bytes (type) >
4457 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4460 /* Create the built-in types and functions */
4462 struct spu_builtin_description spu_builtins[] = {
4463 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4464 {fcode, icode, name, type, params, NULL_TREE},
4465 #include "spu-builtins.def"
4466 #undef DEF_BUILTIN
4469 static void
4470 spu_init_builtins (void)
4472 struct spu_builtin_description *d;
4473 unsigned int i;
4475 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4476 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4477 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4478 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4479 V4SF_type_node = build_vector_type (float_type_node, 4);
4480 V2DF_type_node = build_vector_type (double_type_node, 2);
4482 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4483 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4484 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4485 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4487 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4489 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4490 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4491 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4492 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4493 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4495 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4496 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4497 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4498 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4499 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4500 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4502 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4503 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4504 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4505 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4506 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4507 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4508 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4509 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4511 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4512 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4514 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4516 spu_builtin_types[SPU_BTI_PTR] =
4517 build_pointer_type (build_qualified_type
4518 (void_type_node,
4519 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4521 /* For each builtin we build a new prototype. The tree code will make
4522 sure nodes are shared. */
4523 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4525 tree p;
4526 char name[64]; /* build_function will make a copy. */
4527 int parm;
4529 if (d->name == 0)
4530 continue;
4532 /* find last parm */
4533 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4537 p = void_list_node;
4538 while (parm > 1)
4539 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4541 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4543 sprintf (name, "__builtin_%s", d->name);
4544 d->fndecl =
4545 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4546 NULL, NULL_TREE);
4547 if (d->fcode == SPU_MASK_FOR_LOAD)
4548 TREE_READONLY (d->fndecl) = 1;
4552 void
4553 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4555 static unsigned char arr[16] =
4556 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4558 rtx temp = gen_reg_rtx (Pmode);
4559 rtx temp2 = gen_reg_rtx (V4SImode);
4560 rtx temp3 = gen_reg_rtx (V4SImode);
4561 rtx pat = gen_reg_rtx (TImode);
4562 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4564 emit_move_insn (pat, array_to_constant (TImode, arr));
4566 /* Restore the sp. */
4567 emit_move_insn (temp, op1);
4568 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4570 /* Compute available stack size for sp. */
4571 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4572 emit_insn (gen_shufb (temp3, temp, temp, pat));
4574 emit_insn (gen_addv4si3 (sp, sp, temp3));
4575 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4579 spu_safe_dma (HOST_WIDE_INT channel)
4581 return (channel >= 21 && channel <= 27);
4584 void
4585 spu_builtin_splats (rtx ops[])
4587 enum machine_mode mode = GET_MODE (ops[0]);
4588 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4590 unsigned char arr[16];
4591 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4592 emit_move_insn (ops[0], array_to_constant (mode, arr));
4594 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4596 rtvec v = rtvec_alloc (4);
4597 RTVEC_ELT (v, 0) = ops[1];
4598 RTVEC_ELT (v, 1) = ops[1];
4599 RTVEC_ELT (v, 2) = ops[1];
4600 RTVEC_ELT (v, 3) = ops[1];
4601 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4603 else
4605 rtx reg = gen_reg_rtx (TImode);
4606 rtx shuf;
4607 if (GET_CODE (ops[1]) != REG
4608 && GET_CODE (ops[1]) != SUBREG)
4609 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4610 switch (mode)
4612 case V2DImode:
4613 case V2DFmode:
4614 shuf =
4615 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4616 TImode);
4617 break;
4618 case V4SImode:
4619 case V4SFmode:
4620 shuf =
4621 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4622 TImode);
4623 break;
4624 case V8HImode:
4625 shuf =
4626 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4627 TImode);
4628 break;
4629 case V16QImode:
4630 shuf =
4631 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4632 TImode);
4633 break;
4634 default:
4635 abort ();
4637 emit_move_insn (reg, shuf);
4638 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4642 void
4643 spu_builtin_extract (rtx ops[])
4645 enum machine_mode mode;
4646 rtx rot, from, tmp;
4648 mode = GET_MODE (ops[1]);
4650 if (GET_CODE (ops[2]) == CONST_INT)
4652 switch (mode)
4654 case V16QImode:
4655 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4656 break;
4657 case V8HImode:
4658 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4659 break;
4660 case V4SFmode:
4661 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4662 break;
4663 case V4SImode:
4664 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4665 break;
4666 case V2DImode:
4667 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4668 break;
4669 case V2DFmode:
4670 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4671 break;
4672 default:
4673 abort ();
4675 return;
4678 from = spu_gen_subreg (TImode, ops[1]);
4679 rot = gen_reg_rtx (TImode);
4680 tmp = gen_reg_rtx (SImode);
4682 switch (mode)
4684 case V16QImode:
4685 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4686 break;
4687 case V8HImode:
4688 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4689 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4690 break;
4691 case V4SFmode:
4692 case V4SImode:
4693 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4694 break;
4695 case V2DImode:
4696 case V2DFmode:
4697 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4698 break;
4699 default:
4700 abort ();
4702 emit_insn (gen_rotqby_ti (rot, from, tmp));
4704 emit_insn (gen_spu_convert (ops[0], rot));
4707 void
4708 spu_builtin_insert (rtx ops[])
4710 enum machine_mode mode = GET_MODE (ops[0]);
4711 enum machine_mode imode = GET_MODE_INNER (mode);
4712 rtx mask = gen_reg_rtx (TImode);
4713 rtx offset;
4715 if (GET_CODE (ops[3]) == CONST_INT)
4716 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4717 else
4719 offset = gen_reg_rtx (SImode);
4720 emit_insn (gen_mulsi3
4721 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4723 emit_insn (gen_cpat
4724 (mask, stack_pointer_rtx, offset,
4725 GEN_INT (GET_MODE_SIZE (imode))));
4726 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4729 void
4730 spu_builtin_promote (rtx ops[])
4732 enum machine_mode mode, imode;
4733 rtx rot, from, offset;
4734 HOST_WIDE_INT pos;
4736 mode = GET_MODE (ops[0]);
4737 imode = GET_MODE_INNER (mode);
4739 from = gen_reg_rtx (TImode);
4740 rot = spu_gen_subreg (TImode, ops[0]);
4742 emit_insn (gen_spu_convert (from, ops[1]));
4744 if (GET_CODE (ops[2]) == CONST_INT)
4746 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4747 if (GET_MODE_SIZE (imode) < 4)
4748 pos += 4 - GET_MODE_SIZE (imode);
4749 offset = GEN_INT (pos & 15);
4751 else
4753 offset = gen_reg_rtx (SImode);
4754 switch (mode)
4756 case V16QImode:
4757 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4758 break;
4759 case V8HImode:
4760 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4761 emit_insn (gen_addsi3 (offset, offset, offset));
4762 break;
4763 case V4SFmode:
4764 case V4SImode:
4765 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4766 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4767 break;
4768 case V2DImode:
4769 case V2DFmode:
4770 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4771 break;
4772 default:
4773 abort ();
4776 emit_insn (gen_rotqby_ti (rot, from, offset));
4779 void
4780 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4782 rtx shuf = gen_reg_rtx (V4SImode);
4783 rtx insn = gen_reg_rtx (V4SImode);
4784 rtx shufc;
4785 rtx insnc;
4786 rtx mem;
4788 fnaddr = force_reg (SImode, fnaddr);
4789 cxt = force_reg (SImode, cxt);
4791 if (TARGET_LARGE_MEM)
4793 rtx rotl = gen_reg_rtx (V4SImode);
4794 rtx mask = gen_reg_rtx (V4SImode);
4795 rtx bi = gen_reg_rtx (SImode);
4796 unsigned char shufa[16] = {
4797 2, 3, 0, 1, 18, 19, 16, 17,
4798 0, 1, 2, 3, 16, 17, 18, 19
4800 unsigned char insna[16] = {
4801 0x41, 0, 0, 79,
4802 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4803 0x60, 0x80, 0, 79,
4804 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4807 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4808 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4810 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4811 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4812 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4813 emit_insn (gen_selb (insn, insnc, rotl, mask));
4815 mem = memory_address (Pmode, tramp);
4816 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4818 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4819 mem = memory_address (Pmode, plus_constant (tramp, 16));
4820 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4822 else
4824 rtx scxt = gen_reg_rtx (SImode);
4825 rtx sfnaddr = gen_reg_rtx (SImode);
4826 unsigned char insna[16] = {
4827 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4828 0x30, 0, 0, 0,
4829 0, 0, 0, 0,
4830 0, 0, 0, 0
4833 shufc = gen_reg_rtx (TImode);
4834 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4836 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4837 fits 18 bits and the last 4 are zeros. This will be true if
4838 the stack pointer is initialized to 0x3fff0 at program start,
4839 otherwise the ila instruction will be garbage. */
4841 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4842 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4843 emit_insn (gen_cpat
4844 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4845 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4846 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4848 mem = memory_address (Pmode, tramp);
4849 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4852 emit_insn (gen_sync ());
4855 void
4856 spu_expand_sign_extend (rtx ops[])
4858 unsigned char arr[16];
4859 rtx pat = gen_reg_rtx (TImode);
4860 rtx sign, c;
4861 int i, last;
4862 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4863 if (GET_MODE (ops[1]) == QImode)
4865 sign = gen_reg_rtx (HImode);
4866 emit_insn (gen_extendqihi2 (sign, ops[1]));
4867 for (i = 0; i < 16; i++)
4868 arr[i] = 0x12;
4869 arr[last] = 0x13;
4871 else
4873 for (i = 0; i < 16; i++)
4874 arr[i] = 0x10;
4875 switch (GET_MODE (ops[1]))
4877 case HImode:
4878 sign = gen_reg_rtx (SImode);
4879 emit_insn (gen_extendhisi2 (sign, ops[1]));
4880 arr[last] = 0x03;
4881 arr[last - 1] = 0x02;
4882 break;
4883 case SImode:
4884 sign = gen_reg_rtx (SImode);
4885 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4886 for (i = 0; i < 4; i++)
4887 arr[last - i] = 3 - i;
4888 break;
4889 case DImode:
4890 sign = gen_reg_rtx (SImode);
4891 c = gen_reg_rtx (SImode);
4892 emit_insn (gen_spu_convert (c, ops[1]));
4893 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4894 for (i = 0; i < 8; i++)
4895 arr[last - i] = 7 - i;
4896 break;
4897 default:
4898 abort ();
4901 emit_move_insn (pat, array_to_constant (TImode, arr));
4902 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4905 /* expand vector initialization. If there are any constant parts,
4906 load constant parts first. Then load any non-constant parts. */
4907 void
4908 spu_expand_vector_init (rtx target, rtx vals)
4910 enum machine_mode mode = GET_MODE (target);
4911 int n_elts = GET_MODE_NUNITS (mode);
4912 int n_var = 0;
4913 bool all_same = true;
4914 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4915 int i;
4917 first = XVECEXP (vals, 0, 0);
4918 for (i = 0; i < n_elts; ++i)
4920 x = XVECEXP (vals, 0, i);
4921 if (!CONSTANT_P (x))
4922 ++n_var;
4923 else
4925 if (first_constant == NULL_RTX)
4926 first_constant = x;
4928 if (i > 0 && !rtx_equal_p (x, first))
4929 all_same = false;
4932 /* if all elements are the same, use splats to repeat elements */
4933 if (all_same)
4935 if (!CONSTANT_P (first)
4936 && !register_operand (first, GET_MODE (x)))
4937 first = force_reg (GET_MODE (first), first);
4938 emit_insn (gen_spu_splats (target, first));
4939 return;
4942 /* load constant parts */
4943 if (n_var != n_elts)
4945 if (n_var == 0)
4947 emit_move_insn (target,
4948 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4950 else
4952 rtx constant_parts_rtx = copy_rtx (vals);
4954 gcc_assert (first_constant != NULL_RTX);
4955 /* fill empty slots with the first constant, this increases
4956 our chance of using splats in the recursive call below. */
4957 for (i = 0; i < n_elts; ++i)
4958 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4959 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4961 spu_expand_vector_init (target, constant_parts_rtx);
4965 /* load variable parts */
4966 if (n_var != 0)
4968 rtx insert_operands[4];
4970 insert_operands[0] = target;
4971 insert_operands[2] = target;
4972 for (i = 0; i < n_elts; ++i)
4974 x = XVECEXP (vals, 0, i);
4975 if (!CONSTANT_P (x))
4977 if (!register_operand (x, GET_MODE (x)))
4978 x = force_reg (GET_MODE (x), x);
4979 insert_operands[1] = x;
4980 insert_operands[3] = GEN_INT (i);
4981 spu_builtin_insert (insert_operands);
4987 /* Return insn index for the vector compare instruction for given CODE,
4988 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4990 static int
4991 get_vec_cmp_insn (enum rtx_code code,
4992 enum machine_mode dest_mode,
4993 enum machine_mode op_mode)
4996 switch (code)
4998 case EQ:
4999 if (dest_mode == V16QImode && op_mode == V16QImode)
5000 return CODE_FOR_ceq_v16qi;
5001 if (dest_mode == V8HImode && op_mode == V8HImode)
5002 return CODE_FOR_ceq_v8hi;
5003 if (dest_mode == V4SImode && op_mode == V4SImode)
5004 return CODE_FOR_ceq_v4si;
5005 if (dest_mode == V4SImode && op_mode == V4SFmode)
5006 return CODE_FOR_ceq_v4sf;
5007 if (dest_mode == V2DImode && op_mode == V2DFmode)
5008 return CODE_FOR_ceq_v2df;
5009 break;
5010 case GT:
5011 if (dest_mode == V16QImode && op_mode == V16QImode)
5012 return CODE_FOR_cgt_v16qi;
5013 if (dest_mode == V8HImode && op_mode == V8HImode)
5014 return CODE_FOR_cgt_v8hi;
5015 if (dest_mode == V4SImode && op_mode == V4SImode)
5016 return CODE_FOR_cgt_v4si;
5017 if (dest_mode == V4SImode && op_mode == V4SFmode)
5018 return CODE_FOR_cgt_v4sf;
5019 if (dest_mode == V2DImode && op_mode == V2DFmode)
5020 return CODE_FOR_cgt_v2df;
5021 break;
5022 case GTU:
5023 if (dest_mode == V16QImode && op_mode == V16QImode)
5024 return CODE_FOR_clgt_v16qi;
5025 if (dest_mode == V8HImode && op_mode == V8HImode)
5026 return CODE_FOR_clgt_v8hi;
5027 if (dest_mode == V4SImode && op_mode == V4SImode)
5028 return CODE_FOR_clgt_v4si;
5029 break;
5030 default:
5031 break;
5033 return -1;
5036 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5037 DMODE is expected destination mode. This is a recursive function. */
5039 static rtx
5040 spu_emit_vector_compare (enum rtx_code rcode,
5041 rtx op0, rtx op1,
5042 enum machine_mode dmode)
5044 int vec_cmp_insn;
5045 rtx mask;
5046 enum machine_mode dest_mode;
5047 enum machine_mode op_mode = GET_MODE (op1);
5049 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5051 /* Floating point vector compare instructions uses destination V4SImode.
5052 Double floating point vector compare instructions uses destination V2DImode.
5053 Move destination to appropriate mode later. */
5054 if (dmode == V4SFmode)
5055 dest_mode = V4SImode;
5056 else if (dmode == V2DFmode)
5057 dest_mode = V2DImode;
5058 else
5059 dest_mode = dmode;
5061 mask = gen_reg_rtx (dest_mode);
5062 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5064 if (vec_cmp_insn == -1)
5066 bool swap_operands = false;
5067 bool try_again = false;
5068 switch (rcode)
5070 case LT:
5071 rcode = GT;
5072 swap_operands = true;
5073 try_again = true;
5074 break;
5075 case LTU:
5076 rcode = GTU;
5077 swap_operands = true;
5078 try_again = true;
5079 break;
5080 case NE:
5081 /* Treat A != B as ~(A==B). */
5083 enum insn_code nor_code;
5084 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5085 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5086 gcc_assert (nor_code != CODE_FOR_nothing);
5087 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5088 if (dmode != dest_mode)
5090 rtx temp = gen_reg_rtx (dest_mode);
5091 convert_move (temp, mask, 0);
5092 return temp;
5094 return mask;
5096 break;
5097 case GE:
5098 case GEU:
5099 case LE:
5100 case LEU:
5101 /* Try GT/GTU/LT/LTU OR EQ */
5103 rtx c_rtx, eq_rtx;
5104 enum insn_code ior_code;
5105 enum rtx_code new_code;
5107 switch (rcode)
5109 case GE: new_code = GT; break;
5110 case GEU: new_code = GTU; break;
5111 case LE: new_code = LT; break;
5112 case LEU: new_code = LTU; break;
5113 default:
5114 gcc_unreachable ();
5117 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5118 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5120 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5121 gcc_assert (ior_code != CODE_FOR_nothing);
5122 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5123 if (dmode != dest_mode)
5125 rtx temp = gen_reg_rtx (dest_mode);
5126 convert_move (temp, mask, 0);
5127 return temp;
5129 return mask;
5131 break;
5132 default:
5133 gcc_unreachable ();
5136 /* You only get two chances. */
5137 if (try_again)
5138 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5140 gcc_assert (vec_cmp_insn != -1);
5142 if (swap_operands)
5144 rtx tmp;
5145 tmp = op0;
5146 op0 = op1;
5147 op1 = tmp;
5151 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5152 if (dmode != dest_mode)
5154 rtx temp = gen_reg_rtx (dest_mode);
5155 convert_move (temp, mask, 0);
5156 return temp;
5158 return mask;
5162 /* Emit vector conditional expression.
5163 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5164 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5167 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5168 rtx cond, rtx cc_op0, rtx cc_op1)
5170 enum machine_mode dest_mode = GET_MODE (dest);
5171 enum rtx_code rcode = GET_CODE (cond);
5172 rtx mask;
5174 /* Get the vector mask for the given relational operations. */
5175 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5177 emit_insn(gen_selb (dest, op2, op1, mask));
5179 return 1;
5182 static rtx
5183 spu_force_reg (enum machine_mode mode, rtx op)
5185 rtx x, r;
5186 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5188 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5189 || GET_MODE (op) == BLKmode)
5190 return force_reg (mode, convert_to_mode (mode, op, 0));
5191 abort ();
5194 r = force_reg (GET_MODE (op), op);
5195 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5197 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5198 if (x)
5199 return x;
5202 x = gen_reg_rtx (mode);
5203 emit_insn (gen_spu_convert (x, r));
5204 return x;
5207 static void
5208 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5210 HOST_WIDE_INT v = 0;
5211 int lsbits;
5212 /* Check the range of immediate operands. */
5213 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5215 int range = p - SPU_BTI_7;
5217 if (!CONSTANT_P (op))
5218 error ("%s expects an integer literal in the range [%d, %d].",
5219 d->name,
5220 spu_builtin_range[range].low, spu_builtin_range[range].high);
5222 if (GET_CODE (op) == CONST
5223 && (GET_CODE (XEXP (op, 0)) == PLUS
5224 || GET_CODE (XEXP (op, 0)) == MINUS))
5226 v = INTVAL (XEXP (XEXP (op, 0), 1));
5227 op = XEXP (XEXP (op, 0), 0);
5229 else if (GET_CODE (op) == CONST_INT)
5230 v = INTVAL (op);
5231 else if (GET_CODE (op) == CONST_VECTOR
5232 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5233 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5235 /* The default for v is 0 which is valid in every range. */
5236 if (v < spu_builtin_range[range].low
5237 || v > spu_builtin_range[range].high)
5238 error ("%s expects an integer literal in the range [%d, %d]. ("
5239 HOST_WIDE_INT_PRINT_DEC ")",
5240 d->name,
5241 spu_builtin_range[range].low, spu_builtin_range[range].high,
5244 switch (p)
5246 case SPU_BTI_S10_4:
5247 lsbits = 4;
5248 break;
5249 case SPU_BTI_U16_2:
5250 /* This is only used in lqa, and stqa. Even though the insns
5251 encode 16 bits of the address (all but the 2 least
5252 significant), only 14 bits are used because it is masked to
5253 be 16 byte aligned. */
5254 lsbits = 4;
5255 break;
5256 case SPU_BTI_S16_2:
5257 /* This is used for lqr and stqr. */
5258 lsbits = 2;
5259 break;
5260 default:
5261 lsbits = 0;
5264 if (GET_CODE (op) == LABEL_REF
5265 || (GET_CODE (op) == SYMBOL_REF
5266 && SYMBOL_REF_FUNCTION_P (op))
5267 || (v & ((1 << lsbits) - 1)) != 0)
5268 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5269 d->name);
5274 static void
5275 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5276 rtx target, rtx ops[])
5278 enum insn_code icode = d->icode;
5279 int i = 0, a;
5281 /* Expand the arguments into rtl. */
5283 if (d->parm[0] != SPU_BTI_VOID)
5284 ops[i++] = target;
5286 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
5288 tree arg = CALL_EXPR_ARG (exp, a);
5289 if (arg == 0)
5290 abort ();
5291 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5295 static rtx
5296 spu_expand_builtin_1 (struct spu_builtin_description *d,
5297 tree exp, rtx target)
5299 rtx pat;
5300 rtx ops[8];
5301 enum insn_code icode = d->icode;
5302 enum machine_mode mode, tmode;
5303 int i, p;
5304 tree return_type;
5306 /* Set up ops[] with values from arglist. */
5307 expand_builtin_args (d, exp, target, ops);
5309 /* Handle the target operand which must be operand 0. */
5310 i = 0;
5311 if (d->parm[0] != SPU_BTI_VOID)
5314 /* We prefer the mode specified for the match_operand otherwise
5315 use the mode from the builtin function prototype. */
5316 tmode = insn_data[d->icode].operand[0].mode;
5317 if (tmode == VOIDmode)
5318 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5320 /* Try to use target because not using it can lead to extra copies
5321 and when we are using all of the registers extra copies leads
5322 to extra spills. */
5323 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5324 ops[0] = target;
5325 else
5326 target = ops[0] = gen_reg_rtx (tmode);
5328 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5329 abort ();
5331 i++;
5334 if (d->fcode == SPU_MASK_FOR_LOAD)
5336 enum machine_mode mode = insn_data[icode].operand[1].mode;
5337 tree arg;
5338 rtx addr, op, pat;
5340 /* get addr */
5341 arg = CALL_EXPR_ARG (exp, 0);
5342 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5343 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5344 addr = memory_address (mode, op);
5346 /* negate addr */
5347 op = gen_reg_rtx (GET_MODE (addr));
5348 emit_insn (gen_rtx_SET (VOIDmode, op,
5349 gen_rtx_NEG (GET_MODE (addr), addr)));
5350 op = gen_rtx_MEM (mode, op);
5352 pat = GEN_FCN (icode) (target, op);
5353 if (!pat)
5354 return 0;
5355 emit_insn (pat);
5356 return target;
5359 /* Ignore align_hint, but still expand it's args in case they have
5360 side effects. */
5361 if (icode == CODE_FOR_spu_align_hint)
5362 return 0;
5364 /* Handle the rest of the operands. */
5365 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5367 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5368 mode = insn_data[d->icode].operand[i].mode;
5369 else
5370 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5372 /* mode can be VOIDmode here for labels */
5374 /* For specific intrinsics with an immediate operand, e.g.,
5375 si_ai(), we sometimes need to convert the scalar argument to a
5376 vector argument by splatting the scalar. */
5377 if (VECTOR_MODE_P (mode)
5378 && (GET_CODE (ops[i]) == CONST_INT
5379 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5380 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5382 if (GET_CODE (ops[i]) == CONST_INT)
5383 ops[i] = spu_const (mode, INTVAL (ops[i]));
5384 else
5386 rtx reg = gen_reg_rtx (mode);
5387 enum machine_mode imode = GET_MODE_INNER (mode);
5388 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5389 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5390 if (imode != GET_MODE (ops[i]))
5391 ops[i] = convert_to_mode (imode, ops[i],
5392 TYPE_UNSIGNED (spu_builtin_types
5393 [d->parm[i]]));
5394 emit_insn (gen_spu_splats (reg, ops[i]));
5395 ops[i] = reg;
5399 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5401 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5402 ops[i] = spu_force_reg (mode, ops[i]);
5405 switch (insn_data[icode].n_operands)
5407 case 0:
5408 pat = GEN_FCN (icode) (0);
5409 break;
5410 case 1:
5411 pat = GEN_FCN (icode) (ops[0]);
5412 break;
5413 case 2:
5414 pat = GEN_FCN (icode) (ops[0], ops[1]);
5415 break;
5416 case 3:
5417 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5418 break;
5419 case 4:
5420 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5421 break;
5422 case 5:
5423 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5424 break;
5425 case 6:
5426 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5427 break;
5428 default:
5429 abort ();
5432 if (!pat)
5433 abort ();
5435 if (d->type == B_CALL || d->type == B_BISLED)
5436 emit_call_insn (pat);
5437 else if (d->type == B_JUMP)
5439 emit_jump_insn (pat);
5440 emit_barrier ();
5442 else
5443 emit_insn (pat);
5445 return_type = spu_builtin_types[d->parm[0]];
5446 if (d->parm[0] != SPU_BTI_VOID
5447 && GET_MODE (target) != TYPE_MODE (return_type))
5449 /* target is the return value. It should always be the mode of
5450 the builtin function prototype. */
5451 target = spu_force_reg (TYPE_MODE (return_type), target);
5454 return target;
5458 spu_expand_builtin (tree exp,
5459 rtx target,
5460 rtx subtarget ATTRIBUTE_UNUSED,
5461 enum machine_mode mode ATTRIBUTE_UNUSED,
5462 int ignore ATTRIBUTE_UNUSED)
5464 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5465 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5466 struct spu_builtin_description *d;
5468 if (fcode < NUM_SPU_BUILTINS)
5470 d = &spu_builtins[fcode];
5472 return spu_expand_builtin_1 (d, exp, target);
5474 abort ();
5477 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5478 static tree
5479 spu_builtin_mul_widen_even (tree type)
5481 switch (TYPE_MODE (type))
5483 case V8HImode:
5484 if (TYPE_UNSIGNED (type))
5485 return spu_builtins[SPU_MULE_0].fndecl;
5486 else
5487 return spu_builtins[SPU_MULE_1].fndecl;
5488 break;
5489 default:
5490 return NULL_TREE;
5494 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5495 static tree
5496 spu_builtin_mul_widen_odd (tree type)
5498 switch (TYPE_MODE (type))
5500 case V8HImode:
5501 if (TYPE_UNSIGNED (type))
5502 return spu_builtins[SPU_MULO_1].fndecl;
5503 else
5504 return spu_builtins[SPU_MULO_0].fndecl;
5505 break;
5506 default:
5507 return NULL_TREE;
5511 /* Implement targetm.vectorize.builtin_mask_for_load. */
5512 static tree
5513 spu_builtin_mask_for_load (void)
5515 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5516 gcc_assert (d);
5517 return d->fndecl;
5520 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5521 static int
5522 spu_builtin_vectorization_cost (bool runtime_test)
5524 /* If the branch of the runtime test is taken - i.e. - the vectorized
5525 version is skipped - this incurs a misprediction cost (because the
5526 vectorized version is expected to be the fall-through). So we subtract
5527 the latency of a mispredicted branch from the costs that are incurred
5528 when the vectorized version is executed. */
5529 if (runtime_test)
5530 return -19;
5531 else
5532 return 0;
5535 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5536 after applying N number of iterations. This routine does not determine
5537 how may iterations are required to reach desired alignment. */
5539 static bool
5540 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5542 if (is_packed)
5543 return false;
5545 /* All other types are naturally aligned. */
5546 return true;
5549 /* Count the total number of instructions in each pipe and return the
5550 maximum, which is used as the Minimum Iteration Interval (MII)
5551 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5552 -2 are instructions that can go in pipe0 or pipe1. */
5553 static int
5554 spu_sms_res_mii (struct ddg *g)
5556 int i;
5557 unsigned t[4] = {0, 0, 0, 0};
5559 for (i = 0; i < g->num_nodes; i++)
5561 rtx insn = g->nodes[i].insn;
5562 int p = get_pipe (insn) + 2;
5564 assert (p >= 0);
5565 assert (p < 4);
5567 t[p]++;
5568 if (dump_file && INSN_P (insn))
5569 fprintf (dump_file, "i%d %s %d %d\n",
5570 INSN_UID (insn),
5571 insn_data[INSN_CODE(insn)].name,
5572 p, t[p]);
5574 if (dump_file)
5575 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5577 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5581 void
5582 spu_init_expanders (void)
5584 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5585 * frame_pointer_needed is true. We don't know that until we're
5586 * expanding the prologue. */
5587 if (cfun)
5588 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
5591 static enum machine_mode
5592 spu_libgcc_cmp_return_mode (void)
5595 /* For SPU word mode is TI mode so it is better to use SImode
5596 for compare returns. */
5597 return SImode;
5600 static enum machine_mode
5601 spu_libgcc_shift_count_mode (void)
5603 /* For SPU word mode is TI mode so it is better to use SImode
5604 for shift counts. */
5605 return SImode;