gcc/:
[official-gcc.git] / gcc / config / spu / spu.c
blobb0ba7ceebd2a8ed9b03ea4187f306a571776e4d3
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
56 #include "ddg.h"
57 #include "sbitmap.h"
58 #include "timevar.h"
59 #include "df.h"
61 /* Builtin types, data and prototypes. */
62 struct spu_builtin_range
64 int low, high;
67 static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
83 /* Target specific attribute specifications. */
84 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
86 /* Prototypes and external defs. */
87 static void spu_init_builtins (void);
88 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91 static rtx get_pic_reg (void);
92 static int need_to_save_reg (int regno, int saving);
93 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97 static void emit_nop_for_insn (rtx insn);
98 static bool insn_clobbers_hbr (rtx insn);
99 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
100 int distance, sbitmap blocks);
101 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
103 static rtx get_branch_target (rtx branch);
104 static void spu_machine_dependent_reorg (void);
105 static int spu_sched_issue_rate (void);
106 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108 static int get_pipe (rtx insn);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static void spu_sched_init_global (FILE *, int, int);
111 static void spu_sched_init (FILE *, int, int);
112 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
113 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119 static int spu_naked_function_p (tree func);
120 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
122 static tree spu_build_builtin_va_list (void);
123 static void spu_va_start (tree, rtx);
124 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
126 static int regno_aligned_for_load (int regno);
127 static int store_with_one_insn_p (rtx mem);
128 static int mem_is_padded_component_ref (rtx x);
129 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130 static void spu_asm_globalize_label (FILE * file, const char *name);
131 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
132 int *total, bool speed);
133 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134 static void spu_init_libfuncs (void);
135 static bool spu_return_in_memory (const_tree type, const_tree fntype);
136 static void fix_range (const char *);
137 static void spu_encode_section_info (tree, rtx, int);
138 static tree spu_builtin_mul_widen_even (tree);
139 static tree spu_builtin_mul_widen_odd (tree);
140 static tree spu_builtin_mask_for_load (void);
141 static int spu_builtin_vectorization_cost (bool);
142 static bool spu_vector_alignment_reachable (const_tree, bool);
143 static tree spu_builtin_vec_perm (tree, tree *);
144 static int spu_sms_res_mii (struct ddg *g);
145 static void asm_file_start (void);
146 static unsigned int spu_section_type_flags (tree, const char *, int);
148 extern const char *reg_names[];
149 rtx spu_compare_op0, spu_compare_op1;
151 /* Which instruction set architecture to use. */
152 int spu_arch;
153 /* Which cpu are we tuning for. */
154 int spu_tune;
156 /* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162 int spu_hint_dist = (8*4) - (2*4);
164 /* Determines whether we run variable tracking in machine dependent
165 reorganization. */
166 static int spu_flag_var_tracking;
168 enum spu_immediate {
169 SPU_NONE,
170 SPU_IL,
171 SPU_ILA,
172 SPU_ILH,
173 SPU_ILHU,
174 SPU_ORI,
175 SPU_ORHI,
176 SPU_ORBI,
177 SPU_IOHL
179 enum immediate_class
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
191 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
193 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194 static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
197 static enum machine_mode spu_unwind_word_mode (void);
199 static enum machine_mode
200 spu_libgcc_cmp_return_mode (void);
202 static enum machine_mode
203 spu_libgcc_shift_count_mode (void);
205 /* Built in types. */
206 tree spu_builtin_types[SPU_BTI_MAX];
208 /* TARGET overrides. */
210 #undef TARGET_INIT_BUILTINS
211 #define TARGET_INIT_BUILTINS spu_init_builtins
213 #undef TARGET_EXPAND_BUILTIN
214 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
216 #undef TARGET_UNWIND_WORD_MODE
217 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
219 /* The .8byte directive doesn't seem to work well for a 32 bit
220 architecture. */
221 #undef TARGET_ASM_UNALIGNED_DI_OP
222 #define TARGET_ASM_UNALIGNED_DI_OP NULL
224 #undef TARGET_RTX_COSTS
225 #define TARGET_RTX_COSTS spu_rtx_costs
227 #undef TARGET_ADDRESS_COST
228 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
230 #undef TARGET_SCHED_ISSUE_RATE
231 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
233 #undef TARGET_SCHED_INIT_GLOBAL
234 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
236 #undef TARGET_SCHED_INIT
237 #define TARGET_SCHED_INIT spu_sched_init
239 #undef TARGET_SCHED_VARIABLE_ISSUE
240 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
242 #undef TARGET_SCHED_REORDER
243 #define TARGET_SCHED_REORDER spu_sched_reorder
245 #undef TARGET_SCHED_REORDER2
246 #define TARGET_SCHED_REORDER2 spu_sched_reorder
248 #undef TARGET_SCHED_ADJUST_COST
249 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
251 const struct attribute_spec spu_attribute_table[];
252 #undef TARGET_ATTRIBUTE_TABLE
253 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
255 #undef TARGET_ASM_INTEGER
256 #define TARGET_ASM_INTEGER spu_assemble_integer
258 #undef TARGET_SCALAR_MODE_SUPPORTED_P
259 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
261 #undef TARGET_VECTOR_MODE_SUPPORTED_P
262 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
264 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
265 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
267 #undef TARGET_ASM_GLOBALIZE_LABEL
268 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
270 #undef TARGET_PASS_BY_REFERENCE
271 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
273 #undef TARGET_MUST_PASS_IN_STACK
274 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
276 #undef TARGET_BUILD_BUILTIN_VA_LIST
277 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
279 #undef TARGET_EXPAND_BUILTIN_VA_START
280 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
282 #undef TARGET_SETUP_INCOMING_VARARGS
283 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
285 #undef TARGET_MACHINE_DEPENDENT_REORG
286 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
288 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
289 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
291 #undef TARGET_DEFAULT_TARGET_FLAGS
292 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
294 #undef TARGET_INIT_LIBFUNCS
295 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
297 #undef TARGET_RETURN_IN_MEMORY
298 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
300 #undef TARGET_ENCODE_SECTION_INFO
301 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
303 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
306 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
309 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
312 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
315 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
318 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
321 #undef TARGET_LIBGCC_CMP_RETURN_MODE
322 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
324 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
327 #undef TARGET_SCHED_SMS_RES_MII
328 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
330 #undef TARGET_ASM_FILE_START
331 #define TARGET_ASM_FILE_START asm_file_start
333 #undef TARGET_SECTION_TYPE_FLAGS
334 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
336 struct gcc_target targetm = TARGET_INITIALIZER;
338 void
339 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
349 /* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
353 void
354 spu_override_options (void)
356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
362 flag_omit_frame_pointer = 1;
364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
365 if (align_functions < 8)
366 align_functions = 8;
368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
370 spu_hint_dist = 0;
372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
375 /* Determine processor architectural level. */
376 if (spu_arch_string)
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
382 else
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
386 /* Determine processor to tune for. */
387 if (spu_tune_string)
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
393 else
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
410 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
413 /* Table of machine attributes. */
414 const struct attribute_spec spu_attribute_table[] =
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
422 /* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
425 static bool
426 spu_scalar_mode_supported_p (enum machine_mode mode)
428 switch (mode)
430 case QImode:
431 case HImode:
432 case SImode:
433 case SFmode:
434 case DImode:
435 case TImode:
436 case DFmode:
437 return true;
439 default:
440 return false;
444 /* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
447 static bool
448 spu_vector_mode_supported_p (enum machine_mode mode)
450 switch (mode)
452 case V16QImode:
453 case V8HImode:
454 case V4SImode:
455 case V2DImode:
456 case V4SFmode:
457 case V2DFmode:
458 return true;
460 default:
461 return false;
465 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
469 valid_subreg (rtx op)
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
479 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
480 and adjust the start offset. */
481 static rtx
482 adjust_operand (rtx op, HOST_WIDE_INT * start)
484 enum machine_mode mode;
485 int op_size;
486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
491 if (start)
492 *start -=
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
499 if (op_size < 32)
501 if (start)
502 *start += 32 - op_size;
503 op_size = 32;
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
509 return op;
512 void
513 spu_expand_extv (rtx ops[], int unsignedp)
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
520 rtx s;
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
530 if (start > 0)
532 s = gen_reg_rtx (src_mode);
533 switch (src_mode)
535 case SImode:
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
537 break;
538 case DImode:
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
540 break;
541 case TImode:
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
543 break;
544 default:
545 abort ();
547 src = s;
550 if (width < src_size)
552 rtx pat;
553 int icode;
554 switch (src_mode)
556 case SImode:
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
558 break;
559 case DImode:
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
561 break;
562 case TImode:
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
564 break;
565 default:
566 abort ();
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
570 emit_insn (pat);
571 src = s;
574 convert_move (dst, src, unsignedp);
577 void
578 spu_expand_insv (rtx ops[])
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
586 rtx mask;
587 rtx shift_reg;
588 int shift;
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
593 else
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
598 if (CONSTANT_P (src))
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
616 if (shift > 0)
618 switch (dst_mode)
620 case SImode:
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
622 break;
623 case DImode:
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
625 break;
626 case TImode:
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
628 break;
629 default:
630 abort ();
633 else if (shift < 0)
634 abort ();
636 switch (dst_size)
638 case 32:
639 maskbits = (-1ll << (32 - width - start));
640 if (start)
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
643 break;
644 case 64:
645 maskbits = (-1ll << (64 - width - start));
646 if (start)
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
649 break;
650 case 128:
652 unsigned char arr[16];
653 int i = start / 8;
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
657 arr[i] = 0xff;
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
661 break;
662 default:
663 abort ();
665 if (GET_CODE (ops[0]) == MEM)
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
672 rtx mem;
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
690 rtx mem1;
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
700 else
701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
706 spu_expand_block_move (rtx ops[])
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
710 int i;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
714 return 0;
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
719 if (bytes <= 0)
720 return 1;
722 dst = ops[0];
723 src = ops[1];
725 if (align == 16)
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
733 if (offset < bytes)
735 rtx mask;
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
738 arr[i] = 0xff;
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
751 return 1;
753 return 0;
756 enum spu_comp_code
757 { SPU_EQ, SPU_GT, SPU_GTU };
759 int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
774 /* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
778 ourselves. */
779 void
780 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
782 int reverse_compare = 0;
783 int reverse_test = 0;
784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
789 enum spu_comp_code scode, eq_code;
790 enum insn_code ior_code;
791 int index;
792 int eq_test = 0;
794 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
795 and so on, to keep the constant in operand 1. */
796 if (GET_CODE (spu_compare_op1) == CONST_INT)
798 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
799 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
800 switch (code)
802 case GE:
803 spu_compare_op1 = GEN_INT (val);
804 code = GT;
805 break;
806 case LT:
807 spu_compare_op1 = GEN_INT (val);
808 code = LE;
809 break;
810 case GEU:
811 spu_compare_op1 = GEN_INT (val);
812 code = GTU;
813 break;
814 case LTU:
815 spu_compare_op1 = GEN_INT (val);
816 code = LEU;
817 break;
818 default:
819 break;
823 comp_mode = SImode;
824 op_mode = GET_MODE (spu_compare_op0);
826 switch (code)
828 case GE:
829 scode = SPU_GT;
830 if (HONOR_NANS (op_mode))
832 reverse_compare = 0;
833 reverse_test = 0;
834 eq_test = 1;
835 eq_code = SPU_EQ;
837 else
839 reverse_compare = 1;
840 reverse_test = 1;
842 break;
843 case LE:
844 scode = SPU_GT;
845 if (HONOR_NANS (op_mode))
847 reverse_compare = 1;
848 reverse_test = 0;
849 eq_test = 1;
850 eq_code = SPU_EQ;
852 else
854 reverse_compare = 0;
855 reverse_test = 1;
857 break;
858 case LT:
859 reverse_compare = 1;
860 reverse_test = 0;
861 scode = SPU_GT;
862 break;
863 case GEU:
864 reverse_compare = 1;
865 reverse_test = 1;
866 scode = SPU_GTU;
867 break;
868 case LEU:
869 reverse_compare = 0;
870 reverse_test = 1;
871 scode = SPU_GTU;
872 break;
873 case LTU:
874 reverse_compare = 1;
875 reverse_test = 0;
876 scode = SPU_GTU;
877 break;
878 case NE:
879 reverse_compare = 0;
880 reverse_test = 1;
881 scode = SPU_EQ;
882 break;
884 case EQ:
885 scode = SPU_EQ;
886 break;
887 case GT:
888 scode = SPU_GT;
889 break;
890 case GTU:
891 scode = SPU_GTU;
892 break;
893 default:
894 scode = SPU_EQ;
895 break;
898 switch (op_mode)
900 case QImode:
901 index = 0;
902 comp_mode = QImode;
903 break;
904 case HImode:
905 index = 1;
906 comp_mode = HImode;
907 break;
908 case SImode:
909 index = 2;
910 break;
911 case DImode:
912 index = 3;
913 break;
914 case TImode:
915 index = 4;
916 break;
917 case SFmode:
918 index = 5;
919 break;
920 case DFmode:
921 index = 6;
922 break;
923 case V16QImode:
924 index = 7;
925 comp_mode = op_mode;
926 break;
927 case V8HImode:
928 index = 8;
929 comp_mode = op_mode;
930 break;
931 case V4SImode:
932 index = 9;
933 comp_mode = op_mode;
934 break;
935 case V4SFmode:
936 index = 10;
937 comp_mode = V4SImode;
938 break;
939 case V2DFmode:
940 index = 11;
941 comp_mode = V2DImode;
942 break;
943 case V2DImode:
944 default:
945 abort ();
948 if (GET_MODE (spu_compare_op1) == DFmode
949 && (scode != SPU_GT && scode != SPU_EQ))
950 abort ();
952 if (is_set == 0 && spu_compare_op1 == const0_rtx
953 && (GET_MODE (spu_compare_op0) == SImode
954 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
956 /* Don't need to set a register with the result when we are
957 comparing against zero and branching. */
958 reverse_test = !reverse_test;
959 compare_result = spu_compare_op0;
961 else
963 compare_result = gen_reg_rtx (comp_mode);
965 if (reverse_compare)
967 rtx t = spu_compare_op1;
968 spu_compare_op1 = spu_compare_op0;
969 spu_compare_op0 = t;
972 if (spu_comp_icode[index][scode] == 0)
973 abort ();
975 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
976 (spu_compare_op0, op_mode))
977 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
978 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
979 (spu_compare_op1, op_mode))
980 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
981 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
982 spu_compare_op0,
983 spu_compare_op1);
984 if (comp_rtx == 0)
985 abort ();
986 emit_insn (comp_rtx);
988 if (eq_test)
990 eq_result = gen_reg_rtx (comp_mode);
991 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
992 spu_compare_op0,
993 spu_compare_op1);
994 if (eq_rtx == 0)
995 abort ();
996 emit_insn (eq_rtx);
997 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
998 gcc_assert (ior_code != CODE_FOR_nothing);
999 emit_insn (GEN_FCN (ior_code)
1000 (compare_result, compare_result, eq_result));
1004 if (is_set == 0)
1006 rtx bcomp;
1007 rtx loc_ref;
1009 /* We don't have branch on QI compare insns, so we convert the
1010 QI compare result to a HI result. */
1011 if (comp_mode == QImode)
1013 rtx old_res = compare_result;
1014 compare_result = gen_reg_rtx (HImode);
1015 comp_mode = HImode;
1016 emit_insn (gen_extendqihi2 (compare_result, old_res));
1019 if (reverse_test)
1020 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1021 else
1022 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1024 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1025 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1026 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1027 loc_ref, pc_rtx)));
1029 else if (is_set == 2)
1031 int compare_size = GET_MODE_BITSIZE (comp_mode);
1032 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1033 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1034 rtx select_mask;
1035 rtx op_t = operands[2];
1036 rtx op_f = operands[3];
1038 /* The result of the comparison can be SI, HI or QI mode. Create a
1039 mask based on that result. */
1040 if (target_size > compare_size)
1042 select_mask = gen_reg_rtx (mode);
1043 emit_insn (gen_extend_compare (select_mask, compare_result));
1045 else if (target_size < compare_size)
1046 select_mask =
1047 gen_rtx_SUBREG (mode, compare_result,
1048 (compare_size - target_size) / BITS_PER_UNIT);
1049 else if (comp_mode != mode)
1050 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1051 else
1052 select_mask = compare_result;
1054 if (GET_MODE (target) != GET_MODE (op_t)
1055 || GET_MODE (target) != GET_MODE (op_f))
1056 abort ();
1058 if (reverse_test)
1059 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1060 else
1061 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1063 else
1065 if (reverse_test)
1066 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1067 gen_rtx_NOT (comp_mode, compare_result)));
1068 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1069 emit_insn (gen_extendhisi2 (target, compare_result));
1070 else if (GET_MODE (target) == SImode
1071 && GET_MODE (compare_result) == QImode)
1072 emit_insn (gen_extend_compare (target, compare_result));
1073 else
1074 emit_move_insn (target, compare_result);
1078 HOST_WIDE_INT
1079 const_double_to_hwint (rtx x)
1081 HOST_WIDE_INT val;
1082 REAL_VALUE_TYPE rv;
1083 if (GET_MODE (x) == SFmode)
1085 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1086 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1088 else if (GET_MODE (x) == DFmode)
1090 long l[2];
1091 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1092 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1093 val = l[0];
1094 val = (val << 32) | (l[1] & 0xffffffff);
1096 else
1097 abort ();
1098 return val;
1102 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1104 long tv[2];
1105 REAL_VALUE_TYPE rv;
1106 gcc_assert (mode == SFmode || mode == DFmode);
1108 if (mode == SFmode)
1109 tv[0] = (v << 32) >> 32;
1110 else if (mode == DFmode)
1112 tv[1] = (v << 32) >> 32;
1113 tv[0] = v >> 32;
1115 real_from_target (&rv, tv, mode);
1116 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1119 void
1120 print_operand_address (FILE * file, register rtx addr)
1122 rtx reg;
1123 rtx offset;
1125 if (GET_CODE (addr) == AND
1126 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1127 && INTVAL (XEXP (addr, 1)) == -16)
1128 addr = XEXP (addr, 0);
1130 switch (GET_CODE (addr))
1132 case REG:
1133 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1134 break;
1136 case PLUS:
1137 reg = XEXP (addr, 0);
1138 offset = XEXP (addr, 1);
1139 if (GET_CODE (offset) == REG)
1141 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1142 reg_names[REGNO (offset)]);
1144 else if (GET_CODE (offset) == CONST_INT)
1146 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1147 INTVAL (offset), reg_names[REGNO (reg)]);
1149 else
1150 abort ();
1151 break;
1153 case CONST:
1154 case LABEL_REF:
1155 case SYMBOL_REF:
1156 case CONST_INT:
1157 output_addr_const (file, addr);
1158 break;
1160 default:
1161 debug_rtx (addr);
1162 abort ();
1166 void
1167 print_operand (FILE * file, rtx x, int code)
1169 enum machine_mode mode = GET_MODE (x);
1170 HOST_WIDE_INT val;
1171 unsigned char arr[16];
1172 int xcode = GET_CODE (x);
1173 int i, info;
1174 if (GET_MODE (x) == VOIDmode)
1175 switch (code)
1177 case 'L': /* 128 bits, signed */
1178 case 'm': /* 128 bits, signed */
1179 case 'T': /* 128 bits, signed */
1180 case 't': /* 128 bits, signed */
1181 mode = TImode;
1182 break;
1183 case 'K': /* 64 bits, signed */
1184 case 'k': /* 64 bits, signed */
1185 case 'D': /* 64 bits, signed */
1186 case 'd': /* 64 bits, signed */
1187 mode = DImode;
1188 break;
1189 case 'J': /* 32 bits, signed */
1190 case 'j': /* 32 bits, signed */
1191 case 's': /* 32 bits, signed */
1192 case 'S': /* 32 bits, signed */
1193 mode = SImode;
1194 break;
1196 switch (code)
1199 case 'j': /* 32 bits, signed */
1200 case 'k': /* 64 bits, signed */
1201 case 'm': /* 128 bits, signed */
1202 if (xcode == CONST_INT
1203 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1205 gcc_assert (logical_immediate_p (x, mode));
1206 constant_to_array (mode, x, arr);
1207 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1208 val = trunc_int_for_mode (val, SImode);
1209 switch (which_logical_immediate (val))
1211 case SPU_ORI:
1212 break;
1213 case SPU_ORHI:
1214 fprintf (file, "h");
1215 break;
1216 case SPU_ORBI:
1217 fprintf (file, "b");
1218 break;
1219 default:
1220 gcc_unreachable();
1223 else
1224 gcc_unreachable();
1225 return;
1227 case 'J': /* 32 bits, signed */
1228 case 'K': /* 64 bits, signed */
1229 case 'L': /* 128 bits, signed */
1230 if (xcode == CONST_INT
1231 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1233 gcc_assert (logical_immediate_p (x, mode)
1234 || iohl_immediate_p (x, mode));
1235 constant_to_array (mode, x, arr);
1236 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1237 val = trunc_int_for_mode (val, SImode);
1238 switch (which_logical_immediate (val))
1240 case SPU_ORI:
1241 case SPU_IOHL:
1242 break;
1243 case SPU_ORHI:
1244 val = trunc_int_for_mode (val, HImode);
1245 break;
1246 case SPU_ORBI:
1247 val = trunc_int_for_mode (val, QImode);
1248 break;
1249 default:
1250 gcc_unreachable();
1252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1254 else
1255 gcc_unreachable();
1256 return;
1258 case 't': /* 128 bits, signed */
1259 case 'd': /* 64 bits, signed */
1260 case 's': /* 32 bits, signed */
1261 if (CONSTANT_P (x))
1263 enum immediate_class c = classify_immediate (x, mode);
1264 switch (c)
1266 case IC_IL1:
1267 constant_to_array (mode, x, arr);
1268 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1269 val = trunc_int_for_mode (val, SImode);
1270 switch (which_immediate_load (val))
1272 case SPU_IL:
1273 break;
1274 case SPU_ILA:
1275 fprintf (file, "a");
1276 break;
1277 case SPU_ILH:
1278 fprintf (file, "h");
1279 break;
1280 case SPU_ILHU:
1281 fprintf (file, "hu");
1282 break;
1283 default:
1284 gcc_unreachable ();
1286 break;
1287 case IC_CPAT:
1288 constant_to_array (mode, x, arr);
1289 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1290 if (info == 1)
1291 fprintf (file, "b");
1292 else if (info == 2)
1293 fprintf (file, "h");
1294 else if (info == 4)
1295 fprintf (file, "w");
1296 else if (info == 8)
1297 fprintf (file, "d");
1298 break;
1299 case IC_IL1s:
1300 if (xcode == CONST_VECTOR)
1302 x = CONST_VECTOR_ELT (x, 0);
1303 xcode = GET_CODE (x);
1305 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1306 fprintf (file, "a");
1307 else if (xcode == HIGH)
1308 fprintf (file, "hu");
1309 break;
1310 case IC_FSMBI:
1311 case IC_FSMBI2:
1312 case IC_IL2:
1313 case IC_IL2s:
1314 case IC_POOL:
1315 abort ();
1318 else
1319 gcc_unreachable ();
1320 return;
1322 case 'T': /* 128 bits, signed */
1323 case 'D': /* 64 bits, signed */
1324 case 'S': /* 32 bits, signed */
1325 if (CONSTANT_P (x))
1327 enum immediate_class c = classify_immediate (x, mode);
1328 switch (c)
1330 case IC_IL1:
1331 constant_to_array (mode, x, arr);
1332 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1333 val = trunc_int_for_mode (val, SImode);
1334 switch (which_immediate_load (val))
1336 case SPU_IL:
1337 case SPU_ILA:
1338 break;
1339 case SPU_ILH:
1340 case SPU_ILHU:
1341 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1342 break;
1343 default:
1344 gcc_unreachable ();
1346 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1347 break;
1348 case IC_FSMBI:
1349 constant_to_array (mode, x, arr);
1350 val = 0;
1351 for (i = 0; i < 16; i++)
1353 val <<= 1;
1354 val |= arr[i] & 1;
1356 print_operand (file, GEN_INT (val), 0);
1357 break;
1358 case IC_CPAT:
1359 constant_to_array (mode, x, arr);
1360 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1362 break;
1363 case IC_IL1s:
1364 if (xcode == HIGH)
1365 x = XEXP (x, 0);
1366 if (GET_CODE (x) == CONST_VECTOR)
1367 x = CONST_VECTOR_ELT (x, 0);
1368 output_addr_const (file, x);
1369 if (xcode == HIGH)
1370 fprintf (file, "@h");
1371 break;
1372 case IC_IL2:
1373 case IC_IL2s:
1374 case IC_FSMBI2:
1375 case IC_POOL:
1376 abort ();
1379 else
1380 gcc_unreachable ();
1381 return;
1383 case 'C':
1384 if (xcode == CONST_INT)
1386 /* Only 4 least significant bits are relevant for generate
1387 control word instructions. */
1388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1389 return;
1391 break;
1393 case 'M': /* print code for c*d */
1394 if (GET_CODE (x) == CONST_INT)
1395 switch (INTVAL (x))
1397 case 1:
1398 fprintf (file, "b");
1399 break;
1400 case 2:
1401 fprintf (file, "h");
1402 break;
1403 case 4:
1404 fprintf (file, "w");
1405 break;
1406 case 8:
1407 fprintf (file, "d");
1408 break;
1409 default:
1410 gcc_unreachable();
1412 else
1413 gcc_unreachable();
1414 return;
1416 case 'N': /* Negate the operand */
1417 if (xcode == CONST_INT)
1418 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1419 else if (xcode == CONST_VECTOR)
1420 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1421 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1422 return;
1424 case 'I': /* enable/disable interrupts */
1425 if (xcode == CONST_INT)
1426 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1427 return;
1429 case 'b': /* branch modifiers */
1430 if (xcode == REG)
1431 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1432 else if (COMPARISON_P (x))
1433 fprintf (file, "%s", xcode == NE ? "n" : "");
1434 return;
1436 case 'i': /* indirect call */
1437 if (xcode == MEM)
1439 if (GET_CODE (XEXP (x, 0)) == REG)
1440 /* Used in indirect function calls. */
1441 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1442 else
1443 output_address (XEXP (x, 0));
1445 return;
1447 case 'p': /* load/store */
1448 if (xcode == MEM)
1450 x = XEXP (x, 0);
1451 xcode = GET_CODE (x);
1453 if (xcode == AND)
1455 x = XEXP (x, 0);
1456 xcode = GET_CODE (x);
1458 if (xcode == REG)
1459 fprintf (file, "d");
1460 else if (xcode == CONST_INT)
1461 fprintf (file, "a");
1462 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1463 fprintf (file, "r");
1464 else if (xcode == PLUS || xcode == LO_SUM)
1466 if (GET_CODE (XEXP (x, 1)) == REG)
1467 fprintf (file, "x");
1468 else
1469 fprintf (file, "d");
1471 return;
1473 case 'e':
1474 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1475 val &= 0x7;
1476 output_addr_const (file, GEN_INT (val));
1477 return;
1479 case 'f':
1480 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1481 val &= 0x1f;
1482 output_addr_const (file, GEN_INT (val));
1483 return;
1485 case 'g':
1486 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1487 val &= 0x3f;
1488 output_addr_const (file, GEN_INT (val));
1489 return;
1491 case 'h':
1492 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1493 val = (val >> 3) & 0x1f;
1494 output_addr_const (file, GEN_INT (val));
1495 return;
1497 case 'E':
1498 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1499 val = -val;
1500 val &= 0x7;
1501 output_addr_const (file, GEN_INT (val));
1502 return;
1504 case 'F':
1505 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1506 val = -val;
1507 val &= 0x1f;
1508 output_addr_const (file, GEN_INT (val));
1509 return;
1511 case 'G':
1512 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1513 val = -val;
1514 val &= 0x3f;
1515 output_addr_const (file, GEN_INT (val));
1516 return;
1518 case 'H':
1519 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1520 val = -(val & -8ll);
1521 val = (val >> 3) & 0x1f;
1522 output_addr_const (file, GEN_INT (val));
1523 return;
1525 case 0:
1526 if (xcode == REG)
1527 fprintf (file, "%s", reg_names[REGNO (x)]);
1528 else if (xcode == MEM)
1529 output_address (XEXP (x, 0));
1530 else if (xcode == CONST_VECTOR)
1531 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1532 else
1533 output_addr_const (file, x);
1534 return;
1536 /* unused letters
1537 o qr uvw yz
1538 AB OPQR UVWXYZ */
1539 default:
1540 output_operand_lossage ("invalid %%xn code");
1542 gcc_unreachable ();
1545 extern char call_used_regs[];
1547 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1548 caller saved register. For leaf functions it is more efficient to
1549 use a volatile register because we won't need to save and restore the
1550 pic register. This routine is only valid after register allocation
1551 is completed, so we can pick an unused register. */
1552 static rtx
1553 get_pic_reg (void)
1555 rtx pic_reg = pic_offset_table_rtx;
1556 if (!reload_completed && !reload_in_progress)
1557 abort ();
1558 return pic_reg;
1561 /* Split constant addresses to handle cases that are too large.
1562 Add in the pic register when in PIC mode.
1563 Split immediates that require more than 1 instruction. */
1565 spu_split_immediate (rtx * ops)
1567 enum machine_mode mode = GET_MODE (ops[0]);
1568 enum immediate_class c = classify_immediate (ops[1], mode);
1570 switch (c)
1572 case IC_IL2:
1574 unsigned char arrhi[16];
1575 unsigned char arrlo[16];
1576 rtx to, temp, hi, lo;
1577 int i;
1578 enum machine_mode imode = mode;
1579 /* We need to do reals as ints because the constant used in the
1580 IOR might not be a legitimate real constant. */
1581 imode = int_mode_for_mode (mode);
1582 constant_to_array (mode, ops[1], arrhi);
1583 if (imode != mode)
1584 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1585 else
1586 to = ops[0];
1587 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1588 for (i = 0; i < 16; i += 4)
1590 arrlo[i + 2] = arrhi[i + 2];
1591 arrlo[i + 3] = arrhi[i + 3];
1592 arrlo[i + 0] = arrlo[i + 1] = 0;
1593 arrhi[i + 2] = arrhi[i + 3] = 0;
1595 hi = array_to_constant (imode, arrhi);
1596 lo = array_to_constant (imode, arrlo);
1597 emit_move_insn (temp, hi);
1598 emit_insn (gen_rtx_SET
1599 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1600 return 1;
1602 case IC_FSMBI2:
1604 unsigned char arr_fsmbi[16];
1605 unsigned char arr_andbi[16];
1606 rtx to, reg_fsmbi, reg_and;
1607 int i;
1608 enum machine_mode imode = mode;
1609 /* We need to do reals as ints because the constant used in the
1610 * AND might not be a legitimate real constant. */
1611 imode = int_mode_for_mode (mode);
1612 constant_to_array (mode, ops[1], arr_fsmbi);
1613 if (imode != mode)
1614 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1615 else
1616 to = ops[0];
1617 for (i = 0; i < 16; i++)
1618 if (arr_fsmbi[i] != 0)
1620 arr_andbi[0] = arr_fsmbi[i];
1621 arr_fsmbi[i] = 0xff;
1623 for (i = 1; i < 16; i++)
1624 arr_andbi[i] = arr_andbi[0];
1625 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1626 reg_and = array_to_constant (imode, arr_andbi);
1627 emit_move_insn (to, reg_fsmbi);
1628 emit_insn (gen_rtx_SET
1629 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1630 return 1;
1632 case IC_POOL:
1633 if (reload_in_progress || reload_completed)
1635 rtx mem = force_const_mem (mode, ops[1]);
1636 if (TARGET_LARGE_MEM)
1638 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1639 emit_move_insn (addr, XEXP (mem, 0));
1640 mem = replace_equiv_address (mem, addr);
1642 emit_move_insn (ops[0], mem);
1643 return 1;
1645 break;
1646 case IC_IL1s:
1647 case IC_IL2s:
1648 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1650 if (c == IC_IL2s)
1652 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1653 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1655 else if (flag_pic)
1656 emit_insn (gen_pic (ops[0], ops[1]));
1657 if (flag_pic)
1659 rtx pic_reg = get_pic_reg ();
1660 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1661 crtl->uses_pic_offset_table = 1;
1663 return flag_pic || c == IC_IL2s;
1665 break;
1666 case IC_IL1:
1667 case IC_FSMBI:
1668 case IC_CPAT:
1669 break;
1671 return 0;
1674 /* SAVING is TRUE when we are generating the actual load and store
1675 instructions for REGNO. When determining the size of the stack
1676 needed for saving register we must allocate enough space for the
1677 worst case, because we don't always have the information early enough
1678 to not allocate it. But we can at least eliminate the actual loads
1679 and stores during the prologue/epilogue. */
1680 static int
1681 need_to_save_reg (int regno, int saving)
1683 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1684 return 1;
1685 if (flag_pic
1686 && regno == PIC_OFFSET_TABLE_REGNUM
1687 && (!saving || crtl->uses_pic_offset_table)
1688 && (!saving
1689 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1690 return 1;
1691 return 0;
1694 /* This function is only correct starting with local register
1695 allocation */
1697 spu_saved_regs_size (void)
1699 int reg_save_size = 0;
1700 int regno;
1702 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1703 if (need_to_save_reg (regno, 0))
1704 reg_save_size += 0x10;
1705 return reg_save_size;
1708 static rtx
1709 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1711 rtx reg = gen_rtx_REG (V4SImode, regno);
1712 rtx mem =
1713 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1714 return emit_insn (gen_movv4si (mem, reg));
1717 static rtx
1718 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1720 rtx reg = gen_rtx_REG (V4SImode, regno);
1721 rtx mem =
1722 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1723 return emit_insn (gen_movv4si (reg, mem));
1726 /* This happens after reload, so we need to expand it. */
1727 static rtx
1728 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1730 rtx insn;
1731 if (satisfies_constraint_K (GEN_INT (imm)))
1733 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1735 else
1737 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1738 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1739 if (REGNO (src) == REGNO (scratch))
1740 abort ();
1742 return insn;
1745 /* Return nonzero if this function is known to have a null epilogue. */
1748 direct_return (void)
1750 if (reload_completed)
1752 if (cfun->static_chain_decl == 0
1753 && (spu_saved_regs_size ()
1754 + get_frame_size ()
1755 + crtl->outgoing_args_size
1756 + crtl->args.pretend_args_size == 0)
1757 && current_function_is_leaf)
1758 return 1;
1760 return 0;
1764 The stack frame looks like this:
1765 +-------------+
1766 | incoming |
1767 | args |
1768 AP -> +-------------+
1769 | $lr save |
1770 +-------------+
1771 prev SP | back chain |
1772 +-------------+
1773 | var args |
1774 | reg save | crtl->args.pretend_args_size bytes
1775 +-------------+
1776 | ... |
1777 | saved regs | spu_saved_regs_size() bytes
1778 FP -> +-------------+
1779 | ... |
1780 | vars | get_frame_size() bytes
1781 HFP -> +-------------+
1782 | ... |
1783 | outgoing |
1784 | args | crtl->outgoing_args_size bytes
1785 +-------------+
1786 | $lr of next |
1787 | frame |
1788 +-------------+
1789 | back chain |
1790 SP -> +-------------+
1793 void
1794 spu_expand_prologue (void)
1796 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1797 HOST_WIDE_INT total_size;
1798 HOST_WIDE_INT saved_regs_size;
1799 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1800 rtx scratch_reg_0, scratch_reg_1;
1801 rtx insn, real;
1803 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1804 the "toplevel" insn chain. */
1805 emit_note (NOTE_INSN_DELETED);
1807 if (flag_pic && optimize == 0)
1808 crtl->uses_pic_offset_table = 1;
1810 if (spu_naked_function_p (current_function_decl))
1811 return;
1813 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1814 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1816 saved_regs_size = spu_saved_regs_size ();
1817 total_size = size + saved_regs_size
1818 + crtl->outgoing_args_size
1819 + crtl->args.pretend_args_size;
1821 if (!current_function_is_leaf
1822 || cfun->calls_alloca || total_size > 0)
1823 total_size += STACK_POINTER_OFFSET;
1825 /* Save this first because code after this might use the link
1826 register as a scratch register. */
1827 if (!current_function_is_leaf)
1829 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1830 RTX_FRAME_RELATED_P (insn) = 1;
1833 if (total_size > 0)
1835 offset = -crtl->args.pretend_args_size;
1836 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1837 if (need_to_save_reg (regno, 1))
1839 offset -= 16;
1840 insn = frame_emit_store (regno, sp_reg, offset);
1841 RTX_FRAME_RELATED_P (insn) = 1;
1845 if (flag_pic && crtl->uses_pic_offset_table)
1847 rtx pic_reg = get_pic_reg ();
1848 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1849 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1852 if (total_size > 0)
1854 if (flag_stack_check)
1856 /* We compare against total_size-1 because
1857 ($sp >= total_size) <=> ($sp > total_size-1) */
1858 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1859 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1860 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1861 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1863 emit_move_insn (scratch_v4si, size_v4si);
1864 size_v4si = scratch_v4si;
1866 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1867 emit_insn (gen_vec_extractv4si
1868 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1869 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1872 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1873 the value of the previous $sp because we save it as the back
1874 chain. */
1875 if (total_size <= 2000)
1877 /* In this case we save the back chain first. */
1878 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1879 insn =
1880 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1882 else
1884 insn = emit_move_insn (scratch_reg_0, sp_reg);
1885 insn =
1886 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1888 RTX_FRAME_RELATED_P (insn) = 1;
1889 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1890 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1892 if (total_size > 2000)
1894 /* Save the back chain ptr */
1895 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1898 if (frame_pointer_needed)
1900 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1901 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1902 + crtl->outgoing_args_size;
1903 /* Set the new frame_pointer */
1904 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1906 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1907 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1908 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1912 emit_note (NOTE_INSN_DELETED);
1915 void
1916 spu_expand_epilogue (bool sibcall_p)
1918 int size = get_frame_size (), offset, regno;
1919 HOST_WIDE_INT saved_regs_size, total_size;
1920 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1921 rtx jump, scratch_reg_0;
1923 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1924 the "toplevel" insn chain. */
1925 emit_note (NOTE_INSN_DELETED);
1927 if (spu_naked_function_p (current_function_decl))
1928 return;
1930 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1932 saved_regs_size = spu_saved_regs_size ();
1933 total_size = size + saved_regs_size
1934 + crtl->outgoing_args_size
1935 + crtl->args.pretend_args_size;
1937 if (!current_function_is_leaf
1938 || cfun->calls_alloca || total_size > 0)
1939 total_size += STACK_POINTER_OFFSET;
1941 if (total_size > 0)
1943 if (cfun->calls_alloca)
1944 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1945 else
1946 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1949 if (saved_regs_size > 0)
1951 offset = -crtl->args.pretend_args_size;
1952 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1953 if (need_to_save_reg (regno, 1))
1955 offset -= 0x10;
1956 frame_emit_load (regno, sp_reg, offset);
1961 if (!current_function_is_leaf)
1962 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1964 if (!sibcall_p)
1966 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1967 jump = emit_jump_insn (gen__return ());
1968 emit_barrier_after (jump);
1971 emit_note (NOTE_INSN_DELETED);
1975 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1977 if (count != 0)
1978 return 0;
1979 /* This is inefficient because it ends up copying to a save-register
1980 which then gets saved even though $lr has already been saved. But
1981 it does generate better code for leaf functions and we don't need
1982 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1983 used for __builtin_return_address anyway, so maybe we don't care if
1984 it's inefficient. */
1985 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1989 /* Given VAL, generate a constant appropriate for MODE.
1990 If MODE is a vector mode, every element will be VAL.
1991 For TImode, VAL will be zero extended to 128 bits. */
1993 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1995 rtx inner;
1996 rtvec v;
1997 int units, i;
1999 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2000 || GET_MODE_CLASS (mode) == MODE_FLOAT
2001 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2002 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2004 if (GET_MODE_CLASS (mode) == MODE_INT)
2005 return immed_double_const (val, 0, mode);
2007 /* val is the bit representation of the float */
2008 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2009 return hwint_to_const_double (mode, val);
2011 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2012 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2013 else
2014 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2016 units = GET_MODE_NUNITS (mode);
2018 v = rtvec_alloc (units);
2020 for (i = 0; i < units; ++i)
2021 RTVEC_ELT (v, i) = inner;
2023 return gen_rtx_CONST_VECTOR (mode, v);
2026 /* Create a MODE vector constant from 4 ints. */
2028 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2030 unsigned char arr[16];
2031 arr[0] = (a >> 24) & 0xff;
2032 arr[1] = (a >> 16) & 0xff;
2033 arr[2] = (a >> 8) & 0xff;
2034 arr[3] = (a >> 0) & 0xff;
2035 arr[4] = (b >> 24) & 0xff;
2036 arr[5] = (b >> 16) & 0xff;
2037 arr[6] = (b >> 8) & 0xff;
2038 arr[7] = (b >> 0) & 0xff;
2039 arr[8] = (c >> 24) & 0xff;
2040 arr[9] = (c >> 16) & 0xff;
2041 arr[10] = (c >> 8) & 0xff;
2042 arr[11] = (c >> 0) & 0xff;
2043 arr[12] = (d >> 24) & 0xff;
2044 arr[13] = (d >> 16) & 0xff;
2045 arr[14] = (d >> 8) & 0xff;
2046 arr[15] = (d >> 0) & 0xff;
2047 return array_to_constant(mode, arr);
2050 /* branch hint stuff */
2052 /* An array of these is used to propagate hints to predecessor blocks. */
2053 struct spu_bb_info
2055 rtx prop_jump; /* propagated from another block */
2056 int bb_index; /* the original block. */
2058 static struct spu_bb_info *spu_bb_info;
2060 #define STOP_HINT_P(INSN) \
2061 (GET_CODE(INSN) == CALL_INSN \
2062 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2063 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2065 /* 1 when RTX is a hinted branch or its target. We keep track of
2066 what has been hinted so the safe-hint code can test it easily. */
2067 #define HINTED_P(RTX) \
2068 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2070 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2071 #define SCHED_ON_EVEN_P(RTX) \
2072 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2074 /* Emit a nop for INSN such that the two will dual issue. This assumes
2075 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2076 We check for TImode to handle a MULTI1 insn which has dual issued its
2077 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2078 ADDR_VEC insns. */
2079 static void
2080 emit_nop_for_insn (rtx insn)
2082 int p;
2083 rtx new_insn;
2084 p = get_pipe (insn);
2085 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2086 new_insn = emit_insn_after (gen_lnop (), insn);
2087 else if (p == 1 && GET_MODE (insn) == TImode)
2089 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2090 PUT_MODE (new_insn, TImode);
2091 PUT_MODE (insn, VOIDmode);
2093 else
2094 new_insn = emit_insn_after (gen_lnop (), insn);
2095 recog_memoized (new_insn);
2098 /* Insert nops in basic blocks to meet dual issue alignment
2099 requirements. Also make sure hbrp and hint instructions are at least
2100 one cycle apart, possibly inserting a nop. */
2101 static void
2102 pad_bb(void)
2104 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2105 int length;
2106 int addr;
2108 /* This sets up INSN_ADDRESSES. */
2109 shorten_branches (get_insns ());
2111 /* Keep track of length added by nops. */
2112 length = 0;
2114 prev_insn = 0;
2115 insn = get_insns ();
2116 if (!active_insn_p (insn))
2117 insn = next_active_insn (insn);
2118 for (; insn; insn = next_insn)
2120 next_insn = next_active_insn (insn);
2121 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2122 || INSN_CODE (insn) == CODE_FOR_hbr)
2124 if (hbr_insn)
2126 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2127 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2128 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2129 || (a1 - a0 == 4))
2131 prev_insn = emit_insn_before (gen_lnop (), insn);
2132 PUT_MODE (prev_insn, GET_MODE (insn));
2133 PUT_MODE (insn, TImode);
2134 length += 4;
2137 hbr_insn = insn;
2139 if (INSN_CODE (insn) == CODE_FOR_blockage)
2141 if (GET_MODE (insn) == TImode)
2142 PUT_MODE (next_insn, TImode);
2143 insn = next_insn;
2144 next_insn = next_active_insn (insn);
2146 addr = INSN_ADDRESSES (INSN_UID (insn));
2147 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2149 if (((addr + length) & 7) != 0)
2151 emit_nop_for_insn (prev_insn);
2152 length += 4;
2155 else if (GET_MODE (insn) == TImode
2156 && ((next_insn && GET_MODE (next_insn) != TImode)
2157 || get_attr_type (insn) == TYPE_MULTI0)
2158 && ((addr + length) & 7) != 0)
2160 /* prev_insn will always be set because the first insn is
2161 always 8-byte aligned. */
2162 emit_nop_for_insn (prev_insn);
2163 length += 4;
2165 prev_insn = insn;
2170 /* Routines for branch hints. */
2172 static void
2173 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2174 int distance, sbitmap blocks)
2176 rtx branch_label = 0;
2177 rtx hint;
2178 rtx insn;
2179 rtx table;
2181 if (before == 0 || branch == 0 || target == 0)
2182 return;
2184 /* While scheduling we require hints to be no further than 600, so
2185 we need to enforce that here too */
2186 if (distance > 600)
2187 return;
2189 /* If we have a Basic block note, emit it after the basic block note. */
2190 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2191 before = NEXT_INSN (before);
2193 branch_label = gen_label_rtx ();
2194 LABEL_NUSES (branch_label)++;
2195 LABEL_PRESERVE_P (branch_label) = 1;
2196 insn = emit_label_before (branch_label, branch);
2197 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2198 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2200 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2201 recog_memoized (hint);
2202 HINTED_P (branch) = 1;
2204 if (GET_CODE (target) == LABEL_REF)
2205 HINTED_P (XEXP (target, 0)) = 1;
2206 else if (tablejump_p (branch, 0, &table))
2208 rtvec vec;
2209 int j;
2210 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2211 vec = XVEC (PATTERN (table), 0);
2212 else
2213 vec = XVEC (PATTERN (table), 1);
2214 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2215 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2218 if (distance >= 588)
2220 /* Make sure the hint isn't scheduled any earlier than this point,
2221 which could make it too far for the branch offest to fit */
2222 recog_memoized (emit_insn_before (gen_blockage (), hint));
2224 else if (distance <= 8 * 4)
2226 /* To guarantee at least 8 insns between the hint and branch we
2227 insert nops. */
2228 int d;
2229 for (d = distance; d < 8 * 4; d += 4)
2231 insn =
2232 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2233 recog_memoized (insn);
2236 /* Make sure any nops inserted aren't scheduled before the hint. */
2237 recog_memoized (emit_insn_after (gen_blockage (), hint));
2239 /* Make sure any nops inserted aren't scheduled after the call. */
2240 if (CALL_P (branch) && distance < 8 * 4)
2241 recog_memoized (emit_insn_before (gen_blockage (), branch));
2245 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2246 the rtx for the branch target. */
2247 static rtx
2248 get_branch_target (rtx branch)
2250 if (GET_CODE (branch) == JUMP_INSN)
2252 rtx set, src;
2254 /* Return statements */
2255 if (GET_CODE (PATTERN (branch)) == RETURN)
2256 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2258 /* jump table */
2259 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2260 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2261 return 0;
2263 set = single_set (branch);
2264 src = SET_SRC (set);
2265 if (GET_CODE (SET_DEST (set)) != PC)
2266 abort ();
2268 if (GET_CODE (src) == IF_THEN_ELSE)
2270 rtx lab = 0;
2271 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2272 if (note)
2274 /* If the more probable case is not a fall through, then
2275 try a branch hint. */
2276 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2277 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2278 && GET_CODE (XEXP (src, 1)) != PC)
2279 lab = XEXP (src, 1);
2280 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2281 && GET_CODE (XEXP (src, 2)) != PC)
2282 lab = XEXP (src, 2);
2284 if (lab)
2286 if (GET_CODE (lab) == RETURN)
2287 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2288 return lab;
2290 return 0;
2293 return src;
2295 else if (GET_CODE (branch) == CALL_INSN)
2297 rtx call;
2298 /* All of our call patterns are in a PARALLEL and the CALL is
2299 the first pattern in the PARALLEL. */
2300 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2301 abort ();
2302 call = XVECEXP (PATTERN (branch), 0, 0);
2303 if (GET_CODE (call) == SET)
2304 call = SET_SRC (call);
2305 if (GET_CODE (call) != CALL)
2306 abort ();
2307 return XEXP (XEXP (call, 0), 0);
2309 return 0;
2312 /* The special $hbr register is used to prevent the insn scheduler from
2313 moving hbr insns across instructions which invalidate them. It
2314 should only be used in a clobber, and this function searches for
2315 insns which clobber it. */
2316 static bool
2317 insn_clobbers_hbr (rtx insn)
2319 if (INSN_P (insn)
2320 && GET_CODE (PATTERN (insn)) == PARALLEL)
2322 rtx parallel = PATTERN (insn);
2323 rtx clobber;
2324 int j;
2325 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2327 clobber = XVECEXP (parallel, 0, j);
2328 if (GET_CODE (clobber) == CLOBBER
2329 && GET_CODE (XEXP (clobber, 0)) == REG
2330 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2331 return 1;
2334 return 0;
2337 /* Search up to 32 insns starting at FIRST:
2338 - at any kind of hinted branch, just return
2339 - at any unconditional branch in the first 15 insns, just return
2340 - at a call or indirect branch, after the first 15 insns, force it to
2341 an even address and return
2342 - at any unconditional branch, after the first 15 insns, force it to
2343 an even address.
2344 At then end of the search, insert an hbrp within 4 insns of FIRST,
2345 and an hbrp within 16 instructions of FIRST.
2347 static void
2348 insert_hbrp_for_ilb_runout (rtx first)
2350 rtx insn, before_4 = 0, before_16 = 0;
2351 int addr = 0, length, first_addr = -1;
2352 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2353 int insert_lnop_after = 0;
2354 for (insn = first; insn; insn = NEXT_INSN (insn))
2355 if (INSN_P (insn))
2357 if (first_addr == -1)
2358 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2359 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2360 length = get_attr_length (insn);
2362 if (before_4 == 0 && addr + length >= 4 * 4)
2363 before_4 = insn;
2364 /* We test for 14 instructions because the first hbrp will add
2365 up to 2 instructions. */
2366 if (before_16 == 0 && addr + length >= 14 * 4)
2367 before_16 = insn;
2369 if (INSN_CODE (insn) == CODE_FOR_hbr)
2371 /* Make sure an hbrp is at least 2 cycles away from a hint.
2372 Insert an lnop after the hbrp when necessary. */
2373 if (before_4 == 0 && addr > 0)
2375 before_4 = insn;
2376 insert_lnop_after |= 1;
2378 else if (before_4 && addr <= 4 * 4)
2379 insert_lnop_after |= 1;
2380 if (before_16 == 0 && addr > 10 * 4)
2382 before_16 = insn;
2383 insert_lnop_after |= 2;
2385 else if (before_16 && addr <= 14 * 4)
2386 insert_lnop_after |= 2;
2389 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2391 if (addr < hbrp_addr0)
2392 hbrp_addr0 = addr;
2393 else if (addr < hbrp_addr1)
2394 hbrp_addr1 = addr;
2397 if (CALL_P (insn) || JUMP_P (insn))
2399 if (HINTED_P (insn))
2400 return;
2402 /* Any branch after the first 15 insns should be on an even
2403 address to avoid a special case branch. There might be
2404 some nops and/or hbrps inserted, so we test after 10
2405 insns. */
2406 if (addr > 10 * 4)
2407 SCHED_ON_EVEN_P (insn) = 1;
2410 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2411 return;
2414 if (addr + length >= 32 * 4)
2416 gcc_assert (before_4 && before_16);
2417 if (hbrp_addr0 > 4 * 4)
2419 insn =
2420 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2421 recog_memoized (insn);
2422 INSN_ADDRESSES_NEW (insn,
2423 INSN_ADDRESSES (INSN_UID (before_4)));
2424 PUT_MODE (insn, GET_MODE (before_4));
2425 PUT_MODE (before_4, TImode);
2426 if (insert_lnop_after & 1)
2428 insn = emit_insn_before (gen_lnop (), before_4);
2429 recog_memoized (insn);
2430 INSN_ADDRESSES_NEW (insn,
2431 INSN_ADDRESSES (INSN_UID (before_4)));
2432 PUT_MODE (insn, TImode);
2435 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2436 && hbrp_addr1 > 16 * 4)
2438 insn =
2439 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2440 recog_memoized (insn);
2441 INSN_ADDRESSES_NEW (insn,
2442 INSN_ADDRESSES (INSN_UID (before_16)));
2443 PUT_MODE (insn, GET_MODE (before_16));
2444 PUT_MODE (before_16, TImode);
2445 if (insert_lnop_after & 2)
2447 insn = emit_insn_before (gen_lnop (), before_16);
2448 recog_memoized (insn);
2449 INSN_ADDRESSES_NEW (insn,
2450 INSN_ADDRESSES (INSN_UID
2451 (before_16)));
2452 PUT_MODE (insn, TImode);
2455 return;
2458 else if (BARRIER_P (insn))
2459 return;
2463 /* The SPU might hang when it executes 48 inline instructions after a
2464 hinted branch jumps to its hinted target. The beginning of a
2465 function and the return from a call might have been hinted, and must
2466 be handled as well. To prevent a hang we insert 2 hbrps. The first
2467 should be within 6 insns of the branch target. The second should be
2468 within 22 insns of the branch target. When determining if hbrps are
2469 necessary, we look for only 32 inline instructions, because up to to
2470 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2471 new hbrps, we insert them within 4 and 16 insns of the target. */
2472 static void
2473 insert_hbrp (void)
2475 rtx insn;
2476 if (TARGET_SAFE_HINTS)
2478 shorten_branches (get_insns ());
2479 /* Insert hbrp at beginning of function */
2480 insn = next_active_insn (get_insns ());
2481 if (insn)
2482 insert_hbrp_for_ilb_runout (insn);
2483 /* Insert hbrp after hinted targets. */
2484 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2485 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2486 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2490 static int in_spu_reorg;
2492 /* Insert branch hints. There are no branch optimizations after this
2493 pass, so it's safe to set our branch hints now. */
2494 static void
2495 spu_machine_dependent_reorg (void)
2497 sbitmap blocks;
2498 basic_block bb;
2499 rtx branch, insn;
2500 rtx branch_target = 0;
2501 int branch_addr = 0, insn_addr, required_dist = 0;
2502 int i;
2503 unsigned int j;
2505 if (!TARGET_BRANCH_HINTS || optimize == 0)
2507 /* We still do it for unoptimized code because an external
2508 function might have hinted a call or return. */
2509 insert_hbrp ();
2510 pad_bb ();
2511 return;
2514 blocks = sbitmap_alloc (last_basic_block);
2515 sbitmap_zero (blocks);
2517 in_spu_reorg = 1;
2518 compute_bb_for_insn ();
2520 compact_blocks ();
2522 spu_bb_info =
2523 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2524 sizeof (struct spu_bb_info));
2526 /* We need exact insn addresses and lengths. */
2527 shorten_branches (get_insns ());
2529 for (i = n_basic_blocks - 1; i >= 0; i--)
2531 bb = BASIC_BLOCK (i);
2532 branch = 0;
2533 if (spu_bb_info[i].prop_jump)
2535 branch = spu_bb_info[i].prop_jump;
2536 branch_target = get_branch_target (branch);
2537 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2538 required_dist = spu_hint_dist;
2540 /* Search from end of a block to beginning. In this loop, find
2541 jumps which need a branch and emit them only when:
2542 - it's an indirect branch and we're at the insn which sets
2543 the register
2544 - we're at an insn that will invalidate the hint. e.g., a
2545 call, another hint insn, inline asm that clobbers $hbr, and
2546 some inlined operations (divmodsi4). Don't consider jumps
2547 because they are only at the end of a block and are
2548 considered when we are deciding whether to propagate
2549 - we're getting too far away from the branch. The hbr insns
2550 only have a signed 10 bit offset
2551 We go back as far as possible so the branch will be considered
2552 for propagation when we get to the beginning of the block. */
2553 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2555 if (INSN_P (insn))
2557 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2558 if (branch
2559 && ((GET_CODE (branch_target) == REG
2560 && set_of (branch_target, insn) != NULL_RTX)
2561 || insn_clobbers_hbr (insn)
2562 || branch_addr - insn_addr > 600))
2564 rtx next = NEXT_INSN (insn);
2565 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2566 if (insn != BB_END (bb)
2567 && branch_addr - next_addr >= required_dist)
2569 if (dump_file)
2570 fprintf (dump_file,
2571 "hint for %i in block %i before %i\n",
2572 INSN_UID (branch), bb->index,
2573 INSN_UID (next));
2574 spu_emit_branch_hint (next, branch, branch_target,
2575 branch_addr - next_addr, blocks);
2577 branch = 0;
2580 /* JUMP_P will only be true at the end of a block. When
2581 branch is already set it means we've previously decided
2582 to propagate a hint for that branch into this block. */
2583 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2585 branch = 0;
2586 if ((branch_target = get_branch_target (insn)))
2588 branch = insn;
2589 branch_addr = insn_addr;
2590 required_dist = spu_hint_dist;
2594 if (insn == BB_HEAD (bb))
2595 break;
2598 if (branch)
2600 /* If we haven't emitted a hint for this branch yet, it might
2601 be profitable to emit it in one of the predecessor blocks,
2602 especially for loops. */
2603 rtx bbend;
2604 basic_block prev = 0, prop = 0, prev2 = 0;
2605 int loop_exit = 0, simple_loop = 0;
2606 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2608 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2609 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2610 prev = EDGE_PRED (bb, j)->src;
2611 else
2612 prev2 = EDGE_PRED (bb, j)->src;
2614 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2615 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2616 loop_exit = 1;
2617 else if (EDGE_SUCC (bb, j)->dest == bb)
2618 simple_loop = 1;
2620 /* If this branch is a loop exit then propagate to previous
2621 fallthru block. This catches the cases when it is a simple
2622 loop or when there is an initial branch into the loop. */
2623 if (prev && (loop_exit || simple_loop)
2624 && prev->loop_depth <= bb->loop_depth)
2625 prop = prev;
2627 /* If there is only one adjacent predecessor. Don't propagate
2628 outside this loop. This loop_depth test isn't perfect, but
2629 I'm not sure the loop_father member is valid at this point. */
2630 else if (prev && single_pred_p (bb)
2631 && prev->loop_depth == bb->loop_depth)
2632 prop = prev;
2634 /* If this is the JOIN block of a simple IF-THEN then
2635 propogate the hint to the HEADER block. */
2636 else if (prev && prev2
2637 && EDGE_COUNT (bb->preds) == 2
2638 && EDGE_COUNT (prev->preds) == 1
2639 && EDGE_PRED (prev, 0)->src == prev2
2640 && prev2->loop_depth == bb->loop_depth
2641 && GET_CODE (branch_target) != REG)
2642 prop = prev;
2644 /* Don't propagate when:
2645 - this is a simple loop and the hint would be too far
2646 - this is not a simple loop and there are 16 insns in
2647 this block already
2648 - the predecessor block ends in a branch that will be
2649 hinted
2650 - the predecessor block ends in an insn that invalidates
2651 the hint */
2652 if (prop
2653 && prop->index >= 0
2654 && (bbend = BB_END (prop))
2655 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2656 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2657 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2659 if (dump_file)
2660 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2661 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2662 bb->index, prop->index, bb->loop_depth,
2663 INSN_UID (branch), loop_exit, simple_loop,
2664 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2666 spu_bb_info[prop->index].prop_jump = branch;
2667 spu_bb_info[prop->index].bb_index = i;
2669 else if (branch_addr - next_addr >= required_dist)
2671 if (dump_file)
2672 fprintf (dump_file, "hint for %i in block %i before %i\n",
2673 INSN_UID (branch), bb->index,
2674 INSN_UID (NEXT_INSN (insn)));
2675 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2676 branch_addr - next_addr, blocks);
2678 branch = 0;
2681 free (spu_bb_info);
2683 if (!sbitmap_empty_p (blocks))
2684 find_many_sub_basic_blocks (blocks);
2686 /* We have to schedule to make sure alignment is ok. */
2687 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2689 /* The hints need to be scheduled, so call it again. */
2690 schedule_insns ();
2692 insert_hbrp ();
2694 pad_bb ();
2697 if (spu_flag_var_tracking)
2699 df_analyze ();
2700 timevar_push (TV_VAR_TRACKING);
2701 variable_tracking_main ();
2702 timevar_pop (TV_VAR_TRACKING);
2703 df_finish_pass (false);
2706 free_bb_for_insn ();
2708 in_spu_reorg = 0;
2712 /* Insn scheduling routines, primarily for dual issue. */
2713 static int
2714 spu_sched_issue_rate (void)
2716 return 2;
2719 static int
2720 uses_ls_unit(rtx insn)
2722 rtx set = single_set (insn);
2723 if (set != 0
2724 && (GET_CODE (SET_DEST (set)) == MEM
2725 || GET_CODE (SET_SRC (set)) == MEM))
2726 return 1;
2727 return 0;
2730 static int
2731 get_pipe (rtx insn)
2733 enum attr_type t;
2734 /* Handle inline asm */
2735 if (INSN_CODE (insn) == -1)
2736 return -1;
2737 t = get_attr_type (insn);
2738 switch (t)
2740 case TYPE_CONVERT:
2741 return -2;
2742 case TYPE_MULTI0:
2743 return -1;
2745 case TYPE_FX2:
2746 case TYPE_FX3:
2747 case TYPE_SPR:
2748 case TYPE_NOP:
2749 case TYPE_FXB:
2750 case TYPE_FPD:
2751 case TYPE_FP6:
2752 case TYPE_FP7:
2753 return 0;
2755 case TYPE_LNOP:
2756 case TYPE_SHUF:
2757 case TYPE_LOAD:
2758 case TYPE_STORE:
2759 case TYPE_BR:
2760 case TYPE_MULTI1:
2761 case TYPE_HBR:
2762 case TYPE_IPREFETCH:
2763 return 1;
2764 default:
2765 abort ();
2770 /* haifa-sched.c has a static variable that keeps track of the current
2771 cycle. It is passed to spu_sched_reorder, and we record it here for
2772 use by spu_sched_variable_issue. It won't be accurate if the
2773 scheduler updates it's clock_var between the two calls. */
2774 static int clock_var;
2776 /* This is used to keep track of insn alignment. Set to 0 at the
2777 beginning of each block and increased by the "length" attr of each
2778 insn scheduled. */
2779 static int spu_sched_length;
2781 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2782 ready list appropriately in spu_sched_reorder(). */
2783 static int pipe0_clock;
2784 static int pipe1_clock;
2786 static int prev_clock_var;
2788 static int prev_priority;
2790 /* The SPU needs to load the next ilb sometime during the execution of
2791 the previous ilb. There is a potential conflict if every cycle has a
2792 load or store. To avoid the conflict we make sure the load/store
2793 unit is free for at least one cycle during the execution of insns in
2794 the previous ilb. */
2795 static int spu_ls_first;
2796 static int prev_ls_clock;
2798 static void
2799 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2800 int max_ready ATTRIBUTE_UNUSED)
2802 spu_sched_length = 0;
2805 static void
2806 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2807 int max_ready ATTRIBUTE_UNUSED)
2809 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2811 /* When any block might be at least 8-byte aligned, assume they
2812 will all be at least 8-byte aligned to make sure dual issue
2813 works out correctly. */
2814 spu_sched_length = 0;
2816 spu_ls_first = INT_MAX;
2817 clock_var = -1;
2818 prev_ls_clock = -1;
2819 pipe0_clock = -1;
2820 pipe1_clock = -1;
2821 prev_clock_var = -1;
2822 prev_priority = -1;
2825 static int
2826 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2827 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2829 int len;
2830 int p;
2831 if (GET_CODE (PATTERN (insn)) == USE
2832 || GET_CODE (PATTERN (insn)) == CLOBBER
2833 || (len = get_attr_length (insn)) == 0)
2834 return more;
2836 spu_sched_length += len;
2838 /* Reset on inline asm */
2839 if (INSN_CODE (insn) == -1)
2841 spu_ls_first = INT_MAX;
2842 pipe0_clock = -1;
2843 pipe1_clock = -1;
2844 return 0;
2846 p = get_pipe (insn);
2847 if (p == 0)
2848 pipe0_clock = clock_var;
2849 else
2850 pipe1_clock = clock_var;
2852 if (in_spu_reorg)
2854 if (clock_var - prev_ls_clock > 1
2855 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2856 spu_ls_first = INT_MAX;
2857 if (uses_ls_unit (insn))
2859 if (spu_ls_first == INT_MAX)
2860 spu_ls_first = spu_sched_length;
2861 prev_ls_clock = clock_var;
2864 /* The scheduler hasn't inserted the nop, but we will later on.
2865 Include those nops in spu_sched_length. */
2866 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2867 spu_sched_length += 4;
2868 prev_clock_var = clock_var;
2870 /* more is -1 when called from spu_sched_reorder for new insns
2871 that don't have INSN_PRIORITY */
2872 if (more >= 0)
2873 prev_priority = INSN_PRIORITY (insn);
2876 /* Always try issueing more insns. spu_sched_reorder will decide
2877 when the cycle should be advanced. */
2878 return 1;
2881 /* This function is called for both TARGET_SCHED_REORDER and
2882 TARGET_SCHED_REORDER2. */
2883 static int
2884 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2885 rtx *ready, int *nreadyp, int clock)
2887 int i, nready = *nreadyp;
2888 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2889 rtx insn;
2891 clock_var = clock;
2893 if (nready <= 0 || pipe1_clock >= clock)
2894 return 0;
2896 /* Find any rtl insns that don't generate assembly insns and schedule
2897 them first. */
2898 for (i = nready - 1; i >= 0; i--)
2900 insn = ready[i];
2901 if (INSN_CODE (insn) == -1
2902 || INSN_CODE (insn) == CODE_FOR_blockage
2903 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2905 ready[i] = ready[nready - 1];
2906 ready[nready - 1] = insn;
2907 return 1;
2911 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2912 for (i = 0; i < nready; i++)
2913 if (INSN_CODE (ready[i]) != -1)
2915 insn = ready[i];
2916 switch (get_attr_type (insn))
2918 default:
2919 case TYPE_MULTI0:
2920 case TYPE_CONVERT:
2921 case TYPE_FX2:
2922 case TYPE_FX3:
2923 case TYPE_SPR:
2924 case TYPE_NOP:
2925 case TYPE_FXB:
2926 case TYPE_FPD:
2927 case TYPE_FP6:
2928 case TYPE_FP7:
2929 pipe_0 = i;
2930 break;
2931 case TYPE_LOAD:
2932 case TYPE_STORE:
2933 pipe_ls = i;
2934 case TYPE_LNOP:
2935 case TYPE_SHUF:
2936 case TYPE_BR:
2937 case TYPE_MULTI1:
2938 case TYPE_HBR:
2939 pipe_1 = i;
2940 break;
2941 case TYPE_IPREFETCH:
2942 pipe_hbrp = i;
2943 break;
2947 /* In the first scheduling phase, schedule loads and stores together
2948 to increase the chance they will get merged during postreload CSE. */
2949 if (!reload_completed && pipe_ls >= 0)
2951 insn = ready[pipe_ls];
2952 ready[pipe_ls] = ready[nready - 1];
2953 ready[nready - 1] = insn;
2954 return 1;
2957 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2958 if (pipe_hbrp >= 0)
2959 pipe_1 = pipe_hbrp;
2961 /* When we have loads/stores in every cycle of the last 15 insns and
2962 we are about to schedule another load/store, emit an hbrp insn
2963 instead. */
2964 if (in_spu_reorg
2965 && spu_sched_length - spu_ls_first >= 4 * 15
2966 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2968 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2969 recog_memoized (insn);
2970 if (pipe0_clock < clock)
2971 PUT_MODE (insn, TImode);
2972 spu_sched_variable_issue (file, verbose, insn, -1);
2973 return 0;
2976 /* In general, we want to emit nops to increase dual issue, but dual
2977 issue isn't faster when one of the insns could be scheduled later
2978 without effecting the critical path. We look at INSN_PRIORITY to
2979 make a good guess, but it isn't perfect so -mdual-nops=n can be
2980 used to effect it. */
2981 if (in_spu_reorg && spu_dual_nops < 10)
2983 /* When we are at an even address and we are not issueing nops to
2984 improve scheduling then we need to advance the cycle. */
2985 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2986 && (spu_dual_nops == 0
2987 || (pipe_1 != -1
2988 && prev_priority >
2989 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2990 return 0;
2992 /* When at an odd address, schedule the highest priority insn
2993 without considering pipeline. */
2994 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2995 && (spu_dual_nops == 0
2996 || (prev_priority >
2997 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2998 return 1;
3002 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3003 pipe0 insn in the ready list, schedule it. */
3004 if (pipe0_clock < clock && pipe_0 >= 0)
3005 schedule_i = pipe_0;
3007 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3008 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3009 else
3010 schedule_i = pipe_1;
3012 if (schedule_i > -1)
3014 insn = ready[schedule_i];
3015 ready[schedule_i] = ready[nready - 1];
3016 ready[nready - 1] = insn;
3017 return 1;
3019 return 0;
3022 /* INSN is dependent on DEP_INSN. */
3023 static int
3024 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3026 rtx set;
3028 /* The blockage pattern is used to prevent instructions from being
3029 moved across it and has no cost. */
3030 if (INSN_CODE (insn) == CODE_FOR_blockage
3031 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3032 return 0;
3034 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3035 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3036 return 0;
3038 /* Make sure hbrps are spread out. */
3039 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3040 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3041 return 8;
3043 /* Make sure hints and hbrps are 2 cycles apart. */
3044 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3045 || INSN_CODE (insn) == CODE_FOR_hbr)
3046 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3047 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3048 return 2;
3050 /* An hbrp has no real dependency on other insns. */
3051 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3052 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3053 return 0;
3055 /* Assuming that it is unlikely an argument register will be used in
3056 the first cycle of the called function, we reduce the cost for
3057 slightly better scheduling of dep_insn. When not hinted, the
3058 mispredicted branch would hide the cost as well. */
3059 if (CALL_P (insn))
3061 rtx target = get_branch_target (insn);
3062 if (GET_CODE (target) != REG || !set_of (target, insn))
3063 return cost - 2;
3064 return cost;
3067 /* And when returning from a function, let's assume the return values
3068 are completed sooner too. */
3069 if (CALL_P (dep_insn))
3070 return cost - 2;
3072 /* Make sure an instruction that loads from the back chain is schedule
3073 away from the return instruction so a hint is more likely to get
3074 issued. */
3075 if (INSN_CODE (insn) == CODE_FOR__return
3076 && (set = single_set (dep_insn))
3077 && GET_CODE (SET_DEST (set)) == REG
3078 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3079 return 20;
3081 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3082 scheduler makes every insn in a block anti-dependent on the final
3083 jump_insn. We adjust here so higher cost insns will get scheduled
3084 earlier. */
3085 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3086 return insn_cost (dep_insn) - 3;
3088 return cost;
3091 /* Create a CONST_DOUBLE from a string. */
3092 struct rtx_def *
3093 spu_float_const (const char *string, enum machine_mode mode)
3095 REAL_VALUE_TYPE value;
3096 value = REAL_VALUE_ATOF (string, mode);
3097 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3101 spu_constant_address_p (rtx x)
3103 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3104 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3105 || GET_CODE (x) == HIGH);
3108 static enum spu_immediate
3109 which_immediate_load (HOST_WIDE_INT val)
3111 gcc_assert (val == trunc_int_for_mode (val, SImode));
3113 if (val >= -0x8000 && val <= 0x7fff)
3114 return SPU_IL;
3115 if (val >= 0 && val <= 0x3ffff)
3116 return SPU_ILA;
3117 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3118 return SPU_ILH;
3119 if ((val & 0xffff) == 0)
3120 return SPU_ILHU;
3122 return SPU_NONE;
3125 /* Return true when OP can be loaded by one of the il instructions, or
3126 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3128 immediate_load_p (rtx op, enum machine_mode mode)
3130 if (CONSTANT_P (op))
3132 enum immediate_class c = classify_immediate (op, mode);
3133 return c == IC_IL1 || c == IC_IL1s
3134 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3136 return 0;
3139 /* Return true if the first SIZE bytes of arr is a constant that can be
3140 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3141 represent the size and offset of the instruction to use. */
3142 static int
3143 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3145 int cpat, run, i, start;
3146 cpat = 1;
3147 run = 0;
3148 start = -1;
3149 for (i = 0; i < size && cpat; i++)
3150 if (arr[i] != i+16)
3152 if (!run)
3154 start = i;
3155 if (arr[i] == 3)
3156 run = 1;
3157 else if (arr[i] == 2 && arr[i+1] == 3)
3158 run = 2;
3159 else if (arr[i] == 0)
3161 while (arr[i+run] == run && i+run < 16)
3162 run++;
3163 if (run != 4 && run != 8)
3164 cpat = 0;
3166 else
3167 cpat = 0;
3168 if ((i & (run-1)) != 0)
3169 cpat = 0;
3170 i += run;
3172 else
3173 cpat = 0;
3175 if (cpat && (run || size < 16))
3177 if (run == 0)
3178 run = 1;
3179 if (prun)
3180 *prun = run;
3181 if (pstart)
3182 *pstart = start == -1 ? 16-run : start;
3183 return 1;
3185 return 0;
3188 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3189 it into a register. MODE is only valid when OP is a CONST_INT. */
3190 static enum immediate_class
3191 classify_immediate (rtx op, enum machine_mode mode)
3193 HOST_WIDE_INT val;
3194 unsigned char arr[16];
3195 int i, j, repeated, fsmbi, repeat;
3197 gcc_assert (CONSTANT_P (op));
3199 if (GET_MODE (op) != VOIDmode)
3200 mode = GET_MODE (op);
3202 /* A V4SI const_vector with all identical symbols is ok. */
3203 if (!flag_pic
3204 && mode == V4SImode
3205 && GET_CODE (op) == CONST_VECTOR
3206 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3207 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3208 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3209 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3210 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3211 op = CONST_VECTOR_ELT (op, 0);
3213 switch (GET_CODE (op))
3215 case SYMBOL_REF:
3216 case LABEL_REF:
3217 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3219 case CONST:
3220 /* We can never know if the resulting address fits in 18 bits and can be
3221 loaded with ila. For now, assume the address will not overflow if
3222 the displacement is "small" (fits 'K' constraint). */
3223 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3225 rtx sym = XEXP (XEXP (op, 0), 0);
3226 rtx cst = XEXP (XEXP (op, 0), 1);
3228 if (GET_CODE (sym) == SYMBOL_REF
3229 && GET_CODE (cst) == CONST_INT
3230 && satisfies_constraint_K (cst))
3231 return IC_IL1s;
3233 return IC_IL2s;
3235 case HIGH:
3236 return IC_IL1s;
3238 case CONST_VECTOR:
3239 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3240 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3241 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3242 return IC_POOL;
3243 /* Fall through. */
3245 case CONST_INT:
3246 case CONST_DOUBLE:
3247 constant_to_array (mode, op, arr);
3249 /* Check that each 4-byte slot is identical. */
3250 repeated = 1;
3251 for (i = 4; i < 16; i += 4)
3252 for (j = 0; j < 4; j++)
3253 if (arr[j] != arr[i + j])
3254 repeated = 0;
3256 if (repeated)
3258 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3259 val = trunc_int_for_mode (val, SImode);
3261 if (which_immediate_load (val) != SPU_NONE)
3262 return IC_IL1;
3265 /* Any mode of 2 bytes or smaller can be loaded with an il
3266 instruction. */
3267 gcc_assert (GET_MODE_SIZE (mode) > 2);
3269 fsmbi = 1;
3270 repeat = 0;
3271 for (i = 0; i < 16 && fsmbi; i++)
3272 if (arr[i] != 0 && repeat == 0)
3273 repeat = arr[i];
3274 else if (arr[i] != 0 && arr[i] != repeat)
3275 fsmbi = 0;
3276 if (fsmbi)
3277 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3279 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3280 return IC_CPAT;
3282 if (repeated)
3283 return IC_IL2;
3285 return IC_POOL;
3286 default:
3287 break;
3289 gcc_unreachable ();
3292 static enum spu_immediate
3293 which_logical_immediate (HOST_WIDE_INT val)
3295 gcc_assert (val == trunc_int_for_mode (val, SImode));
3297 if (val >= -0x200 && val <= 0x1ff)
3298 return SPU_ORI;
3299 if (val >= 0 && val <= 0xffff)
3300 return SPU_IOHL;
3301 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3303 val = trunc_int_for_mode (val, HImode);
3304 if (val >= -0x200 && val <= 0x1ff)
3305 return SPU_ORHI;
3306 if ((val & 0xff) == ((val >> 8) & 0xff))
3308 val = trunc_int_for_mode (val, QImode);
3309 if (val >= -0x200 && val <= 0x1ff)
3310 return SPU_ORBI;
3313 return SPU_NONE;
3316 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3317 CONST_DOUBLEs. */
3318 static int
3319 const_vector_immediate_p (rtx x)
3321 int i;
3322 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3323 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3324 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3325 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3326 return 0;
3327 return 1;
3331 logical_immediate_p (rtx op, enum machine_mode mode)
3333 HOST_WIDE_INT val;
3334 unsigned char arr[16];
3335 int i, j;
3337 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 || GET_CODE (op) == CONST_VECTOR);
3340 if (GET_CODE (op) == CONST_VECTOR
3341 && !const_vector_immediate_p (op))
3342 return 0;
3344 if (GET_MODE (op) != VOIDmode)
3345 mode = GET_MODE (op);
3347 constant_to_array (mode, op, arr);
3349 /* Check that bytes are repeated. */
3350 for (i = 4; i < 16; i += 4)
3351 for (j = 0; j < 4; j++)
3352 if (arr[j] != arr[i + j])
3353 return 0;
3355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356 val = trunc_int_for_mode (val, SImode);
3358 i = which_logical_immediate (val);
3359 return i != SPU_NONE && i != SPU_IOHL;
3363 iohl_immediate_p (rtx op, enum machine_mode mode)
3365 HOST_WIDE_INT val;
3366 unsigned char arr[16];
3367 int i, j;
3369 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 || GET_CODE (op) == CONST_VECTOR);
3372 if (GET_CODE (op) == CONST_VECTOR
3373 && !const_vector_immediate_p (op))
3374 return 0;
3376 if (GET_MODE (op) != VOIDmode)
3377 mode = GET_MODE (op);
3379 constant_to_array (mode, op, arr);
3381 /* Check that bytes are repeated. */
3382 for (i = 4; i < 16; i += 4)
3383 for (j = 0; j < 4; j++)
3384 if (arr[j] != arr[i + j])
3385 return 0;
3387 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3388 val = trunc_int_for_mode (val, SImode);
3390 return val >= 0 && val <= 0xffff;
3394 arith_immediate_p (rtx op, enum machine_mode mode,
3395 HOST_WIDE_INT low, HOST_WIDE_INT high)
3397 HOST_WIDE_INT val;
3398 unsigned char arr[16];
3399 int bytes, i, j;
3401 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3402 || GET_CODE (op) == CONST_VECTOR);
3404 if (GET_CODE (op) == CONST_VECTOR
3405 && !const_vector_immediate_p (op))
3406 return 0;
3408 if (GET_MODE (op) != VOIDmode)
3409 mode = GET_MODE (op);
3411 constant_to_array (mode, op, arr);
3413 if (VECTOR_MODE_P (mode))
3414 mode = GET_MODE_INNER (mode);
3416 bytes = GET_MODE_SIZE (mode);
3417 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3419 /* Check that bytes are repeated. */
3420 for (i = bytes; i < 16; i += bytes)
3421 for (j = 0; j < bytes; j++)
3422 if (arr[j] != arr[i + j])
3423 return 0;
3425 val = arr[0];
3426 for (j = 1; j < bytes; j++)
3427 val = (val << 8) | arr[j];
3429 val = trunc_int_for_mode (val, mode);
3431 return val >= low && val <= high;
3434 /* We accept:
3435 - any 32-bit constant (SImode, SFmode)
3436 - any constant that can be generated with fsmbi (any mode)
3437 - a 64-bit constant where the high and low bits are identical
3438 (DImode, DFmode)
3439 - a 128-bit constant where the four 32-bit words match. */
3441 spu_legitimate_constant_p (rtx x)
3443 if (GET_CODE (x) == HIGH)
3444 x = XEXP (x, 0);
3445 /* V4SI with all identical symbols is valid. */
3446 if (!flag_pic
3447 && GET_MODE (x) == V4SImode
3448 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3449 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3450 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3451 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3452 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3453 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3455 if (GET_CODE (x) == CONST_VECTOR
3456 && !const_vector_immediate_p (x))
3457 return 0;
3458 return 1;
3461 /* Valid address are:
3462 - symbol_ref, label_ref, const
3463 - reg
3464 - reg + const, where either reg or const is 16 byte aligned
3465 - reg + reg, alignment doesn't matter
3466 The alignment matters in the reg+const case because lqd and stqd
3467 ignore the 4 least significant bits of the const. (TODO: It might be
3468 preferable to allow any alignment and fix it up when splitting.) */
3470 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3471 rtx x, int reg_ok_strict)
3473 if (mode == TImode && GET_CODE (x) == AND
3474 && GET_CODE (XEXP (x, 1)) == CONST_INT
3475 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3476 x = XEXP (x, 0);
3477 switch (GET_CODE (x))
3479 case SYMBOL_REF:
3480 case LABEL_REF:
3481 return !TARGET_LARGE_MEM;
3483 case CONST:
3484 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3486 rtx sym = XEXP (XEXP (x, 0), 0);
3487 rtx cst = XEXP (XEXP (x, 0), 1);
3489 /* Accept any symbol_ref + constant, assuming it does not
3490 wrap around the local store addressability limit. */
3491 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3492 return 1;
3494 return 0;
3496 case CONST_INT:
3497 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3499 case SUBREG:
3500 x = XEXP (x, 0);
3501 gcc_assert (GET_CODE (x) == REG);
3503 case REG:
3504 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3506 case PLUS:
3507 case LO_SUM:
3509 rtx op0 = XEXP (x, 0);
3510 rtx op1 = XEXP (x, 1);
3511 if (GET_CODE (op0) == SUBREG)
3512 op0 = XEXP (op0, 0);
3513 if (GET_CODE (op1) == SUBREG)
3514 op1 = XEXP (op1, 0);
3515 /* We can't just accept any aligned register because CSE can
3516 change it to a register that is not marked aligned and then
3517 recog will fail. So we only accept frame registers because
3518 they will only be changed to other frame registers. */
3519 if (GET_CODE (op0) == REG
3520 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3521 && GET_CODE (op1) == CONST_INT
3522 && INTVAL (op1) >= -0x2000
3523 && INTVAL (op1) <= 0x1fff
3524 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3525 return 1;
3526 if (GET_CODE (op0) == REG
3527 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3528 && GET_CODE (op1) == REG
3529 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3530 return 1;
3532 break;
3534 default:
3535 break;
3537 return 0;
3540 /* When the address is reg + const_int, force the const_int into a
3541 register. */
3543 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3544 enum machine_mode mode)
3546 rtx op0, op1;
3547 /* Make sure both operands are registers. */
3548 if (GET_CODE (x) == PLUS)
3550 op0 = XEXP (x, 0);
3551 op1 = XEXP (x, 1);
3552 if (ALIGNED_SYMBOL_REF_P (op0))
3554 op0 = force_reg (Pmode, op0);
3555 mark_reg_pointer (op0, 128);
3557 else if (GET_CODE (op0) != REG)
3558 op0 = force_reg (Pmode, op0);
3559 if (ALIGNED_SYMBOL_REF_P (op1))
3561 op1 = force_reg (Pmode, op1);
3562 mark_reg_pointer (op1, 128);
3564 else if (GET_CODE (op1) != REG)
3565 op1 = force_reg (Pmode, op1);
3566 x = gen_rtx_PLUS (Pmode, op0, op1);
3567 if (spu_legitimate_address (mode, x, 0))
3568 return x;
3570 return NULL_RTX;
3573 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3574 struct attribute_spec.handler. */
3575 static tree
3576 spu_handle_fndecl_attribute (tree * node,
3577 tree name,
3578 tree args ATTRIBUTE_UNUSED,
3579 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3581 if (TREE_CODE (*node) != FUNCTION_DECL)
3583 warning (0, "`%s' attribute only applies to functions",
3584 IDENTIFIER_POINTER (name));
3585 *no_add_attrs = true;
3588 return NULL_TREE;
3591 /* Handle the "vector" attribute. */
3592 static tree
3593 spu_handle_vector_attribute (tree * node, tree name,
3594 tree args ATTRIBUTE_UNUSED,
3595 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3597 tree type = *node, result = NULL_TREE;
3598 enum machine_mode mode;
3599 int unsigned_p;
3601 while (POINTER_TYPE_P (type)
3602 || TREE_CODE (type) == FUNCTION_TYPE
3603 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3604 type = TREE_TYPE (type);
3606 mode = TYPE_MODE (type);
3608 unsigned_p = TYPE_UNSIGNED (type);
3609 switch (mode)
3611 case DImode:
3612 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3613 break;
3614 case SImode:
3615 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3616 break;
3617 case HImode:
3618 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3619 break;
3620 case QImode:
3621 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3622 break;
3623 case SFmode:
3624 result = V4SF_type_node;
3625 break;
3626 case DFmode:
3627 result = V2DF_type_node;
3628 break;
3629 default:
3630 break;
3633 /* Propagate qualifiers attached to the element type
3634 onto the vector type. */
3635 if (result && result != type && TYPE_QUALS (type))
3636 result = build_qualified_type (result, TYPE_QUALS (type));
3638 *no_add_attrs = true; /* No need to hang on to the attribute. */
3640 if (!result)
3641 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3642 else
3643 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3645 return NULL_TREE;
3648 /* Return nonzero if FUNC is a naked function. */
3649 static int
3650 spu_naked_function_p (tree func)
3652 tree a;
3654 if (TREE_CODE (func) != FUNCTION_DECL)
3655 abort ();
3657 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3658 return a != NULL_TREE;
3662 spu_initial_elimination_offset (int from, int to)
3664 int saved_regs_size = spu_saved_regs_size ();
3665 int sp_offset = 0;
3666 if (!current_function_is_leaf || crtl->outgoing_args_size
3667 || get_frame_size () || saved_regs_size)
3668 sp_offset = STACK_POINTER_OFFSET;
3669 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3670 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3671 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3672 return get_frame_size ();
3673 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3674 return sp_offset + crtl->outgoing_args_size
3675 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3676 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3677 return get_frame_size () + saved_regs_size + sp_offset;
3678 else
3679 gcc_unreachable ();
3683 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3685 enum machine_mode mode = TYPE_MODE (type);
3686 int byte_size = ((mode == BLKmode)
3687 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3689 /* Make sure small structs are left justified in a register. */
3690 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3691 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3693 enum machine_mode smode;
3694 rtvec v;
3695 int i;
3696 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3697 int n = byte_size / UNITS_PER_WORD;
3698 v = rtvec_alloc (nregs);
3699 for (i = 0; i < n; i++)
3701 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3702 gen_rtx_REG (TImode,
3703 FIRST_RETURN_REGNUM
3704 + i),
3705 GEN_INT (UNITS_PER_WORD * i));
3706 byte_size -= UNITS_PER_WORD;
3709 if (n < nregs)
3711 if (byte_size < 4)
3712 byte_size = 4;
3713 smode =
3714 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3715 RTVEC_ELT (v, n) =
3716 gen_rtx_EXPR_LIST (VOIDmode,
3717 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3718 GEN_INT (UNITS_PER_WORD * n));
3720 return gen_rtx_PARALLEL (mode, v);
3722 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3726 spu_function_arg (CUMULATIVE_ARGS cum,
3727 enum machine_mode mode,
3728 tree type, int named ATTRIBUTE_UNUSED)
3730 int byte_size;
3732 if (cum >= MAX_REGISTER_ARGS)
3733 return 0;
3735 byte_size = ((mode == BLKmode)
3736 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3738 /* The ABI does not allow parameters to be passed partially in
3739 reg and partially in stack. */
3740 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3741 return 0;
3743 /* Make sure small structs are left justified in a register. */
3744 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3745 && byte_size < UNITS_PER_WORD && byte_size > 0)
3747 enum machine_mode smode;
3748 rtx gr_reg;
3749 if (byte_size < 4)
3750 byte_size = 4;
3751 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3752 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3753 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3754 const0_rtx);
3755 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3757 else
3758 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3761 /* Variable sized types are passed by reference. */
3762 static bool
3763 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3764 enum machine_mode mode ATTRIBUTE_UNUSED,
3765 const_tree type, bool named ATTRIBUTE_UNUSED)
3767 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3771 /* Var args. */
3773 /* Create and return the va_list datatype.
3775 On SPU, va_list is an array type equivalent to
3777 typedef struct __va_list_tag
3779 void *__args __attribute__((__aligned(16)));
3780 void *__skip __attribute__((__aligned(16)));
3782 } va_list[1];
3784 where __args points to the arg that will be returned by the next
3785 va_arg(), and __skip points to the previous stack frame such that
3786 when __args == __skip we should advance __args by 32 bytes. */
3787 static tree
3788 spu_build_builtin_va_list (void)
3790 tree f_args, f_skip, record, type_decl;
3791 bool owp;
3793 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3795 type_decl =
3796 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3798 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3799 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3801 DECL_FIELD_CONTEXT (f_args) = record;
3802 DECL_ALIGN (f_args) = 128;
3803 DECL_USER_ALIGN (f_args) = 1;
3805 DECL_FIELD_CONTEXT (f_skip) = record;
3806 DECL_ALIGN (f_skip) = 128;
3807 DECL_USER_ALIGN (f_skip) = 1;
3809 TREE_CHAIN (record) = type_decl;
3810 TYPE_NAME (record) = type_decl;
3811 TYPE_FIELDS (record) = f_args;
3812 TREE_CHAIN (f_args) = f_skip;
3814 /* We know this is being padded and we want it too. It is an internal
3815 type so hide the warnings from the user. */
3816 owp = warn_padded;
3817 warn_padded = false;
3819 layout_type (record);
3821 warn_padded = owp;
3823 /* The correct type is an array type of one element. */
3824 return build_array_type (record, build_index_type (size_zero_node));
3827 /* Implement va_start by filling the va_list structure VALIST.
3828 NEXTARG points to the first anonymous stack argument.
3830 The following global variables are used to initialize
3831 the va_list structure:
3833 crtl->args.info;
3834 the CUMULATIVE_ARGS for this function
3836 crtl->args.arg_offset_rtx:
3837 holds the offset of the first anonymous stack argument
3838 (relative to the virtual arg pointer). */
3840 static void
3841 spu_va_start (tree valist, rtx nextarg)
3843 tree f_args, f_skip;
3844 tree args, skip, t;
3846 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3847 f_skip = TREE_CHAIN (f_args);
3849 valist = build_va_arg_indirect_ref (valist);
3850 args =
3851 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3852 skip =
3853 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3855 /* Find the __args area. */
3856 t = make_tree (TREE_TYPE (args), nextarg);
3857 if (crtl->args.pretend_args_size > 0)
3858 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3859 size_int (-STACK_POINTER_OFFSET));
3860 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3861 TREE_SIDE_EFFECTS (t) = 1;
3862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3864 /* Find the __skip area. */
3865 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3866 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3867 size_int (crtl->args.pretend_args_size
3868 - STACK_POINTER_OFFSET));
3869 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3870 TREE_SIDE_EFFECTS (t) = 1;
3871 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3874 /* Gimplify va_arg by updating the va_list structure
3875 VALIST as required to retrieve an argument of type
3876 TYPE, and returning that argument.
3878 ret = va_arg(VALIST, TYPE);
3880 generates code equivalent to:
3882 paddedsize = (sizeof(TYPE) + 15) & -16;
3883 if (VALIST.__args + paddedsize > VALIST.__skip
3884 && VALIST.__args <= VALIST.__skip)
3885 addr = VALIST.__skip + 32;
3886 else
3887 addr = VALIST.__args;
3888 VALIST.__args = addr + paddedsize;
3889 ret = *(TYPE *)addr;
3891 static tree
3892 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3893 gimple_seq * post_p ATTRIBUTE_UNUSED)
3895 tree f_args, f_skip;
3896 tree args, skip;
3897 HOST_WIDE_INT size, rsize;
3898 tree paddedsize, addr, tmp;
3899 bool pass_by_reference_p;
3901 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3902 f_skip = TREE_CHAIN (f_args);
3904 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3905 args =
3906 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3907 skip =
3908 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3910 addr = create_tmp_var (ptr_type_node, "va_arg");
3911 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3913 /* if an object is dynamically sized, a pointer to it is passed
3914 instead of the object itself. */
3915 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3916 false);
3917 if (pass_by_reference_p)
3918 type = build_pointer_type (type);
3919 size = int_size_in_bytes (type);
3920 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3922 /* build conditional expression to calculate addr. The expression
3923 will be gimplified later. */
3924 paddedsize = size_int (rsize);
3925 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
3926 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3927 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3928 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3929 unshare_expr (skip)));
3931 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3932 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3933 size_int (32)), unshare_expr (args));
3935 gimplify_assign (addr, tmp, pre_p);
3937 /* update VALIST.__args */
3938 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3939 gimplify_assign (unshare_expr (args), tmp, pre_p);
3941 addr = fold_convert (build_pointer_type (type), addr);
3943 if (pass_by_reference_p)
3944 addr = build_va_arg_indirect_ref (addr);
3946 return build_va_arg_indirect_ref (addr);
3949 /* Save parameter registers starting with the register that corresponds
3950 to the first unnamed parameters. If the first unnamed parameter is
3951 in the stack then save no registers. Set pretend_args_size to the
3952 amount of space needed to save the registers. */
3953 void
3954 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3955 tree type, int *pretend_size, int no_rtl)
3957 if (!no_rtl)
3959 rtx tmp;
3960 int regno;
3961 int offset;
3962 int ncum = *cum;
3964 /* cum currently points to the last named argument, we want to
3965 start at the next argument. */
3966 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3968 offset = -STACK_POINTER_OFFSET;
3969 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3971 tmp = gen_frame_mem (V4SImode,
3972 plus_constant (virtual_incoming_args_rtx,
3973 offset));
3974 emit_move_insn (tmp,
3975 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3976 offset += 16;
3978 *pretend_size = offset + STACK_POINTER_OFFSET;
3982 void
3983 spu_conditional_register_usage (void)
3985 if (flag_pic)
3987 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3988 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3992 /* This is called to decide when we can simplify a load instruction. We
3993 must only return true for registers which we know will always be
3994 aligned. Taking into account that CSE might replace this reg with
3995 another one that has not been marked aligned.
3996 So this is really only true for frame, stack and virtual registers,
3997 which we know are always aligned and should not be adversely effected
3998 by CSE. */
3999 static int
4000 regno_aligned_for_load (int regno)
4002 return regno == FRAME_POINTER_REGNUM
4003 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4004 || regno == ARG_POINTER_REGNUM
4005 || regno == STACK_POINTER_REGNUM
4006 || (regno >= FIRST_VIRTUAL_REGISTER
4007 && regno <= LAST_VIRTUAL_REGISTER);
4010 /* Return TRUE when mem is known to be 16-byte aligned. */
4012 aligned_mem_p (rtx mem)
4014 if (MEM_ALIGN (mem) >= 128)
4015 return 1;
4016 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4017 return 1;
4018 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4020 rtx p0 = XEXP (XEXP (mem, 0), 0);
4021 rtx p1 = XEXP (XEXP (mem, 0), 1);
4022 if (regno_aligned_for_load (REGNO (p0)))
4024 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4025 return 1;
4026 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4027 return 1;
4030 else if (GET_CODE (XEXP (mem, 0)) == REG)
4032 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4033 return 1;
4035 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4036 return 1;
4037 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4039 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4040 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4041 if (GET_CODE (p0) == SYMBOL_REF
4042 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4043 return 1;
4045 return 0;
4048 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4049 into its SYMBOL_REF_FLAGS. */
4050 static void
4051 spu_encode_section_info (tree decl, rtx rtl, int first)
4053 default_encode_section_info (decl, rtl, first);
4055 /* If a variable has a forced alignment to < 16 bytes, mark it with
4056 SYMBOL_FLAG_ALIGN1. */
4057 if (TREE_CODE (decl) == VAR_DECL
4058 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4059 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4062 /* Return TRUE if we are certain the mem refers to a complete object
4063 which is both 16-byte aligned and padded to a 16-byte boundary. This
4064 would make it safe to store with a single instruction.
4065 We guarantee the alignment and padding for static objects by aligning
4066 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4067 FIXME: We currently cannot guarantee this for objects on the stack
4068 because assign_parm_setup_stack calls assign_stack_local with the
4069 alignment of the parameter mode and in that case the alignment never
4070 gets adjusted by LOCAL_ALIGNMENT. */
4071 static int
4072 store_with_one_insn_p (rtx mem)
4074 rtx addr = XEXP (mem, 0);
4075 if (GET_MODE (mem) == BLKmode)
4076 return 0;
4077 /* Only static objects. */
4078 if (GET_CODE (addr) == SYMBOL_REF)
4080 /* We use the associated declaration to make sure the access is
4081 referring to the whole object.
4082 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4083 if it is necessary. Will there be cases where one exists, and
4084 the other does not? Will there be cases where both exist, but
4085 have different types? */
4086 tree decl = MEM_EXPR (mem);
4087 if (decl
4088 && TREE_CODE (decl) == VAR_DECL
4089 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4090 return 1;
4091 decl = SYMBOL_REF_DECL (addr);
4092 if (decl
4093 && TREE_CODE (decl) == VAR_DECL
4094 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4095 return 1;
4097 return 0;
4101 spu_expand_mov (rtx * ops, enum machine_mode mode)
4103 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4104 abort ();
4106 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4108 rtx from = SUBREG_REG (ops[1]);
4109 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4111 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4112 && GET_MODE_CLASS (imode) == MODE_INT
4113 && subreg_lowpart_p (ops[1]));
4115 if (GET_MODE_SIZE (imode) < 4)
4116 imode = SImode;
4117 if (imode != GET_MODE (from))
4118 from = gen_rtx_SUBREG (imode, from, 0);
4120 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4122 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4123 emit_insn (GEN_FCN (icode) (ops[0], from));
4125 else
4126 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4127 return 1;
4130 /* At least one of the operands needs to be a register. */
4131 if ((reload_in_progress | reload_completed) == 0
4132 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4134 rtx temp = force_reg (mode, ops[1]);
4135 emit_move_insn (ops[0], temp);
4136 return 1;
4138 if (reload_in_progress || reload_completed)
4140 if (CONSTANT_P (ops[1]))
4141 return spu_split_immediate (ops);
4142 return 0;
4144 else
4146 if (GET_CODE (ops[0]) == MEM)
4148 if (!spu_valid_move (ops))
4150 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4151 gen_reg_rtx (TImode)));
4152 return 1;
4155 else if (GET_CODE (ops[1]) == MEM)
4157 if (!spu_valid_move (ops))
4159 emit_insn (gen_load
4160 (ops[0], ops[1], gen_reg_rtx (TImode),
4161 gen_reg_rtx (SImode)));
4162 return 1;
4165 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4166 extend them. */
4167 if (GET_CODE (ops[1]) == CONST_INT)
4169 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4170 if (val != INTVAL (ops[1]))
4172 emit_move_insn (ops[0], GEN_INT (val));
4173 return 1;
4177 return 0;
4180 void
4181 spu_split_load (rtx * ops)
4183 enum machine_mode mode = GET_MODE (ops[0]);
4184 rtx addr, load, rot, mem, p0, p1;
4185 int rot_amt;
4187 addr = XEXP (ops[1], 0);
4189 rot = 0;
4190 rot_amt = 0;
4191 if (GET_CODE (addr) == PLUS)
4193 /* 8 cases:
4194 aligned reg + aligned reg => lqx
4195 aligned reg + unaligned reg => lqx, rotqby
4196 aligned reg + aligned const => lqd
4197 aligned reg + unaligned const => lqd, rotqbyi
4198 unaligned reg + aligned reg => lqx, rotqby
4199 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4200 unaligned reg + aligned const => lqd, rotqby
4201 unaligned reg + unaligned const -> not allowed by legitimate address
4203 p0 = XEXP (addr, 0);
4204 p1 = XEXP (addr, 1);
4205 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4207 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4209 emit_insn (gen_addsi3 (ops[3], p0, p1));
4210 rot = ops[3];
4212 else
4213 rot = p0;
4215 else
4217 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4219 rot_amt = INTVAL (p1) & 15;
4220 p1 = GEN_INT (INTVAL (p1) & -16);
4221 addr = gen_rtx_PLUS (SImode, p0, p1);
4223 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4224 rot = p1;
4227 else if (GET_CODE (addr) == REG)
4229 if (!regno_aligned_for_load (REGNO (addr)))
4230 rot = addr;
4232 else if (GET_CODE (addr) == CONST)
4234 if (GET_CODE (XEXP (addr, 0)) == PLUS
4235 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4236 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4238 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4239 if (rot_amt & -16)
4240 addr = gen_rtx_CONST (Pmode,
4241 gen_rtx_PLUS (Pmode,
4242 XEXP (XEXP (addr, 0), 0),
4243 GEN_INT (rot_amt & -16)));
4244 else
4245 addr = XEXP (XEXP (addr, 0), 0);
4247 else
4248 rot = addr;
4250 else if (GET_CODE (addr) == CONST_INT)
4252 rot_amt = INTVAL (addr);
4253 addr = GEN_INT (rot_amt & -16);
4255 else if (!ALIGNED_SYMBOL_REF_P (addr))
4256 rot = addr;
4258 if (GET_MODE_SIZE (mode) < 4)
4259 rot_amt += GET_MODE_SIZE (mode) - 4;
4261 rot_amt &= 15;
4263 if (rot && rot_amt)
4265 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4266 rot = ops[3];
4267 rot_amt = 0;
4270 load = ops[2];
4272 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4273 mem = change_address (ops[1], TImode, addr);
4275 emit_insn (gen_movti (load, mem));
4277 if (rot)
4278 emit_insn (gen_rotqby_ti (load, load, rot));
4279 else if (rot_amt)
4280 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4282 if (reload_completed)
4283 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4284 else
4285 emit_insn (gen_spu_convert (ops[0], load));
4288 void
4289 spu_split_store (rtx * ops)
4291 enum machine_mode mode = GET_MODE (ops[0]);
4292 rtx pat = ops[2];
4293 rtx reg = ops[3];
4294 rtx addr, p0, p1, p1_lo, smem;
4295 int aform;
4296 int scalar;
4298 addr = XEXP (ops[0], 0);
4300 if (GET_CODE (addr) == PLUS)
4302 /* 8 cases:
4303 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4304 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4305 aligned reg + aligned const => lqd, c?d, shuf, stqx
4306 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4307 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4308 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4309 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4310 unaligned reg + unaligned const -> not allowed by legitimate address
4312 aform = 0;
4313 p0 = XEXP (addr, 0);
4314 p1 = p1_lo = XEXP (addr, 1);
4315 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4317 p1_lo = GEN_INT (INTVAL (p1) & 15);
4318 p1 = GEN_INT (INTVAL (p1) & -16);
4319 addr = gen_rtx_PLUS (SImode, p0, p1);
4322 else if (GET_CODE (addr) == REG)
4324 aform = 0;
4325 p0 = addr;
4326 p1 = p1_lo = const0_rtx;
4328 else
4330 aform = 1;
4331 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4332 p1 = 0; /* aform doesn't use p1 */
4333 p1_lo = addr;
4334 if (ALIGNED_SYMBOL_REF_P (addr))
4335 p1_lo = const0_rtx;
4336 else if (GET_CODE (addr) == CONST)
4338 if (GET_CODE (XEXP (addr, 0)) == PLUS
4339 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4340 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4342 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4343 if ((v & -16) != 0)
4344 addr = gen_rtx_CONST (Pmode,
4345 gen_rtx_PLUS (Pmode,
4346 XEXP (XEXP (addr, 0), 0),
4347 GEN_INT (v & -16)));
4348 else
4349 addr = XEXP (XEXP (addr, 0), 0);
4350 p1_lo = GEN_INT (v & 15);
4353 else if (GET_CODE (addr) == CONST_INT)
4355 p1_lo = GEN_INT (INTVAL (addr) & 15);
4356 addr = GEN_INT (INTVAL (addr) & -16);
4360 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4362 scalar = store_with_one_insn_p (ops[0]);
4363 if (!scalar)
4365 /* We could copy the flags from the ops[0] MEM to mem here,
4366 We don't because we want this load to be optimized away if
4367 possible, and copying the flags will prevent that in certain
4368 cases, e.g. consider the volatile flag. */
4370 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4371 set_mem_alias_set (lmem, 0);
4372 emit_insn (gen_movti (reg, lmem));
4374 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4375 p0 = stack_pointer_rtx;
4376 if (!p1_lo)
4377 p1_lo = const0_rtx;
4379 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4380 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4382 else if (reload_completed)
4384 if (GET_CODE (ops[1]) == REG)
4385 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4386 else if (GET_CODE (ops[1]) == SUBREG)
4387 emit_move_insn (reg,
4388 gen_rtx_REG (GET_MODE (reg),
4389 REGNO (SUBREG_REG (ops[1]))));
4390 else
4391 abort ();
4393 else
4395 if (GET_CODE (ops[1]) == REG)
4396 emit_insn (gen_spu_convert (reg, ops[1]));
4397 else if (GET_CODE (ops[1]) == SUBREG)
4398 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4399 else
4400 abort ();
4403 if (GET_MODE_SIZE (mode) < 4 && scalar)
4404 emit_insn (gen_shlqby_ti
4405 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4407 smem = change_address (ops[0], TImode, addr);
4408 /* We can't use the previous alias set because the memory has changed
4409 size and can potentially overlap objects of other types. */
4410 set_mem_alias_set (smem, 0);
4412 emit_insn (gen_movti (smem, reg));
4415 /* Return TRUE if X is MEM which is a struct member reference
4416 and the member can safely be loaded and stored with a single
4417 instruction because it is padded. */
4418 static int
4419 mem_is_padded_component_ref (rtx x)
4421 tree t = MEM_EXPR (x);
4422 tree r;
4423 if (!t || TREE_CODE (t) != COMPONENT_REF)
4424 return 0;
4425 t = TREE_OPERAND (t, 1);
4426 if (!t || TREE_CODE (t) != FIELD_DECL
4427 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4428 return 0;
4429 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4430 r = DECL_FIELD_CONTEXT (t);
4431 if (!r || TREE_CODE (r) != RECORD_TYPE)
4432 return 0;
4433 /* Make sure they are the same mode */
4434 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4435 return 0;
4436 /* If there are no following fields then the field alignment assures
4437 the structure is padded to the alignment which means this field is
4438 padded too. */
4439 if (TREE_CHAIN (t) == 0)
4440 return 1;
4441 /* If the following field is also aligned then this field will be
4442 padded. */
4443 t = TREE_CHAIN (t);
4444 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4445 return 1;
4446 return 0;
4449 /* Parse the -mfixed-range= option string. */
4450 static void
4451 fix_range (const char *const_str)
4453 int i, first, last;
4454 char *str, *dash, *comma;
4456 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4457 REG2 are either register names or register numbers. The effect
4458 of this option is to mark the registers in the range from REG1 to
4459 REG2 as ``fixed'' so they won't be used by the compiler. */
4461 i = strlen (const_str);
4462 str = (char *) alloca (i + 1);
4463 memcpy (str, const_str, i + 1);
4465 while (1)
4467 dash = strchr (str, '-');
4468 if (!dash)
4470 warning (0, "value of -mfixed-range must have form REG1-REG2");
4471 return;
4473 *dash = '\0';
4474 comma = strchr (dash + 1, ',');
4475 if (comma)
4476 *comma = '\0';
4478 first = decode_reg_name (str);
4479 if (first < 0)
4481 warning (0, "unknown register name: %s", str);
4482 return;
4485 last = decode_reg_name (dash + 1);
4486 if (last < 0)
4488 warning (0, "unknown register name: %s", dash + 1);
4489 return;
4492 *dash = '-';
4494 if (first > last)
4496 warning (0, "%s-%s is an empty range", str, dash + 1);
4497 return;
4500 for (i = first; i <= last; ++i)
4501 fixed_regs[i] = call_used_regs[i] = 1;
4503 if (!comma)
4504 break;
4506 *comma = ',';
4507 str = comma + 1;
4512 spu_valid_move (rtx * ops)
4514 enum machine_mode mode = GET_MODE (ops[0]);
4515 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4516 return 0;
4518 /* init_expr_once tries to recog against load and store insns to set
4519 the direct_load[] and direct_store[] arrays. We always want to
4520 consider those loads and stores valid. init_expr_once is called in
4521 the context of a dummy function which does not have a decl. */
4522 if (cfun->decl == 0)
4523 return 1;
4525 /* Don't allows loads/stores which would require more than 1 insn.
4526 During and after reload we assume loads and stores only take 1
4527 insn. */
4528 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4530 if (GET_CODE (ops[0]) == MEM
4531 && (GET_MODE_SIZE (mode) < 4
4532 || !(store_with_one_insn_p (ops[0])
4533 || mem_is_padded_component_ref (ops[0]))))
4534 return 0;
4535 if (GET_CODE (ops[1]) == MEM
4536 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4537 return 0;
4539 return 1;
4542 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4543 can be generated using the fsmbi instruction. */
4545 fsmbi_const_p (rtx x)
4547 if (CONSTANT_P (x))
4549 /* We can always choose TImode for CONST_INT because the high bits
4550 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4551 enum immediate_class c = classify_immediate (x, TImode);
4552 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4554 return 0;
4557 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4558 can be generated using the cbd, chd, cwd or cdd instruction. */
4560 cpat_const_p (rtx x, enum machine_mode mode)
4562 if (CONSTANT_P (x))
4564 enum immediate_class c = classify_immediate (x, mode);
4565 return c == IC_CPAT;
4567 return 0;
4571 gen_cpat_const (rtx * ops)
4573 unsigned char dst[16];
4574 int i, offset, shift, isize;
4575 if (GET_CODE (ops[3]) != CONST_INT
4576 || GET_CODE (ops[2]) != CONST_INT
4577 || (GET_CODE (ops[1]) != CONST_INT
4578 && GET_CODE (ops[1]) != REG))
4579 return 0;
4580 if (GET_CODE (ops[1]) == REG
4581 && (!REG_POINTER (ops[1])
4582 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4583 return 0;
4585 for (i = 0; i < 16; i++)
4586 dst[i] = i + 16;
4587 isize = INTVAL (ops[3]);
4588 if (isize == 1)
4589 shift = 3;
4590 else if (isize == 2)
4591 shift = 2;
4592 else
4593 shift = 0;
4594 offset = (INTVAL (ops[2]) +
4595 (GET_CODE (ops[1]) ==
4596 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4597 for (i = 0; i < isize; i++)
4598 dst[offset + i] = i + shift;
4599 return array_to_constant (TImode, dst);
4602 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4603 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4604 than 16 bytes, the value is repeated across the rest of the array. */
4605 void
4606 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4608 HOST_WIDE_INT val;
4609 int i, j, first;
4611 memset (arr, 0, 16);
4612 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4613 if (GET_CODE (x) == CONST_INT
4614 || (GET_CODE (x) == CONST_DOUBLE
4615 && (mode == SFmode || mode == DFmode)))
4617 gcc_assert (mode != VOIDmode && mode != BLKmode);
4619 if (GET_CODE (x) == CONST_DOUBLE)
4620 val = const_double_to_hwint (x);
4621 else
4622 val = INTVAL (x);
4623 first = GET_MODE_SIZE (mode) - 1;
4624 for (i = first; i >= 0; i--)
4626 arr[i] = val & 0xff;
4627 val >>= 8;
4629 /* Splat the constant across the whole array. */
4630 for (j = 0, i = first + 1; i < 16; i++)
4632 arr[i] = arr[j];
4633 j = (j == first) ? 0 : j + 1;
4636 else if (GET_CODE (x) == CONST_DOUBLE)
4638 val = CONST_DOUBLE_LOW (x);
4639 for (i = 15; i >= 8; i--)
4641 arr[i] = val & 0xff;
4642 val >>= 8;
4644 val = CONST_DOUBLE_HIGH (x);
4645 for (i = 7; i >= 0; i--)
4647 arr[i] = val & 0xff;
4648 val >>= 8;
4651 else if (GET_CODE (x) == CONST_VECTOR)
4653 int units;
4654 rtx elt;
4655 mode = GET_MODE_INNER (mode);
4656 units = CONST_VECTOR_NUNITS (x);
4657 for (i = 0; i < units; i++)
4659 elt = CONST_VECTOR_ELT (x, i);
4660 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4662 if (GET_CODE (elt) == CONST_DOUBLE)
4663 val = const_double_to_hwint (elt);
4664 else
4665 val = INTVAL (elt);
4666 first = GET_MODE_SIZE (mode) - 1;
4667 if (first + i * GET_MODE_SIZE (mode) > 16)
4668 abort ();
4669 for (j = first; j >= 0; j--)
4671 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4672 val >>= 8;
4677 else
4678 gcc_unreachable();
4681 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4682 smaller than 16 bytes, use the bytes that would represent that value
4683 in a register, e.g., for QImode return the value of arr[3]. */
4685 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4687 enum machine_mode inner_mode;
4688 rtvec v;
4689 int units, size, i, j, k;
4690 HOST_WIDE_INT val;
4692 if (GET_MODE_CLASS (mode) == MODE_INT
4693 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4695 j = GET_MODE_SIZE (mode);
4696 i = j < 4 ? 4 - j : 0;
4697 for (val = 0; i < j; i++)
4698 val = (val << 8) | arr[i];
4699 val = trunc_int_for_mode (val, mode);
4700 return GEN_INT (val);
4703 if (mode == TImode)
4705 HOST_WIDE_INT high;
4706 for (i = high = 0; i < 8; i++)
4707 high = (high << 8) | arr[i];
4708 for (i = 8, val = 0; i < 16; i++)
4709 val = (val << 8) | arr[i];
4710 return immed_double_const (val, high, TImode);
4712 if (mode == SFmode)
4714 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4715 val = trunc_int_for_mode (val, SImode);
4716 return hwint_to_const_double (SFmode, val);
4718 if (mode == DFmode)
4720 for (i = 0, val = 0; i < 8; i++)
4721 val = (val << 8) | arr[i];
4722 return hwint_to_const_double (DFmode, val);
4725 if (!VECTOR_MODE_P (mode))
4726 abort ();
4728 units = GET_MODE_NUNITS (mode);
4729 size = GET_MODE_UNIT_SIZE (mode);
4730 inner_mode = GET_MODE_INNER (mode);
4731 v = rtvec_alloc (units);
4733 for (k = i = 0; i < units; ++i)
4735 val = 0;
4736 for (j = 0; j < size; j++, k++)
4737 val = (val << 8) | arr[k];
4739 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4740 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4741 else
4742 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4744 if (k > 16)
4745 abort ();
4747 return gen_rtx_CONST_VECTOR (mode, v);
4750 static void
4751 reloc_diagnostic (rtx x)
4753 tree loc_decl, decl = 0;
4754 const char *msg;
4755 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4756 return;
4758 if (GET_CODE (x) == SYMBOL_REF)
4759 decl = SYMBOL_REF_DECL (x);
4760 else if (GET_CODE (x) == CONST
4761 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4762 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4764 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4765 if (decl && !DECL_P (decl))
4766 decl = 0;
4768 /* We use last_assemble_variable_decl to get line information. It's
4769 not always going to be right and might not even be close, but will
4770 be right for the more common cases. */
4771 if (!last_assemble_variable_decl || in_section == ctors_section)
4772 loc_decl = decl;
4773 else
4774 loc_decl = last_assemble_variable_decl;
4776 /* The decl could be a string constant. */
4777 if (decl && DECL_P (decl))
4778 msg = "%Jcreating run-time relocation for %qD";
4779 else
4780 msg = "creating run-time relocation";
4782 if (TARGET_WARN_RELOC)
4783 warning (0, msg, loc_decl, decl);
4784 else
4785 error (msg, loc_decl, decl);
4788 /* Hook into assemble_integer so we can generate an error for run-time
4789 relocations. The SPU ABI disallows them. */
4790 static bool
4791 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4793 /* By default run-time relocations aren't supported, but we allow them
4794 in case users support it in their own run-time loader. And we provide
4795 a warning for those users that don't. */
4796 if ((GET_CODE (x) == SYMBOL_REF)
4797 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4798 reloc_diagnostic (x);
4800 return default_assemble_integer (x, size, aligned_p);
4803 static void
4804 spu_asm_globalize_label (FILE * file, const char *name)
4806 fputs ("\t.global\t", file);
4807 assemble_name (file, name);
4808 fputs ("\n", file);
4811 static bool
4812 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4813 bool speed ATTRIBUTE_UNUSED)
4815 enum machine_mode mode = GET_MODE (x);
4816 int cost = COSTS_N_INSNS (2);
4818 /* Folding to a CONST_VECTOR will use extra space but there might
4819 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4820 only if it allows us to fold away multiple insns. Changing the cost
4821 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4822 because this cost will only be compared against a single insn.
4823 if (code == CONST_VECTOR)
4824 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4827 /* Use defaults for float operations. Not accurate but good enough. */
4828 if (mode == DFmode)
4830 *total = COSTS_N_INSNS (13);
4831 return true;
4833 if (mode == SFmode)
4835 *total = COSTS_N_INSNS (6);
4836 return true;
4838 switch (code)
4840 case CONST_INT:
4841 if (satisfies_constraint_K (x))
4842 *total = 0;
4843 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4844 *total = COSTS_N_INSNS (1);
4845 else
4846 *total = COSTS_N_INSNS (3);
4847 return true;
4849 case CONST:
4850 *total = COSTS_N_INSNS (3);
4851 return true;
4853 case LABEL_REF:
4854 case SYMBOL_REF:
4855 *total = COSTS_N_INSNS (0);
4856 return true;
4858 case CONST_DOUBLE:
4859 *total = COSTS_N_INSNS (5);
4860 return true;
4862 case FLOAT_EXTEND:
4863 case FLOAT_TRUNCATE:
4864 case FLOAT:
4865 case UNSIGNED_FLOAT:
4866 case FIX:
4867 case UNSIGNED_FIX:
4868 *total = COSTS_N_INSNS (7);
4869 return true;
4871 case PLUS:
4872 if (mode == TImode)
4874 *total = COSTS_N_INSNS (9);
4875 return true;
4877 break;
4879 case MULT:
4880 cost =
4881 GET_CODE (XEXP (x, 0)) ==
4882 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4883 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4885 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4887 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4888 cost = COSTS_N_INSNS (14);
4889 if ((val & 0xffff) == 0)
4890 cost = COSTS_N_INSNS (9);
4891 else if (val > 0 && val < 0x10000)
4892 cost = COSTS_N_INSNS (11);
4895 *total = cost;
4896 return true;
4897 case DIV:
4898 case UDIV:
4899 case MOD:
4900 case UMOD:
4901 *total = COSTS_N_INSNS (20);
4902 return true;
4903 case ROTATE:
4904 case ROTATERT:
4905 case ASHIFT:
4906 case ASHIFTRT:
4907 case LSHIFTRT:
4908 *total = COSTS_N_INSNS (4);
4909 return true;
4910 case UNSPEC:
4911 if (XINT (x, 1) == UNSPEC_CONVERT)
4912 *total = COSTS_N_INSNS (0);
4913 else
4914 *total = COSTS_N_INSNS (4);
4915 return true;
4917 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4918 if (GET_MODE_CLASS (mode) == MODE_INT
4919 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4920 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4921 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4922 *total = cost;
4923 return true;
4926 static enum machine_mode
4927 spu_unwind_word_mode (void)
4929 return SImode;
4932 /* Decide whether we can make a sibling call to a function. DECL is the
4933 declaration of the function being targeted by the call and EXP is the
4934 CALL_EXPR representing the call. */
4935 static bool
4936 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4938 return decl && !TARGET_LARGE_MEM;
4941 /* We need to correctly update the back chain pointer and the Available
4942 Stack Size (which is in the second slot of the sp register.) */
4943 void
4944 spu_allocate_stack (rtx op0, rtx op1)
4946 HOST_WIDE_INT v;
4947 rtx chain = gen_reg_rtx (V4SImode);
4948 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4949 rtx sp = gen_reg_rtx (V4SImode);
4950 rtx splatted = gen_reg_rtx (V4SImode);
4951 rtx pat = gen_reg_rtx (TImode);
4953 /* copy the back chain so we can save it back again. */
4954 emit_move_insn (chain, stack_bot);
4956 op1 = force_reg (SImode, op1);
4958 v = 0x1020300010203ll;
4959 emit_move_insn (pat, immed_double_const (v, v, TImode));
4960 emit_insn (gen_shufb (splatted, op1, op1, pat));
4962 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4963 emit_insn (gen_subv4si3 (sp, sp, splatted));
4965 if (flag_stack_check)
4967 rtx avail = gen_reg_rtx(SImode);
4968 rtx result = gen_reg_rtx(SImode);
4969 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4970 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4971 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4974 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4976 emit_move_insn (stack_bot, chain);
4978 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4981 void
4982 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4984 static unsigned char arr[16] =
4985 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4986 rtx temp = gen_reg_rtx (SImode);
4987 rtx temp2 = gen_reg_rtx (SImode);
4988 rtx temp3 = gen_reg_rtx (V4SImode);
4989 rtx temp4 = gen_reg_rtx (V4SImode);
4990 rtx pat = gen_reg_rtx (TImode);
4991 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4993 /* Restore the backchain from the first word, sp from the second. */
4994 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4995 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4997 emit_move_insn (pat, array_to_constant (TImode, arr));
4999 /* Compute Available Stack Size for sp */
5000 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5001 emit_insn (gen_shufb (temp3, temp, temp, pat));
5003 /* Compute Available Stack Size for back chain */
5004 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5005 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5006 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5008 emit_insn (gen_addv4si3 (sp, sp, temp3));
5009 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5012 static void
5013 spu_init_libfuncs (void)
5015 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5016 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5017 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5018 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5019 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5020 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5021 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5022 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5023 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5024 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5025 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5027 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5028 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5030 set_optab_libfunc (smul_optab, TImode, "__multi3");
5031 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5032 set_optab_libfunc (smod_optab, TImode, "__modti3");
5033 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5034 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5035 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5038 /* Make a subreg, stripping any existing subreg. We could possibly just
5039 call simplify_subreg, but in this case we know what we want. */
5041 spu_gen_subreg (enum machine_mode mode, rtx x)
5043 if (GET_CODE (x) == SUBREG)
5044 x = SUBREG_REG (x);
5045 if (GET_MODE (x) == mode)
5046 return x;
5047 return gen_rtx_SUBREG (mode, x, 0);
5050 static bool
5051 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5053 return (TYPE_MODE (type) == BLKmode
5054 && ((type) == 0
5055 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5056 || int_size_in_bytes (type) >
5057 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5060 /* Create the built-in types and functions */
5062 struct spu_builtin_description spu_builtins[] = {
5063 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5064 {fcode, icode, name, type, params, NULL_TREE},
5065 #include "spu-builtins.def"
5066 #undef DEF_BUILTIN
5069 static void
5070 spu_init_builtins (void)
5072 struct spu_builtin_description *d;
5073 unsigned int i;
5075 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5076 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5077 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5078 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5079 V4SF_type_node = build_vector_type (float_type_node, 4);
5080 V2DF_type_node = build_vector_type (double_type_node, 2);
5082 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5083 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5084 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5085 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5087 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5089 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5090 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5091 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5092 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5093 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5094 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5095 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5103 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5104 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5106 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5107 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5108 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5109 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5111 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5112 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5114 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5116 spu_builtin_types[SPU_BTI_PTR] =
5117 build_pointer_type (build_qualified_type
5118 (void_type_node,
5119 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5121 /* For each builtin we build a new prototype. The tree code will make
5122 sure nodes are shared. */
5123 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5125 tree p;
5126 char name[64]; /* build_function will make a copy. */
5127 int parm;
5129 if (d->name == 0)
5130 continue;
5132 /* Find last parm. */
5133 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5136 p = void_list_node;
5137 while (parm > 1)
5138 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5140 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5142 sprintf (name, "__builtin_%s", d->name);
5143 d->fndecl =
5144 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5145 NULL, NULL_TREE);
5146 if (d->fcode == SPU_MASK_FOR_LOAD)
5147 TREE_READONLY (d->fndecl) = 1;
5149 /* These builtins don't throw. */
5150 TREE_NOTHROW (d->fndecl) = 1;
5154 void
5155 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5157 static unsigned char arr[16] =
5158 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5160 rtx temp = gen_reg_rtx (Pmode);
5161 rtx temp2 = gen_reg_rtx (V4SImode);
5162 rtx temp3 = gen_reg_rtx (V4SImode);
5163 rtx pat = gen_reg_rtx (TImode);
5164 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5166 emit_move_insn (pat, array_to_constant (TImode, arr));
5168 /* Restore the sp. */
5169 emit_move_insn (temp, op1);
5170 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5172 /* Compute available stack size for sp. */
5173 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5174 emit_insn (gen_shufb (temp3, temp, temp, pat));
5176 emit_insn (gen_addv4si3 (sp, sp, temp3));
5177 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5181 spu_safe_dma (HOST_WIDE_INT channel)
5183 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5186 void
5187 spu_builtin_splats (rtx ops[])
5189 enum machine_mode mode = GET_MODE (ops[0]);
5190 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5192 unsigned char arr[16];
5193 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5194 emit_move_insn (ops[0], array_to_constant (mode, arr));
5196 else
5198 rtx reg = gen_reg_rtx (TImode);
5199 rtx shuf;
5200 if (GET_CODE (ops[1]) != REG
5201 && GET_CODE (ops[1]) != SUBREG)
5202 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5203 switch (mode)
5205 case V2DImode:
5206 case V2DFmode:
5207 shuf =
5208 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5209 TImode);
5210 break;
5211 case V4SImode:
5212 case V4SFmode:
5213 shuf =
5214 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5215 TImode);
5216 break;
5217 case V8HImode:
5218 shuf =
5219 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5220 TImode);
5221 break;
5222 case V16QImode:
5223 shuf =
5224 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5225 TImode);
5226 break;
5227 default:
5228 abort ();
5230 emit_move_insn (reg, shuf);
5231 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5235 void
5236 spu_builtin_extract (rtx ops[])
5238 enum machine_mode mode;
5239 rtx rot, from, tmp;
5241 mode = GET_MODE (ops[1]);
5243 if (GET_CODE (ops[2]) == CONST_INT)
5245 switch (mode)
5247 case V16QImode:
5248 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5249 break;
5250 case V8HImode:
5251 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5252 break;
5253 case V4SFmode:
5254 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5255 break;
5256 case V4SImode:
5257 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5258 break;
5259 case V2DImode:
5260 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5261 break;
5262 case V2DFmode:
5263 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5264 break;
5265 default:
5266 abort ();
5268 return;
5271 from = spu_gen_subreg (TImode, ops[1]);
5272 rot = gen_reg_rtx (TImode);
5273 tmp = gen_reg_rtx (SImode);
5275 switch (mode)
5277 case V16QImode:
5278 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5279 break;
5280 case V8HImode:
5281 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5282 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5283 break;
5284 case V4SFmode:
5285 case V4SImode:
5286 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5287 break;
5288 case V2DImode:
5289 case V2DFmode:
5290 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5291 break;
5292 default:
5293 abort ();
5295 emit_insn (gen_rotqby_ti (rot, from, tmp));
5297 emit_insn (gen_spu_convert (ops[0], rot));
5300 void
5301 spu_builtin_insert (rtx ops[])
5303 enum machine_mode mode = GET_MODE (ops[0]);
5304 enum machine_mode imode = GET_MODE_INNER (mode);
5305 rtx mask = gen_reg_rtx (TImode);
5306 rtx offset;
5308 if (GET_CODE (ops[3]) == CONST_INT)
5309 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5310 else
5312 offset = gen_reg_rtx (SImode);
5313 emit_insn (gen_mulsi3
5314 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5316 emit_insn (gen_cpat
5317 (mask, stack_pointer_rtx, offset,
5318 GEN_INT (GET_MODE_SIZE (imode))));
5319 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5322 void
5323 spu_builtin_promote (rtx ops[])
5325 enum machine_mode mode, imode;
5326 rtx rot, from, offset;
5327 HOST_WIDE_INT pos;
5329 mode = GET_MODE (ops[0]);
5330 imode = GET_MODE_INNER (mode);
5332 from = gen_reg_rtx (TImode);
5333 rot = spu_gen_subreg (TImode, ops[0]);
5335 emit_insn (gen_spu_convert (from, ops[1]));
5337 if (GET_CODE (ops[2]) == CONST_INT)
5339 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5340 if (GET_MODE_SIZE (imode) < 4)
5341 pos += 4 - GET_MODE_SIZE (imode);
5342 offset = GEN_INT (pos & 15);
5344 else
5346 offset = gen_reg_rtx (SImode);
5347 switch (mode)
5349 case V16QImode:
5350 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5351 break;
5352 case V8HImode:
5353 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5354 emit_insn (gen_addsi3 (offset, offset, offset));
5355 break;
5356 case V4SFmode:
5357 case V4SImode:
5358 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5359 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5360 break;
5361 case V2DImode:
5362 case V2DFmode:
5363 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5364 break;
5365 default:
5366 abort ();
5369 emit_insn (gen_rotqby_ti (rot, from, offset));
5372 void
5373 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5375 rtx shuf = gen_reg_rtx (V4SImode);
5376 rtx insn = gen_reg_rtx (V4SImode);
5377 rtx shufc;
5378 rtx insnc;
5379 rtx mem;
5381 fnaddr = force_reg (SImode, fnaddr);
5382 cxt = force_reg (SImode, cxt);
5384 if (TARGET_LARGE_MEM)
5386 rtx rotl = gen_reg_rtx (V4SImode);
5387 rtx mask = gen_reg_rtx (V4SImode);
5388 rtx bi = gen_reg_rtx (SImode);
5389 unsigned char shufa[16] = {
5390 2, 3, 0, 1, 18, 19, 16, 17,
5391 0, 1, 2, 3, 16, 17, 18, 19
5393 unsigned char insna[16] = {
5394 0x41, 0, 0, 79,
5395 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5396 0x60, 0x80, 0, 79,
5397 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5400 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5401 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5403 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5404 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5405 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5406 emit_insn (gen_selb (insn, insnc, rotl, mask));
5408 mem = memory_address (Pmode, tramp);
5409 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5411 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5412 mem = memory_address (Pmode, plus_constant (tramp, 16));
5413 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5415 else
5417 rtx scxt = gen_reg_rtx (SImode);
5418 rtx sfnaddr = gen_reg_rtx (SImode);
5419 unsigned char insna[16] = {
5420 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5421 0x30, 0, 0, 0,
5422 0, 0, 0, 0,
5423 0, 0, 0, 0
5426 shufc = gen_reg_rtx (TImode);
5427 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5429 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5430 fits 18 bits and the last 4 are zeros. This will be true if
5431 the stack pointer is initialized to 0x3fff0 at program start,
5432 otherwise the ila instruction will be garbage. */
5434 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5435 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5436 emit_insn (gen_cpat
5437 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5438 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5439 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5441 mem = memory_address (Pmode, tramp);
5442 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5445 emit_insn (gen_sync ());
5448 void
5449 spu_expand_sign_extend (rtx ops[])
5451 unsigned char arr[16];
5452 rtx pat = gen_reg_rtx (TImode);
5453 rtx sign, c;
5454 int i, last;
5455 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5456 if (GET_MODE (ops[1]) == QImode)
5458 sign = gen_reg_rtx (HImode);
5459 emit_insn (gen_extendqihi2 (sign, ops[1]));
5460 for (i = 0; i < 16; i++)
5461 arr[i] = 0x12;
5462 arr[last] = 0x13;
5464 else
5466 for (i = 0; i < 16; i++)
5467 arr[i] = 0x10;
5468 switch (GET_MODE (ops[1]))
5470 case HImode:
5471 sign = gen_reg_rtx (SImode);
5472 emit_insn (gen_extendhisi2 (sign, ops[1]));
5473 arr[last] = 0x03;
5474 arr[last - 1] = 0x02;
5475 break;
5476 case SImode:
5477 sign = gen_reg_rtx (SImode);
5478 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5479 for (i = 0; i < 4; i++)
5480 arr[last - i] = 3 - i;
5481 break;
5482 case DImode:
5483 sign = gen_reg_rtx (SImode);
5484 c = gen_reg_rtx (SImode);
5485 emit_insn (gen_spu_convert (c, ops[1]));
5486 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5487 for (i = 0; i < 8; i++)
5488 arr[last - i] = 7 - i;
5489 break;
5490 default:
5491 abort ();
5494 emit_move_insn (pat, array_to_constant (TImode, arr));
5495 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5498 /* expand vector initialization. If there are any constant parts,
5499 load constant parts first. Then load any non-constant parts. */
5500 void
5501 spu_expand_vector_init (rtx target, rtx vals)
5503 enum machine_mode mode = GET_MODE (target);
5504 int n_elts = GET_MODE_NUNITS (mode);
5505 int n_var = 0;
5506 bool all_same = true;
5507 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5508 int i;
5510 first = XVECEXP (vals, 0, 0);
5511 for (i = 0; i < n_elts; ++i)
5513 x = XVECEXP (vals, 0, i);
5514 if (!(CONST_INT_P (x)
5515 || GET_CODE (x) == CONST_DOUBLE
5516 || GET_CODE (x) == CONST_FIXED))
5517 ++n_var;
5518 else
5520 if (first_constant == NULL_RTX)
5521 first_constant = x;
5523 if (i > 0 && !rtx_equal_p (x, first))
5524 all_same = false;
5527 /* if all elements are the same, use splats to repeat elements */
5528 if (all_same)
5530 if (!CONSTANT_P (first)
5531 && !register_operand (first, GET_MODE (x)))
5532 first = force_reg (GET_MODE (first), first);
5533 emit_insn (gen_spu_splats (target, first));
5534 return;
5537 /* load constant parts */
5538 if (n_var != n_elts)
5540 if (n_var == 0)
5542 emit_move_insn (target,
5543 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5545 else
5547 rtx constant_parts_rtx = copy_rtx (vals);
5549 gcc_assert (first_constant != NULL_RTX);
5550 /* fill empty slots with the first constant, this increases
5551 our chance of using splats in the recursive call below. */
5552 for (i = 0; i < n_elts; ++i)
5554 x = XVECEXP (constant_parts_rtx, 0, i);
5555 if (!(CONST_INT_P (x)
5556 || GET_CODE (x) == CONST_DOUBLE
5557 || GET_CODE (x) == CONST_FIXED))
5558 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5561 spu_expand_vector_init (target, constant_parts_rtx);
5565 /* load variable parts */
5566 if (n_var != 0)
5568 rtx insert_operands[4];
5570 insert_operands[0] = target;
5571 insert_operands[2] = target;
5572 for (i = 0; i < n_elts; ++i)
5574 x = XVECEXP (vals, 0, i);
5575 if (!(CONST_INT_P (x)
5576 || GET_CODE (x) == CONST_DOUBLE
5577 || GET_CODE (x) == CONST_FIXED))
5579 if (!register_operand (x, GET_MODE (x)))
5580 x = force_reg (GET_MODE (x), x);
5581 insert_operands[1] = x;
5582 insert_operands[3] = GEN_INT (i);
5583 spu_builtin_insert (insert_operands);
5589 /* Return insn index for the vector compare instruction for given CODE,
5590 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5592 static int
5593 get_vec_cmp_insn (enum rtx_code code,
5594 enum machine_mode dest_mode,
5595 enum machine_mode op_mode)
5598 switch (code)
5600 case EQ:
5601 if (dest_mode == V16QImode && op_mode == V16QImode)
5602 return CODE_FOR_ceq_v16qi;
5603 if (dest_mode == V8HImode && op_mode == V8HImode)
5604 return CODE_FOR_ceq_v8hi;
5605 if (dest_mode == V4SImode && op_mode == V4SImode)
5606 return CODE_FOR_ceq_v4si;
5607 if (dest_mode == V4SImode && op_mode == V4SFmode)
5608 return CODE_FOR_ceq_v4sf;
5609 if (dest_mode == V2DImode && op_mode == V2DFmode)
5610 return CODE_FOR_ceq_v2df;
5611 break;
5612 case GT:
5613 if (dest_mode == V16QImode && op_mode == V16QImode)
5614 return CODE_FOR_cgt_v16qi;
5615 if (dest_mode == V8HImode && op_mode == V8HImode)
5616 return CODE_FOR_cgt_v8hi;
5617 if (dest_mode == V4SImode && op_mode == V4SImode)
5618 return CODE_FOR_cgt_v4si;
5619 if (dest_mode == V4SImode && op_mode == V4SFmode)
5620 return CODE_FOR_cgt_v4sf;
5621 if (dest_mode == V2DImode && op_mode == V2DFmode)
5622 return CODE_FOR_cgt_v2df;
5623 break;
5624 case GTU:
5625 if (dest_mode == V16QImode && op_mode == V16QImode)
5626 return CODE_FOR_clgt_v16qi;
5627 if (dest_mode == V8HImode && op_mode == V8HImode)
5628 return CODE_FOR_clgt_v8hi;
5629 if (dest_mode == V4SImode && op_mode == V4SImode)
5630 return CODE_FOR_clgt_v4si;
5631 break;
5632 default:
5633 break;
5635 return -1;
5638 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5639 DMODE is expected destination mode. This is a recursive function. */
5641 static rtx
5642 spu_emit_vector_compare (enum rtx_code rcode,
5643 rtx op0, rtx op1,
5644 enum machine_mode dmode)
5646 int vec_cmp_insn;
5647 rtx mask;
5648 enum machine_mode dest_mode;
5649 enum machine_mode op_mode = GET_MODE (op1);
5651 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5653 /* Floating point vector compare instructions uses destination V4SImode.
5654 Double floating point vector compare instructions uses destination V2DImode.
5655 Move destination to appropriate mode later. */
5656 if (dmode == V4SFmode)
5657 dest_mode = V4SImode;
5658 else if (dmode == V2DFmode)
5659 dest_mode = V2DImode;
5660 else
5661 dest_mode = dmode;
5663 mask = gen_reg_rtx (dest_mode);
5664 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5666 if (vec_cmp_insn == -1)
5668 bool swap_operands = false;
5669 bool try_again = false;
5670 switch (rcode)
5672 case LT:
5673 rcode = GT;
5674 swap_operands = true;
5675 try_again = true;
5676 break;
5677 case LTU:
5678 rcode = GTU;
5679 swap_operands = true;
5680 try_again = true;
5681 break;
5682 case NE:
5683 /* Treat A != B as ~(A==B). */
5685 enum insn_code nor_code;
5686 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5687 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5688 gcc_assert (nor_code != CODE_FOR_nothing);
5689 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5690 if (dmode != dest_mode)
5692 rtx temp = gen_reg_rtx (dest_mode);
5693 convert_move (temp, mask, 0);
5694 return temp;
5696 return mask;
5698 break;
5699 case GE:
5700 case GEU:
5701 case LE:
5702 case LEU:
5703 /* Try GT/GTU/LT/LTU OR EQ */
5705 rtx c_rtx, eq_rtx;
5706 enum insn_code ior_code;
5707 enum rtx_code new_code;
5709 switch (rcode)
5711 case GE: new_code = GT; break;
5712 case GEU: new_code = GTU; break;
5713 case LE: new_code = LT; break;
5714 case LEU: new_code = LTU; break;
5715 default:
5716 gcc_unreachable ();
5719 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5720 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5722 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5723 gcc_assert (ior_code != CODE_FOR_nothing);
5724 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5725 if (dmode != dest_mode)
5727 rtx temp = gen_reg_rtx (dest_mode);
5728 convert_move (temp, mask, 0);
5729 return temp;
5731 return mask;
5733 break;
5734 default:
5735 gcc_unreachable ();
5738 /* You only get two chances. */
5739 if (try_again)
5740 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5742 gcc_assert (vec_cmp_insn != -1);
5744 if (swap_operands)
5746 rtx tmp;
5747 tmp = op0;
5748 op0 = op1;
5749 op1 = tmp;
5753 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5754 if (dmode != dest_mode)
5756 rtx temp = gen_reg_rtx (dest_mode);
5757 convert_move (temp, mask, 0);
5758 return temp;
5760 return mask;
5764 /* Emit vector conditional expression.
5765 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5766 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5769 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5770 rtx cond, rtx cc_op0, rtx cc_op1)
5772 enum machine_mode dest_mode = GET_MODE (dest);
5773 enum rtx_code rcode = GET_CODE (cond);
5774 rtx mask;
5776 /* Get the vector mask for the given relational operations. */
5777 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5779 emit_insn(gen_selb (dest, op2, op1, mask));
5781 return 1;
5784 static rtx
5785 spu_force_reg (enum machine_mode mode, rtx op)
5787 rtx x, r;
5788 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5790 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5791 || GET_MODE (op) == BLKmode)
5792 return force_reg (mode, convert_to_mode (mode, op, 0));
5793 abort ();
5796 r = force_reg (GET_MODE (op), op);
5797 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5799 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5800 if (x)
5801 return x;
5804 x = gen_reg_rtx (mode);
5805 emit_insn (gen_spu_convert (x, r));
5806 return x;
5809 static void
5810 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5812 HOST_WIDE_INT v = 0;
5813 int lsbits;
5814 /* Check the range of immediate operands. */
5815 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5817 int range = p - SPU_BTI_7;
5819 if (!CONSTANT_P (op))
5820 error ("%s expects an integer literal in the range [%d, %d].",
5821 d->name,
5822 spu_builtin_range[range].low, spu_builtin_range[range].high);
5824 if (GET_CODE (op) == CONST
5825 && (GET_CODE (XEXP (op, 0)) == PLUS
5826 || GET_CODE (XEXP (op, 0)) == MINUS))
5828 v = INTVAL (XEXP (XEXP (op, 0), 1));
5829 op = XEXP (XEXP (op, 0), 0);
5831 else if (GET_CODE (op) == CONST_INT)
5832 v = INTVAL (op);
5833 else if (GET_CODE (op) == CONST_VECTOR
5834 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5835 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5837 /* The default for v is 0 which is valid in every range. */
5838 if (v < spu_builtin_range[range].low
5839 || v > spu_builtin_range[range].high)
5840 error ("%s expects an integer literal in the range [%d, %d]. ("
5841 HOST_WIDE_INT_PRINT_DEC ")",
5842 d->name,
5843 spu_builtin_range[range].low, spu_builtin_range[range].high,
5846 switch (p)
5848 case SPU_BTI_S10_4:
5849 lsbits = 4;
5850 break;
5851 case SPU_BTI_U16_2:
5852 /* This is only used in lqa, and stqa. Even though the insns
5853 encode 16 bits of the address (all but the 2 least
5854 significant), only 14 bits are used because it is masked to
5855 be 16 byte aligned. */
5856 lsbits = 4;
5857 break;
5858 case SPU_BTI_S16_2:
5859 /* This is used for lqr and stqr. */
5860 lsbits = 2;
5861 break;
5862 default:
5863 lsbits = 0;
5866 if (GET_CODE (op) == LABEL_REF
5867 || (GET_CODE (op) == SYMBOL_REF
5868 && SYMBOL_REF_FUNCTION_P (op))
5869 || (v & ((1 << lsbits) - 1)) != 0)
5870 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5871 d->name);
5876 static int
5877 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5878 rtx target, rtx ops[])
5880 enum insn_code icode = d->icode;
5881 int i = 0, a;
5883 /* Expand the arguments into rtl. */
5885 if (d->parm[0] != SPU_BTI_VOID)
5886 ops[i++] = target;
5888 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
5890 tree arg = CALL_EXPR_ARG (exp, a);
5891 if (arg == 0)
5892 abort ();
5893 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5896 /* The insn pattern may have additional operands (SCRATCH).
5897 Return the number of actual non-SCRATCH operands. */
5898 gcc_assert (i <= insn_data[icode].n_operands);
5899 return i;
5902 static rtx
5903 spu_expand_builtin_1 (struct spu_builtin_description *d,
5904 tree exp, rtx target)
5906 rtx pat;
5907 rtx ops[8];
5908 enum insn_code icode = d->icode;
5909 enum machine_mode mode, tmode;
5910 int i, p;
5911 int n_operands;
5912 tree return_type;
5914 /* Set up ops[] with values from arglist. */
5915 n_operands = expand_builtin_args (d, exp, target, ops);
5917 /* Handle the target operand which must be operand 0. */
5918 i = 0;
5919 if (d->parm[0] != SPU_BTI_VOID)
5922 /* We prefer the mode specified for the match_operand otherwise
5923 use the mode from the builtin function prototype. */
5924 tmode = insn_data[d->icode].operand[0].mode;
5925 if (tmode == VOIDmode)
5926 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5928 /* Try to use target because not using it can lead to extra copies
5929 and when we are using all of the registers extra copies leads
5930 to extra spills. */
5931 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5932 ops[0] = target;
5933 else
5934 target = ops[0] = gen_reg_rtx (tmode);
5936 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5937 abort ();
5939 i++;
5942 if (d->fcode == SPU_MASK_FOR_LOAD)
5944 enum machine_mode mode = insn_data[icode].operand[1].mode;
5945 tree arg;
5946 rtx addr, op, pat;
5948 /* get addr */
5949 arg = CALL_EXPR_ARG (exp, 0);
5950 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5951 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5952 addr = memory_address (mode, op);
5954 /* negate addr */
5955 op = gen_reg_rtx (GET_MODE (addr));
5956 emit_insn (gen_rtx_SET (VOIDmode, op,
5957 gen_rtx_NEG (GET_MODE (addr), addr)));
5958 op = gen_rtx_MEM (mode, op);
5960 pat = GEN_FCN (icode) (target, op);
5961 if (!pat)
5962 return 0;
5963 emit_insn (pat);
5964 return target;
5967 /* Ignore align_hint, but still expand it's args in case they have
5968 side effects. */
5969 if (icode == CODE_FOR_spu_align_hint)
5970 return 0;
5972 /* Handle the rest of the operands. */
5973 for (p = 1; i < n_operands; i++, p++)
5975 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5976 mode = insn_data[d->icode].operand[i].mode;
5977 else
5978 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5980 /* mode can be VOIDmode here for labels */
5982 /* For specific intrinsics with an immediate operand, e.g.,
5983 si_ai(), we sometimes need to convert the scalar argument to a
5984 vector argument by splatting the scalar. */
5985 if (VECTOR_MODE_P (mode)
5986 && (GET_CODE (ops[i]) == CONST_INT
5987 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5988 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5990 if (GET_CODE (ops[i]) == CONST_INT)
5991 ops[i] = spu_const (mode, INTVAL (ops[i]));
5992 else
5994 rtx reg = gen_reg_rtx (mode);
5995 enum machine_mode imode = GET_MODE_INNER (mode);
5996 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5997 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5998 if (imode != GET_MODE (ops[i]))
5999 ops[i] = convert_to_mode (imode, ops[i],
6000 TYPE_UNSIGNED (spu_builtin_types
6001 [d->parm[i]]));
6002 emit_insn (gen_spu_splats (reg, ops[i]));
6003 ops[i] = reg;
6007 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6009 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6010 ops[i] = spu_force_reg (mode, ops[i]);
6013 switch (n_operands)
6015 case 0:
6016 pat = GEN_FCN (icode) (0);
6017 break;
6018 case 1:
6019 pat = GEN_FCN (icode) (ops[0]);
6020 break;
6021 case 2:
6022 pat = GEN_FCN (icode) (ops[0], ops[1]);
6023 break;
6024 case 3:
6025 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6026 break;
6027 case 4:
6028 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6029 break;
6030 case 5:
6031 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6032 break;
6033 case 6:
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6035 break;
6036 default:
6037 abort ();
6040 if (!pat)
6041 abort ();
6043 if (d->type == B_CALL || d->type == B_BISLED)
6044 emit_call_insn (pat);
6045 else if (d->type == B_JUMP)
6047 emit_jump_insn (pat);
6048 emit_barrier ();
6050 else
6051 emit_insn (pat);
6053 return_type = spu_builtin_types[d->parm[0]];
6054 if (d->parm[0] != SPU_BTI_VOID
6055 && GET_MODE (target) != TYPE_MODE (return_type))
6057 /* target is the return value. It should always be the mode of
6058 the builtin function prototype. */
6059 target = spu_force_reg (TYPE_MODE (return_type), target);
6062 return target;
6066 spu_expand_builtin (tree exp,
6067 rtx target,
6068 rtx subtarget ATTRIBUTE_UNUSED,
6069 enum machine_mode mode ATTRIBUTE_UNUSED,
6070 int ignore ATTRIBUTE_UNUSED)
6072 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6073 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6074 struct spu_builtin_description *d;
6076 if (fcode < NUM_SPU_BUILTINS)
6078 d = &spu_builtins[fcode];
6080 return spu_expand_builtin_1 (d, exp, target);
6082 abort ();
6085 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6086 static tree
6087 spu_builtin_mul_widen_even (tree type)
6089 switch (TYPE_MODE (type))
6091 case V8HImode:
6092 if (TYPE_UNSIGNED (type))
6093 return spu_builtins[SPU_MULE_0].fndecl;
6094 else
6095 return spu_builtins[SPU_MULE_1].fndecl;
6096 break;
6097 default:
6098 return NULL_TREE;
6102 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6103 static tree
6104 spu_builtin_mul_widen_odd (tree type)
6106 switch (TYPE_MODE (type))
6108 case V8HImode:
6109 if (TYPE_UNSIGNED (type))
6110 return spu_builtins[SPU_MULO_1].fndecl;
6111 else
6112 return spu_builtins[SPU_MULO_0].fndecl;
6113 break;
6114 default:
6115 return NULL_TREE;
6119 /* Implement targetm.vectorize.builtin_mask_for_load. */
6120 static tree
6121 spu_builtin_mask_for_load (void)
6123 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6124 gcc_assert (d);
6125 return d->fndecl;
6128 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6129 static int
6130 spu_builtin_vectorization_cost (bool runtime_test)
6132 /* If the branch of the runtime test is taken - i.e. - the vectorized
6133 version is skipped - this incurs a misprediction cost (because the
6134 vectorized version is expected to be the fall-through). So we subtract
6135 the latency of a mispredicted branch from the costs that are incurred
6136 when the vectorized version is executed. */
6137 if (runtime_test)
6138 return -19;
6139 else
6140 return 0;
6143 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6144 after applying N number of iterations. This routine does not determine
6145 how may iterations are required to reach desired alignment. */
6147 static bool
6148 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6150 if (is_packed)
6151 return false;
6153 /* All other types are naturally aligned. */
6154 return true;
6157 /* Implement targetm.vectorize.builtin_vec_perm. */
6158 tree
6159 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6161 struct spu_builtin_description *d;
6163 *mask_element_type = unsigned_char_type_node;
6165 switch (TYPE_MODE (type))
6167 case V16QImode:
6168 if (TYPE_UNSIGNED (type))
6169 d = &spu_builtins[SPU_SHUFFLE_0];
6170 else
6171 d = &spu_builtins[SPU_SHUFFLE_1];
6172 break;
6174 case V8HImode:
6175 if (TYPE_UNSIGNED (type))
6176 d = &spu_builtins[SPU_SHUFFLE_2];
6177 else
6178 d = &spu_builtins[SPU_SHUFFLE_3];
6179 break;
6181 case V4SImode:
6182 if (TYPE_UNSIGNED (type))
6183 d = &spu_builtins[SPU_SHUFFLE_4];
6184 else
6185 d = &spu_builtins[SPU_SHUFFLE_5];
6186 break;
6188 case V2DImode:
6189 if (TYPE_UNSIGNED (type))
6190 d = &spu_builtins[SPU_SHUFFLE_6];
6191 else
6192 d = &spu_builtins[SPU_SHUFFLE_7];
6193 break;
6195 case V4SFmode:
6196 d = &spu_builtins[SPU_SHUFFLE_8];
6197 break;
6199 case V2DFmode:
6200 d = &spu_builtins[SPU_SHUFFLE_9];
6201 break;
6203 default:
6204 return NULL_TREE;
6207 gcc_assert (d);
6208 return d->fndecl;
6211 /* Count the total number of instructions in each pipe and return the
6212 maximum, which is used as the Minimum Iteration Interval (MII)
6213 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6214 -2 are instructions that can go in pipe0 or pipe1. */
6215 static int
6216 spu_sms_res_mii (struct ddg *g)
6218 int i;
6219 unsigned t[4] = {0, 0, 0, 0};
6221 for (i = 0; i < g->num_nodes; i++)
6223 rtx insn = g->nodes[i].insn;
6224 int p = get_pipe (insn) + 2;
6226 assert (p >= 0);
6227 assert (p < 4);
6229 t[p]++;
6230 if (dump_file && INSN_P (insn))
6231 fprintf (dump_file, "i%d %s %d %d\n",
6232 INSN_UID (insn),
6233 insn_data[INSN_CODE(insn)].name,
6234 p, t[p]);
6236 if (dump_file)
6237 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6239 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6243 void
6244 spu_init_expanders (void)
6246 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6247 * frame_pointer_needed is true. We don't know that until we're
6248 * expanding the prologue. */
6249 if (cfun)
6250 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6253 static enum machine_mode
6254 spu_libgcc_cmp_return_mode (void)
6257 /* For SPU word mode is TI mode so it is better to use SImode
6258 for compare returns. */
6259 return SImode;
6262 static enum machine_mode
6263 spu_libgcc_shift_count_mode (void)
6265 /* For SPU word mode is TI mode so it is better to use SImode
6266 for shift counts. */
6267 return SImode;
6270 /* An early place to adjust some flags after GCC has finished processing
6271 * them. */
6272 static void
6273 asm_file_start (void)
6275 /* Variable tracking should be run after all optimizations which
6276 change order of insns. It also needs a valid CFG. */
6277 spu_flag_var_tracking = flag_var_tracking;
6278 flag_var_tracking = 0;
6280 default_file_start ();
6283 /* Implement targetm.section_type_flags. */
6284 static unsigned int
6285 spu_section_type_flags (tree decl, const char *name, int reloc)
6287 /* .toe needs to have type @nobits. */
6288 if (strcmp (name, ".toe") == 0)
6289 return SECTION_BSS;
6290 return default_section_type_flags (decl, name, reloc);