Merge with gcc-4_3-branch up to revision 175516.
[official-gcc.git] / gcc / config / spu / spu.c
bloba881af7d9d662655627bf07614be39952f5b393c
1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "sbitmap.h"
57 #include "timevar.h"
58 #include "df.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS,
66 /* We create new type nodes for these. */
67 SPU_BTI_V16QI,
68 SPU_BTI_V8HI,
69 SPU_BTI_V4SI,
70 SPU_BTI_V2DI,
71 SPU_BTI_V4SF,
72 SPU_BTI_V2DF,
73 SPU_BTI_UV16QI,
74 SPU_BTI_UV8HI,
75 SPU_BTI_UV4SI,
76 SPU_BTI_UV2DI,
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
79 SPU_BTI_QUADWORD,
81 /* These all correspond to intSI_type_node */
82 SPU_BTI_7,
83 SPU_BTI_S7,
84 SPU_BTI_U7,
85 SPU_BTI_S10,
86 SPU_BTI_S10_4,
87 SPU_BTI_U14,
88 SPU_BTI_16,
89 SPU_BTI_S16,
90 SPU_BTI_S16_2,
91 SPU_BTI_U16,
92 SPU_BTI_U16_2,
93 SPU_BTI_U18,
95 /* These correspond to the standard types */
96 SPU_BTI_INTQI,
97 SPU_BTI_INTHI,
98 SPU_BTI_INTSI,
99 SPU_BTI_INTDI,
101 SPU_BTI_UINTQI,
102 SPU_BTI_UINTHI,
103 SPU_BTI_UINTSI,
104 SPU_BTI_UINTDI,
106 SPU_BTI_FLOAT,
107 SPU_BTI_DOUBLE,
109 SPU_BTI_VOID,
110 SPU_BTI_PTR,
112 SPU_BTI_MAX
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128 struct spu_builtin_range
130 int low, high;
133 struct spu_address_space
135 const char *name;
136 rtx (*to_generic_insn) (rtx, rtx);
137 rtx (*from_generic_insn) (rtx, rtx);
140 static struct spu_address_space spu_address_spaces[] = {
141 {"generic", NULL, NULL },
142 {"__ea", gen_from_ea, gen_to_ea },
143 {NULL, NULL, NULL},
146 static struct spu_builtin_range spu_builtin_range[] = {
147 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
148 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
149 {0ll, 0x7fll}, /* SPU_BTI_U7 */
150 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
151 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
152 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
153 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
154 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
155 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
156 {0ll, 0xffffll}, /* SPU_BTI_U16 */
157 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
158 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
162 /* Target specific attribute specifications. */
163 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
165 /* Prototypes and external defs. */
166 static void spu_init_builtins (void);
167 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
168 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
169 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
170 static rtx get_pic_reg (void);
171 static int need_to_save_reg (int regno, int saving);
172 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
173 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
174 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
175 rtx scratch);
176 static void emit_nop_for_insn (rtx insn);
177 static bool insn_clobbers_hbr (rtx insn);
178 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
179 int distance, sbitmap blocks);
180 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
181 enum machine_mode dmode);
182 static rtx get_branch_target (rtx branch);
183 static void spu_machine_dependent_reorg (void);
184 static int spu_sched_issue_rate (void);
185 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
186 int can_issue_more);
187 static int get_pipe (rtx insn);
188 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
189 static void spu_sched_init_global (FILE *, int, int);
190 static void spu_sched_init (FILE *, int, int);
191 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
192 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
193 int flags,
194 unsigned char *no_add_attrs);
195 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
196 int flags,
197 unsigned char *no_add_attrs);
198 static int spu_naked_function_p (tree func);
199 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
200 const_tree type, unsigned char named);
201 static tree spu_build_builtin_va_list (void);
202 static void spu_va_start (tree, rtx);
203 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
204 tree * post_p);
205 static int store_with_one_insn_p (rtx mem);
206 static int mem_is_padded_component_ref (rtx x);
207 static int reg_aligned_for_addr (rtx x, int aligned);
208 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
209 static void spu_asm_globalize_label (FILE * file, const char *name);
210 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
211 int *total);
212 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
213 static void spu_init_libfuncs (void);
214 static bool spu_return_in_memory (const_tree type, const_tree fntype);
215 static void fix_range (const char *);
216 static void spu_encode_section_info (tree, rtx, int);
217 static tree spu_builtin_mul_widen_even (tree);
218 static tree spu_builtin_mul_widen_odd (tree);
219 static tree spu_builtin_mask_for_load (void);
220 static int spu_builtin_vectorization_cost (bool);
221 static bool spu_vector_alignment_reachable (const_tree, bool);
222 static int spu_sms_res_mii (struct ddg *g);
223 static void asm_file_start (void);
224 static unsigned int spu_section_type_flags (tree, const char *, int);
226 extern const char *reg_names[];
227 rtx spu_compare_op0, spu_compare_op1, spu_expect_op0, spu_expect_op1;
229 /* Which instruction set architecture to use. */
230 int spu_arch;
231 /* Which cpu are we tuning for. */
232 int spu_tune;
234 /* The hardware requires 8 insns between a hint and the branch it
235 effects. This variable describes how many rtl instructions the
236 compiler needs to see before inserting a hint, and then the compiler
237 will insert enough nops to make it at least 8 insns. The default is
238 for the compiler to allow up to 2 nops be emitted. The nops are
239 inserted in pairs, so we round down. */
240 int spu_hint_dist = (8*4) - (2*4);
242 /* Determines whether we run variable tracking in machine dependent
243 reorganization. */
244 static int spu_flag_var_tracking;
246 enum spu_immediate {
247 SPU_NONE,
248 SPU_IL,
249 SPU_ILA,
250 SPU_ILH,
251 SPU_ILHU,
252 SPU_ORI,
253 SPU_ORHI,
254 SPU_ORBI,
255 SPU_IOHL
257 enum immediate_class
259 IC_POOL, /* constant pool */
260 IC_IL1, /* one il* instruction */
261 IC_IL2, /* both ilhu and iohl instructions */
262 IC_IL1s, /* one il* instruction */
263 IC_IL2s, /* both ilhu and iohl instructions */
264 IC_FSMBI, /* the fsmbi instruction */
265 IC_CPAT, /* one of the c*d instructions */
266 IC_FSMBI2 /* fsmbi plus 1 other instruction */
269 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
270 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
271 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
272 static enum immediate_class classify_immediate (rtx op,
273 enum machine_mode mode);
275 static enum machine_mode spu_unwind_word_mode (void);
277 static enum machine_mode
278 spu_libgcc_cmp_return_mode (void);
280 static enum machine_mode
281 spu_libgcc_shift_count_mode (void);
284 /* TARGET overrides. */
286 static enum machine_mode spu_ea_pointer_mode (int);
287 #undef TARGET_ADDR_SPACE_POINTER_MODE
288 #define TARGET_ADDR_SPACE_POINTER_MODE spu_ea_pointer_mode
290 static const char *spu_addr_space_name (int);
291 #undef TARGET_ADDR_SPACE_NAME
292 #define TARGET_ADDR_SPACE_NAME spu_addr_space_name
294 static unsigned char spu_addr_space_number (const tree);
295 #undef TARGET_ADDR_SPACE_NUMBER
296 #define TARGET_ADDR_SPACE_NUMBER spu_addr_space_number
298 static rtx (* spu_addr_space_conversion_rtl (int, int)) (rtx, rtx);
299 #undef TARGET_ADDR_SPACE_CONVERSION_RTL
300 #define TARGET_ADDR_SPACE_CONVERSION_RTL spu_addr_space_conversion_rtl
302 static bool spu_valid_pointer_mode (enum machine_mode mode);
303 #undef TARGET_VALID_POINTER_MODE
304 #define TARGET_VALID_POINTER_MODE spu_valid_pointer_mode
306 static bool spu_valid_addr_space (const_tree);
307 #undef TARGET_VALID_ADDR_SPACE
308 #define TARGET_VALID_ADDR_SPACE spu_valid_addr_space
310 #undef TARGET_INIT_BUILTINS
311 #define TARGET_INIT_BUILTINS spu_init_builtins
313 #undef TARGET_EXPAND_BUILTIN
314 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
316 #undef TARGET_UNWIND_WORD_MODE
317 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
319 #undef TARGET_ASM_ALIGNED_DI_OP
320 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS spu_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hook_int_rtx_0
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
331 #undef TARGET_SCHED_INIT_GLOBAL
332 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
334 #undef TARGET_SCHED_INIT
335 #define TARGET_SCHED_INIT spu_sched_init
337 #undef TARGET_SCHED_VARIABLE_ISSUE
338 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
340 #undef TARGET_SCHED_REORDER
341 #define TARGET_SCHED_REORDER spu_sched_reorder
343 #undef TARGET_SCHED_REORDER2
344 #define TARGET_SCHED_REORDER2 spu_sched_reorder
346 #undef TARGET_SCHED_ADJUST_COST
347 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
349 const struct attribute_spec spu_attribute_table[];
350 #undef TARGET_ATTRIBUTE_TABLE
351 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
353 #undef TARGET_ASM_INTEGER
354 #define TARGET_ASM_INTEGER spu_assemble_integer
356 #undef TARGET_SCALAR_MODE_SUPPORTED_P
357 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
359 #undef TARGET_VECTOR_MODE_SUPPORTED_P
360 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
362 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
363 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
365 #undef TARGET_ASM_GLOBALIZE_LABEL
366 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
368 #undef TARGET_PASS_BY_REFERENCE
369 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
371 #undef TARGET_MUST_PASS_IN_STACK
372 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
374 #undef TARGET_BUILD_BUILTIN_VA_LIST
375 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
377 #undef TARGET_EXPAND_BUILTIN_VA_START
378 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
380 #undef TARGET_SETUP_INCOMING_VARARGS
381 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
383 #undef TARGET_MACHINE_DEPENDENT_REORG
384 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
386 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
387 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
389 #undef TARGET_DEFAULT_TARGET_FLAGS
390 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
392 #undef TARGET_INIT_LIBFUNCS
393 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
395 #undef TARGET_RETURN_IN_MEMORY
396 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
398 #undef TARGET_ENCODE_SECTION_INFO
399 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
401 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
402 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
404 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
405 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
407 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
408 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
410 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
411 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
413 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
414 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
416 #undef TARGET_LIBGCC_CMP_RETURN_MODE
417 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
419 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
420 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
422 #undef TARGET_SCHED_SMS_RES_MII
423 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
425 #undef TARGET_ASM_FILE_START
426 #define TARGET_ASM_FILE_START asm_file_start
428 #undef TARGET_SECTION_TYPE_FLAGS
429 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
431 struct gcc_target targetm = TARGET_INITIALIZER;
433 void
434 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
436 /* Override some of the default param values. With so many registers
437 larger values are better for these params. */
438 MAX_PENDING_LIST_LENGTH = 128;
440 /* With so many registers this is better on by default. */
441 flag_rename_registers = 1;
444 /* Sometimes certain combinations of command options do not make sense
445 on a particular target machine. You can define a macro
446 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
447 executed once just after all the command options have been parsed. */
448 void
449 spu_override_options (void)
451 /* Small loops will be unpeeled at -O3. For SPU it is more important
452 to keep code small by default. */
453 if (!flag_unroll_loops && !flag_peel_loops
454 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
455 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
457 flag_omit_frame_pointer = 1;
459 /* Functions must be 8 byte aligned so we correctly handle dual issue */
460 if (align_functions < 8)
461 align_functions = 8;
463 spu_hint_dist = 8*4 - spu_max_nops*4;
464 if (spu_hint_dist < 0)
465 spu_hint_dist = 0;
467 if (spu_fixed_range_string)
468 fix_range (spu_fixed_range_string);
470 /* Determine processor architectural level. */
471 if (spu_arch_string)
473 if (strcmp (&spu_arch_string[0], "cell") == 0)
474 spu_arch = PROCESSOR_CELL;
475 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
476 spu_arch = PROCESSOR_CELLEDP;
477 else
478 error ("Unknown architecture '%s'", &spu_arch_string[0]);
481 /* Determine processor to tune for. */
482 if (spu_tune_string)
484 if (strcmp (&spu_tune_string[0], "cell") == 0)
485 spu_tune = PROCESSOR_CELL;
486 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
487 spu_tune = PROCESSOR_CELLEDP;
488 else
489 error ("Unknown architecture '%s'", &spu_tune_string[0]);
492 /* Change defaults according to the processor architecture. */
493 if (spu_arch == PROCESSOR_CELLEDP)
495 /* If no command line option has been otherwise specified, change
496 the default to -mno-safe-hints on celledp -- only the original
497 Cell/B.E. processors require this workaround. */
498 if (!(target_flags_explicit & MASK_SAFE_HINTS))
499 target_flags &= ~MASK_SAFE_HINTS;
502 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
505 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
506 struct attribute_spec.handler. */
508 /* Table of machine attributes. */
509 const struct attribute_spec spu_attribute_table[] =
511 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
512 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
513 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
514 { NULL, 0, 0, false, false, false, NULL }
517 /* True if MODE is valid for the target. By "valid", we mean able to
518 be manipulated in non-trivial ways. In particular, this means all
519 the arithmetic is supported. */
520 static bool
521 spu_scalar_mode_supported_p (enum machine_mode mode)
523 switch (mode)
525 case QImode:
526 case HImode:
527 case SImode:
528 case SFmode:
529 case DImode:
530 case TImode:
531 case DFmode:
532 return true;
534 default:
535 return false;
539 /* Similarly for vector modes. "Supported" here is less strict. At
540 least some operations are supported; need to check optabs or builtins
541 for further details. */
542 static bool
543 spu_vector_mode_supported_p (enum machine_mode mode)
545 switch (mode)
547 case V16QImode:
548 case V8HImode:
549 case V4SImode:
550 case V2DImode:
551 case V4SFmode:
552 case V2DFmode:
553 return true;
555 default:
556 return false;
560 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
561 least significant bytes of the outer mode. This function returns
562 TRUE for the SUBREG's where this is correct. */
564 valid_subreg (rtx op)
566 enum machine_mode om = GET_MODE (op);
567 enum machine_mode im = GET_MODE (SUBREG_REG (op));
568 return om != VOIDmode && im != VOIDmode
569 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
570 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
571 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
574 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
575 and adjust the start offset. */
576 static rtx
577 adjust_operand (rtx op, HOST_WIDE_INT * start)
579 enum machine_mode mode;
580 int op_size;
581 /* Strip any paradoxical SUBREG. */
582 if (GET_CODE (op) == SUBREG
583 && (GET_MODE_BITSIZE (GET_MODE (op))
584 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
586 if (start)
587 *start -=
588 GET_MODE_BITSIZE (GET_MODE (op)) -
589 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
590 op = SUBREG_REG (op);
592 /* If it is smaller than SI, assure a SUBREG */
593 op_size = GET_MODE_BITSIZE (GET_MODE (op));
594 if (op_size < 32)
596 if (start)
597 *start += 32 - op_size;
598 op_size = 32;
600 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
601 mode = mode_for_size (op_size, MODE_INT, 0);
602 if (mode != GET_MODE (op))
603 op = gen_rtx_SUBREG (mode, op, 0);
604 return op;
607 void
608 spu_expand_extv (rtx ops[], int unsignedp)
610 HOST_WIDE_INT width = INTVAL (ops[2]);
611 HOST_WIDE_INT start = INTVAL (ops[3]);
612 HOST_WIDE_INT src_size, dst_size;
613 enum machine_mode src_mode, dst_mode;
614 rtx dst = ops[0], src = ops[1];
615 rtx s;
617 dst = adjust_operand (ops[0], 0);
618 dst_mode = GET_MODE (dst);
619 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
621 src = adjust_operand (src, &start);
622 src_mode = GET_MODE (src);
623 src_size = GET_MODE_BITSIZE (GET_MODE (src));
625 if (start > 0)
627 s = gen_reg_rtx (src_mode);
628 switch (src_mode)
630 case SImode:
631 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
632 break;
633 case DImode:
634 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
635 break;
636 case TImode:
637 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
638 break;
639 default:
640 abort ();
642 src = s;
645 if (width < src_size)
647 rtx pat;
648 int icode;
649 switch (src_mode)
651 case SImode:
652 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
653 break;
654 case DImode:
655 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
656 break;
657 case TImode:
658 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
659 break;
660 default:
661 abort ();
663 s = gen_reg_rtx (src_mode);
664 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
665 emit_insn (pat);
666 src = s;
669 convert_move (dst, src, unsignedp);
672 void
673 spu_expand_insv (rtx ops[])
675 HOST_WIDE_INT width = INTVAL (ops[1]);
676 HOST_WIDE_INT start = INTVAL (ops[2]);
677 HOST_WIDE_INT maskbits;
678 enum machine_mode dst_mode, src_mode;
679 rtx dst = ops[0], src = ops[3];
680 int dst_size, src_size;
681 rtx mask;
682 rtx shift_reg;
683 int shift;
686 if (GET_CODE (ops[0]) == MEM)
687 dst = gen_reg_rtx (TImode);
688 else
689 dst = adjust_operand (dst, &start);
690 dst_mode = GET_MODE (dst);
691 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
693 if (CONSTANT_P (src))
695 enum machine_mode m =
696 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
697 src = force_reg (m, convert_to_mode (m, src, 0));
699 src = adjust_operand (src, 0);
700 src_mode = GET_MODE (src);
701 src_size = GET_MODE_BITSIZE (GET_MODE (src));
703 mask = gen_reg_rtx (dst_mode);
704 shift_reg = gen_reg_rtx (dst_mode);
705 shift = dst_size - start - width;
707 /* It's not safe to use subreg here because the compiler assumes
708 that the SUBREG_REG is right justified in the SUBREG. */
709 convert_move (shift_reg, src, 1);
711 if (shift > 0)
713 switch (dst_mode)
715 case SImode:
716 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
717 break;
718 case DImode:
719 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
720 break;
721 case TImode:
722 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
723 break;
724 default:
725 abort ();
728 else if (shift < 0)
729 abort ();
731 switch (dst_size)
733 case 32:
734 maskbits = (-1ll << (32 - width - start));
735 if (start)
736 maskbits += (1ll << (32 - start));
737 emit_move_insn (mask, GEN_INT (maskbits));
738 break;
739 case 64:
740 maskbits = (-1ll << (64 - width - start));
741 if (start)
742 maskbits += (1ll << (64 - start));
743 emit_move_insn (mask, GEN_INT (maskbits));
744 break;
745 case 128:
747 unsigned char arr[16];
748 int i = start / 8;
749 memset (arr, 0, sizeof (arr));
750 arr[i] = 0xff >> (start & 7);
751 for (i++; i <= (start + width - 1) / 8; i++)
752 arr[i] = 0xff;
753 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
754 emit_move_insn (mask, array_to_constant (TImode, arr));
756 break;
757 default:
758 abort ();
760 if (GET_CODE (ops[0]) == MEM)
762 rtx aligned = gen_reg_rtx (SImode);
763 rtx low = gen_reg_rtx (SImode);
764 rtx addr = gen_reg_rtx (SImode);
765 rtx rotl = gen_reg_rtx (SImode);
766 rtx mask0 = gen_reg_rtx (TImode);
767 rtx mem;
769 emit_move_insn (addr, XEXP (ops[0], 0));
770 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
771 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
772 emit_insn (gen_negsi2 (rotl, low));
773 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
774 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
775 mem = change_address (ops[0], TImode, aligned);
776 set_mem_alias_set (mem, 0);
777 emit_move_insn (dst, mem);
778 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
779 emit_move_insn (mem, dst);
780 if (start + width > MEM_ALIGN (ops[0]))
782 rtx shl = gen_reg_rtx (SImode);
783 rtx mask1 = gen_reg_rtx (TImode);
784 rtx dst1 = gen_reg_rtx (TImode);
785 rtx mem1;
786 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
787 emit_insn (gen_shlqby_ti (mask1, mask, shl));
788 mem1 = adjust_address (mem, TImode, 16);
789 set_mem_alias_set (mem1, 0);
790 emit_move_insn (dst1, mem1);
791 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
792 emit_move_insn (mem1, dst1);
795 else
796 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
801 spu_expand_block_move (rtx ops[])
803 HOST_WIDE_INT bytes, align, offset;
804 rtx src, dst, sreg, dreg, target;
805 int i;
806 if (GET_CODE (ops[2]) != CONST_INT
807 || GET_CODE (ops[3]) != CONST_INT
808 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
809 return 0;
811 bytes = INTVAL (ops[2]);
812 align = INTVAL (ops[3]);
814 if (bytes <= 0)
815 return 1;
817 dst = ops[0];
818 src = ops[1];
820 if (align == 16)
822 for (offset = 0; offset + 16 <= bytes; offset += 16)
824 dst = adjust_address (ops[0], V16QImode, offset);
825 src = adjust_address (ops[1], V16QImode, offset);
826 emit_move_insn (dst, src);
828 if (offset < bytes)
830 rtx mask;
831 unsigned char arr[16] = { 0 };
832 for (i = 0; i < bytes - offset; i++)
833 arr[i] = 0xff;
834 dst = adjust_address (ops[0], V16QImode, offset);
835 src = adjust_address (ops[1], V16QImode, offset);
836 mask = gen_reg_rtx (V16QImode);
837 sreg = gen_reg_rtx (V16QImode);
838 dreg = gen_reg_rtx (V16QImode);
839 target = gen_reg_rtx (V16QImode);
840 emit_move_insn (mask, array_to_constant (V16QImode, arr));
841 emit_move_insn (dreg, dst);
842 emit_move_insn (sreg, src);
843 emit_insn (gen_selb (target, dreg, sreg, mask));
844 emit_move_insn (dst, target);
846 return 1;
848 return 0;
851 enum spu_comp_code
852 { SPU_EQ, SPU_GT, SPU_GTU };
854 int spu_comp_icode[12][3] = {
855 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
856 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
857 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
858 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
859 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
860 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
861 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
862 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
863 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
864 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
865 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
866 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
869 /* Generate a compare for CODE. Return a brand-new rtx that represents
870 the result of the compare. GCC can figure this out too if we don't
871 provide all variations of compares, but GCC always wants to use
872 WORD_MODE, we can generate better code in most cases if we do it
873 ourselves. */
874 void
875 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
877 int reverse_compare = 0;
878 int reverse_test = 0;
879 rtx compare_result, eq_result;
880 rtx comp_rtx, eq_rtx;
881 rtx target = operands[0];
882 enum machine_mode comp_mode;
883 enum machine_mode op_mode;
884 enum spu_comp_code scode, eq_code, ior_code;
885 int index;
886 int eq_test = 0;
888 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
889 and so on, to keep the constant in operand 1. */
890 if (GET_CODE (spu_compare_op1) == CONST_INT)
892 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
893 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
894 switch (code)
896 case GE:
897 spu_compare_op1 = GEN_INT (val);
898 code = GT;
899 break;
900 case LT:
901 spu_compare_op1 = GEN_INT (val);
902 code = LE;
903 break;
904 case GEU:
905 spu_compare_op1 = GEN_INT (val);
906 code = GTU;
907 break;
908 case LTU:
909 spu_compare_op1 = GEN_INT (val);
910 code = LEU;
911 break;
912 default:
913 break;
917 comp_mode = SImode;
918 op_mode = GET_MODE (spu_compare_op0);
920 switch (code)
922 case GE:
923 scode = SPU_GT;
924 if (HONOR_NANS (op_mode))
926 reverse_compare = 0;
927 reverse_test = 0;
928 eq_test = 1;
929 eq_code = SPU_EQ;
931 else
933 reverse_compare = 1;
934 reverse_test = 1;
936 break;
937 case LE:
938 scode = SPU_GT;
939 if (HONOR_NANS (op_mode))
941 reverse_compare = 1;
942 reverse_test = 0;
943 eq_test = 1;
944 eq_code = SPU_EQ;
946 else
948 reverse_compare = 0;
949 reverse_test = 1;
951 break;
952 case LT:
953 reverse_compare = 1;
954 reverse_test = 0;
955 scode = SPU_GT;
956 break;
957 case GEU:
958 reverse_compare = 1;
959 reverse_test = 1;
960 scode = SPU_GTU;
961 break;
962 case LEU:
963 reverse_compare = 0;
964 reverse_test = 1;
965 scode = SPU_GTU;
966 break;
967 case LTU:
968 reverse_compare = 1;
969 reverse_test = 0;
970 scode = SPU_GTU;
971 break;
972 case NE:
973 reverse_compare = 0;
974 reverse_test = 1;
975 scode = SPU_EQ;
976 break;
978 case EQ:
979 scode = SPU_EQ;
980 break;
981 case GT:
982 scode = SPU_GT;
983 break;
984 case GTU:
985 scode = SPU_GTU;
986 break;
987 default:
988 scode = SPU_EQ;
989 break;
992 switch (op_mode)
994 case QImode:
995 index = 0;
996 comp_mode = QImode;
997 break;
998 case HImode:
999 index = 1;
1000 comp_mode = HImode;
1001 break;
1002 case SImode:
1003 index = 2;
1004 break;
1005 case DImode:
1006 index = 3;
1007 break;
1008 case TImode:
1009 index = 4;
1010 break;
1011 case SFmode:
1012 index = 5;
1013 break;
1014 case DFmode:
1015 index = 6;
1016 break;
1017 case V16QImode:
1018 index = 7;
1019 comp_mode = op_mode;
1020 break;
1021 case V8HImode:
1022 index = 8;
1023 comp_mode = op_mode;
1024 break;
1025 case V4SImode:
1026 index = 9;
1027 comp_mode = op_mode;
1028 break;
1029 case V4SFmode:
1030 index = 10;
1031 comp_mode = V4SImode;
1032 break;
1033 case V2DFmode:
1034 index = 11;
1035 comp_mode = V2DImode;
1036 break;
1037 case V2DImode:
1038 default:
1039 abort ();
1042 if (GET_MODE (spu_compare_op1) == DFmode
1043 && (scode != SPU_GT && scode != SPU_EQ))
1044 abort ();
1046 if (is_set == 0 && spu_compare_op1 == const0_rtx
1047 && (GET_MODE (spu_compare_op0) == SImode
1048 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
1050 /* Don't need to set a register with the result when we are
1051 comparing against zero and branching. */
1052 reverse_test = !reverse_test;
1053 compare_result = spu_compare_op0;
1055 else
1057 compare_result = gen_reg_rtx (comp_mode);
1059 if (reverse_compare)
1061 rtx t = spu_compare_op1;
1062 spu_compare_op1 = spu_compare_op0;
1063 spu_compare_op0 = t;
1066 if (spu_comp_icode[index][scode] == 0)
1067 abort ();
1069 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1070 (spu_compare_op0, op_mode))
1071 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
1072 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1073 (spu_compare_op1, op_mode))
1074 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
1075 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1076 spu_compare_op0,
1077 spu_compare_op1);
1078 if (comp_rtx == 0)
1079 abort ();
1080 emit_insn (comp_rtx);
1082 if (eq_test)
1084 eq_result = gen_reg_rtx (comp_mode);
1085 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1086 spu_compare_op0,
1087 spu_compare_op1);
1088 if (eq_rtx == 0)
1089 abort ();
1090 emit_insn (eq_rtx);
1091 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1092 gcc_assert (ior_code != CODE_FOR_nothing);
1093 emit_insn (GEN_FCN (ior_code)
1094 (compare_result, compare_result, eq_result));
1098 if (is_set == 0)
1100 rtx bcomp;
1101 rtx loc_ref;
1102 rtx jump_pat;
1104 /* We don't have branch on QI compare insns, so we convert the
1105 QI compare result to a HI result. */
1106 if (comp_mode == QImode)
1108 rtx old_res = compare_result;
1109 compare_result = gen_reg_rtx (HImode);
1110 comp_mode = HImode;
1111 emit_insn (gen_extendqihi2 (compare_result, old_res));
1114 if (reverse_test)
1115 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1116 else
1117 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1119 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1120 jump_pat = gen_rtx_SET (VOIDmode, pc_rtx,
1121 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1122 loc_ref, pc_rtx));
1124 if (flag_schedule_insns_after_reload && TARGET_BRANCH_HINTS
1125 && spu_expect_op0 && comp_mode == Pmode
1126 && spu_expect_op0 == spu_compare_op0)
1128 rtx then_reg = gen_reg_rtx (Pmode);
1129 rtx else_reg = gen_reg_rtx (Pmode);
1130 rtx expect_cmp = gen_reg_rtx (Pmode);
1131 rtx hint_target = gen_reg_rtx (Pmode);
1132 rtx branch_label = gen_label_rtx ();
1133 rtx branch_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1134 rtx then_label = gen_label_rtx ();
1135 rtx then_ref = gen_rtx_LABEL_REF (VOIDmode, then_label);
1136 rtx else_label = gen_label_rtx ();
1137 rtx else_ref = gen_rtx_LABEL_REF (VOIDmode, else_label);
1138 rtvec v;
1140 emit_move_insn (then_reg, then_ref);
1141 emit_move_insn (else_reg, else_ref);
1142 emit_insn (gen_clgt_si (expect_cmp, spu_expect_op1, const0_rtx));
1143 emit_insn (gen_selb (hint_target, then_reg, else_reg, expect_cmp));
1144 emit_insn (gen_hbr (branch_ref, hint_target));
1146 LABEL_NUSES (branch_label)++;
1147 LABEL_PRESERVE_P (branch_label) = 1;
1148 LABEL_NUSES (then_label)++;
1149 LABEL_PRESERVE_P (then_label) = 1;
1150 LABEL_NUSES (else_label)++;
1151 LABEL_PRESERVE_P (else_label) = 1;
1153 /* We delete the labels to make sure they don't get used for
1154 anything else. The machine reorg phase will move them to
1155 the correct place. We don't try to reuse existing labels
1156 because we move these around later. */
1157 delete_insn (emit_label (branch_label));
1158 delete_insn (emit_label (then_label));
1159 delete_insn (emit_label (else_label));
1161 v = rtvec_alloc (5);
1162 RTVEC_ELT (v, 0) = jump_pat;
1163 RTVEC_ELT (v, 1) = gen_rtx_USE (VOIDmode, branch_ref);
1164 RTVEC_ELT (v, 2) = gen_rtx_USE (VOIDmode, then_ref);
1165 RTVEC_ELT (v, 3) = gen_rtx_USE (VOIDmode, else_ref);
1166 RTVEC_ELT (v, 4) = gen_rtx_CLOBBER (VOIDmode,
1167 gen_rtx_REG (SImode,
1168 HBR_REGNUM));
1169 jump_pat = gen_rtx_PARALLEL (VOIDmode, v);
1172 emit_jump_insn (jump_pat);
1174 else if (is_set == 2)
1176 int compare_size = GET_MODE_BITSIZE (comp_mode);
1177 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1178 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1179 rtx select_mask;
1180 rtx op_t = operands[2];
1181 rtx op_f = operands[3];
1183 /* The result of the comparison can be SI, HI or QI mode. Create a
1184 mask based on that result. */
1185 if (target_size > compare_size)
1187 select_mask = gen_reg_rtx (mode);
1188 emit_insn (gen_extend_compare (select_mask, compare_result));
1190 else if (target_size < compare_size)
1191 select_mask =
1192 gen_rtx_SUBREG (mode, compare_result,
1193 (compare_size - target_size) / BITS_PER_UNIT);
1194 else if (comp_mode != mode)
1195 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1196 else
1197 select_mask = compare_result;
1199 if (GET_MODE (target) != GET_MODE (op_t)
1200 || GET_MODE (target) != GET_MODE (op_f))
1201 abort ();
1203 if (reverse_test)
1204 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1205 else
1206 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1208 else
1210 if (reverse_test)
1211 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1212 gen_rtx_NOT (comp_mode, compare_result)));
1213 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1214 emit_insn (gen_extendhisi2 (target, compare_result));
1215 else if (GET_MODE (target) == SImode
1216 && GET_MODE (compare_result) == QImode)
1217 emit_insn (gen_extend_compare (target, compare_result));
1218 else
1219 emit_move_insn (target, compare_result);
1221 spu_expect_op0 = spu_expect_op1 = 0;
1224 HOST_WIDE_INT
1225 const_double_to_hwint (rtx x)
1227 HOST_WIDE_INT val;
1228 REAL_VALUE_TYPE rv;
1229 if (GET_MODE (x) == SFmode)
1231 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1232 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1234 else if (GET_MODE (x) == DFmode)
1236 long l[2];
1237 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1238 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1239 val = l[0];
1240 val = (val << 32) | (l[1] & 0xffffffff);
1242 else
1243 abort ();
1244 return val;
1248 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1250 long tv[2];
1251 REAL_VALUE_TYPE rv;
1252 gcc_assert (mode == SFmode || mode == DFmode);
1254 if (mode == SFmode)
1255 tv[0] = (v << 32) >> 32;
1256 else if (mode == DFmode)
1258 tv[1] = (v << 32) >> 32;
1259 tv[0] = v >> 32;
1261 real_from_target (&rv, tv, mode);
1262 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1265 void
1266 print_operand_address (FILE * file, register rtx addr)
1268 rtx reg;
1269 rtx offset;
1271 if (GET_CODE (addr) == AND
1272 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1273 && INTVAL (XEXP (addr, 1)) == -16)
1274 addr = XEXP (addr, 0);
1276 switch (GET_CODE (addr))
1278 case REG:
1279 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1280 break;
1282 case PLUS:
1283 reg = XEXP (addr, 0);
1284 offset = XEXP (addr, 1);
1285 if (GET_CODE (offset) == REG)
1287 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1288 reg_names[REGNO (offset)]);
1290 else if (GET_CODE (offset) == CONST_INT)
1292 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1293 INTVAL (offset), reg_names[REGNO (reg)]);
1295 else
1296 abort ();
1297 break;
1299 case CONST:
1300 case LABEL_REF:
1301 case SYMBOL_REF:
1302 case CONST_INT:
1303 output_addr_const (file, addr);
1304 break;
1306 default:
1307 debug_rtx (addr);
1308 abort ();
1312 void
1313 print_operand (FILE * file, rtx x, int code)
1315 enum machine_mode mode = GET_MODE (x);
1316 HOST_WIDE_INT val;
1317 unsigned char arr[16];
1318 int xcode = GET_CODE (x);
1319 int i, info;
1320 if (GET_MODE (x) == VOIDmode)
1321 switch (code)
1323 case 'L': /* 128 bits, signed */
1324 case 'm': /* 128 bits, signed */
1325 case 'T': /* 128 bits, signed */
1326 case 't': /* 128 bits, signed */
1327 mode = TImode;
1328 break;
1329 case 'K': /* 64 bits, signed */
1330 case 'k': /* 64 bits, signed */
1331 case 'D': /* 64 bits, signed */
1332 case 'd': /* 64 bits, signed */
1333 mode = DImode;
1334 break;
1335 case 'J': /* 32 bits, signed */
1336 case 'j': /* 32 bits, signed */
1337 case 's': /* 32 bits, signed */
1338 case 'S': /* 32 bits, signed */
1339 mode = SImode;
1340 break;
1342 switch (code)
1345 case 'j': /* 32 bits, signed */
1346 case 'k': /* 64 bits, signed */
1347 case 'm': /* 128 bits, signed */
1348 if (xcode == CONST_INT
1349 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1351 gcc_assert (logical_immediate_p (x, mode));
1352 constant_to_array (mode, x, arr);
1353 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1354 val = trunc_int_for_mode (val, SImode);
1355 switch (which_logical_immediate (val))
1357 case SPU_ORI:
1358 break;
1359 case SPU_ORHI:
1360 fprintf (file, "h");
1361 break;
1362 case SPU_ORBI:
1363 fprintf (file, "b");
1364 break;
1365 default:
1366 gcc_unreachable();
1369 else
1370 gcc_unreachable();
1371 return;
1373 case 'J': /* 32 bits, signed */
1374 case 'K': /* 64 bits, signed */
1375 case 'L': /* 128 bits, signed */
1376 if (xcode == CONST_INT
1377 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1379 gcc_assert (logical_immediate_p (x, mode)
1380 || iohl_immediate_p (x, mode));
1381 constant_to_array (mode, x, arr);
1382 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1383 val = trunc_int_for_mode (val, SImode);
1384 switch (which_logical_immediate (val))
1386 case SPU_ORI:
1387 case SPU_IOHL:
1388 break;
1389 case SPU_ORHI:
1390 val = trunc_int_for_mode (val, HImode);
1391 break;
1392 case SPU_ORBI:
1393 val = trunc_int_for_mode (val, QImode);
1394 break;
1395 default:
1396 gcc_unreachable();
1398 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1400 else
1401 gcc_unreachable();
1402 return;
1404 case 't': /* 128 bits, signed */
1405 case 'd': /* 64 bits, signed */
1406 case 's': /* 32 bits, signed */
1407 if (CONSTANT_P (x))
1409 enum immediate_class c = classify_immediate (x, mode);
1410 switch (c)
1412 case IC_IL1:
1413 constant_to_array (mode, x, arr);
1414 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1415 val = trunc_int_for_mode (val, SImode);
1416 switch (which_immediate_load (val))
1418 case SPU_IL:
1419 break;
1420 case SPU_ILA:
1421 fprintf (file, "a");
1422 break;
1423 case SPU_ILH:
1424 fprintf (file, "h");
1425 break;
1426 case SPU_ILHU:
1427 fprintf (file, "hu");
1428 break;
1429 default:
1430 gcc_unreachable ();
1432 break;
1433 case IC_CPAT:
1434 constant_to_array (mode, x, arr);
1435 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1436 if (info == 1)
1437 fprintf (file, "b");
1438 else if (info == 2)
1439 fprintf (file, "h");
1440 else if (info == 4)
1441 fprintf (file, "w");
1442 else if (info == 8)
1443 fprintf (file, "d");
1444 break;
1445 case IC_IL1s:
1446 if (xcode == CONST_VECTOR)
1448 x = CONST_VECTOR_ELT (x, 0);
1449 xcode = GET_CODE (x);
1451 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1452 fprintf (file, "a");
1453 else if (xcode == HIGH)
1454 fprintf (file, "hu");
1455 break;
1456 case IC_FSMBI:
1457 case IC_FSMBI2:
1458 case IC_IL2:
1459 case IC_IL2s:
1460 case IC_POOL:
1461 abort ();
1464 else
1465 gcc_unreachable ();
1466 return;
1468 case 'T': /* 128 bits, signed */
1469 case 'D': /* 64 bits, signed */
1470 case 'S': /* 32 bits, signed */
1471 if (CONSTANT_P (x))
1473 enum immediate_class c = classify_immediate (x, mode);
1474 switch (c)
1476 case IC_IL1:
1477 constant_to_array (mode, x, arr);
1478 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1479 val = trunc_int_for_mode (val, SImode);
1480 switch (which_immediate_load (val))
1482 case SPU_IL:
1483 case SPU_ILA:
1484 break;
1485 case SPU_ILH:
1486 case SPU_ILHU:
1487 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1488 break;
1489 default:
1490 gcc_unreachable ();
1492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1493 break;
1494 case IC_FSMBI:
1495 constant_to_array (mode, x, arr);
1496 val = 0;
1497 for (i = 0; i < 16; i++)
1499 val <<= 1;
1500 val |= arr[i] & 1;
1502 print_operand (file, GEN_INT (val), 0);
1503 break;
1504 case IC_CPAT:
1505 constant_to_array (mode, x, arr);
1506 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1507 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1508 break;
1509 case IC_IL1s:
1510 if (xcode == HIGH)
1511 x = XEXP (x, 0);
1512 if (GET_CODE (x) == CONST_VECTOR)
1513 x = CONST_VECTOR_ELT (x, 0);
1514 output_addr_const (file, x);
1515 if (xcode == HIGH)
1516 fprintf (file, "@h");
1517 break;
1518 case IC_IL2:
1519 case IC_IL2s:
1520 case IC_FSMBI2:
1521 case IC_POOL:
1522 abort ();
1525 else
1526 gcc_unreachable ();
1527 return;
1529 case 'C':
1530 if (xcode == CONST_INT)
1532 /* Only 4 least significant bits are relevant for generate
1533 control word instructions. */
1534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1535 return;
1537 break;
1539 case 'M': /* print code for c*d */
1540 if (GET_CODE (x) == CONST_INT)
1541 switch (INTVAL (x))
1543 case 1:
1544 fprintf (file, "b");
1545 break;
1546 case 2:
1547 fprintf (file, "h");
1548 break;
1549 case 4:
1550 fprintf (file, "w");
1551 break;
1552 case 8:
1553 fprintf (file, "d");
1554 break;
1555 default:
1556 gcc_unreachable();
1558 else
1559 gcc_unreachable();
1560 return;
1562 case 'N': /* Negate the operand */
1563 if (xcode == CONST_INT)
1564 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1565 else if (xcode == CONST_VECTOR)
1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1567 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1568 return;
1570 case 'I': /* enable/disable interrupts */
1571 if (xcode == CONST_INT)
1572 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1573 return;
1575 case 'b': /* branch modifiers */
1576 if (xcode == REG)
1577 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1578 else if (COMPARISON_P (x))
1579 fprintf (file, "%s", xcode == NE ? "n" : "");
1580 return;
1582 case 'i': /* indirect call */
1583 if (xcode == MEM)
1585 if (GET_CODE (XEXP (x, 0)) == REG)
1586 /* Used in indirect function calls. */
1587 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1588 else
1589 output_address (XEXP (x, 0));
1591 return;
1593 case 'p': /* load/store */
1594 if (xcode == MEM)
1596 x = XEXP (x, 0);
1597 xcode = GET_CODE (x);
1599 if (xcode == AND)
1601 x = XEXP (x, 0);
1602 xcode = GET_CODE (x);
1604 if (xcode == REG)
1605 fprintf (file, "d");
1606 else if (xcode == CONST_INT)
1607 fprintf (file, "a");
1608 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1609 fprintf (file, "r");
1610 else if (xcode == PLUS || xcode == LO_SUM)
1612 if (GET_CODE (XEXP (x, 1)) == REG)
1613 fprintf (file, "x");
1614 else
1615 fprintf (file, "d");
1617 return;
1619 case 'e':
1620 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1621 val &= 0x7;
1622 output_addr_const (file, GEN_INT (val));
1623 return;
1625 case 'f':
1626 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1627 val &= 0x1f;
1628 output_addr_const (file, GEN_INT (val));
1629 return;
1631 case 'g':
1632 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1633 val &= 0x3f;
1634 output_addr_const (file, GEN_INT (val));
1635 return;
1637 case 'h':
1638 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1639 val = (val >> 3) & 0x1f;
1640 output_addr_const (file, GEN_INT (val));
1641 return;
1643 case 'E':
1644 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1645 val = -val;
1646 val &= 0x7;
1647 output_addr_const (file, GEN_INT (val));
1648 return;
1650 case 'F':
1651 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1652 val = -val;
1653 val &= 0x1f;
1654 output_addr_const (file, GEN_INT (val));
1655 return;
1657 case 'G':
1658 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1659 val = -val;
1660 val &= 0x3f;
1661 output_addr_const (file, GEN_INT (val));
1662 return;
1664 case 'H':
1665 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1666 val = -(val & -8ll);
1667 val = (val >> 3) & 0x1f;
1668 output_addr_const (file, GEN_INT (val));
1669 return;
1671 case 'v':
1672 case 'w':
1673 constant_to_array (mode, x, arr);
1674 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1675 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1676 return;
1678 case 0:
1679 if (xcode == REG)
1680 fprintf (file, "%s", reg_names[REGNO (x)]);
1681 else if (xcode == MEM)
1682 output_address (XEXP (x, 0));
1683 else if (xcode == CONST_VECTOR)
1684 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1685 else
1686 output_addr_const (file, x);
1687 return;
1689 /* unused letters
1690 o qr u yz
1691 AB OPQR UVWXYZ */
1692 default:
1693 output_operand_lossage ("invalid %%xn code");
1695 gcc_unreachable ();
1698 extern char call_used_regs[];
1700 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1701 caller saved register. For leaf functions it is more efficient to
1702 use a volatile register because we won't need to save and restore the
1703 pic register. This routine is only valid after register allocation
1704 is completed, so we can pick an unused register. */
1705 static rtx
1706 get_pic_reg (void)
1708 rtx pic_reg = pic_offset_table_rtx;
1709 if (!reload_completed && !reload_in_progress)
1710 abort ();
1711 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1712 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1713 return pic_reg;
1716 /* Split constant addresses to handle cases that are too large.
1717 Add in the pic register when in PIC mode.
1718 Split immediates that require more than 1 instruction. */
1720 spu_split_immediate (rtx * ops)
1722 enum machine_mode mode = GET_MODE (ops[0]);
1723 enum immediate_class c = classify_immediate (ops[1], mode);
1725 switch (c)
1727 case IC_IL2:
1729 unsigned char arrhi[16];
1730 unsigned char arrlo[16];
1731 rtx to, temp, hi, lo;
1732 int i;
1733 enum machine_mode imode = mode;
1734 /* We need to do reals as ints because the constant used in the
1735 IOR might not be a legitimate real constant. */
1736 imode = int_mode_for_mode (mode);
1737 constant_to_array (mode, ops[1], arrhi);
1738 if (imode != mode)
1739 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1740 else
1741 to = ops[0];
1742 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1743 for (i = 0; i < 16; i += 4)
1745 arrlo[i + 2] = arrhi[i + 2];
1746 arrlo[i + 3] = arrhi[i + 3];
1747 arrlo[i + 0] = arrlo[i + 1] = 0;
1748 arrhi[i + 2] = arrhi[i + 3] = 0;
1750 hi = array_to_constant (imode, arrhi);
1751 lo = array_to_constant (imode, arrlo);
1752 emit_move_insn (temp, hi);
1753 emit_insn (gen_rtx_SET
1754 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1755 return 1;
1757 case IC_FSMBI2:
1759 unsigned char arr_fsmbi[16];
1760 unsigned char arr_andbi[16];
1761 rtx to, reg_fsmbi, reg_and;
1762 int i;
1763 enum machine_mode imode = mode;
1764 /* We need to do reals as ints because the constant used in the
1765 * AND might not be a legitimate real constant. */
1766 imode = int_mode_for_mode (mode);
1767 constant_to_array (mode, ops[1], arr_fsmbi);
1768 if (imode != mode)
1769 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1770 else
1771 to = ops[0];
1772 for (i = 0; i < 16; i++)
1773 if (arr_fsmbi[i] != 0)
1775 arr_andbi[0] = arr_fsmbi[i];
1776 arr_fsmbi[i] = 0xff;
1778 for (i = 1; i < 16; i++)
1779 arr_andbi[i] = arr_andbi[0];
1780 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1781 reg_and = array_to_constant (imode, arr_andbi);
1782 emit_move_insn (to, reg_fsmbi);
1783 emit_insn (gen_rtx_SET
1784 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1785 return 1;
1787 case IC_POOL:
1788 if (reload_in_progress || reload_completed)
1790 rtx mem = force_const_mem (mode, ops[1]);
1791 if (TARGET_LARGE_MEM)
1793 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1794 emit_move_insn (addr, XEXP (mem, 0));
1795 mem = replace_equiv_address (mem, addr);
1797 emit_move_insn (ops[0], mem);
1798 return 1;
1800 break;
1801 case IC_IL1s:
1802 case IC_IL2s:
1803 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1805 if (c == IC_IL2s)
1807 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1808 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1810 else if (flag_pic)
1811 emit_insn (gen_pic (ops[0], ops[1]));
1812 if (flag_pic)
1814 rtx pic_reg = get_pic_reg ();
1815 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1816 current_function_uses_pic_offset_table = 1;
1818 return flag_pic || c == IC_IL2s;
1820 break;
1821 case IC_IL1:
1822 case IC_FSMBI:
1823 case IC_CPAT:
1824 break;
1826 return 0;
1829 /* SAVING is TRUE when we are generating the actual load and store
1830 instructions for REGNO. When determining the size of the stack
1831 needed for saving register we must allocate enough space for the
1832 worst case, because we don't always have the information early enough
1833 to not allocate it. But we can at least eliminate the actual loads
1834 and stores during the prologue/epilogue. */
1835 static int
1836 need_to_save_reg (int regno, int saving)
1838 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1839 return 1;
1840 if (flag_pic
1841 && regno == PIC_OFFSET_TABLE_REGNUM
1842 && (!saving || current_function_uses_pic_offset_table)
1843 && (!saving
1844 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1845 return 1;
1846 return 0;
1849 /* This function is only correct starting with local register
1850 allocation */
1852 spu_saved_regs_size (void)
1854 int reg_save_size = 0;
1855 int regno;
1857 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1858 if (need_to_save_reg (regno, 0))
1859 reg_save_size += 0x10;
1860 return reg_save_size;
1863 static rtx
1864 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1866 rtx reg = gen_rtx_REG (V4SImode, regno);
1867 rtx mem =
1868 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1869 return emit_insn (gen_movv4si (mem, reg));
1872 static rtx
1873 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1875 rtx reg = gen_rtx_REG (V4SImode, regno);
1876 rtx mem =
1877 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1878 return emit_insn (gen_movv4si (reg, mem));
1881 /* This happens after reload, so we need to expand it. */
1882 static rtx
1883 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1885 rtx insn;
1886 if (satisfies_constraint_K (GEN_INT (imm)))
1888 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1890 else
1892 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1893 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1894 if (REGNO (src) == REGNO (scratch))
1895 abort ();
1897 return insn;
1900 /* Return nonzero if this function is known to have a null epilogue. */
1903 direct_return (void)
1905 if (reload_completed)
1907 if (cfun->static_chain_decl == 0
1908 && (spu_saved_regs_size ()
1909 + get_frame_size ()
1910 + current_function_outgoing_args_size
1911 + current_function_pretend_args_size == 0)
1912 && current_function_is_leaf)
1913 return 1;
1915 return 0;
1919 The stack frame looks like this:
1920 +-------------+
1921 | incoming |
1922 AP | args |
1923 +-------------+
1924 | $lr save |
1925 +-------------+
1926 prev SP | back chain |
1927 +-------------+
1928 | var args |
1929 | reg save | current_function_pretend_args_size bytes
1930 +-------------+
1931 | ... |
1932 | saved regs | spu_saved_regs_size() bytes
1933 +-------------+
1934 | ... |
1935 FP | vars | get_frame_size() bytes
1936 +-------------+
1937 | ... |
1938 | outgoing |
1939 | args | current_function_outgoing_args_size bytes
1940 +-------------+
1941 | $lr of next |
1942 | frame |
1943 +-------------+
1944 SP | back chain |
1945 +-------------+
1948 void
1949 spu_expand_prologue (void)
1951 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1952 HOST_WIDE_INT total_size;
1953 HOST_WIDE_INT saved_regs_size;
1954 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1955 rtx scratch_reg_0, scratch_reg_1;
1956 rtx insn, real;
1958 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1959 the "toplevel" insn chain. */
1960 emit_note (NOTE_INSN_DELETED);
1962 if (flag_pic && optimize == 0)
1963 current_function_uses_pic_offset_table = 1;
1965 if (spu_naked_function_p (current_function_decl))
1966 return;
1968 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1969 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1971 saved_regs_size = spu_saved_regs_size ();
1972 total_size = size + saved_regs_size
1973 + current_function_outgoing_args_size
1974 + current_function_pretend_args_size;
1976 if (!current_function_is_leaf
1977 || current_function_calls_alloca || total_size > 0)
1978 total_size += STACK_POINTER_OFFSET;
1980 /* Save this first because code after this might use the link
1981 register as a scratch register. */
1982 if (!current_function_is_leaf)
1984 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1985 RTX_FRAME_RELATED_P (insn) = 1;
1988 if (total_size > 0)
1990 offset = -current_function_pretend_args_size;
1991 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1992 if (need_to_save_reg (regno, 1))
1994 offset -= 16;
1995 insn = frame_emit_store (regno, sp_reg, offset);
1996 RTX_FRAME_RELATED_P (insn) = 1;
2000 if (flag_pic && current_function_uses_pic_offset_table)
2002 rtx pic_reg = get_pic_reg ();
2003 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2004 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2007 if (total_size > 0)
2009 if (flag_stack_check)
2011 /* We compare against total_size-1 because
2012 ($sp >= total_size) <=> ($sp > total_size-1) */
2013 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2014 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2015 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2016 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2018 emit_move_insn (scratch_v4si, size_v4si);
2019 size_v4si = scratch_v4si;
2021 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2022 emit_insn (gen_vec_extractv4si
2023 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2024 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2027 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2028 the value of the previous $sp because we save it as the back
2029 chain. */
2030 if (total_size <= 2000)
2032 /* In this case we save the back chain first. */
2033 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2034 insn =
2035 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2037 else if (satisfies_constraint_K (GEN_INT (-total_size)))
2039 insn = emit_move_insn (scratch_reg_0, sp_reg);
2040 insn =
2041 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
2043 else
2045 insn = emit_move_insn (scratch_reg_0, sp_reg);
2046 insn =
2047 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2049 RTX_FRAME_RELATED_P (insn) = 1;
2050 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2051 REG_NOTES (insn) =
2052 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
2054 if (total_size > 2000)
2056 /* Save the back chain ptr */
2057 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2060 if (frame_pointer_needed)
2062 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2063 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2064 + current_function_outgoing_args_size;
2065 /* Set the new frame_pointer */
2066 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2067 RTX_FRAME_RELATED_P (insn) = 1;
2068 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2069 REG_NOTES (insn) =
2070 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2071 real, REG_NOTES (insn));
2072 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2076 emit_note (NOTE_INSN_DELETED);
2079 void
2080 spu_expand_epilogue (bool sibcall_p)
2082 int size = get_frame_size (), offset, regno;
2083 HOST_WIDE_INT saved_regs_size, total_size;
2084 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2085 rtx jump, scratch_reg_0;
2087 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2088 the "toplevel" insn chain. */
2089 emit_note (NOTE_INSN_DELETED);
2091 if (spu_naked_function_p (current_function_decl))
2092 return;
2094 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2096 saved_regs_size = spu_saved_regs_size ();
2097 total_size = size + saved_regs_size
2098 + current_function_outgoing_args_size
2099 + current_function_pretend_args_size;
2101 if (!current_function_is_leaf
2102 || current_function_calls_alloca || total_size > 0)
2103 total_size += STACK_POINTER_OFFSET;
2105 if (total_size > 0)
2107 if (current_function_calls_alloca)
2108 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2109 else
2110 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2113 if (saved_regs_size > 0)
2115 offset = -current_function_pretend_args_size;
2116 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2117 if (need_to_save_reg (regno, 1))
2119 offset -= 0x10;
2120 frame_emit_load (regno, sp_reg, offset);
2125 if (!current_function_is_leaf)
2126 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2128 if (!sibcall_p)
2130 emit_insn (gen_rtx_USE
2131 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
2132 jump = emit_jump_insn (gen__return ());
2133 emit_barrier_after (jump);
2136 emit_note (NOTE_INSN_DELETED);
2140 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2142 if (count != 0)
2143 return 0;
2144 /* This is inefficient because it ends up copying to a save-register
2145 which then gets saved even though $lr has already been saved. But
2146 it does generate better code for leaf functions and we don't need
2147 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2148 used for __builtin_return_address anyway, so maybe we don't care if
2149 it's inefficient. */
2150 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2154 /* Given VAL, generate a constant appropriate for MODE.
2155 If MODE is a vector mode, every element will be VAL.
2156 For TImode, VAL will be zero extended to 128 bits. */
2158 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2160 rtx inner;
2161 rtvec v;
2162 int units, i;
2164 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2165 || GET_MODE_CLASS (mode) == MODE_FLOAT
2166 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2167 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2169 if (GET_MODE_CLASS (mode) == MODE_INT)
2170 return immed_double_const (val, 0, mode);
2172 /* val is the bit representation of the float */
2173 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2174 return hwint_to_const_double (mode, val);
2176 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2177 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2178 else
2179 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2181 units = GET_MODE_NUNITS (mode);
2183 v = rtvec_alloc (units);
2185 for (i = 0; i < units; ++i)
2186 RTVEC_ELT (v, i) = inner;
2188 return gen_rtx_CONST_VECTOR (mode, v);
2191 /* Create a MODE vector constant from 4 ints. */
2193 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2195 unsigned char arr[16];
2196 arr[0] = (a >> 24) & 0xff;
2197 arr[1] = (a >> 16) & 0xff;
2198 arr[2] = (a >> 8) & 0xff;
2199 arr[3] = (a >> 0) & 0xff;
2200 arr[4] = (b >> 24) & 0xff;
2201 arr[5] = (b >> 16) & 0xff;
2202 arr[6] = (b >> 8) & 0xff;
2203 arr[7] = (b >> 0) & 0xff;
2204 arr[8] = (c >> 24) & 0xff;
2205 arr[9] = (c >> 16) & 0xff;
2206 arr[10] = (c >> 8) & 0xff;
2207 arr[11] = (c >> 0) & 0xff;
2208 arr[12] = (d >> 24) & 0xff;
2209 arr[13] = (d >> 16) & 0xff;
2210 arr[14] = (d >> 8) & 0xff;
2211 arr[15] = (d >> 0) & 0xff;
2212 return array_to_constant(mode, arr);
2215 /* branch hint stuff */
2217 /* An array of these is used to propagate hints to predecessor blocks. */
2218 struct spu_bb_info
2220 rtx prop_jump; /* propagated from another block */
2221 int bb_index; /* the original block. */
2223 static struct spu_bb_info *spu_bb_info;
2225 #define STOP_HINT_P(INSN) \
2226 (GET_CODE(INSN) == CALL_INSN \
2227 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2228 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2230 /* 1 when RTX is a hinted branch or its target. We keep track of
2231 what has been hinted so the safe-hint code can test it easily. */
2232 #define HINTED_P(RTX) \
2233 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2235 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2236 #define SCHED_ON_EVEN_P(RTX) \
2237 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2239 /* Emit a nop for INSN such that the two will dual issue. This assumes
2240 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2241 We check for TImode to handle a MULTI1 insn which has dual issued its
2242 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2243 ADDR_VEC insns. */
2244 static void
2245 emit_nop_for_insn (rtx insn)
2247 int p;
2248 rtx new_insn;
2249 p = get_pipe (insn);
2250 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2251 new_insn = emit_insn_after (gen_lnop (), insn);
2252 else if (p == 1 && GET_MODE (insn) == TImode)
2254 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2255 PUT_MODE (new_insn, TImode);
2256 PUT_MODE (insn, VOIDmode);
2258 else
2259 new_insn = emit_insn_after (gen_lnop (), insn);
2260 recog_memoized (new_insn);
2263 /* Insert nops in basic blocks to meet dual issue alignment
2264 requirements. Also make sure hbrp and hint instructions are at least
2265 one cycle apart, possibly inserting a nop. */
2266 static void
2267 pad_bb(void)
2269 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2270 int length;
2271 int addr;
2273 /* This sets up INSN_ADDRESSES. */
2274 shorten_branches (get_insns ());
2276 /* Keep track of length added by nops. */
2277 length = 0;
2279 prev_insn = 0;
2280 insn = get_insns ();
2281 if (!active_insn_p (insn))
2282 insn = next_active_insn (insn);
2283 for (; insn; insn = next_insn)
2285 next_insn = next_active_insn (insn);
2286 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2287 || INSN_CODE (insn) == CODE_FOR_hbr)
2289 if (hbr_insn)
2291 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2292 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2293 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2294 || (a1 - a0 == 4))
2296 prev_insn = emit_insn_before (gen_lnop (), insn);
2297 PUT_MODE (prev_insn, GET_MODE (insn));
2298 PUT_MODE (insn, TImode);
2299 length += 4;
2302 hbr_insn = insn;
2304 if (INSN_CODE (insn) == CODE_FOR_blockage)
2306 if (GET_MODE (insn) == TImode)
2307 PUT_MODE (next_insn, TImode);
2308 insn = next_insn;
2309 next_insn = next_active_insn (insn);
2311 addr = INSN_ADDRESSES (INSN_UID (insn));
2312 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2314 if (((addr + length) & 7) != 0)
2316 emit_nop_for_insn (prev_insn);
2317 length += 4;
2320 else if (GET_MODE (insn) == TImode
2321 && ((next_insn && GET_MODE (next_insn) != TImode)
2322 || get_attr_type (insn) == TYPE_MULTI0)
2323 && ((addr + length) & 7) != 0)
2325 /* prev_insn will always be set because the first insn is
2326 always 8-byte aligned. */
2327 emit_nop_for_insn (prev_insn);
2328 length += 4;
2330 prev_insn = insn;
2335 /* Routines for branch hints. */
2337 static void
2338 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2339 int distance, sbitmap blocks)
2341 rtx branch_label = 0;
2342 rtx hint;
2343 rtx insn;
2344 rtx table;
2346 if (before == 0 || branch == 0 || target == 0)
2347 return;
2349 /* While scheduling we require hints to be no further than 600, so
2350 we need to enforce that here too */
2351 if (distance > 600)
2352 return;
2354 /* If we have a Basic block note, emit it after the basic block note. */
2355 if (NOTE_INSN_BASIC_BLOCK_P (before))
2356 before = NEXT_INSN (before);
2358 if (INSN_CODE (branch) == CODE_FOR_expect_then
2359 || INSN_CODE (branch) == CODE_FOR_expect_else)
2361 HINTED_P (branch) = 1;
2362 hint = PREV_INSN (before);
2364 else
2366 branch_label = gen_label_rtx ();
2367 LABEL_NUSES (branch_label)++;
2368 LABEL_PRESERVE_P (branch_label) = 1;
2369 insn = emit_label_before (branch_label, branch);
2370 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2371 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2373 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2374 recog_memoized (hint);
2375 HINTED_P (branch) = 1;
2378 if (GET_CODE (target) == LABEL_REF)
2379 HINTED_P (XEXP (target, 0)) = 1;
2380 else if (tablejump_p (branch, 0, &table))
2382 rtvec vec;
2383 int j;
2384 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2385 vec = XVEC (PATTERN (table), 0);
2386 else
2387 vec = XVEC (PATTERN (table), 1);
2388 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2389 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2392 if (distance >= 588)
2394 /* Make sure the hint isn't scheduled any earlier than this point,
2395 which could make it too far for the branch offest to fit */
2396 recog_memoized (emit_insn_before (gen_blockage (), hint));
2398 else if (distance <= 8 * 4)
2400 /* To guarantee at least 8 insns between the hint and branch we
2401 insert nops. */
2402 int d;
2403 for (d = distance; d < 8 * 4; d += 4)
2405 insn =
2406 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2407 recog_memoized (insn);
2410 /* Make sure any nops inserted aren't scheduled before the hint. */
2411 recog_memoized (emit_insn_after (gen_blockage (), hint));
2413 /* Make sure any nops inserted aren't scheduled after the call. */
2414 if (CALL_P (branch) && distance < 8 * 4)
2415 recog_memoized (emit_insn_before (gen_blockage (), branch));
2419 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2420 the rtx for the branch target. */
2421 static rtx
2422 get_branch_target (rtx branch)
2424 if (GET_CODE (branch) == JUMP_INSN)
2426 rtx set, src;
2428 /* Return statements */
2429 if (GET_CODE (PATTERN (branch)) == RETURN)
2430 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2432 /* jump table */
2433 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2434 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2435 return 0;
2437 set = single_set (branch);
2438 src = SET_SRC (set);
2439 if (GET_CODE (SET_DEST (set)) != PC)
2440 abort ();
2442 if (GET_CODE (src) == IF_THEN_ELSE)
2444 rtx lab = 0;
2445 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2447 if (INSN_CODE (branch) == CODE_FOR_expect_then)
2448 return XEXP (src, 1);
2449 if (INSN_CODE (branch) == CODE_FOR_expect_else)
2450 return XEXP (src, 2);
2452 if (note)
2454 /* If the more probable case is not a fall through, then
2455 try a branch hint. */
2456 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2457 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2458 && GET_CODE (XEXP (src, 1)) != PC)
2459 lab = XEXP (src, 1);
2460 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2461 && GET_CODE (XEXP (src, 2)) != PC)
2462 lab = XEXP (src, 2);
2464 if (lab)
2466 if (GET_CODE (lab) == RETURN)
2467 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2468 return lab;
2470 return 0;
2473 return src;
2475 else if (GET_CODE (branch) == CALL_INSN)
2477 rtx call;
2478 /* All of our call patterns are in a PARALLEL and the CALL is
2479 the first pattern in the PARALLEL. */
2480 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2481 abort ();
2482 call = XVECEXP (PATTERN (branch), 0, 0);
2483 if (GET_CODE (call) == SET)
2484 call = SET_SRC (call);
2485 if (GET_CODE (call) != CALL)
2486 abort ();
2487 return XEXP (XEXP (call, 0), 0);
2489 return 0;
2492 /* The special $hbr register is used to prevent the insn scheduler from
2493 moving hbr insns across instructions which invalidate them. It
2494 should only be used in a clobber, and this function searches for
2495 insns which clobber it. */
2496 static bool
2497 insn_clobbers_hbr (rtx insn)
2499 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2500 return 1;
2501 if (INSN_P (insn)
2502 && GET_CODE (PATTERN (insn)) == PARALLEL)
2504 rtx parallel = PATTERN (insn);
2505 rtx clobber;
2506 int j;
2507 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2509 clobber = XVECEXP (parallel, 0, j);
2510 if (GET_CODE (clobber) == CLOBBER
2511 && GET_CODE (XEXP (clobber, 0)) == REG
2512 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2513 return 1;
2516 return 0;
2519 /* Search up to 32 insns starting at FIRST:
2520 - at any kind of hinted branch, just return
2521 - at any unconditional branch in the first 15 insns, just return
2522 - at a call or indirect branch, after the first 15 insns, force it to
2523 an even address and return
2524 - at any unconditional branch, after the first 15 insns, force it to
2525 an even address.
2526 At then end of the search, insert an hbrp within 4 insns of FIRST,
2527 and an hbrp within 16 instructions of FIRST.
2529 static void
2530 insert_hbrp_for_ilb_runout (rtx first)
2532 rtx insn, before_4 = 0, before_16 = 0;
2533 int addr = 0, length, first_addr = -1;
2534 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2535 int insert_lnop_after = 0;
2536 for (insn = first; insn; insn = NEXT_INSN (insn))
2537 if (INSN_P (insn))
2539 if (first_addr == -1)
2540 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2541 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2542 length = get_attr_length (insn);
2544 if (before_4 == 0 && addr + length >= 4 * 4)
2545 before_4 = insn;
2546 /* We test for 14 instructions because the first hbrp will add
2547 up to 2 instructions. */
2548 if (before_16 == 0 && addr + length >= 14 * 4)
2549 before_16 = insn;
2551 if (INSN_CODE (insn) == CODE_FOR_hbr)
2553 /* Make sure an hbrp is at least 2 cycles away from a hint.
2554 Insert an lnop after the hbrp when necessary. */
2555 if (before_4 == 0 && addr > 0)
2557 before_4 = insn;
2558 insert_lnop_after |= 1;
2560 else if (before_4 && addr <= 4 * 4)
2561 insert_lnop_after |= 1;
2562 if (before_16 == 0 && addr > 10 * 4)
2564 before_16 = insn;
2565 insert_lnop_after |= 2;
2567 else if (before_16 && addr <= 14 * 4)
2568 insert_lnop_after |= 2;
2571 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2573 if (addr < hbrp_addr0)
2574 hbrp_addr0 = addr;
2575 else if (addr < hbrp_addr1)
2576 hbrp_addr1 = addr;
2579 if (CALL_P (insn) || JUMP_P (insn))
2581 if (HINTED_P (insn))
2582 return;
2584 /* Any branch after the first 15 insns should be on an even
2585 address to avoid a special case branch. There might be
2586 some nops and/or hbrps inserted, so we test after 10
2587 insns. */
2588 if (addr > 10 * 4)
2589 SCHED_ON_EVEN_P (insn) = 1;
2592 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2593 return;
2596 if (addr + length >= 32 * 4)
2598 gcc_assert (before_4 && before_16);
2599 if (hbrp_addr0 > 4 * 4)
2601 insn =
2602 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2603 recog_memoized (insn);
2604 INSN_ADDRESSES_NEW (insn,
2605 INSN_ADDRESSES (INSN_UID (before_4)));
2606 PUT_MODE (insn, GET_MODE (before_4));
2607 PUT_MODE (before_4, TImode);
2608 if (insert_lnop_after & 1)
2610 insn = emit_insn_before (gen_lnop (), before_4);
2611 recog_memoized (insn);
2612 INSN_ADDRESSES_NEW (insn,
2613 INSN_ADDRESSES (INSN_UID (before_4)));
2614 PUT_MODE (insn, TImode);
2617 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2618 && hbrp_addr1 > 16 * 4)
2620 insn =
2621 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2622 recog_memoized (insn);
2623 INSN_ADDRESSES_NEW (insn,
2624 INSN_ADDRESSES (INSN_UID (before_16)));
2625 PUT_MODE (insn, GET_MODE (before_16));
2626 PUT_MODE (before_16, TImode);
2627 if (insert_lnop_after & 2)
2629 insn = emit_insn_before (gen_lnop (), before_16);
2630 recog_memoized (insn);
2631 INSN_ADDRESSES_NEW (insn,
2632 INSN_ADDRESSES (INSN_UID
2633 (before_16)));
2634 PUT_MODE (insn, TImode);
2637 return;
2640 else if (BARRIER_P (insn))
2641 return;
2645 /* The SPU might hang when it executes 48 inline instructions after a
2646 hinted branch jumps to its hinted target. The beginning of a
2647 function and the return from a call might have been hinted, and must
2648 be handled as well. To prevent a hang we insert 2 hbrps. The first
2649 should be within 6 insns of the branch target. The second should be
2650 within 22 insns of the branch target. When determining if hbrps are
2651 necessary, we look for only 32 inline instructions, because up to to
2652 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2653 new hbrps, we insert them within 4 and 16 insns of the target. */
2654 static void
2655 insert_hbrp (void)
2657 rtx insn;
2658 if (TARGET_SAFE_HINTS)
2660 shorten_branches (get_insns ());
2661 /* Insert hbrp at beginning of function */
2662 insn = next_active_insn (get_insns ());
2663 if (insn)
2664 insert_hbrp_for_ilb_runout (insn);
2665 /* Insert hbrp after hinted targets. */
2666 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2667 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2668 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2672 static int in_spu_reorg;
2674 /* Insert branch hints. There are no branch optimizations after this
2675 pass, so it's safe to set our branch hints now. */
2676 static void
2677 spu_machine_dependent_reorg (void)
2679 sbitmap blocks;
2680 basic_block bb;
2681 rtx branch, insn;
2682 rtx branch_target = 0;
2683 int branch_addr = 0, insn_addr, required_dist = 0;
2684 int i;
2685 unsigned int j;
2687 if (!TARGET_BRANCH_HINTS || optimize == 0)
2689 /* We still do it for unoptimized code because an external
2690 function might have hinted a call or return. */
2691 insert_hbrp ();
2692 pad_bb ();
2693 return;
2696 blocks = sbitmap_alloc (last_basic_block);
2697 sbitmap_zero (blocks);
2699 in_spu_reorg = 1;
2700 compute_bb_for_insn ();
2702 compact_blocks ();
2704 spu_bb_info =
2705 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2706 sizeof (struct spu_bb_info));
2708 /* We need exact insn addresses and lengths. */
2709 shorten_branches (get_insns ());
2711 for (i = n_basic_blocks - 1; i >= 0; i--)
2713 bb = BASIC_BLOCK (i);
2714 branch = 0;
2715 if (spu_bb_info[i].prop_jump)
2717 branch = spu_bb_info[i].prop_jump;
2718 branch_target = get_branch_target (branch);
2719 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2720 required_dist = spu_hint_dist;
2722 /* Search from end of a block to beginning. In this loop, find
2723 jumps which need a branch and emit them only when:
2724 - it's an indirect branch and we're at the insn which sets
2725 the register
2726 - we're at an insn that will invalidate the hint. e.g., a
2727 call, another hint insn, inline asm that clobbers $hbr, and
2728 some inlined operations (divmodsi4). Don't consider jumps
2729 because they are only at the end of a block and are
2730 considered when we are deciding whether to propagate
2731 - we're getting too far away from the branch. The hbr insns
2732 only have a signed 10 bit offset
2733 We go back as far as possible so the branch will be considered
2734 for propagation when we get to the beginning of the block. */
2735 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2737 if (INSN_P (insn))
2739 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2740 if (branch
2741 && ((GET_CODE (branch_target) == REG
2742 && set_of (branch_target, insn) != NULL_RTX)
2743 || insn_clobbers_hbr (insn)
2744 || branch_addr - insn_addr > 600))
2746 rtx next = NEXT_INSN (insn);
2747 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2748 if (insn != BB_END (bb)
2749 && branch_addr - next_addr >= required_dist)
2751 if (dump_file)
2752 fprintf (dump_file,
2753 "hint for %i in block %i before %i\n",
2754 INSN_UID (branch), bb->index,
2755 INSN_UID (next));
2756 spu_emit_branch_hint (next, branch, branch_target,
2757 branch_addr - next_addr, blocks);
2759 branch = 0;
2762 /* JUMP_P will only be true at the end of a block. When
2763 branch is already set it means we've previously decided
2764 to propagate a hint for that branch into this block. */
2765 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2767 branch = 0;
2768 if ((branch_target = get_branch_target (insn)))
2770 branch = insn;
2771 branch_addr = insn_addr;
2772 required_dist = spu_hint_dist;
2773 if (INSN_CODE (branch) == CODE_FOR_expect_then
2774 || INSN_CODE (branch) == CODE_FOR_expect_else)
2775 required_dist = 0;
2779 if (insn == BB_HEAD (bb))
2780 break;
2783 if (branch)
2785 /* If we haven't emitted a hint for this branch yet, it might
2786 be profitable to emit it in one of the predecessor blocks,
2787 especially for loops. */
2788 rtx bbend;
2789 basic_block prev = 0, prop = 0, prev2 = 0;
2790 int loop_exit = 0, simple_loop = 0;
2791 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2793 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2794 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2795 prev = EDGE_PRED (bb, j)->src;
2796 else
2797 prev2 = EDGE_PRED (bb, j)->src;
2799 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2800 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2801 loop_exit = 1;
2802 else if (EDGE_SUCC (bb, j)->dest == bb)
2803 simple_loop = 1;
2805 /* If this branch is a loop exit then propagate to previous
2806 fallthru block. This catches the cases when it is a simple
2807 loop or when there is an initial branch into the loop. */
2808 if (prev && (loop_exit || simple_loop)
2809 && prev->loop_depth <= bb->loop_depth)
2810 prop = prev;
2812 /* If there is only one adjacent predecessor. Don't propagate
2813 outside this loop. This loop_depth test isn't perfect, but
2814 I'm not sure the loop_father member is valid at this point. */
2815 else if (prev && single_pred_p (bb)
2816 && prev->loop_depth == bb->loop_depth)
2817 prop = prev;
2819 /* If this is the JOIN block of a simple IF-THEN then
2820 propogate the hint to the HEADER block. */
2821 else if (prev && prev2
2822 && EDGE_COUNT (bb->preds) == 2
2823 && EDGE_COUNT (prev->preds) == 1
2824 && EDGE_PRED (prev, 0)->src == prev2
2825 && prev2->loop_depth == bb->loop_depth
2826 && GET_CODE (branch_target) != REG)
2827 prop = prev;
2829 /* Don't propagate when:
2830 - this is a simple loop and the hint would be too far
2831 - this is not a simple loop and there are 16 insns in
2832 this block already
2833 - the predecessor block ends in a branch that will be
2834 hinted
2835 - the predecessor block ends in an insn that invalidates
2836 the hint */
2837 if (prop
2838 && prop->index >= 0
2839 && (bbend = BB_END (prop))
2840 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2841 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2842 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2844 if (dump_file)
2845 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2846 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2847 bb->index, prop->index, bb->loop_depth,
2848 INSN_UID (branch), loop_exit, simple_loop,
2849 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2851 spu_bb_info[prop->index].prop_jump = branch;
2852 spu_bb_info[prop->index].bb_index = i;
2854 else if (branch_addr - next_addr >= required_dist)
2856 if (dump_file)
2857 fprintf (dump_file, "hint for %i in block %i before %i\n",
2858 INSN_UID (branch), bb->index,
2859 INSN_UID (NEXT_INSN (insn)));
2860 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2861 branch_addr - next_addr, blocks);
2863 branch = 0;
2866 free (spu_bb_info);
2868 if (!sbitmap_empty_p (blocks))
2869 find_many_sub_basic_blocks (blocks);
2871 /* We have to schedule to make sure alignment is ok. */
2872 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2874 /* The hints need to be scheduled, so call it again. */
2875 schedule_insns ();
2877 insert_hbrp ();
2879 pad_bb ();
2881 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2882 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2884 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2885 between its branch label and the branch . We don't move the
2886 label because GCC expects it at the beginning of the block. */
2887 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2888 rtx label_ref = XVECEXP (unspec, 0, 0);
2889 rtx label = XEXP (label_ref, 0);
2890 rtx branch;
2891 int offset = 0;
2892 for (branch = NEXT_INSN (label);
2893 !JUMP_P (branch) && !CALL_P (branch);
2894 branch = NEXT_INSN (branch))
2895 if (NONJUMP_INSN_P (branch))
2896 offset += get_attr_length (branch);
2897 if (offset > 0)
2898 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2900 else if (JUMP_P (insn) && (INSN_CODE (insn) == CODE_FOR_expect_then
2901 || INSN_CODE (insn) == CODE_FOR_expect_else))
2903 /* __builtin_expect with a non-constant second argument
2904 generates patterns which contain labels that need to be
2905 relocated. These are generated in spu_emit_branch_or_set. */
2906 rtx set0 = XVECEXP (PATTERN (insn), 0, 0);
2907 rtx use1 = XVECEXP (PATTERN (insn), 0, 1);
2908 rtx use2 = XVECEXP (PATTERN (insn), 0, 2);
2909 rtx use3 = XVECEXP (PATTERN (insn), 0, 3);
2910 rtx label0 = XEXP (XEXP (set0, 1), 1);
2911 rtx label1 = XEXP (XEXP (use1, 0), 0);
2912 rtx label2 = XEXP (XEXP (use2, 0), 0);
2913 rtx label3 = XEXP (XEXP (use3, 0), 0);
2914 if (GET_CODE (label0) == PC)
2915 label0 = XEXP (XEXP (set0, 1), 2);
2916 remove_insn (label1);
2917 add_insn_before (label1, insn, 0);
2918 if (GET_CODE (XEXP (XEXP (set0, 1), 0)) == NE)
2920 remove_insn (label2);
2921 add_insn_after (label2, insn, 0);
2922 remove_insn (label3);
2923 add_insn_after (label3, XEXP (label0, 0), 0);
2925 else
2927 remove_insn (label2);
2928 add_insn_after (label2, XEXP (label0, 0), 0);
2929 remove_insn (label3);
2930 add_insn_after (label3, insn, 0);
2934 if (spu_flag_var_tracking)
2936 df_analyze ();
2937 timevar_push (TV_VAR_TRACKING);
2938 variable_tracking_main ();
2939 timevar_pop (TV_VAR_TRACKING);
2940 df_finish_pass (false);
2943 free_bb_for_insn ();
2945 in_spu_reorg = 0;
2949 /* Insn scheduling routines, primarily for dual issue. */
2950 static int
2951 spu_sched_issue_rate (void)
2953 return 2;
2956 static int
2957 uses_ls_unit(rtx insn)
2959 rtx set = single_set (insn);
2960 if (set != 0
2961 && (GET_CODE (SET_DEST (set)) == MEM
2962 || GET_CODE (SET_SRC (set)) == MEM))
2963 return 1;
2964 return 0;
2967 static int
2968 get_pipe (rtx insn)
2970 enum attr_type t;
2971 /* Handle inline asm */
2972 if (INSN_CODE (insn) == -1)
2973 return -1;
2974 t = get_attr_type (insn);
2975 switch (t)
2977 case TYPE_CONVERT:
2978 return -2;
2979 case TYPE_MULTI0:
2980 return -1;
2982 case TYPE_FX2:
2983 case TYPE_FX3:
2984 case TYPE_SPR:
2985 case TYPE_NOP:
2986 case TYPE_FXB:
2987 case TYPE_FPD:
2988 case TYPE_FP6:
2989 case TYPE_FP7:
2990 return 0;
2992 case TYPE_LNOP:
2993 case TYPE_SHUF:
2994 case TYPE_LOAD:
2995 case TYPE_STORE:
2996 case TYPE_BR:
2997 case TYPE_MULTI1:
2998 case TYPE_HBR:
2999 case TYPE_IPREFETCH:
3000 return 1;
3001 default:
3002 abort ();
3007 /* haifa-sched.c has a static variable that keeps track of the current
3008 cycle. It is passed to spu_sched_reorder, and we record it here for
3009 use by spu_sched_variable_issue. It won't be accurate if the
3010 scheduler updates it's clock_var between the two calls. */
3011 static int clock_var;
3013 /* This is used to keep track of insn alignment. Set to 0 at the
3014 beginning of each block and increased by the "length" attr of each
3015 insn scheduled. */
3016 static int spu_sched_length;
3018 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3019 ready list appropriately in spu_sched_reorder(). */
3020 static int pipe0_clock;
3021 static int pipe1_clock;
3023 static int prev_clock_var;
3025 static int prev_priority;
3027 /* The SPU needs to load the next ilb sometime during the execution of
3028 the previous ilb. There is a potential conflict if every cycle has a
3029 load or store. To avoid the conflict we make sure the load/store
3030 unit is free for at least one cycle during the execution of insns in
3031 the previous ilb. */
3032 static int spu_ls_first;
3033 static int prev_ls_clock;
3035 static void
3036 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3037 int max_ready ATTRIBUTE_UNUSED)
3039 spu_sched_length = 0;
3042 static void
3043 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3044 int max_ready ATTRIBUTE_UNUSED)
3046 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3048 /* When any block might be at least 8-byte aligned, assume they
3049 will all be at least 8-byte aligned to make sure dual issue
3050 works out correctly. */
3051 spu_sched_length = 0;
3053 spu_ls_first = INT_MAX;
3054 clock_var = -1;
3055 prev_ls_clock = -1;
3056 pipe0_clock = -1;
3057 pipe1_clock = -1;
3058 prev_clock_var = -1;
3059 prev_priority = -1;
3062 static int
3063 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3064 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3066 int len;
3067 int p;
3068 if (GET_CODE (PATTERN (insn)) == USE
3069 || GET_CODE (PATTERN (insn)) == CLOBBER
3070 || (len = get_attr_length (insn)) == 0)
3071 return more;
3073 spu_sched_length += len;
3075 /* Reset on inline asm */
3076 if (INSN_CODE (insn) == -1)
3078 spu_ls_first = INT_MAX;
3079 pipe0_clock = -1;
3080 pipe1_clock = -1;
3081 return 0;
3083 p = get_pipe (insn);
3084 if (p == 0)
3085 pipe0_clock = clock_var;
3086 else
3087 pipe1_clock = clock_var;
3089 if (in_spu_reorg)
3091 if (clock_var - prev_ls_clock > 1
3092 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3093 spu_ls_first = INT_MAX;
3094 if (uses_ls_unit (insn))
3096 if (spu_ls_first == INT_MAX)
3097 spu_ls_first = spu_sched_length;
3098 prev_ls_clock = clock_var;
3101 /* The scheduler hasn't inserted the nop, but we will later on.
3102 Include those nops in spu_sched_length. */
3103 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3104 spu_sched_length += 4;
3105 prev_clock_var = clock_var;
3107 /* more is -1 when called from spu_sched_reorder for new insns
3108 that don't have INSN_PRIORITY */
3109 if (more >= 0)
3110 prev_priority = INSN_PRIORITY (insn);
3113 /* Always try issueing more insns. spu_sched_reorder will decide
3114 when the cycle should be advanced. */
3115 return 1;
3118 /* This function is called for both TARGET_SCHED_REORDER and
3119 TARGET_SCHED_REORDER2. */
3120 static int
3121 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3122 rtx *ready, int *nreadyp, int clock)
3124 int i, nready = *nreadyp;
3125 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3126 rtx insn;
3128 clock_var = clock;
3130 if (nready <= 0 || pipe1_clock >= clock)
3131 return 0;
3133 /* Find any rtl insns that don't generate assembly insns and schedule
3134 them first. */
3135 for (i = nready - 1; i >= 0; i--)
3137 insn = ready[i];
3138 if (INSN_CODE (insn) == -1
3139 || INSN_CODE (insn) == CODE_FOR_blockage
3140 || INSN_CODE (insn) == CODE_FOR__spu_convert)
3142 ready[i] = ready[nready - 1];
3143 ready[nready - 1] = insn;
3144 return 1;
3148 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3149 for (i = 0; i < nready; i++)
3150 if (INSN_CODE (ready[i]) != -1)
3152 insn = ready[i];
3153 switch (get_attr_type (insn))
3155 default:
3156 case TYPE_MULTI0:
3157 case TYPE_CONVERT:
3158 case TYPE_FX2:
3159 case TYPE_FX3:
3160 case TYPE_SPR:
3161 case TYPE_NOP:
3162 case TYPE_FXB:
3163 case TYPE_FPD:
3164 case TYPE_FP6:
3165 case TYPE_FP7:
3166 pipe_0 = i;
3167 break;
3168 case TYPE_LOAD:
3169 case TYPE_STORE:
3170 pipe_ls = i;
3171 case TYPE_LNOP:
3172 case TYPE_SHUF:
3173 case TYPE_BR:
3174 case TYPE_MULTI1:
3175 case TYPE_HBR:
3176 pipe_1 = i;
3177 break;
3178 case TYPE_IPREFETCH:
3179 pipe_hbrp = i;
3180 break;
3184 /* In the first scheduling phase, schedule loads and stores together
3185 to increase the chance they will get merged during postreload CSE. */
3186 if (!reload_completed && pipe_ls >= 0)
3188 insn = ready[pipe_ls];
3189 ready[pipe_ls] = ready[nready - 1];
3190 ready[nready - 1] = insn;
3191 return 1;
3194 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3195 if (pipe_hbrp >= 0)
3196 pipe_1 = pipe_hbrp;
3198 /* When we have loads/stores in every cycle of the last 15 insns and
3199 we are about to schedule another load/store, emit an hbrp insn
3200 instead. */
3201 if (in_spu_reorg
3202 && spu_sched_length - spu_ls_first >= 4 * 15
3203 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3205 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3206 recog_memoized (insn);
3207 if (pipe0_clock < clock)
3208 PUT_MODE (insn, TImode);
3209 spu_sched_variable_issue (file, verbose, insn, -1);
3210 return 0;
3213 /* In general, we want to emit nops to increase dual issue, but dual
3214 issue isn't faster when one of the insns could be scheduled later
3215 without effecting the critical path. We look at INSN_PRIORITY to
3216 make a good guess, but it isn't perfect so -mdual-nops=n can be
3217 used to effect it. */
3218 if (in_spu_reorg && spu_dual_nops < 10)
3220 /* When we are at an even address and we are not issueing nops to
3221 improve scheduling then we need to advance the cycle. */
3222 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3223 && (spu_dual_nops == 0
3224 || (pipe_1 != -1
3225 && prev_priority >
3226 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3227 return 0;
3229 /* When at an odd address, schedule the highest priority insn
3230 without considering pipeline. */
3231 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3232 && (spu_dual_nops == 0
3233 || (prev_priority >
3234 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3235 return 1;
3239 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3240 pipe0 insn in the ready list, schedule it. */
3241 if (pipe0_clock < clock && pipe_0 >= 0)
3242 schedule_i = pipe_0;
3244 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3245 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3246 else
3247 schedule_i = pipe_1;
3249 if (schedule_i > -1)
3251 insn = ready[schedule_i];
3252 ready[schedule_i] = ready[nready - 1];
3253 ready[nready - 1] = insn;
3254 return 1;
3256 return 0;
3259 /* INSN is dependent on DEP_INSN. */
3260 static int
3261 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3263 rtx set;
3265 /* The blockage pattern is used to prevent instructions from being
3266 moved across it and has no cost. */
3267 if (INSN_CODE (insn) == CODE_FOR_blockage
3268 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3269 return 0;
3271 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3272 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3273 return 0;
3275 /* Make sure hbrps are spread out. */
3276 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3277 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3278 return 8;
3280 /* Make sure hints and hbrps are 2 cycles apart. */
3281 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3282 || INSN_CODE (insn) == CODE_FOR_hbr)
3283 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3284 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3285 return 2;
3287 /* An hbrp has no real dependency on other insns. */
3288 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3289 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3290 return 0;
3292 /* Assuming that it is unlikely an argument register will be used in
3293 the first cycle of the called function, we reduce the cost for
3294 slightly better scheduling of dep_insn. When not hinted, the
3295 mispredicted branch would hide the cost as well. */
3296 if (CALL_P (insn))
3298 rtx target = get_branch_target (insn);
3299 if (GET_CODE (target) != REG || !set_of (target, insn))
3300 return cost - 2;
3301 return cost;
3304 /* And when returning from a function, let's assume the return values
3305 are completed sooner too. */
3306 if (CALL_P (dep_insn))
3307 return cost - 2;
3309 /* Make sure an instruction that loads from the back chain is schedule
3310 away from the return instruction so a hint is more likely to get
3311 issued. */
3312 if (INSN_CODE (insn) == CODE_FOR__return
3313 && (set = single_set (dep_insn))
3314 && GET_CODE (SET_DEST (set)) == REG
3315 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3316 return 20;
3318 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3319 scheduler makes every insn in a block anti-dependent on the final
3320 jump_insn. We adjust here so higher cost insns will get scheduled
3321 earlier. */
3322 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3323 return insn_cost (dep_insn) - 3;
3325 return cost;
3328 /* Create a CONST_DOUBLE from a string. */
3329 struct rtx_def *
3330 spu_float_const (const char *string, enum machine_mode mode)
3332 REAL_VALUE_TYPE value;
3333 value = REAL_VALUE_ATOF (string, mode);
3334 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3338 spu_constant_address_p (rtx x)
3340 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3341 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3342 || GET_CODE (x) == HIGH);
3345 static enum spu_immediate
3346 which_immediate_load (HOST_WIDE_INT val)
3348 gcc_assert (val == trunc_int_for_mode (val, SImode));
3350 if (val >= -0x8000 && val <= 0x7fff)
3351 return SPU_IL;
3352 if (val >= 0 && val <= 0x3ffff)
3353 return SPU_ILA;
3354 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3355 return SPU_ILH;
3356 if ((val & 0xffff) == 0)
3357 return SPU_ILHU;
3359 return SPU_NONE;
3362 /* Return true when OP can be loaded by one of the il instructions, or
3363 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3365 immediate_load_p (rtx op, enum machine_mode mode)
3367 if (CONSTANT_P (op))
3369 enum immediate_class c = classify_immediate (op, mode);
3370 return c == IC_IL1 || c == IC_IL1s
3371 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3373 return 0;
3376 /* Return true if the first SIZE bytes of arr is a constant that can be
3377 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3378 represent the size and offset of the instruction to use. */
3379 static int
3380 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3382 int cpat, run, i, start;
3383 cpat = 1;
3384 run = 0;
3385 start = -1;
3386 for (i = 0; i < size && cpat; i++)
3387 if (arr[i] != i+16)
3389 if (!run)
3391 start = i;
3392 if (arr[i] == 3)
3393 run = 1;
3394 else if (arr[i] == 2 && arr[i+1] == 3)
3395 run = 2;
3396 else if (arr[i] == 0)
3398 while (arr[i+run] == run && i+run < 16)
3399 run++;
3400 if (run != 4 && run != 8)
3401 cpat = 0;
3403 else
3404 cpat = 0;
3405 if ((i & (run-1)) != 0)
3406 cpat = 0;
3407 i += run;
3409 else
3410 cpat = 0;
3412 if (cpat && (run || size < 16))
3414 if (run == 0)
3415 run = 1;
3416 if (prun)
3417 *prun = run;
3418 if (pstart)
3419 *pstart = start == -1 ? 16-run : start;
3420 return 1;
3422 return 0;
3425 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3426 it into a register. MODE is only valid when OP is a CONST_INT. */
3427 static enum immediate_class
3428 classify_immediate (rtx op, enum machine_mode mode)
3430 HOST_WIDE_INT val;
3431 unsigned char arr[16];
3432 int i, j, repeated, fsmbi, repeat;
3434 gcc_assert (CONSTANT_P (op));
3436 if (GET_MODE (op) != VOIDmode)
3437 mode = GET_MODE (op);
3439 /* A V4SI const_vector with all identical symbols is ok. */
3440 if (!flag_pic
3441 && mode == V4SImode
3442 && GET_CODE (op) == CONST_VECTOR
3443 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3445 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3446 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3447 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3448 op = CONST_VECTOR_ELT (op, 0);
3450 switch (GET_CODE (op))
3452 case SYMBOL_REF:
3453 case LABEL_REF:
3454 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3456 case CONST:
3457 /* We can never know if the resulting address fits in 18 bits and can be
3458 loaded with ila. For now, assume the address will not overflow if
3459 the displacement is "small" (fits 'K' constraint). */
3460 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3462 rtx sym = XEXP (XEXP (op, 0), 0);
3463 rtx cst = XEXP (XEXP (op, 0), 1);
3465 if (GET_CODE (sym) == SYMBOL_REF
3466 && GET_CODE (cst) == CONST_INT
3467 && satisfies_constraint_K (cst))
3468 return IC_IL1s;
3470 return IC_IL2s;
3472 case HIGH:
3473 return IC_IL1s;
3475 case CONST_VECTOR:
3476 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3477 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3478 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3479 return IC_POOL;
3480 /* Fall through. */
3482 case CONST_INT:
3483 case CONST_DOUBLE:
3484 constant_to_array (mode, op, arr);
3486 /* Check that each 4-byte slot is identical. */
3487 repeated = 1;
3488 for (i = 4; i < 16; i += 4)
3489 for (j = 0; j < 4; j++)
3490 if (arr[j] != arr[i + j])
3491 repeated = 0;
3493 if (repeated)
3495 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3496 val = trunc_int_for_mode (val, SImode);
3498 if (which_immediate_load (val) != SPU_NONE)
3499 return IC_IL1;
3502 /* Any mode of 2 bytes or smaller can be loaded with an il
3503 instruction. */
3504 gcc_assert (GET_MODE_SIZE (mode) > 2);
3506 fsmbi = 1;
3507 repeat = 0;
3508 for (i = 0; i < 16 && fsmbi; i++)
3509 if (arr[i] != 0 && repeat == 0)
3510 repeat = arr[i];
3511 else if (arr[i] != 0 && arr[i] != repeat)
3512 fsmbi = 0;
3513 if (fsmbi)
3514 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3516 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3517 return IC_CPAT;
3519 if (repeated)
3520 return IC_IL2;
3522 return IC_POOL;
3523 default:
3524 break;
3526 gcc_unreachable ();
3529 static enum spu_immediate
3530 which_logical_immediate (HOST_WIDE_INT val)
3532 gcc_assert (val == trunc_int_for_mode (val, SImode));
3534 if (val >= -0x200 && val <= 0x1ff)
3535 return SPU_ORI;
3536 if (val >= 0 && val <= 0xffff)
3537 return SPU_IOHL;
3538 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3540 val = trunc_int_for_mode (val, HImode);
3541 if (val >= -0x200 && val <= 0x1ff)
3542 return SPU_ORHI;
3543 if ((val & 0xff) == ((val >> 8) & 0xff))
3545 val = trunc_int_for_mode (val, QImode);
3546 if (val >= -0x200 && val <= 0x1ff)
3547 return SPU_ORBI;
3550 return SPU_NONE;
3553 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3554 CONST_DOUBLEs. */
3555 static int
3556 const_vector_immediate_p (rtx x)
3558 int i;
3559 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3560 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3561 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3562 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3563 return 0;
3564 return 1;
3568 logical_immediate_p (rtx op, enum machine_mode mode)
3570 HOST_WIDE_INT val;
3571 unsigned char arr[16];
3572 int i, j;
3574 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3575 || GET_CODE (op) == CONST_VECTOR);
3577 if (GET_CODE (op) == CONST_VECTOR
3578 && !const_vector_immediate_p (op))
3579 return 0;
3581 if (GET_MODE (op) != VOIDmode)
3582 mode = GET_MODE (op);
3584 constant_to_array (mode, op, arr);
3586 /* Check that bytes are repeated. */
3587 for (i = 4; i < 16; i += 4)
3588 for (j = 0; j < 4; j++)
3589 if (arr[j] != arr[i + j])
3590 return 0;
3592 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3593 val = trunc_int_for_mode (val, SImode);
3595 i = which_logical_immediate (val);
3596 return i != SPU_NONE && i != SPU_IOHL;
3600 iohl_immediate_p (rtx op, enum machine_mode mode)
3602 HOST_WIDE_INT val;
3603 unsigned char arr[16];
3604 int i, j;
3606 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3607 || GET_CODE (op) == CONST_VECTOR);
3609 if (GET_CODE (op) == CONST_VECTOR
3610 && !const_vector_immediate_p (op))
3611 return 0;
3613 if (GET_MODE (op) != VOIDmode)
3614 mode = GET_MODE (op);
3616 constant_to_array (mode, op, arr);
3618 /* Check that bytes are repeated. */
3619 for (i = 4; i < 16; i += 4)
3620 for (j = 0; j < 4; j++)
3621 if (arr[j] != arr[i + j])
3622 return 0;
3624 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3625 val = trunc_int_for_mode (val, SImode);
3627 return val >= 0 && val <= 0xffff;
3631 arith_immediate_p (rtx op, enum machine_mode mode,
3632 HOST_WIDE_INT low, HOST_WIDE_INT high)
3634 HOST_WIDE_INT val;
3635 unsigned char arr[16];
3636 int bytes, i, j;
3638 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3639 || GET_CODE (op) == CONST_VECTOR);
3641 if (GET_CODE (op) == CONST_VECTOR
3642 && !const_vector_immediate_p (op))
3643 return 0;
3645 if (GET_MODE (op) != VOIDmode)
3646 mode = GET_MODE (op);
3648 constant_to_array (mode, op, arr);
3650 if (VECTOR_MODE_P (mode))
3651 mode = GET_MODE_INNER (mode);
3653 bytes = GET_MODE_SIZE (mode);
3654 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3656 /* Check that bytes are repeated. */
3657 for (i = bytes; i < 16; i += bytes)
3658 for (j = 0; j < bytes; j++)
3659 if (arr[j] != arr[i + j])
3660 return 0;
3662 val = arr[0];
3663 for (j = 1; j < bytes; j++)
3664 val = (val << 8) | arr[j];
3666 val = trunc_int_for_mode (val, mode);
3668 return val >= low && val <= high;
3671 /* TRUE when op is an immediate and an exact power of 2, and given that
3672 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3673 all entries must be the same. */
3674 bool
3675 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3677 enum machine_mode int_mode;
3678 HOST_WIDE_INT val;
3679 unsigned char arr[16];
3680 int bytes, i, j;
3682 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3683 || GET_CODE (op) == CONST_VECTOR);
3685 if (GET_CODE (op) == CONST_VECTOR
3686 && !const_vector_immediate_p (op))
3687 return 0;
3689 if (GET_MODE (op) != VOIDmode)
3690 mode = GET_MODE (op);
3692 constant_to_array (mode, op, arr);
3694 if (VECTOR_MODE_P (mode))
3695 mode = GET_MODE_INNER (mode);
3697 bytes = GET_MODE_SIZE (mode);
3698 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3700 /* Check that bytes are repeated. */
3701 for (i = bytes; i < 16; i += bytes)
3702 for (j = 0; j < bytes; j++)
3703 if (arr[j] != arr[i + j])
3704 return 0;
3706 val = arr[0];
3707 for (j = 1; j < bytes; j++)
3708 val = (val << 8) | arr[j];
3710 val = trunc_int_for_mode (val, int_mode);
3712 /* Currently, we only handle SFmode */
3713 gcc_assert (mode == SFmode);
3714 if (mode == SFmode)
3716 int exp = (val >> 23) - 127;
3717 return val > 0 && (val & 0x007fffff) == 0
3718 && exp >= low && exp <= high;
3720 return FALSE;
3723 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3725 static int
3726 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3728 rtx x = *px;
3729 tree decl;
3731 return (GET_CODE (x) == SYMBOL_REF
3732 && (decl = SYMBOL_REF_DECL (x)) != 0
3733 && TREE_CODE (decl) == VAR_DECL
3734 && TYPE_ADDR_SPACE (strip_array_types (TREE_TYPE (decl))));
3737 /* We accept:
3738 - any 32-bit constant (SImode, SFmode)
3739 - any constant that can be generated with fsmbi (any mode)
3740 - a 64-bit constant where the high and low bits are identical
3741 (DImode, DFmode)
3742 - a 128-bit constant where the four 32-bit words match. */
3744 spu_legitimate_constant_p (rtx x)
3746 if (GET_CODE (x) == HIGH)
3747 x = XEXP (x, 0);
3749 /* Reject any __ea qualified reference. These can't appear in
3750 instructions but must be forced to the constant pool. */
3751 if (for_each_rtx (&x, ea_symbol_ref, 0))
3752 return 0;
3754 if (GET_CODE (x) == CONST_VECTOR)
3756 /* V4SI with all identical symbols is valid. */
3757 if (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3758 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3759 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST)
3760 return (!flag_pic
3761 && GET_MODE (x) == V4SImode
3762 && CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3763 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3764 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3));
3766 if (!const_vector_immediate_p (x))
3767 return 0;
3769 return 1;
3772 /* Valid address are:
3773 - symbol_ref, label_ref, const
3774 - reg
3775 - reg + const, where const is 16 byte aligned
3776 - reg + reg, alignment doesn't matter
3777 The alignment matters in the reg+const case because lqd and stqd
3778 ignore the 4 least significant bits of the const.
3780 Addresses are handled in 4 phases.
3781 1) from the beginning of rtl expansion until the split0 pass. Any
3782 address is acceptable.
3783 2) The split0 pass. It is responsible for making every load and store
3784 valid. It calls legitimate_address with FOR_SPLIT set to 1. This
3785 is where non-16-byte aligned loads/stores are split into multiple
3786 instructions to extract or insert just the part we care about.
3787 3) From the split0 pass to the beginning of reload. During this
3788 phase the constant part of an address must be 16 byte aligned, and
3789 we don't allow any loads/store of less than 4 bytes. We also
3790 allow a mask of -16 to be part of the address as an optimization.
3791 4) From reload until the end. Reload can change the modes of loads
3792 and stores to something smaller than 4-bytes which we need to allow
3793 now, and it also adjusts the address to match. So in this phase we
3794 allow that special case. Still allow addresses with a mask of -16.
3796 FOR_SPLIT is only set to 1 for phase 2, otherwise it is 0. */
3798 spu_legitimate_address (enum machine_mode mode, rtx x, int reg_ok_strict,
3799 int for_split)
3801 int aligned = (split0_completed || for_split)
3802 && !reload_in_progress && !reload_completed;
3803 int const_aligned = split0_completed || for_split;
3804 if (GET_MODE_SIZE (mode) >= 16)
3805 aligned = 0;
3806 else if (aligned && GET_MODE_SIZE (mode) < 4)
3807 return 0;
3808 if (split0_completed
3809 && (GET_CODE (x) == AND
3810 && GET_CODE (XEXP (x, 1)) == CONST_INT
3811 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16
3812 && !CONSTANT_P (XEXP (x, 0))))
3813 x = XEXP (x, 0);
3814 switch (GET_CODE (x))
3816 case LABEL_REF:
3817 return !TARGET_LARGE_MEM && !aligned;
3819 case SYMBOL_REF:
3820 /* Keep __ea references until reload so that spu_expand_mov
3821 can see them in MEMs. */
3822 if (ea_symbol_ref (&x, 0))
3823 return !reload_in_progress && !reload_completed;
3824 return !TARGET_LARGE_MEM && (!aligned || ALIGNED_SYMBOL_REF_P (x));
3826 case CONST:
3827 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3829 rtx sym = XEXP (XEXP (x, 0), 0);
3830 rtx cst = XEXP (XEXP (x, 0), 1);
3832 /* Accept any symbol_ref + constant, assuming it does not
3833 wrap around the local store addressability limit. */
3834 if (ea_symbol_ref (&sym, 0))
3835 return 0;
3837 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3839 /* Check for alignment if required. */
3840 if (!aligned)
3841 return 1;
3842 if ((INTVAL (cst) & 15) == 0 && ALIGNED_SYMBOL_REF_P (sym))
3843 return 1;
3846 return 0;
3848 case CONST_INT:
3849 /* We don't test alignement here. For an absolute address we
3850 assume the user knows what they are doing. */
3851 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3853 case SUBREG:
3854 x = XEXP (x, 0);
3855 if (GET_CODE (x) != REG)
3856 return 0;
3858 case REG:
3859 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict)
3860 && reg_aligned_for_addr (x, 0);
3862 case PLUS:
3863 case LO_SUM:
3865 rtx op0 = XEXP (x, 0);
3866 rtx op1 = XEXP (x, 1);
3867 if (GET_CODE (op0) == SUBREG)
3868 op0 = XEXP (op0, 0);
3869 if (GET_CODE (op1) == SUBREG)
3870 op1 = XEXP (op1, 0);
3871 if (GET_CODE (op0) == REG
3872 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3873 && GET_CODE (op1) == CONST_INT
3874 && INTVAL (op1) >= -0x2000
3875 && INTVAL (op1) <= 0x1fff
3876 && reg_aligned_for_addr (op0, 0)
3877 && (!const_aligned
3878 || (INTVAL (op1) & 15) == 0
3879 || ((reload_in_progress || reload_completed)
3880 && GET_MODE_SIZE (mode) < 4
3881 && (INTVAL (op1) & 15) == 4 - GET_MODE_SIZE (mode))
3882 /* Some passes create a fake register for testing valid
3883 * addresses, be more lenient when we see those. ivopts
3884 * and reload do it. */
3885 || REGNO (op0) == LAST_VIRTUAL_REGISTER + 1
3886 || REGNO (op0) == LAST_VIRTUAL_REGISTER + 2))
3887 return 1;
3888 if (GET_CODE (op0) == REG
3889 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3890 && reg_aligned_for_addr (op0, 0)
3891 && GET_CODE (op1) == REG
3892 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)
3893 && reg_aligned_for_addr (op1, 0))
3894 return 1;
3896 break;
3898 default:
3899 break;
3901 return 0;
3904 /* When the address is reg + const_int, force the const_int into a
3905 register. */
3907 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3908 enum machine_mode mode)
3910 rtx op0, op1;
3911 /* Make sure both operands are registers. */
3912 if (GET_CODE (x) == PLUS)
3914 op0 = XEXP (x, 0);
3915 op1 = XEXP (x, 1);
3916 if (ALIGNED_SYMBOL_REF_P (op0))
3918 op0 = force_reg (Pmode, op0);
3919 mark_reg_pointer (op0, 128);
3921 else if (GET_CODE (op0) != REG)
3922 op0 = force_reg (Pmode, op0);
3923 if (ALIGNED_SYMBOL_REF_P (op1))
3925 op1 = force_reg (Pmode, op1);
3926 mark_reg_pointer (op1, 128);
3928 else if (GET_CODE (op1) != REG)
3929 op1 = force_reg (Pmode, op1);
3930 x = gen_rtx_PLUS (Pmode, op0, op1);
3931 if (spu_legitimate_address (mode, x, 0, 0))
3932 return x;
3934 return NULL_RTX;
3937 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3938 struct attribute_spec.handler. */
3939 static tree
3940 spu_handle_fndecl_attribute (tree * node,
3941 tree name,
3942 tree args ATTRIBUTE_UNUSED,
3943 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3945 if (TREE_CODE (*node) != FUNCTION_DECL)
3947 warning (0, "`%s' attribute only applies to functions",
3948 IDENTIFIER_POINTER (name));
3949 *no_add_attrs = true;
3952 return NULL_TREE;
3955 /* Handle the "vector" attribute. */
3956 static tree
3957 spu_handle_vector_attribute (tree * node, tree name,
3958 tree args ATTRIBUTE_UNUSED,
3959 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3961 tree type = *node, result = NULL_TREE;
3962 enum machine_mode mode;
3963 int unsigned_p;
3965 while (POINTER_TYPE_P (type)
3966 || TREE_CODE (type) == FUNCTION_TYPE
3967 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3968 type = TREE_TYPE (type);
3970 mode = TYPE_MODE (type);
3972 unsigned_p = TYPE_UNSIGNED (type);
3973 switch (mode)
3975 case DImode:
3976 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3977 break;
3978 case SImode:
3979 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3980 break;
3981 case HImode:
3982 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3983 break;
3984 case QImode:
3985 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3986 break;
3987 case SFmode:
3988 result = V4SF_type_node;
3989 break;
3990 case DFmode:
3991 result = V2DF_type_node;
3992 break;
3993 default:
3994 break;
3997 /* Propagate qualifiers attached to the element type
3998 onto the vector type. */
3999 if (result && result != type && TYPE_QUALS (type))
4000 result = build_qualified_type (result, TYPE_QUALS (type));
4002 *no_add_attrs = true; /* No need to hang on to the attribute. */
4004 if (!result)
4005 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
4006 else
4007 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
4009 return NULL_TREE;
4012 /* Return nonzero if FUNC is a naked function. */
4013 static int
4014 spu_naked_function_p (tree func)
4016 tree a;
4018 if (TREE_CODE (func) != FUNCTION_DECL)
4019 abort ();
4021 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
4022 return a != NULL_TREE;
4026 spu_initial_elimination_offset (int from, int to)
4028 int saved_regs_size = spu_saved_regs_size ();
4029 int sp_offset = 0;
4030 if (!current_function_is_leaf || current_function_outgoing_args_size
4031 || get_frame_size () || saved_regs_size)
4032 sp_offset = STACK_POINTER_OFFSET;
4033 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4034 return (sp_offset + current_function_outgoing_args_size);
4035 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4036 return 0;
4037 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4038 return sp_offset + current_function_outgoing_args_size
4039 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
4040 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4041 return get_frame_size () + saved_regs_size + sp_offset;
4042 return 0;
4046 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
4048 enum machine_mode mode = TYPE_MODE (type);
4049 int byte_size = ((mode == BLKmode)
4050 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4052 /* Make sure small structs are left justified in a register. */
4053 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4054 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4056 enum machine_mode smode;
4057 rtvec v;
4058 int i;
4059 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4060 int n = byte_size / UNITS_PER_WORD;
4061 v = rtvec_alloc (nregs);
4062 for (i = 0; i < n; i++)
4064 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4065 gen_rtx_REG (TImode,
4066 FIRST_RETURN_REGNUM
4067 + i),
4068 GEN_INT (UNITS_PER_WORD * i));
4069 byte_size -= UNITS_PER_WORD;
4072 if (n < nregs)
4074 if (byte_size < 4)
4075 byte_size = 4;
4076 smode =
4077 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4078 RTVEC_ELT (v, n) =
4079 gen_rtx_EXPR_LIST (VOIDmode,
4080 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4081 GEN_INT (UNITS_PER_WORD * n));
4083 return gen_rtx_PARALLEL (mode, v);
4085 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4089 spu_function_arg (CUMULATIVE_ARGS cum,
4090 enum machine_mode mode,
4091 tree type, int named ATTRIBUTE_UNUSED)
4093 int byte_size;
4095 if (cum >= MAX_REGISTER_ARGS)
4096 return 0;
4098 byte_size = ((mode == BLKmode)
4099 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4101 /* The ABI does not allow parameters to be passed partially in
4102 reg and partially in stack. */
4103 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4104 return 0;
4106 /* Make sure small structs are left justified in a register. */
4107 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4108 && byte_size < UNITS_PER_WORD && byte_size > 0)
4110 enum machine_mode smode;
4111 rtx gr_reg;
4112 if (byte_size < 4)
4113 byte_size = 4;
4114 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4115 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4116 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4117 const0_rtx);
4118 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4120 else
4121 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4124 /* Variable sized types are passed by reference. */
4125 static bool
4126 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4127 enum machine_mode mode ATTRIBUTE_UNUSED,
4128 const_tree type, bool named ATTRIBUTE_UNUSED)
4130 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4134 /* Var args. */
4136 /* Create and return the va_list datatype.
4138 On SPU, va_list is an array type equivalent to
4140 typedef struct __va_list_tag
4142 void *__args __attribute__((__aligned(16)));
4143 void *__skip __attribute__((__aligned(16)));
4145 } va_list[1];
4147 where __args points to the arg that will be returned by the next
4148 va_arg(), and __skip points to the previous stack frame such that
4149 when __args == __skip we should advance __args by 32 bytes. */
4150 static tree
4151 spu_build_builtin_va_list (void)
4153 tree f_args, f_skip, record, type_decl;
4154 bool owp;
4156 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4158 type_decl =
4159 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4161 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4162 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4164 DECL_FIELD_CONTEXT (f_args) = record;
4165 DECL_ALIGN (f_args) = 128;
4166 DECL_USER_ALIGN (f_args) = 1;
4168 DECL_FIELD_CONTEXT (f_skip) = record;
4169 DECL_ALIGN (f_skip) = 128;
4170 DECL_USER_ALIGN (f_skip) = 1;
4172 TREE_CHAIN (record) = type_decl;
4173 TYPE_NAME (record) = type_decl;
4174 TYPE_FIELDS (record) = f_args;
4175 TREE_CHAIN (f_args) = f_skip;
4177 /* We know this is being padded and we want it too. It is an internal
4178 type so hide the warnings from the user. */
4179 owp = warn_padded;
4180 warn_padded = false;
4182 layout_type (record);
4184 warn_padded = owp;
4186 /* The correct type is an array type of one element. */
4187 return build_array_type (record, build_index_type (size_zero_node));
4190 /* Implement va_start by filling the va_list structure VALIST.
4191 NEXTARG points to the first anonymous stack argument.
4193 The following global variables are used to initialize
4194 the va_list structure:
4196 current_function_args_info;
4197 the CUMULATIVE_ARGS for this function
4199 current_function_arg_offset_rtx:
4200 holds the offset of the first anonymous stack argument
4201 (relative to the virtual arg pointer). */
4203 static void
4204 spu_va_start (tree valist, rtx nextarg)
4206 tree f_args, f_skip;
4207 tree args, skip, t;
4209 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4210 f_skip = TREE_CHAIN (f_args);
4212 valist = build_va_arg_indirect_ref (valist);
4213 args =
4214 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4215 skip =
4216 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4218 /* Find the __args area. */
4219 t = make_tree (TREE_TYPE (args), nextarg);
4220 if (current_function_pretend_args_size > 0)
4221 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4222 size_int (-STACK_POINTER_OFFSET));
4223 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
4224 TREE_SIDE_EFFECTS (t) = 1;
4225 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4227 /* Find the __skip area. */
4228 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4229 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4230 size_int (current_function_pretend_args_size
4231 - STACK_POINTER_OFFSET));
4232 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
4233 TREE_SIDE_EFFECTS (t) = 1;
4234 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 /* Gimplify va_arg by updating the va_list structure
4238 VALIST as required to retrieve an argument of type
4239 TYPE, and returning that argument.
4241 ret = va_arg(VALIST, TYPE);
4243 generates code equivalent to:
4245 paddedsize = (sizeof(TYPE) + 15) & -16;
4246 if (VALIST.__args + paddedsize > VALIST.__skip
4247 && VALIST.__args <= VALIST.__skip)
4248 addr = VALIST.__skip + 32;
4249 else
4250 addr = VALIST.__args;
4251 VALIST.__args = addr + paddedsize;
4252 ret = *(TYPE *)addr;
4254 static tree
4255 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
4256 tree * post_p ATTRIBUTE_UNUSED)
4258 tree f_args, f_skip;
4259 tree args, skip;
4260 HOST_WIDE_INT size, rsize;
4261 tree paddedsize, addr, tmp;
4262 bool pass_by_reference_p;
4264 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4265 f_skip = TREE_CHAIN (f_args);
4267 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4268 args =
4269 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4270 skip =
4271 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4273 addr = create_tmp_var (ptr_type_node, "va_arg");
4274 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4276 /* if an object is dynamically sized, a pointer to it is passed
4277 instead of the object itself. */
4278 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4279 false);
4280 if (pass_by_reference_p)
4281 type = build_pointer_type (type);
4282 size = int_size_in_bytes (type);
4283 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4285 /* build conditional expression to calculate addr. The expression
4286 will be gimplified later. */
4287 paddedsize = size_int (rsize);
4288 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
4289 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4290 build2 (GT_EXPR, boolean_type_node, tmp, skip),
4291 build2 (LE_EXPR, boolean_type_node, args, skip));
4293 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4294 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
4295 size_int (32)), args);
4297 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
4298 gimplify_and_add (tmp, pre_p);
4300 /* update VALIST.__args */
4301 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4302 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
4303 gimplify_and_add (tmp, pre_p);
4305 addr = fold_convert (build_pointer_type (type), addr);
4307 if (pass_by_reference_p)
4308 addr = build_va_arg_indirect_ref (addr);
4310 return build_va_arg_indirect_ref (addr);
4313 /* Save parameter registers starting with the register that corresponds
4314 to the first unnamed parameters. If the first unnamed parameter is
4315 in the stack then save no registers. Set pretend_args_size to the
4316 amount of space needed to save the registers. */
4317 void
4318 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4319 tree type, int *pretend_size, int no_rtl)
4321 if (!no_rtl)
4323 rtx tmp;
4324 int regno;
4325 int offset;
4326 int ncum = *cum;
4328 /* cum currently points to the last named argument, we want to
4329 start at the next argument. */
4330 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4332 offset = -STACK_POINTER_OFFSET;
4333 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4335 tmp = gen_frame_mem (V4SImode,
4336 plus_constant (virtual_incoming_args_rtx,
4337 offset));
4338 emit_move_insn (tmp,
4339 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4340 offset += 16;
4342 *pretend_size = offset + STACK_POINTER_OFFSET;
4346 void
4347 spu_conditional_register_usage (void)
4349 if (flag_pic)
4351 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4352 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4356 /* This is called any time we inspect the alignment of a register for
4357 addresses. */
4358 static int
4359 reg_aligned_for_addr (rtx x, int aligned)
4361 int regno =
4362 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4363 if (!aligned)
4364 return 1;
4365 return REGNO_POINTER_ALIGN (regno) >= 128;
4368 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4369 into its SYMBOL_REF_FLAGS. */
4370 static void
4371 spu_encode_section_info (tree decl, rtx rtl, int first)
4373 default_encode_section_info (decl, rtl, first);
4375 /* If a variable has a forced alignment to < 16 bytes, mark it with
4376 SYMBOL_FLAG_ALIGN1. */
4377 if (TREE_CODE (decl) == VAR_DECL
4378 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4379 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4382 /* Return TRUE if we are certain the mem refers to a complete object
4383 which is both 16-byte aligned and padded to a 16-byte boundary. This
4384 would make it safe to store with a single instruction.
4385 We guarantee the alignment and padding for static objects by aligning
4386 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4387 FIXME: We currently cannot guarantee this for objects on the stack
4388 because assign_parm_setup_stack calls assign_stack_local with the
4389 alignment of the parameter mode and in that case the alignment never
4390 gets adjusted by LOCAL_ALIGNMENT. */
4391 static int
4392 store_with_one_insn_p (rtx mem)
4394 enum machine_mode mode = GET_MODE (mem);
4395 rtx addr = XEXP (mem, 0);
4396 if (mode == BLKmode)
4397 return 0;
4398 if (GET_MODE_SIZE (mode) >= 16)
4399 return 1;
4400 /* Only static objects. */
4401 if (GET_CODE (addr) == SYMBOL_REF)
4403 /* We use the associated declaration to make sure the access is
4404 referring to the whole object.
4405 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4406 if it is necessary. Will there be cases where one exists, and
4407 the other does not? Will there be cases where both exist, but
4408 have different types? */
4409 tree decl = MEM_EXPR (mem);
4410 if (decl
4411 && TREE_CODE (decl) == VAR_DECL
4412 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4413 return 1;
4414 decl = SYMBOL_REF_DECL (addr);
4415 if (decl
4416 && TREE_CODE (decl) == VAR_DECL
4417 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4418 return 1;
4420 return 0;
4423 /* Return 1 when the address is not valid for a simple load and store as
4424 required by the '_mov*' patterns. We could make this less strict
4425 for loads, but we prefer mem's to look the same so they are more
4426 likely to be merged. */
4427 static int
4428 address_needs_split (rtx mem)
4430 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4431 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4432 || !(store_with_one_insn_p (mem)
4433 || mem_is_padded_component_ref (mem))))
4434 return 1;
4436 return 0;
4439 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
4441 rtx cache_fetch;
4442 rtx cache_fetch_dirty;
4443 int ea_alias_set = -1;
4445 /* MEM is known to be an __ea qualified memory access. Emit a call to
4446 fetch the ppu memory to local store, and return its address in local
4447 store. */
4449 static void
4450 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4452 if (is_store)
4454 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4455 if (!cache_fetch_dirty)
4456 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4457 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4458 2, ea_addr, EAmode, ndirty, SImode);
4460 else
4462 if (!cache_fetch)
4463 cache_fetch = init_one_libfunc ("__cache_fetch");
4464 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4465 1, ea_addr, EAmode);
4469 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4470 dirty bit marking, inline.
4472 The cache control data structure is an array of
4474 struct __cache_tag_array
4476 unsigned int tag_lo[4];
4477 unsigned int tag_hi[4];
4478 void *data_pointer[4];
4479 int reserved[4];
4480 vector unsigned short dirty_bits[4];
4481 } */
4483 static void
4484 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4486 rtx ea_addr_si;
4487 HOST_WIDE_INT v;
4488 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4489 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4490 rtx index_mask = gen_reg_rtx (SImode);
4491 rtx tag_arr = gen_reg_rtx (Pmode);
4492 rtx splat_mask = gen_reg_rtx (TImode);
4493 rtx splat = gen_reg_rtx (V4SImode);
4494 rtx splat_hi = NULL_RTX;
4495 rtx tag_index = gen_reg_rtx (Pmode);
4496 rtx block_off = gen_reg_rtx (SImode);
4497 rtx tag_addr = gen_reg_rtx (Pmode);
4498 rtx tag = gen_reg_rtx (V4SImode);
4499 rtx cache_tag = gen_reg_rtx (V4SImode);
4500 rtx cache_tag_hi = NULL_RTX;
4501 rtx cache_ptrs = gen_reg_rtx (TImode);
4502 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4503 rtx tag_equal = gen_reg_rtx (V4SImode);
4504 rtx tag_equal_hi = NULL_RTX;
4505 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4506 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4507 rtx eq_index = gen_reg_rtx (SImode);
4508 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4510 if (spu_ea_model != 32)
4512 splat_hi = gen_reg_rtx (V4SImode);
4513 cache_tag_hi = gen_reg_rtx (V4SImode);
4514 tag_equal_hi = gen_reg_rtx (V4SImode);
4517 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4518 emit_move_insn (tag_arr, tag_arr_sym);
4519 v = 0x0001020300010203LL;
4520 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4521 ea_addr_si = ea_addr;
4522 if (spu_ea_model != 32)
4523 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4525 /* tag_index = ea_addr & (tag_array_size - 128) */
4526 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4528 /* splat ea_addr to all 4 slots. */
4529 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4530 /* Similarly for high 32 bits of ea_addr. */
4531 if (spu_ea_model != 32)
4532 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4534 /* block_off = ea_addr & 127 */
4535 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4537 /* tag_addr = tag_arr + tag_index */
4538 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4540 /* Read cache tags. */
4541 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4542 if (spu_ea_model != 32)
4543 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4544 plus_constant (tag_addr, 16)));
4546 /* tag = ea_addr & -128 */
4547 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4549 /* Read all four cache data pointers. */
4550 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4551 plus_constant (tag_addr, 32)));
4553 /* Compare tags. */
4554 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4555 if (spu_ea_model != 32)
4557 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4558 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4561 /* At most one of the tags compare equal, so tag_equal has one
4562 32-bit slot set to all 1's, with the other slots all zero.
4563 gbb picks off low bit from each byte in the 128-bit registers,
4564 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4565 we have a hit. */
4566 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4567 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4569 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4570 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4572 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4573 (rotating eq_index mod 16 bytes). */
4574 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4575 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4577 /* Add block offset to form final data address. */
4578 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4580 /* Check that we did hit. */
4581 hit_label = gen_label_rtx ();
4582 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4583 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4584 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4585 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4586 hit_ref, pc_rtx)));
4587 /* Say that this branch is very likely to happen. */
4588 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4589 REG_NOTES (insn)
4590 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
4592 ea_load_store (mem, is_store, ea_addr, data_addr);
4593 cont_label = gen_label_rtx ();
4594 emit_jump_insn (gen_jump (cont_label));
4595 emit_barrier ();
4597 emit_label (hit_label);
4599 if (is_store)
4601 HOST_WIDE_INT v_hi;
4602 rtx dirty_bits = gen_reg_rtx (TImode);
4603 rtx dirty_off = gen_reg_rtx (SImode);
4604 rtx dirty_128 = gen_reg_rtx (TImode);
4605 rtx neg_block_off = gen_reg_rtx (SImode);
4607 /* Set up mask with one dirty bit per byte of the mem we are
4608 writing, starting from top bit. */
4609 v_hi = v = -1;
4610 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4611 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4613 v_hi = v;
4614 v = 0;
4616 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4618 /* Form index into cache dirty_bits. eq_index is one of
4619 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4620 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4621 offset to each of the four dirty_bits elements. */
4622 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4624 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4626 /* Rotate bit mask to proper bit. */
4627 emit_insn (gen_negsi2 (neg_block_off, block_off));
4628 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4629 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4631 /* Or in the new dirty bits. */
4632 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4634 /* Store. */
4635 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4638 emit_label (cont_label);
4641 static rtx
4642 expand_ea_mem (rtx mem, bool is_store)
4644 rtx ea_addr;
4645 rtx data_addr = gen_reg_rtx (Pmode);
4647 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4648 if (optimize_size || optimize == 0)
4649 ea_load_store (mem, is_store, ea_addr, data_addr);
4650 else
4651 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4653 mem = change_address (mem, VOIDmode, data_addr);
4655 if (ea_alias_set == -1)
4656 ea_alias_set = new_alias_set ();
4657 set_mem_alias_set (mem, 0);
4658 set_mem_alias_set (mem, ea_alias_set);
4659 return mem;
4663 spu_expand_mov (rtx * ops, enum machine_mode mode)
4665 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4666 abort ();
4668 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4670 rtx from = SUBREG_REG (ops[1]);
4671 enum machine_mode imode = GET_MODE (from);
4673 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4674 && GET_MODE_CLASS (imode) == MODE_INT
4675 && subreg_lowpart_p (ops[1]));
4677 if (GET_MODE_SIZE (imode) < 4)
4679 from = gen_rtx_SUBREG (SImode, from, 0);
4680 imode = SImode;
4683 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4685 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4686 emit_insn (GEN_FCN (icode) (ops[0], from));
4688 else
4689 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4690 return 1;
4693 /* At least one of the operands needs to be a register. */
4694 if ((reload_in_progress | reload_completed) == 0
4695 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4697 rtx temp = force_reg (mode, ops[1]);
4698 emit_move_insn (ops[0], temp);
4699 return 1;
4701 if (reload_in_progress || reload_completed)
4703 if (CONSTANT_P (ops[1]))
4704 return spu_split_immediate (ops);
4705 return 0;
4707 else
4709 if (MEM_P (ops[0]))
4711 if (MEM_ADDR_SPACE (ops[0]))
4712 ops[0] = expand_ea_mem (ops[0], true);
4714 else if (MEM_P (ops[1]))
4716 if (MEM_ADDR_SPACE (ops[1]))
4717 ops[1] = expand_ea_mem (ops[1], false);
4719 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4720 extend them. */
4721 if (GET_CODE (ops[1]) == CONST_INT)
4723 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4724 if (val != INTVAL (ops[1]))
4726 emit_move_insn (ops[0], GEN_INT (val));
4727 return 1;
4731 return 0;
4735 spu_split_load (rtx * ops)
4737 enum machine_mode mode = GET_MODE (ops[0]);
4738 rtx addr, load, rot, mem, p0, p1;
4739 int rot_amt;
4741 addr = XEXP (ops[1], 0);
4742 gcc_assert (GET_CODE (addr) != AND);
4744 if (!address_needs_split (ops[1]))
4746 addr = XEXP (ops[1], 0);
4747 if (spu_legitimate_address (mode, addr, 0, 1))
4748 return 0;
4749 ops[1] = change_address (ops[1], VOIDmode, force_reg (Pmode, addr));
4750 emit_move_insn (ops[0], ops[1]);
4751 return 1;
4754 rot = 0;
4755 rot_amt = 0;
4757 if (MEM_ALIGN (ops[1]) >= 128)
4758 /* Address is already aligned; simply perform a TImode load. */;
4759 else if (GET_CODE (addr) == PLUS)
4761 /* 8 cases:
4762 aligned reg + aligned reg => lqx
4763 aligned reg + unaligned reg => lqx, rotqby
4764 aligned reg + aligned const => lqd
4765 aligned reg + unaligned const => lqd, rotqbyi
4766 unaligned reg + aligned reg => lqx, rotqby
4767 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4768 unaligned reg + aligned const => lqd, rotqby
4769 unaligned reg + unaligned const -> not allowed by legitimate address
4771 p0 = XEXP (addr, 0);
4772 p1 = XEXP (addr, 1);
4773 if (!reg_aligned_for_addr (p0, 1))
4775 if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1))
4777 rot = gen_reg_rtx (SImode);
4778 emit_insn (gen_addsi3 (rot, p0, p1));
4780 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4782 if (INTVAL (p1) > 0
4783 && INTVAL (p1) * BITS_PER_UNIT < REG_ALIGN (p0))
4785 rot = gen_reg_rtx (SImode);
4786 emit_insn (gen_addsi3 (rot, p0, p1));
4787 addr = p0;
4789 else
4791 rtx x = gen_reg_rtx (SImode);
4792 emit_move_insn (x, p1);
4793 if (!spu_arith_operand (p1, SImode))
4794 p1 = x;
4795 rot = gen_reg_rtx (SImode);
4796 emit_insn (gen_addsi3 (rot, p0, p1));
4797 addr = gen_rtx_PLUS (Pmode, p0, x);
4800 else
4801 rot = p0;
4803 else
4805 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4807 rot_amt = INTVAL (p1) & 15;
4808 if (INTVAL (p1) & -16)
4810 p1 = GEN_INT (INTVAL (p1) & -16);
4811 addr = gen_rtx_PLUS (SImode, p0, p1);
4813 else
4814 addr = p0;
4816 else if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1))
4817 rot = p1;
4820 else if (GET_CODE (addr) == REG)
4822 if (!reg_aligned_for_addr (addr, 1))
4823 rot = addr;
4825 else if (GET_CODE (addr) == CONST)
4827 if (GET_CODE (XEXP (addr, 0)) == PLUS
4828 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4829 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4831 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4832 if (rot_amt & -16)
4833 addr = gen_rtx_CONST (Pmode,
4834 gen_rtx_PLUS (Pmode,
4835 XEXP (XEXP (addr, 0), 0),
4836 GEN_INT (rot_amt & -16)));
4837 else
4838 addr = XEXP (XEXP (addr, 0), 0);
4840 else
4842 rot = gen_reg_rtx (Pmode);
4843 emit_move_insn (rot, addr);
4846 else if (GET_CODE (addr) == CONST_INT)
4848 rot_amt = INTVAL (addr);
4849 addr = GEN_INT (rot_amt & -16);
4851 else if (!ALIGNED_SYMBOL_REF_P (addr))
4853 rot = gen_reg_rtx (Pmode);
4854 emit_move_insn (rot, addr);
4857 if (GET_MODE_SIZE (mode) < 4)
4858 rot_amt += GET_MODE_SIZE (mode) - 4;
4860 rot_amt &= 15;
4862 if (rot && rot_amt)
4864 rtx x = gen_reg_rtx (SImode);
4865 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4866 rot = x;
4867 rot_amt = 0;
4870 /* If the source is properly aligned, we don't need to split this insn into
4871 a TImode load plus a _spu_convert. However, we want to perform the split
4872 anyway when optimizing to make the MEMs look the same as those used for
4873 stores so they are more easily merged. When *not* optimizing, that will
4874 not happen anyway, so we prefer to avoid generating the _spu_convert. */
4875 if (!rot && !rot_amt && !optimize)
4876 return 0;
4878 load = gen_reg_rtx (TImode);
4880 mem = change_address (ops[1], TImode, copy_rtx (addr));
4882 emit_insn (gen_movti (load, mem));
4884 if (rot)
4885 emit_insn (gen_rotqby_ti (load, load, rot));
4886 else if (rot_amt)
4887 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4889 emit_insn (gen_spu_convert (ops[0], load));
4890 return 1;
4894 spu_split_store (rtx * ops)
4896 enum machine_mode mode = GET_MODE (ops[0]);
4897 rtx reg;
4898 rtx addr, p0, p1, p1_lo, smem;
4899 int aform;
4900 int scalar;
4902 if (!address_needs_split (ops[0]))
4904 addr = XEXP (ops[0], 0);
4905 if (spu_legitimate_address (mode, addr, 0, 1))
4906 return 0;
4907 ops[0] = change_address (ops[0], VOIDmode, force_reg (Pmode, addr));
4908 emit_move_insn (ops[0], ops[1]);
4909 return 1;
4912 addr = XEXP (ops[0], 0);
4913 gcc_assert (GET_CODE (addr) != AND);
4915 if (GET_CODE (addr) == PLUS)
4917 /* 8 cases:
4918 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4919 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4920 aligned reg + aligned const => lqd, c?d, shuf, stqx
4921 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4922 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4923 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4924 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4925 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4927 aform = 0;
4928 p0 = XEXP (addr, 0);
4929 p1 = p1_lo = XEXP (addr, 1);
4930 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4932 p1_lo = GEN_INT (INTVAL (p1) & 15);
4933 if (reg_aligned_for_addr (p0, 1))
4935 p1 = GEN_INT (INTVAL (p1) & -16);
4936 if (p1 == const0_rtx)
4937 addr = p0;
4938 else
4939 addr = gen_rtx_PLUS (SImode, p0, p1);
4941 else
4943 rtx x = gen_reg_rtx (SImode);
4944 emit_move_insn (x, p1);
4945 addr = gen_rtx_PLUS (SImode, p0, x);
4949 else if (GET_CODE (addr) == REG)
4951 aform = 0;
4952 p0 = addr;
4953 p1 = p1_lo = const0_rtx;
4955 else
4957 aform = 1;
4958 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4959 p1 = 0; /* aform doesn't use p1 */
4960 p1_lo = addr;
4961 if (ALIGNED_SYMBOL_REF_P (addr))
4962 p1_lo = const0_rtx;
4963 else if (GET_CODE (addr) == CONST
4964 && GET_CODE (XEXP (addr, 0)) == PLUS
4965 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4966 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4968 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4969 if ((v & -16) != 0)
4970 addr = gen_rtx_CONST (Pmode,
4971 gen_rtx_PLUS (Pmode,
4972 XEXP (XEXP (addr, 0), 0),
4973 GEN_INT (v & -16)));
4974 else
4975 addr = XEXP (XEXP (addr, 0), 0);
4976 p1_lo = GEN_INT (v & 15);
4978 else if (GET_CODE (addr) == CONST_INT)
4980 p1_lo = GEN_INT (INTVAL (addr) & 15);
4981 addr = GEN_INT (INTVAL (addr) & -16);
4983 else
4985 p1_lo = gen_reg_rtx (SImode);
4986 emit_move_insn (p1_lo, addr);
4990 reg = gen_reg_rtx (TImode);
4992 scalar = store_with_one_insn_p (ops[0]);
4993 if (!scalar)
4995 /* We could copy the flags from the ops[0] MEM to mem here,
4996 We don't because we want this load to be optimized away if
4997 possible, and copying the flags will prevent that in certain
4998 cases, e.g. consider the volatile flag. */
5000 rtx pat = gen_reg_rtx (TImode);
5001 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5002 set_mem_alias_set (lmem, 0);
5003 emit_insn (gen_movti (reg, lmem));
5005 if (!p0 || reg_aligned_for_addr (p0, 1))
5006 p0 = stack_pointer_rtx;
5007 if (!p1_lo)
5008 p1_lo = const0_rtx;
5010 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5011 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5013 else
5015 if (GET_CODE (ops[1]) == REG)
5016 emit_insn (gen_spu_convert (reg, ops[1]));
5017 else if (GET_CODE (ops[1]) == SUBREG)
5018 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5019 else
5020 abort ();
5023 if (GET_MODE_SIZE (mode) < 4 && scalar)
5024 emit_insn (gen_ashlti3
5025 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5027 smem = change_address (ops[0], TImode, copy_rtx (addr));
5028 /* We can't use the previous alias set because the memory has changed
5029 size and can potentially overlap objects of other types. */
5030 set_mem_alias_set (smem, 0);
5032 emit_insn (gen_movti (smem, reg));
5033 return 1;
5036 /* Return TRUE if X is MEM which is a struct member reference
5037 and the member can safely be loaded and stored with a single
5038 instruction because it is padded. */
5039 static int
5040 mem_is_padded_component_ref (rtx x)
5042 tree t = MEM_EXPR (x);
5043 tree r;
5044 if (!t || TREE_CODE (t) != COMPONENT_REF)
5045 return 0;
5046 t = TREE_OPERAND (t, 1);
5047 if (!t || TREE_CODE (t) != FIELD_DECL
5048 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5049 return 0;
5050 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5051 r = DECL_FIELD_CONTEXT (t);
5052 if (!r || TREE_CODE (r) != RECORD_TYPE)
5053 return 0;
5054 /* Make sure they are the same mode */
5055 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5056 return 0;
5057 /* If there are no following fields then the field alignment assures
5058 the structure is padded to the alignment which means this field is
5059 padded too. */
5060 if (TREE_CHAIN (t) == 0)
5061 return 1;
5062 /* If the following field is also aligned then this field will be
5063 padded. */
5064 t = TREE_CHAIN (t);
5065 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5066 return 1;
5067 return 0;
5070 /* Parse the -mfixed-range= option string. */
5071 static void
5072 fix_range (const char *const_str)
5074 int i, first, last;
5075 char *str, *dash, *comma;
5077 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5078 REG2 are either register names or register numbers. The effect
5079 of this option is to mark the registers in the range from REG1 to
5080 REG2 as ``fixed'' so they won't be used by the compiler. */
5082 i = strlen (const_str);
5083 str = (char *) alloca (i + 1);
5084 memcpy (str, const_str, i + 1);
5086 while (1)
5088 dash = strchr (str, '-');
5089 if (!dash)
5091 warning (0, "value of -mfixed-range must have form REG1-REG2");
5092 return;
5094 *dash = '\0';
5095 comma = strchr (dash + 1, ',');
5096 if (comma)
5097 *comma = '\0';
5099 first = decode_reg_name (str);
5100 if (first < 0)
5102 warning (0, "unknown register name: %s", str);
5103 return;
5106 last = decode_reg_name (dash + 1);
5107 if (last < 0)
5109 warning (0, "unknown register name: %s", dash + 1);
5110 return;
5113 *dash = '-';
5115 if (first > last)
5117 warning (0, "%s-%s is an empty range", str, dash + 1);
5118 return;
5121 for (i = first; i <= last; ++i)
5122 fixed_regs[i] = call_used_regs[i] = 1;
5124 if (!comma)
5125 break;
5127 *comma = ',';
5128 str = comma + 1;
5132 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5133 can be generated using the fsmbi instruction. */
5135 fsmbi_const_p (rtx x)
5137 if (CONSTANT_P (x))
5139 /* We can always choose TImode for CONST_INT because the high bits
5140 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5141 enum immediate_class c = classify_immediate (x, TImode);
5142 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5144 return 0;
5147 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5148 can be generated using the cbd, chd, cwd or cdd instruction. */
5150 cpat_const_p (rtx x, enum machine_mode mode)
5152 if (CONSTANT_P (x))
5154 enum immediate_class c = classify_immediate (x, mode);
5155 return c == IC_CPAT;
5157 return 0;
5161 gen_cpat_const (rtx * ops)
5163 unsigned char dst[16];
5164 int i, offset, shift, isize;
5165 if (GET_CODE (ops[3]) != CONST_INT
5166 || GET_CODE (ops[2]) != CONST_INT
5167 || (GET_CODE (ops[1]) != CONST_INT
5168 && GET_CODE (ops[1]) != REG))
5169 return 0;
5170 if (GET_CODE (ops[1]) == REG
5171 && (!REG_POINTER (ops[1])
5172 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5173 return 0;
5175 for (i = 0; i < 16; i++)
5176 dst[i] = i + 16;
5177 isize = INTVAL (ops[3]);
5178 if (isize == 1)
5179 shift = 3;
5180 else if (isize == 2)
5181 shift = 2;
5182 else
5183 shift = 0;
5184 offset = (INTVAL (ops[2]) +
5185 (GET_CODE (ops[1]) ==
5186 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5187 for (i = 0; i < isize; i++)
5188 dst[offset + i] = i + shift;
5189 return array_to_constant (TImode, dst);
5192 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5193 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5194 than 16 bytes, the value is repeated across the rest of the array. */
5195 void
5196 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5198 HOST_WIDE_INT val;
5199 int i, j, first;
5201 memset (arr, 0, 16);
5202 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5203 if (GET_CODE (x) == CONST_INT
5204 || (GET_CODE (x) == CONST_DOUBLE
5205 && (mode == SFmode || mode == DFmode)))
5207 gcc_assert (mode != VOIDmode && mode != BLKmode);
5209 if (GET_CODE (x) == CONST_DOUBLE)
5210 val = const_double_to_hwint (x);
5211 else
5212 val = INTVAL (x);
5213 first = GET_MODE_SIZE (mode) - 1;
5214 for (i = first; i >= 0; i--)
5216 arr[i] = val & 0xff;
5217 val >>= 8;
5219 /* Splat the constant across the whole array. */
5220 for (j = 0, i = first + 1; i < 16; i++)
5222 arr[i] = arr[j];
5223 j = (j == first) ? 0 : j + 1;
5226 else if (GET_CODE (x) == CONST_DOUBLE)
5228 val = CONST_DOUBLE_LOW (x);
5229 for (i = 15; i >= 8; i--)
5231 arr[i] = val & 0xff;
5232 val >>= 8;
5234 val = CONST_DOUBLE_HIGH (x);
5235 for (i = 7; i >= 0; i--)
5237 arr[i] = val & 0xff;
5238 val >>= 8;
5241 else if (GET_CODE (x) == CONST_VECTOR)
5243 int units;
5244 rtx elt;
5245 mode = GET_MODE_INNER (mode);
5246 units = CONST_VECTOR_NUNITS (x);
5247 for (i = 0; i < units; i++)
5249 elt = CONST_VECTOR_ELT (x, i);
5250 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5252 if (GET_CODE (elt) == CONST_DOUBLE)
5253 val = const_double_to_hwint (elt);
5254 else
5255 val = INTVAL (elt);
5256 first = GET_MODE_SIZE (mode) - 1;
5257 if (first + i * GET_MODE_SIZE (mode) > 16)
5258 abort ();
5259 for (j = first; j >= 0; j--)
5261 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5262 val >>= 8;
5267 else
5268 gcc_unreachable();
5271 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5272 smaller than 16 bytes, use the bytes that would represent that value
5273 in a register, e.g., for QImode return the value of arr[3]. */
5275 array_to_constant (enum machine_mode mode, unsigned char arr[16])
5277 enum machine_mode inner_mode;
5278 rtvec v;
5279 int units, size, i, j, k;
5280 HOST_WIDE_INT val;
5282 if (GET_MODE_CLASS (mode) == MODE_INT
5283 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5285 j = GET_MODE_SIZE (mode);
5286 i = j < 4 ? 4 - j : 0;
5287 for (val = 0; i < j; i++)
5288 val = (val << 8) | arr[i];
5289 val = trunc_int_for_mode (val, mode);
5290 return GEN_INT (val);
5293 if (mode == TImode)
5295 HOST_WIDE_INT high;
5296 for (i = high = 0; i < 8; i++)
5297 high = (high << 8) | arr[i];
5298 for (i = 8, val = 0; i < 16; i++)
5299 val = (val << 8) | arr[i];
5300 return immed_double_const (val, high, TImode);
5302 if (mode == SFmode)
5304 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5305 val = trunc_int_for_mode (val, SImode);
5306 return hwint_to_const_double (SFmode, val);
5308 if (mode == DFmode)
5310 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5311 val <<= 32;
5312 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
5313 return hwint_to_const_double (DFmode, val);
5316 if (!VECTOR_MODE_P (mode))
5317 abort ();
5319 units = GET_MODE_NUNITS (mode);
5320 size = GET_MODE_UNIT_SIZE (mode);
5321 inner_mode = GET_MODE_INNER (mode);
5322 v = rtvec_alloc (units);
5324 for (k = i = 0; i < units; ++i)
5326 val = 0;
5327 for (j = 0; j < size; j++, k++)
5328 val = (val << 8) | arr[k];
5330 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5331 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5332 else
5333 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5335 if (k > 16)
5336 abort ();
5338 return gen_rtx_CONST_VECTOR (mode, v);
5341 static void
5342 reloc_diagnostic (rtx x)
5344 tree loc_decl, decl = 0;
5345 const char *msg;
5346 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5347 return;
5349 if (GET_CODE (x) == SYMBOL_REF)
5350 decl = SYMBOL_REF_DECL (x);
5351 else if (GET_CODE (x) == CONST
5352 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5353 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5355 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5356 if (decl && !DECL_P (decl))
5357 decl = 0;
5359 /* We use last_assemble_variable_decl to get line information. It's
5360 not always going to be right and might not even be close, but will
5361 be right for the more common cases. */
5362 if (!last_assemble_variable_decl || in_section == ctors_section)
5363 loc_decl = decl;
5364 else
5365 loc_decl = last_assemble_variable_decl;
5367 /* The decl could be a string constant. */
5368 if (decl && DECL_P (decl))
5369 msg = "%Jcreating run-time relocation for %qD";
5370 else
5371 msg = "creating run-time relocation";
5373 if (TARGET_WARN_RELOC)
5374 warning (0, msg, loc_decl, decl);
5375 else
5376 error (msg, loc_decl, decl);
5379 /* Hook into assemble_integer so we can generate an error for run-time
5380 relocations. The SPU ABI disallows them. */
5381 static bool
5382 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5384 /* By default run-time relocations aren't supported, but we allow them
5385 in case users support it in their own run-time loader. And we provide
5386 a warning for those users that don't. */
5387 if ((GET_CODE (x) == SYMBOL_REF)
5388 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5389 reloc_diagnostic (x);
5391 return default_assemble_integer (x, size, aligned_p);
5394 static void
5395 spu_asm_globalize_label (FILE * file, const char *name)
5397 fputs ("\t.global\t", file);
5398 assemble_name (file, name);
5399 fputs ("\n", file);
5402 static bool
5403 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
5405 enum machine_mode mode = GET_MODE (x);
5406 int cost = COSTS_N_INSNS (2);
5408 /* Folding to a CONST_VECTOR will use extra space but there might
5409 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5410 only if it allows us to fold away multiple insns. Changing the cost
5411 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5412 because this cost will only be compared against a single insn.
5413 if (code == CONST_VECTOR)
5414 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5417 /* Use defaults for float operations. Not accurate but good enough. */
5418 if (mode == DFmode)
5420 *total = COSTS_N_INSNS (13);
5421 return true;
5423 if (mode == SFmode)
5425 *total = COSTS_N_INSNS (6);
5426 return true;
5428 switch (code)
5430 case CONST_INT:
5431 if (satisfies_constraint_K (x))
5432 *total = 0;
5433 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5434 *total = COSTS_N_INSNS (1);
5435 else
5436 *total = COSTS_N_INSNS (3);
5437 return true;
5439 case CONST:
5440 *total = COSTS_N_INSNS (3);
5441 return true;
5443 case LABEL_REF:
5444 case SYMBOL_REF:
5445 *total = COSTS_N_INSNS (0);
5446 return true;
5448 case CONST_DOUBLE:
5449 *total = COSTS_N_INSNS (5);
5450 return true;
5452 case FLOAT_EXTEND:
5453 case FLOAT_TRUNCATE:
5454 case FLOAT:
5455 case UNSIGNED_FLOAT:
5456 case FIX:
5457 case UNSIGNED_FIX:
5458 *total = COSTS_N_INSNS (7);
5459 return true;
5461 case PLUS:
5462 if (mode == TImode)
5464 *total = COSTS_N_INSNS (9);
5465 return true;
5467 break;
5469 case MULT:
5470 cost =
5471 GET_CODE (XEXP (x, 0)) ==
5472 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5473 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5475 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5477 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5478 cost = COSTS_N_INSNS (14);
5479 if ((val & 0xffff) == 0)
5480 cost = COSTS_N_INSNS (9);
5481 else if (val > 0 && val < 0x10000)
5482 cost = COSTS_N_INSNS (11);
5485 *total = cost;
5486 return true;
5487 case DIV:
5488 case UDIV:
5489 case MOD:
5490 case UMOD:
5491 *total = COSTS_N_INSNS (20);
5492 return true;
5493 case ROTATE:
5494 case ROTATERT:
5495 case ASHIFT:
5496 case ASHIFTRT:
5497 case LSHIFTRT:
5498 *total = COSTS_N_INSNS (4);
5499 return true;
5500 case UNSPEC:
5501 if (XINT (x, 1) == UNSPEC_CONVERT)
5502 *total = COSTS_N_INSNS (0);
5503 else
5504 *total = COSTS_N_INSNS (4);
5505 return true;
5507 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5508 if (GET_MODE_CLASS (mode) == MODE_INT
5509 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5510 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5511 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5512 *total = cost;
5513 return true;
5516 static enum machine_mode
5517 spu_unwind_word_mode (void)
5519 return SImode;
5522 /* Decide whether we can make a sibling call to a function. DECL is the
5523 declaration of the function being targeted by the call and EXP is the
5524 CALL_EXPR representing the call. */
5525 static bool
5526 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5528 return decl && !TARGET_LARGE_MEM;
5531 /* We need to correctly update the back chain pointer and the Available
5532 Stack Size (which is in the second slot of the sp register.) */
5533 void
5534 spu_allocate_stack (rtx op0, rtx op1)
5536 HOST_WIDE_INT v;
5537 rtx chain = gen_reg_rtx (V4SImode);
5538 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5539 rtx sp = gen_reg_rtx (V4SImode);
5540 rtx splatted = gen_reg_rtx (V4SImode);
5541 rtx pat = gen_reg_rtx (TImode);
5543 /* copy the back chain so we can save it back again. */
5544 emit_move_insn (chain, stack_bot);
5546 op1 = force_reg (SImode, op1);
5548 v = 0x1020300010203ll;
5549 emit_move_insn (pat, immed_double_const (v, v, TImode));
5550 emit_insn (gen_shufb (splatted, op1, op1, pat));
5552 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5553 emit_insn (gen_subv4si3 (sp, sp, splatted));
5555 if (flag_stack_check)
5557 rtx avail = gen_reg_rtx(SImode);
5558 rtx result = gen_reg_rtx(SImode);
5559 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5560 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5561 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5564 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5566 emit_move_insn (stack_bot, chain);
5568 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5571 void
5572 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5574 static unsigned char arr[16] =
5575 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5576 rtx temp = gen_reg_rtx (SImode);
5577 rtx temp2 = gen_reg_rtx (SImode);
5578 rtx temp3 = gen_reg_rtx (V4SImode);
5579 rtx temp4 = gen_reg_rtx (V4SImode);
5580 rtx pat = gen_reg_rtx (TImode);
5581 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5583 /* Restore the backchain from the first word, sp from the second. */
5584 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5585 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5587 emit_move_insn (pat, array_to_constant (TImode, arr));
5589 /* Compute Available Stack Size for sp */
5590 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5591 emit_insn (gen_shufb (temp3, temp, temp, pat));
5593 /* Compute Available Stack Size for back chain */
5594 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5595 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5596 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5598 emit_insn (gen_addv4si3 (sp, sp, temp3));
5599 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5602 static void
5603 spu_init_libfuncs (void)
5605 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5606 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5607 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5608 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5609 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5610 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5611 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5612 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5613 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5614 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5615 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5617 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5618 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5620 set_optab_libfunc (smul_optab, TImode, "__multi3");
5621 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5622 set_optab_libfunc (smod_optab, TImode, "__modti3");
5623 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5624 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5625 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5628 /* Make a subreg, stripping any existing subreg. We could possibly just
5629 call simplify_subreg, but in this case we know what we want. */
5631 spu_gen_subreg (enum machine_mode mode, rtx x)
5633 if (GET_CODE (x) == SUBREG)
5634 x = SUBREG_REG (x);
5635 if (GET_MODE (x) == mode)
5636 return x;
5637 return gen_rtx_SUBREG (mode, x, 0);
5640 static bool
5641 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5643 return (TYPE_MODE (type) == BLKmode
5644 && ((type) == 0
5645 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5646 || int_size_in_bytes (type) >
5647 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5650 /* Create the built-in types and functions */
5652 enum spu_function_code
5654 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5655 #include "spu-builtins.def"
5656 #undef DEF_BUILTIN
5657 NUM_SPU_BUILTINS
5660 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5662 struct spu_builtin_description spu_builtins[] = {
5663 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5664 {fcode, icode, name, type, params, NULL_TREE},
5665 #include "spu-builtins.def"
5666 #undef DEF_BUILTIN
5669 static void
5670 spu_init_builtins (void)
5672 struct spu_builtin_description *d;
5673 unsigned int i;
5675 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5676 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5677 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5678 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5679 V4SF_type_node = build_vector_type (float_type_node, 4);
5680 V2DF_type_node = build_vector_type (double_type_node, 2);
5682 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5683 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5684 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5685 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5687 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5689 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5690 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5691 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5692 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5693 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5694 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5695 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5696 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5697 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5698 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5699 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5700 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5702 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5703 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5704 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5705 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5706 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5707 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5708 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5709 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5711 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5712 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5714 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5716 spu_builtin_types[SPU_BTI_PTR] =
5717 build_pointer_type (build_qualified_type
5718 (void_type_node,
5719 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5721 /* For each builtin we build a new prototype. The tree code will make
5722 sure nodes are shared. */
5723 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5725 tree p;
5726 char name[64]; /* build_function will make a copy. */
5727 int parm;
5729 if (d->name == 0)
5730 continue;
5732 /* find last parm */
5733 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5737 p = void_list_node;
5738 while (parm > 1)
5739 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5741 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5743 sprintf (name, "__builtin_%s", d->name);
5744 d->fndecl =
5745 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5746 NULL, NULL_TREE);
5747 if (d->fcode == SPU_MASK_FOR_LOAD)
5748 TREE_READONLY (d->fndecl) = 1;
5752 void
5753 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5755 static unsigned char arr[16] =
5756 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5758 rtx temp = gen_reg_rtx (Pmode);
5759 rtx temp2 = gen_reg_rtx (V4SImode);
5760 rtx temp3 = gen_reg_rtx (V4SImode);
5761 rtx pat = gen_reg_rtx (TImode);
5762 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5764 emit_move_insn (pat, array_to_constant (TImode, arr));
5766 /* Restore the sp. */
5767 emit_move_insn (temp, op1);
5768 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5770 /* Compute available stack size for sp. */
5771 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5772 emit_insn (gen_shufb (temp3, temp, temp, pat));
5774 emit_insn (gen_addv4si3 (sp, sp, temp3));
5775 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5779 spu_safe_dma (HOST_WIDE_INT channel)
5781 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5784 void
5785 spu_builtin_splats (rtx ops[])
5787 enum machine_mode mode = GET_MODE (ops[0]);
5788 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5790 unsigned char arr[16];
5791 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5792 emit_move_insn (ops[0], array_to_constant (mode, arr));
5794 else
5796 rtx reg = gen_reg_rtx (TImode);
5797 rtx shuf;
5798 if (GET_CODE (ops[1]) != REG
5799 && GET_CODE (ops[1]) != SUBREG)
5800 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5801 switch (mode)
5803 case V2DImode:
5804 case V2DFmode:
5805 shuf =
5806 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5807 TImode);
5808 break;
5809 case V4SImode:
5810 case V4SFmode:
5811 shuf =
5812 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5813 TImode);
5814 break;
5815 case V8HImode:
5816 shuf =
5817 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5818 TImode);
5819 break;
5820 case V16QImode:
5821 shuf =
5822 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5823 TImode);
5824 break;
5825 default:
5826 abort ();
5828 emit_move_insn (reg, shuf);
5829 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5833 void
5834 spu_builtin_extract (rtx ops[])
5836 enum machine_mode mode;
5837 rtx rot, from, tmp;
5839 mode = GET_MODE (ops[1]);
5841 if (GET_CODE (ops[2]) == CONST_INT)
5843 switch (mode)
5845 case V16QImode:
5846 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5847 break;
5848 case V8HImode:
5849 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5850 break;
5851 case V4SFmode:
5852 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5853 break;
5854 case V4SImode:
5855 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5856 break;
5857 case V2DImode:
5858 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5859 break;
5860 case V2DFmode:
5861 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5862 break;
5863 default:
5864 abort ();
5866 return;
5869 from = spu_gen_subreg (TImode, ops[1]);
5870 rot = gen_reg_rtx (TImode);
5871 tmp = gen_reg_rtx (SImode);
5873 switch (mode)
5875 case V16QImode:
5876 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5877 break;
5878 case V8HImode:
5879 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5880 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5881 break;
5882 case V4SFmode:
5883 case V4SImode:
5884 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5885 break;
5886 case V2DImode:
5887 case V2DFmode:
5888 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5889 break;
5890 default:
5891 abort ();
5893 emit_insn (gen_rotqby_ti (rot, from, tmp));
5895 emit_insn (gen_spu_convert (ops[0], rot));
5898 void
5899 spu_builtin_insert (rtx ops[])
5901 enum machine_mode mode = GET_MODE (ops[0]);
5902 enum machine_mode imode = GET_MODE_INNER (mode);
5903 rtx mask = gen_reg_rtx (TImode);
5904 rtx offset;
5906 if (GET_CODE (ops[3]) == CONST_INT)
5907 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5908 else
5910 offset = gen_reg_rtx (SImode);
5911 emit_insn (gen_mulsi3
5912 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5914 emit_insn (gen_cpat
5915 (mask, stack_pointer_rtx, offset,
5916 GEN_INT (GET_MODE_SIZE (imode))));
5917 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5920 void
5921 spu_builtin_promote (rtx ops[])
5923 enum machine_mode mode, imode;
5924 rtx rot, from, offset;
5925 HOST_WIDE_INT pos;
5927 mode = GET_MODE (ops[0]);
5928 imode = GET_MODE_INNER (mode);
5930 from = gen_reg_rtx (TImode);
5931 rot = spu_gen_subreg (TImode, ops[0]);
5933 emit_insn (gen_spu_convert (from, ops[1]));
5935 if (GET_CODE (ops[2]) == CONST_INT)
5937 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5938 if (GET_MODE_SIZE (imode) < 4)
5939 pos += 4 - GET_MODE_SIZE (imode);
5940 offset = GEN_INT (pos & 15);
5942 else
5944 offset = gen_reg_rtx (SImode);
5945 switch (mode)
5947 case V16QImode:
5948 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5949 break;
5950 case V8HImode:
5951 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5952 emit_insn (gen_addsi3 (offset, offset, offset));
5953 break;
5954 case V4SFmode:
5955 case V4SImode:
5956 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5957 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5958 break;
5959 case V2DImode:
5960 case V2DFmode:
5961 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5962 break;
5963 default:
5964 abort ();
5967 emit_insn (gen_rotqby_ti (rot, from, offset));
5970 void
5971 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5973 rtx shuf = gen_reg_rtx (V4SImode);
5974 rtx insn = gen_reg_rtx (V4SImode);
5975 rtx shufc;
5976 rtx insnc;
5977 rtx mem;
5979 fnaddr = force_reg (SImode, fnaddr);
5980 cxt = force_reg (SImode, cxt);
5982 if (TARGET_LARGE_MEM)
5984 rtx rotl = gen_reg_rtx (V4SImode);
5985 rtx mask = gen_reg_rtx (V4SImode);
5986 rtx bi = gen_reg_rtx (SImode);
5987 unsigned char shufa[16] = {
5988 2, 3, 0, 1, 18, 19, 16, 17,
5989 0, 1, 2, 3, 16, 17, 18, 19
5991 unsigned char insna[16] = {
5992 0x41, 0, 0, 79,
5993 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5994 0x60, 0x80, 0, 79,
5995 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5998 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5999 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6001 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6002 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6003 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6004 emit_insn (gen_selb (insn, insnc, rotl, mask));
6006 mem = memory_address (Pmode, tramp);
6007 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
6009 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6010 mem = memory_address (Pmode, plus_constant (tramp, 16));
6011 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
6013 else
6015 rtx scxt = gen_reg_rtx (SImode);
6016 rtx sfnaddr = gen_reg_rtx (SImode);
6017 unsigned char insna[16] = {
6018 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6019 0x30, 0, 0, 0,
6020 0, 0, 0, 0,
6021 0, 0, 0, 0
6024 shufc = gen_reg_rtx (TImode);
6025 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6027 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6028 fits 18 bits and the last 4 are zeros. This will be true if
6029 the stack pointer is initialized to 0x3fff0 at program start,
6030 otherwise the ila instruction will be garbage. */
6032 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6033 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6034 emit_insn (gen_cpat
6035 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6036 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6037 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6039 mem = memory_address (Pmode, tramp);
6040 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
6043 emit_insn (gen_sync ());
6046 void
6047 spu_expand_sign_extend (rtx ops[])
6049 unsigned char arr[16];
6050 rtx pat = gen_reg_rtx (TImode);
6051 rtx sign, c;
6052 int i, last;
6053 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6054 if (GET_MODE (ops[1]) == QImode)
6056 sign = gen_reg_rtx (HImode);
6057 emit_insn (gen_extendqihi2 (sign, ops[1]));
6058 for (i = 0; i < 16; i++)
6059 arr[i] = 0x12;
6060 arr[last] = 0x13;
6062 else
6064 for (i = 0; i < 16; i++)
6065 arr[i] = 0x10;
6066 switch (GET_MODE (ops[1]))
6068 case HImode:
6069 sign = gen_reg_rtx (SImode);
6070 emit_insn (gen_extendhisi2 (sign, ops[1]));
6071 arr[last] = 0x03;
6072 arr[last - 1] = 0x02;
6073 break;
6074 case SImode:
6075 sign = gen_reg_rtx (SImode);
6076 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6077 for (i = 0; i < 4; i++)
6078 arr[last - i] = 3 - i;
6079 break;
6080 case DImode:
6081 sign = gen_reg_rtx (SImode);
6082 c = gen_reg_rtx (SImode);
6083 emit_insn (gen_spu_convert (c, ops[1]));
6084 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6085 for (i = 0; i < 8; i++)
6086 arr[last - i] = 7 - i;
6087 break;
6088 default:
6089 abort ();
6092 emit_move_insn (pat, array_to_constant (TImode, arr));
6093 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6096 /* expand vector initialization. If there are any constant parts,
6097 load constant parts first. Then load any non-constant parts. */
6098 void
6099 spu_expand_vector_init (rtx target, rtx vals)
6101 enum machine_mode mode = GET_MODE (target);
6102 int n_elts = GET_MODE_NUNITS (mode);
6103 int n_var = 0;
6104 bool all_same = true;
6105 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6106 int i;
6108 first = XVECEXP (vals, 0, 0);
6109 for (i = 0; i < n_elts; ++i)
6111 x = XVECEXP (vals, 0, i);
6112 if (!(CONST_INT_P (x)
6113 || GET_CODE (x) == CONST_DOUBLE
6114 || GET_CODE (x) == CONST_FIXED))
6115 ++n_var;
6116 else
6118 if (first_constant == NULL_RTX)
6119 first_constant = x;
6121 if (i > 0 && !rtx_equal_p (x, first))
6122 all_same = false;
6125 /* if all elements are the same, use splats to repeat elements */
6126 if (all_same)
6128 if (!CONSTANT_P (first)
6129 && !register_operand (first, GET_MODE (x)))
6130 first = force_reg (GET_MODE (first), first);
6131 emit_insn (gen_spu_splats (target, first));
6132 return;
6135 /* load constant parts */
6136 if (n_var != n_elts)
6138 if (n_var == 0)
6140 emit_move_insn (target,
6141 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6143 else
6145 rtx constant_parts_rtx = copy_rtx (vals);
6147 gcc_assert (first_constant != NULL_RTX);
6148 /* fill empty slots with the first constant, this increases
6149 our chance of using splats in the recursive call below. */
6150 for (i = 0; i < n_elts; ++i)
6152 x = XVECEXP (constant_parts_rtx, 0, i);
6153 if (!(CONST_INT_P (x)
6154 || GET_CODE (x) == CONST_DOUBLE
6155 || GET_CODE (x) == CONST_FIXED))
6156 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6159 spu_expand_vector_init (target, constant_parts_rtx);
6163 /* load variable parts */
6164 if (n_var != 0)
6166 rtx insert_operands[4];
6168 insert_operands[0] = target;
6169 insert_operands[2] = target;
6170 for (i = 0; i < n_elts; ++i)
6172 x = XVECEXP (vals, 0, i);
6173 if (!(CONST_INT_P (x)
6174 || GET_CODE (x) == CONST_DOUBLE
6175 || GET_CODE (x) == CONST_FIXED))
6177 if (!register_operand (x, GET_MODE (x)))
6178 x = force_reg (GET_MODE (x), x);
6179 insert_operands[1] = x;
6180 insert_operands[3] = GEN_INT (i);
6181 spu_builtin_insert (insert_operands);
6187 /* Return insn index for the vector compare instruction for given CODE,
6188 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6190 static int
6191 get_vec_cmp_insn (enum rtx_code code,
6192 enum machine_mode dest_mode,
6193 enum machine_mode op_mode)
6196 switch (code)
6198 case EQ:
6199 if (dest_mode == V16QImode && op_mode == V16QImode)
6200 return CODE_FOR_ceq_v16qi;
6201 if (dest_mode == V8HImode && op_mode == V8HImode)
6202 return CODE_FOR_ceq_v8hi;
6203 if (dest_mode == V4SImode && op_mode == V4SImode)
6204 return CODE_FOR_ceq_v4si;
6205 if (dest_mode == V4SImode && op_mode == V4SFmode)
6206 return CODE_FOR_ceq_v4sf;
6207 if (dest_mode == V2DImode && op_mode == V2DFmode)
6208 return CODE_FOR_ceq_v2df;
6209 break;
6210 case GT:
6211 if (dest_mode == V16QImode && op_mode == V16QImode)
6212 return CODE_FOR_cgt_v16qi;
6213 if (dest_mode == V8HImode && op_mode == V8HImode)
6214 return CODE_FOR_cgt_v8hi;
6215 if (dest_mode == V4SImode && op_mode == V4SImode)
6216 return CODE_FOR_cgt_v4si;
6217 if (dest_mode == V4SImode && op_mode == V4SFmode)
6218 return CODE_FOR_cgt_v4sf;
6219 if (dest_mode == V2DImode && op_mode == V2DFmode)
6220 return CODE_FOR_cgt_v2df;
6221 break;
6222 case GTU:
6223 if (dest_mode == V16QImode && op_mode == V16QImode)
6224 return CODE_FOR_clgt_v16qi;
6225 if (dest_mode == V8HImode && op_mode == V8HImode)
6226 return CODE_FOR_clgt_v8hi;
6227 if (dest_mode == V4SImode && op_mode == V4SImode)
6228 return CODE_FOR_clgt_v4si;
6229 break;
6230 default:
6231 break;
6233 return -1;
6236 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6237 DMODE is expected destination mode. This is a recursive function. */
6239 static rtx
6240 spu_emit_vector_compare (enum rtx_code rcode,
6241 rtx op0, rtx op1,
6242 enum machine_mode dmode)
6244 int vec_cmp_insn;
6245 rtx mask;
6246 enum machine_mode dest_mode;
6247 enum machine_mode op_mode = GET_MODE (op1);
6249 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6251 /* Floating point vector compare instructions uses destination V4SImode.
6252 Double floating point vector compare instructions uses destination V2DImode.
6253 Move destination to appropriate mode later. */
6254 if (dmode == V4SFmode)
6255 dest_mode = V4SImode;
6256 else if (dmode == V2DFmode)
6257 dest_mode = V2DImode;
6258 else
6259 dest_mode = dmode;
6261 mask = gen_reg_rtx (dest_mode);
6262 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6264 if (vec_cmp_insn == -1)
6266 bool swap_operands = false;
6267 bool try_again = false;
6268 switch (rcode)
6270 case LT:
6271 rcode = GT;
6272 swap_operands = true;
6273 try_again = true;
6274 break;
6275 case LTU:
6276 rcode = GTU;
6277 swap_operands = true;
6278 try_again = true;
6279 break;
6280 case NE:
6281 /* Treat A != B as ~(A==B). */
6283 enum insn_code nor_code;
6284 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6285 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
6286 gcc_assert (nor_code != CODE_FOR_nothing);
6287 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6288 if (dmode != dest_mode)
6290 rtx temp = gen_reg_rtx (dest_mode);
6291 convert_move (temp, mask, 0);
6292 return temp;
6294 return mask;
6296 break;
6297 case GE:
6298 case GEU:
6299 case LE:
6300 case LEU:
6301 /* Try GT/GTU/LT/LTU OR EQ */
6303 rtx c_rtx, eq_rtx;
6304 enum insn_code ior_code;
6305 enum rtx_code new_code;
6307 switch (rcode)
6309 case GE: new_code = GT; break;
6310 case GEU: new_code = GTU; break;
6311 case LE: new_code = LT; break;
6312 case LEU: new_code = LTU; break;
6313 default:
6314 gcc_unreachable ();
6317 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6318 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6320 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
6321 gcc_assert (ior_code != CODE_FOR_nothing);
6322 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6323 if (dmode != dest_mode)
6325 rtx temp = gen_reg_rtx (dest_mode);
6326 convert_move (temp, mask, 0);
6327 return temp;
6329 return mask;
6331 break;
6332 default:
6333 gcc_unreachable ();
6336 /* You only get two chances. */
6337 if (try_again)
6338 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6340 gcc_assert (vec_cmp_insn != -1);
6342 if (swap_operands)
6344 rtx tmp;
6345 tmp = op0;
6346 op0 = op1;
6347 op1 = tmp;
6351 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6352 if (dmode != dest_mode)
6354 rtx temp = gen_reg_rtx (dest_mode);
6355 convert_move (temp, mask, 0);
6356 return temp;
6358 return mask;
6362 /* Emit vector conditional expression.
6363 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6364 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6367 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6368 rtx cond, rtx cc_op0, rtx cc_op1)
6370 enum machine_mode dest_mode = GET_MODE (dest);
6371 enum rtx_code rcode = GET_CODE (cond);
6372 rtx mask;
6374 /* Get the vector mask for the given relational operations. */
6375 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6377 emit_insn(gen_selb (dest, op2, op1, mask));
6379 return 1;
6382 static rtx
6383 spu_force_reg (enum machine_mode mode, rtx op)
6385 rtx x, r;
6386 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6388 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6389 || GET_MODE (op) == BLKmode)
6390 return force_reg (mode, convert_to_mode (mode, op, 0));
6391 abort ();
6394 r = force_reg (GET_MODE (op), op);
6395 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6397 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6398 if (x)
6399 return x;
6402 x = gen_reg_rtx (mode);
6403 emit_insn (gen_spu_convert (x, r));
6404 return x;
6407 static void
6408 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6410 HOST_WIDE_INT v = 0;
6411 int lsbits;
6412 /* Check the range of immediate operands. */
6413 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6415 int range = p - SPU_BTI_7;
6417 if (!CONSTANT_P (op))
6418 error ("%s expects an integer literal in the range [%d, %d].",
6419 d->name,
6420 spu_builtin_range[range].low, spu_builtin_range[range].high);
6422 if (GET_CODE (op) == CONST
6423 && (GET_CODE (XEXP (op, 0)) == PLUS
6424 || GET_CODE (XEXP (op, 0)) == MINUS))
6426 v = INTVAL (XEXP (XEXP (op, 0), 1));
6427 op = XEXP (XEXP (op, 0), 0);
6429 else if (GET_CODE (op) == CONST_INT)
6430 v = INTVAL (op);
6431 else if (GET_CODE (op) == CONST_VECTOR
6432 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6433 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6435 /* The default for v is 0 which is valid in every range. */
6436 if (v < spu_builtin_range[range].low
6437 || v > spu_builtin_range[range].high)
6438 error ("%s expects an integer literal in the range [%d, %d]. ("
6439 HOST_WIDE_INT_PRINT_DEC ")",
6440 d->name,
6441 spu_builtin_range[range].low, spu_builtin_range[range].high,
6444 switch (p)
6446 case SPU_BTI_S10_4:
6447 lsbits = 4;
6448 break;
6449 case SPU_BTI_U16_2:
6450 /* This is only used in lqa, and stqa. Even though the insns
6451 encode 16 bits of the address (all but the 2 least
6452 significant), only 14 bits are used because it is masked to
6453 be 16 byte aligned. */
6454 lsbits = 4;
6455 break;
6456 case SPU_BTI_S16_2:
6457 /* This is used for lqr and stqr. */
6458 lsbits = 2;
6459 break;
6460 default:
6461 lsbits = 0;
6464 if (GET_CODE (op) == LABEL_REF
6465 || (GET_CODE (op) == SYMBOL_REF
6466 && SYMBOL_REF_FUNCTION_P (op))
6467 || (v & ((1 << lsbits) - 1)) != 0)
6468 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6469 d->name);
6474 static void
6475 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6476 rtx target, rtx ops[])
6478 enum insn_code icode = d->icode;
6479 int i = 0, a;
6481 /* Expand the arguments into rtl. */
6483 if (d->parm[0] != SPU_BTI_VOID)
6484 ops[i++] = target;
6486 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6488 tree arg = CALL_EXPR_ARG (exp, a);
6489 if (arg == 0)
6490 abort ();
6491 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6495 static rtx
6496 spu_expand_builtin_1 (struct spu_builtin_description *d,
6497 tree exp, rtx target)
6499 rtx pat;
6500 rtx ops[8];
6501 enum insn_code icode = d->icode;
6502 enum machine_mode mode, tmode;
6503 int i, p;
6504 tree return_type;
6506 /* Set up ops[] with values from arglist. */
6507 expand_builtin_args (d, exp, target, ops);
6509 /* Handle the target operand which must be operand 0. */
6510 i = 0;
6511 if (d->parm[0] != SPU_BTI_VOID)
6514 /* We prefer the mode specified for the match_operand otherwise
6515 use the mode from the builtin function prototype. */
6516 tmode = insn_data[d->icode].operand[0].mode;
6517 if (tmode == VOIDmode)
6518 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6520 /* Try to use target because not using it can lead to extra copies
6521 and when we are using all of the registers extra copies leads
6522 to extra spills. */
6523 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6524 ops[0] = target;
6525 else
6526 target = ops[0] = gen_reg_rtx (tmode);
6528 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6529 abort ();
6531 i++;
6534 if (d->fcode == SPU_MASK_FOR_LOAD)
6536 enum machine_mode mode = insn_data[icode].operand[1].mode;
6537 tree arg;
6538 rtx addr, op, pat;
6540 /* get addr */
6541 arg = CALL_EXPR_ARG (exp, 0);
6542 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6543 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6544 addr = memory_address (mode, op);
6546 /* negate addr */
6547 op = gen_reg_rtx (GET_MODE (addr));
6548 emit_insn (gen_rtx_SET (VOIDmode, op,
6549 gen_rtx_NEG (GET_MODE (addr), addr)));
6550 op = gen_rtx_MEM (mode, op);
6552 pat = GEN_FCN (icode) (target, op);
6553 if (!pat)
6554 return 0;
6555 emit_insn (pat);
6556 return target;
6559 /* Ignore align_hint, but still expand it's args in case they have
6560 side effects. */
6561 if (icode == CODE_FOR_spu_align_hint)
6562 return 0;
6564 /* Handle the rest of the operands. */
6565 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
6567 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6568 mode = insn_data[d->icode].operand[i].mode;
6569 else
6570 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6572 /* mode can be VOIDmode here for labels */
6574 /* For specific intrinsics with an immediate operand, e.g.,
6575 si_ai(), we sometimes need to convert the scalar argument to a
6576 vector argument by splatting the scalar. */
6577 if (VECTOR_MODE_P (mode)
6578 && (GET_CODE (ops[i]) == CONST_INT
6579 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6580 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6582 if (GET_CODE (ops[i]) == CONST_INT)
6583 ops[i] = spu_const (mode, INTVAL (ops[i]));
6584 else
6586 rtx reg = gen_reg_rtx (mode);
6587 enum machine_mode imode = GET_MODE_INNER (mode);
6588 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6589 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6590 if (imode != GET_MODE (ops[i]))
6591 ops[i] = convert_to_mode (imode, ops[i],
6592 TYPE_UNSIGNED (spu_builtin_types
6593 [d->parm[i]]));
6594 emit_insn (gen_spu_splats (reg, ops[i]));
6595 ops[i] = reg;
6599 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6601 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6602 ops[i] = spu_force_reg (mode, ops[i]);
6605 switch (insn_data[icode].n_operands)
6607 case 0:
6608 pat = GEN_FCN (icode) (0);
6609 break;
6610 case 1:
6611 pat = GEN_FCN (icode) (ops[0]);
6612 break;
6613 case 2:
6614 pat = GEN_FCN (icode) (ops[0], ops[1]);
6615 break;
6616 case 3:
6617 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6618 break;
6619 case 4:
6620 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6621 break;
6622 case 5:
6623 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6624 break;
6625 case 6:
6626 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6627 break;
6628 default:
6629 abort ();
6632 if (!pat)
6633 abort ();
6635 if (d->type == B_CALL || d->type == B_BISLED)
6636 emit_call_insn (pat);
6637 else if (d->type == B_JUMP)
6639 emit_jump_insn (pat);
6640 emit_barrier ();
6642 else
6643 emit_insn (pat);
6645 return_type = spu_builtin_types[d->parm[0]];
6646 if (d->parm[0] != SPU_BTI_VOID
6647 && GET_MODE (target) != TYPE_MODE (return_type))
6649 /* target is the return value. It should always be the mode of
6650 the builtin function prototype. */
6651 target = spu_force_reg (TYPE_MODE (return_type), target);
6654 return target;
6658 spu_expand_builtin (tree exp,
6659 rtx target,
6660 rtx subtarget ATTRIBUTE_UNUSED,
6661 enum machine_mode mode ATTRIBUTE_UNUSED,
6662 int ignore ATTRIBUTE_UNUSED)
6664 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6665 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6666 struct spu_builtin_description *d;
6668 if (fcode < NUM_SPU_BUILTINS)
6670 d = &spu_builtins[fcode];
6672 return spu_expand_builtin_1 (d, exp, target);
6674 abort ();
6677 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6678 static tree
6679 spu_builtin_mul_widen_even (tree type)
6681 switch (TYPE_MODE (type))
6683 case V8HImode:
6684 if (TYPE_UNSIGNED (type))
6685 return spu_builtins[SPU_MULE_0].fndecl;
6686 else
6687 return spu_builtins[SPU_MULE_1].fndecl;
6688 break;
6689 default:
6690 return NULL_TREE;
6694 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6695 static tree
6696 spu_builtin_mul_widen_odd (tree type)
6698 switch (TYPE_MODE (type))
6700 case V8HImode:
6701 if (TYPE_UNSIGNED (type))
6702 return spu_builtins[SPU_MULO_1].fndecl;
6703 else
6704 return spu_builtins[SPU_MULO_0].fndecl;
6705 break;
6706 default:
6707 return NULL_TREE;
6711 /* Implement targetm.vectorize.builtin_mask_for_load. */
6712 static tree
6713 spu_builtin_mask_for_load (void)
6715 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6716 gcc_assert (d);
6717 return d->fndecl;
6720 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6721 static int
6722 spu_builtin_vectorization_cost (bool runtime_test)
6724 /* If the branch of the runtime test is taken - i.e. - the vectorized
6725 version is skipped - this incurs a misprediction cost (because the
6726 vectorized version is expected to be the fall-through). So we subtract
6727 the latency of a mispredicted branch from the costs that are incurred
6728 when the vectorized version is executed. */
6729 if (runtime_test)
6730 return -19;
6731 else
6732 return 0;
6735 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6736 after applying N number of iterations. This routine does not determine
6737 how may iterations are required to reach desired alignment. */
6739 static bool
6740 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6742 if (is_packed)
6743 return false;
6745 /* All other types are naturally aligned. */
6746 return true;
6749 static enum machine_mode
6750 spu_ea_pointer_mode (int addrspace)
6752 switch (addrspace)
6754 case 0:
6755 return ptr_mode;
6756 case 1:
6757 return (spu_ea_model == 64 ? DImode : ptr_mode);
6758 default:
6759 gcc_unreachable ();
6763 static bool
6764 spu_valid_pointer_mode (enum machine_mode mode)
6766 return (mode == ptr_mode || mode == Pmode || mode == spu_ea_pointer_mode (1));
6769 /* Count the total number of instructions in each pipe and return the
6770 maximum, which is used as the Minimum Iteration Interval (MII)
6771 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6772 -2 are instructions that can go in pipe0 or pipe1. */
6773 static int
6774 spu_sms_res_mii (struct ddg *g)
6776 int i;
6777 unsigned t[4] = {0, 0, 0, 0};
6779 for (i = 0; i < g->num_nodes; i++)
6781 rtx insn = g->nodes[i].insn;
6782 int p = get_pipe (insn) + 2;
6784 assert (p >= 0);
6785 assert (p < 4);
6787 t[p]++;
6788 if (dump_file && INSN_P (insn))
6789 fprintf (dump_file, "i%d %s %d %d\n",
6790 INSN_UID (insn),
6791 insn_data[INSN_CODE(insn)].name,
6792 p, t[p]);
6794 if (dump_file)
6795 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6797 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6801 void
6802 spu_init_expanders (void)
6804 if (cfun)
6806 rtx r0, r1;
6807 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6808 frame_pointer_needed is true. We don't know that until we're
6809 expanding the prologue. */
6810 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6812 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6813 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want to
6814 handle those cases specially, so we reserve those two registers
6815 here by generating them. */
6816 r0 = gen_reg_rtx (SImode);
6817 r1 = gen_reg_rtx (SImode);
6818 mark_reg_pointer (r0, 128);
6819 mark_reg_pointer (r1, 128);
6820 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6821 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6825 static enum machine_mode
6826 spu_libgcc_cmp_return_mode (void)
6829 /* For SPU word mode is TI mode so it is better to use SImode
6830 for compare returns. */
6831 return SImode;
6834 static enum machine_mode
6835 spu_libgcc_shift_count_mode (void)
6837 /* For SPU word mode is TI mode so it is better to use SImode
6838 for shift counts. */
6839 return SImode;
6842 /* An early place to adjust some flags after GCC has finished processing
6843 * them. */
6844 static void
6845 asm_file_start (void)
6847 /* Variable tracking should be run after all optimizations which
6848 change order of insns. It also needs a valid CFG. */
6849 spu_flag_var_tracking = flag_var_tracking;
6850 flag_var_tracking = 0;
6852 default_file_start ();
6855 /* Implement targetm.section_type_flags. */
6856 static unsigned int
6857 spu_section_type_flags (tree decl, const char *name, int reloc)
6859 /* .toe needs to have type @nobits. */
6860 if (strcmp (name, ".toe") == 0)
6861 return SECTION_BSS;
6862 if (strcmp (name, "._ea") == 0)
6863 return SECTION_WRITE | SECTION_DEBUG;
6864 return default_section_type_flags (decl, name, reloc);
6867 const char *
6868 spu_addr_space_name (int addrspace)
6870 gcc_assert (addrspace > 0 && addrspace <= 1);
6871 return (spu_address_spaces [addrspace].name);
6874 static
6875 rtx (* spu_addr_space_conversion_rtl (int from, int to)) (rtx, rtx)
6877 gcc_assert ((from == 0 && to == 1) || (from == 1 && to == 0));
6879 if (to == 0)
6880 return spu_address_spaces[1].to_generic_insn;
6881 else if (to == 1)
6882 return spu_address_spaces[1].from_generic_insn;
6884 return 0;
6887 static
6888 bool spu_valid_addr_space (const_tree value)
6890 int i;
6891 if (!value)
6892 return false;
6894 for (i = 0; spu_address_spaces[i].name; i++)
6895 if (strcmp (IDENTIFIER_POINTER (value), spu_address_spaces[i].name) == 0)
6896 return true;
6897 return false;
6900 static
6901 unsigned char spu_addr_space_number (tree ident)
6903 int i;
6904 if (!ident)
6905 return 0;
6907 for (i = 0; spu_address_spaces[i].name; i++)
6908 if (strcmp (IDENTIFIER_POINTER (ident), spu_address_spaces[i].name) == 0)
6909 return i;
6911 gcc_unreachable ();
6914 /* Generate a constant or register which contains 2^SCALE. We assume
6915 the result is valid for MODE. Currently, MODE must be V4SFmode and
6916 SCALE must be SImode. */
6918 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6920 gcc_assert (mode == V4SFmode);
6921 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6922 if (GET_CODE (scale) != CONST_INT)
6924 /* unsigned int exp = (127 + scale) << 23;
6925 __vector float m = (__vector float) spu_splats (exp); */
6926 rtx reg = force_reg (SImode, scale);
6927 rtx exp = gen_reg_rtx (SImode);
6928 rtx mul = gen_reg_rtx (mode);
6929 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6930 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6931 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6932 return mul;
6934 else
6936 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6937 unsigned char arr[16];
6938 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6939 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6940 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6941 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6942 return array_to_constant (mode, arr);
6946 #include "gt-spu.h"