PR middle-end/46500
[official-gcc.git] / gcc / config / bfin / bfin.c
blob72fc42383ee48cea9cb6ab2a9336ab91d2872ce6
1 /* The Blackfin code generation auxiliary output file.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Analog Devices.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "insn-codes.h"
31 #include "conditions.h"
32 #include "insn-flags.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "tree.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "input.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "expr.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "optabs.h"
46 #include "ggc.h"
47 #include "integrate.h"
48 #include "cgraph.h"
49 #include "langhooks.h"
50 #include "bfin-protos.h"
51 #include "tm-preds.h"
52 #include "tm-constrs.h"
53 #include "gt-bfin.h"
54 #include "basic-block.h"
55 #include "cfglayout.h"
56 #include "timevar.h"
57 #include "df.h"
58 #include "sel-sched.h"
59 #include "opts.h"
61 /* A C structure for machine-specific, per-function data.
62 This is added to the cfun structure. */
63 struct GTY(()) machine_function
65 /* Set if we are notified by the doloop pass that a hardware loop
66 was created. */
67 int has_hardware_loops;
69 /* Set if we create a memcpy pattern that uses loop registers. */
70 int has_loopreg_clobber;
73 /* RTX for condition code flag register and RETS register */
74 extern GTY(()) rtx bfin_cc_rtx;
75 extern GTY(()) rtx bfin_rets_rtx;
76 rtx bfin_cc_rtx, bfin_rets_rtx;
78 int max_arg_registers = 0;
80 /* Arrays used when emitting register names. */
81 const char *short_reg_names[] = SHORT_REGISTER_NAMES;
82 const char *high_reg_names[] = HIGH_REGISTER_NAMES;
83 const char *dregs_pair_names[] = DREGS_PAIR_NAMES;
84 const char *byte_reg_names[] = BYTE_REGISTER_NAMES;
86 static int arg_regs[] = FUNCTION_ARG_REGISTERS;
87 static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
89 int splitting_for_sched, splitting_loops;
91 static void
92 bfin_globalize_label (FILE *stream, const char *name)
94 fputs (".global ", stream);
95 assemble_name (stream, name);
96 fputc (';',stream);
97 fputc ('\n',stream);
100 static void
101 output_file_start (void)
103 FILE *file = asm_out_file;
104 int i;
106 fprintf (file, ".file \"%s\";\n", input_filename);
108 for (i = 0; arg_regs[i] >= 0; i++)
110 max_arg_registers = i; /* how many arg reg used */
113 /* Examine machine-dependent attributes of function type FUNTYPE and return its
114 type. See the definition of E_FUNKIND. */
116 static e_funkind
117 funkind (const_tree funtype)
119 tree attrs = TYPE_ATTRIBUTES (funtype);
120 if (lookup_attribute ("interrupt_handler", attrs))
121 return INTERRUPT_HANDLER;
122 else if (lookup_attribute ("exception_handler", attrs))
123 return EXCPT_HANDLER;
124 else if (lookup_attribute ("nmi_handler", attrs))
125 return NMI_HANDLER;
126 else
127 return SUBROUTINE;
130 /* Legitimize PIC addresses. If the address is already position-independent,
131 we return ORIG. Newly generated position-independent addresses go into a
132 reg. This is REG if nonzero, otherwise we allocate register(s) as
133 necessary. PICREG is the register holding the pointer to the PIC offset
134 table. */
136 static rtx
137 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
139 rtx addr = orig;
140 rtx new_rtx = orig;
142 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
144 int unspec;
145 rtx tmp;
147 if (TARGET_ID_SHARED_LIBRARY)
148 unspec = UNSPEC_MOVE_PIC;
149 else if (GET_CODE (addr) == SYMBOL_REF
150 && SYMBOL_REF_FUNCTION_P (addr))
151 unspec = UNSPEC_FUNCDESC_GOT17M4;
152 else
153 unspec = UNSPEC_MOVE_FDPIC;
155 if (reg == 0)
157 gcc_assert (can_create_pseudo_p ());
158 reg = gen_reg_rtx (Pmode);
161 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
162 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
164 emit_move_insn (reg, new_rtx);
165 if (picreg == pic_offset_table_rtx)
166 crtl->uses_pic_offset_table = 1;
167 return reg;
170 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
172 rtx base;
174 if (GET_CODE (addr) == CONST)
176 addr = XEXP (addr, 0);
177 gcc_assert (GET_CODE (addr) == PLUS);
180 if (XEXP (addr, 0) == picreg)
181 return orig;
183 if (reg == 0)
185 gcc_assert (can_create_pseudo_p ());
186 reg = gen_reg_rtx (Pmode);
189 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
190 addr = legitimize_pic_address (XEXP (addr, 1),
191 base == reg ? NULL_RTX : reg,
192 picreg);
194 if (GET_CODE (addr) == CONST_INT)
196 gcc_assert (! reload_in_progress && ! reload_completed);
197 addr = force_reg (Pmode, addr);
200 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
202 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
203 addr = XEXP (addr, 1);
206 return gen_rtx_PLUS (Pmode, base, addr);
209 return new_rtx;
212 /* Stack frame layout. */
214 /* For a given REGNO, determine whether it must be saved in the function
215 prologue. IS_INTHANDLER specifies whether we're generating a normal
216 prologue or an interrupt/exception one. */
217 static bool
218 must_save_p (bool is_inthandler, unsigned regno)
220 if (D_REGNO_P (regno))
222 bool is_eh_return_reg = false;
223 if (crtl->calls_eh_return)
225 unsigned j;
226 for (j = 0; ; j++)
228 unsigned test = EH_RETURN_DATA_REGNO (j);
229 if (test == INVALID_REGNUM)
230 break;
231 if (test == regno)
232 is_eh_return_reg = true;
236 return (is_eh_return_reg
237 || (df_regs_ever_live_p (regno)
238 && !fixed_regs[regno]
239 && (is_inthandler || !call_used_regs[regno])));
241 else if (P_REGNO_P (regno))
243 return ((df_regs_ever_live_p (regno)
244 && !fixed_regs[regno]
245 && (is_inthandler || !call_used_regs[regno]))
246 || (is_inthandler
247 && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
248 && regno == REG_P5)
249 || (!TARGET_FDPIC
250 && regno == PIC_OFFSET_TABLE_REGNUM
251 && (crtl->uses_pic_offset_table
252 || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf))));
254 else
255 return ((is_inthandler || !call_used_regs[regno])
256 && (df_regs_ever_live_p (regno)
257 || (!leaf_function_p () && call_used_regs[regno])));
261 /* Compute the number of DREGS to save with a push_multiple operation.
262 This could include registers that aren't modified in the function,
263 since push_multiple only takes a range of registers.
264 If IS_INTHANDLER, then everything that is live must be saved, even
265 if normally call-clobbered.
266 If CONSECUTIVE, return the number of registers we can save in one
267 instruction with a push/pop multiple instruction. */
269 static int
270 n_dregs_to_save (bool is_inthandler, bool consecutive)
272 int count = 0;
273 unsigned i;
275 for (i = REG_R7 + 1; i-- != REG_R0;)
277 if (must_save_p (is_inthandler, i))
278 count++;
279 else if (consecutive)
280 return count;
282 return count;
285 /* Like n_dregs_to_save, but compute number of PREGS to save. */
287 static int
288 n_pregs_to_save (bool is_inthandler, bool consecutive)
290 int count = 0;
291 unsigned i;
293 for (i = REG_P5 + 1; i-- != REG_P0;)
294 if (must_save_p (is_inthandler, i))
295 count++;
296 else if (consecutive)
297 return count;
298 return count;
301 /* Determine if we are going to save the frame pointer in the prologue. */
303 static bool
304 must_save_fp_p (void)
306 return df_regs_ever_live_p (REG_FP);
309 /* Determine if we are going to save the RETS register. */
310 static bool
311 must_save_rets_p (void)
313 return df_regs_ever_live_p (REG_RETS);
316 static bool
317 stack_frame_needed_p (void)
319 /* EH return puts a new return address into the frame using an
320 address relative to the frame pointer. */
321 if (crtl->calls_eh_return)
322 return true;
323 return frame_pointer_needed;
326 /* Emit code to save registers in the prologue. SAVEALL is nonzero if we
327 must save all registers; this is used for interrupt handlers.
328 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
329 this for an interrupt (or exception) handler. */
331 static void
332 expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
334 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
335 rtx predec = gen_rtx_MEM (SImode, predec1);
336 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
337 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
338 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
339 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
340 int dregno, pregno;
341 int total_consec = ndregs_consec + npregs_consec;
342 int i, d_to_save;
344 if (saveall || is_inthandler)
346 rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
348 RTX_FRAME_RELATED_P (insn) = 1;
349 for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
350 if (! current_function_is_leaf
351 || cfun->machine->has_hardware_loops
352 || cfun->machine->has_loopreg_clobber
353 || (ENABLE_WA_05000257
354 && (dregno == REG_LC0 || dregno == REG_LC1)))
356 insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
357 RTX_FRAME_RELATED_P (insn) = 1;
361 if (total_consec != 0)
363 rtx insn;
364 rtx val = GEN_INT (-total_consec * 4);
365 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
367 XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
368 UNSPEC_PUSH_MULTIPLE);
369 XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
370 gen_rtx_PLUS (Pmode,
371 spreg,
372 val));
373 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
374 d_to_save = ndregs_consec;
375 dregno = REG_R7 + 1 - ndregs_consec;
376 pregno = REG_P5 + 1 - npregs_consec;
377 for (i = 0; i < total_consec; i++)
379 rtx memref = gen_rtx_MEM (word_mode,
380 gen_rtx_PLUS (Pmode, spreg,
381 GEN_INT (- i * 4 - 4)));
382 rtx subpat;
383 if (d_to_save > 0)
385 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
386 dregno++));
387 d_to_save--;
389 else
391 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
392 pregno++));
394 XVECEXP (pat, 0, i + 1) = subpat;
395 RTX_FRAME_RELATED_P (subpat) = 1;
397 insn = emit_insn (pat);
398 RTX_FRAME_RELATED_P (insn) = 1;
401 for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
403 if (must_save_p (is_inthandler, dregno))
405 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
406 RTX_FRAME_RELATED_P (insn) = 1;
407 ndregs--;
410 for (pregno = REG_P0; npregs != npregs_consec; pregno++)
412 if (must_save_p (is_inthandler, pregno))
414 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
415 RTX_FRAME_RELATED_P (insn) = 1;
416 npregs--;
419 for (i = REG_P7 + 1; i < REG_CC; i++)
420 if (saveall
421 || (is_inthandler
422 && (df_regs_ever_live_p (i)
423 || (!leaf_function_p () && call_used_regs[i]))))
425 rtx insn;
426 if (i == REG_A0 || i == REG_A1)
427 insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
428 gen_rtx_REG (PDImode, i));
429 else
430 insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
431 RTX_FRAME_RELATED_P (insn) = 1;
435 /* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we
436 must save all registers; this is used for interrupt handlers.
437 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
438 this for an interrupt (or exception) handler. */
440 static void
441 expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
443 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
444 rtx postinc = gen_rtx_MEM (SImode, postinc1);
446 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
447 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
448 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
449 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
450 int total_consec = ndregs_consec + npregs_consec;
451 int i, regno;
452 rtx insn;
454 /* A slightly crude technique to stop flow from trying to delete "dead"
455 insns. */
456 MEM_VOLATILE_P (postinc) = 1;
458 for (i = REG_CC - 1; i > REG_P7; i--)
459 if (saveall
460 || (is_inthandler
461 && (df_regs_ever_live_p (i)
462 || (!leaf_function_p () && call_used_regs[i]))))
464 if (i == REG_A0 || i == REG_A1)
466 rtx mem = gen_rtx_MEM (PDImode, postinc1);
467 MEM_VOLATILE_P (mem) = 1;
468 emit_move_insn (gen_rtx_REG (PDImode, i), mem);
470 else
471 emit_move_insn (gen_rtx_REG (SImode, i), postinc);
474 regno = REG_P5 - npregs_consec;
475 for (; npregs != npregs_consec; regno--)
477 if (must_save_p (is_inthandler, regno))
479 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
480 npregs--;
483 regno = REG_R7 - ndregs_consec;
484 for (; ndregs != ndregs_consec; regno--)
486 if (must_save_p (is_inthandler, regno))
488 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
489 ndregs--;
493 if (total_consec != 0)
495 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
496 XVECEXP (pat, 0, 0)
497 = gen_rtx_SET (VOIDmode, spreg,
498 gen_rtx_PLUS (Pmode, spreg,
499 GEN_INT (total_consec * 4)));
501 if (npregs_consec > 0)
502 regno = REG_P5 + 1;
503 else
504 regno = REG_R7 + 1;
506 for (i = 0; i < total_consec; i++)
508 rtx addr = (i > 0
509 ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
510 : spreg);
511 rtx memref = gen_rtx_MEM (word_mode, addr);
513 regno--;
514 XVECEXP (pat, 0, i + 1)
515 = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
517 if (npregs_consec > 0)
519 if (--npregs_consec == 0)
520 regno = REG_R7 + 1;
524 insn = emit_insn (pat);
525 RTX_FRAME_RELATED_P (insn) = 1;
527 if (saveall || is_inthandler)
529 for (regno = REG_LB1; regno >= REG_LT0; regno--)
530 if (! current_function_is_leaf
531 || cfun->machine->has_hardware_loops
532 || cfun->machine->has_loopreg_clobber
533 || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
534 emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
536 emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
540 /* Perform any needed actions needed for a function that is receiving a
541 variable number of arguments.
543 CUM is as above.
545 MODE and TYPE are the mode and type of the current parameter.
547 PRETEND_SIZE is a variable that should be set to the amount of stack
548 that must be pushed by the prolog to pretend that our caller pushed
551 Normally, this macro will push all remaining incoming registers on the
552 stack and set PRETEND_SIZE to the length of the registers pushed.
554 Blackfin specific :
555 - VDSP C compiler manual (our ABI) says that a variable args function
556 should save the R0, R1 and R2 registers in the stack.
557 - The caller will always leave space on the stack for the
558 arguments that are passed in registers, so we dont have
559 to leave any extra space.
560 - now, the vastart pointer can access all arguments from the stack. */
562 static void
563 setup_incoming_varargs (cumulative_args_t cum,
564 enum machine_mode mode ATTRIBUTE_UNUSED,
565 tree type ATTRIBUTE_UNUSED, int *pretend_size,
566 int no_rtl)
568 rtx mem;
569 int i;
571 if (no_rtl)
572 return;
574 /* The move for named arguments will be generated automatically by the
575 compiler. We need to generate the move rtx for the unnamed arguments
576 if they are in the first 3 words. We assume at least 1 named argument
577 exists, so we never generate [ARGP] = R0 here. */
579 for (i = get_cumulative_args (cum)->words + 1; i < max_arg_registers; i++)
581 mem = gen_rtx_MEM (Pmode,
582 plus_constant (arg_pointer_rtx, (i * UNITS_PER_WORD)));
583 emit_move_insn (mem, gen_rtx_REG (Pmode, i));
586 *pretend_size = 0;
589 /* Value should be nonzero if functions must have frame pointers.
590 Zero means the frame pointer need not be set up (and parms may
591 be accessed via the stack pointer) in functions that seem suitable. */
593 static bool
594 bfin_frame_pointer_required (void)
596 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
598 if (fkind != SUBROUTINE)
599 return true;
601 /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
602 so we have to override it for non-leaf functions. */
603 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf)
604 return true;
606 return false;
609 /* Return the number of registers pushed during the prologue. */
611 static int
612 n_regs_saved_by_prologue (void)
614 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
615 bool is_inthandler = fkind != SUBROUTINE;
616 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
617 bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
618 || (is_inthandler && !current_function_is_leaf));
619 int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
620 int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
621 int n = ndregs + npregs;
622 int i;
624 if (all || stack_frame_needed_p ())
625 n += 2;
626 else
628 if (must_save_fp_p ())
629 n++;
630 if (must_save_rets_p ())
631 n++;
634 if (fkind != SUBROUTINE || all)
636 /* Increment once for ASTAT. */
637 n++;
638 if (! current_function_is_leaf
639 || cfun->machine->has_hardware_loops
640 || cfun->machine->has_loopreg_clobber)
642 n += 6;
646 if (fkind != SUBROUTINE)
648 /* RETE/X/N. */
649 if (lookup_attribute ("nesting", attrs))
650 n++;
653 for (i = REG_P7 + 1; i < REG_CC; i++)
654 if (all
655 || (fkind != SUBROUTINE
656 && (df_regs_ever_live_p (i)
657 || (!leaf_function_p () && call_used_regs[i]))))
658 n += i == REG_A0 || i == REG_A1 ? 2 : 1;
660 return n;
663 /* Given FROM and TO register numbers, say whether this elimination is
664 allowed. Frame pointer elimination is automatically handled.
666 All other eliminations are valid. */
668 static bool
669 bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
671 return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
674 /* Return the offset between two registers, one to be eliminated, and the other
675 its replacement, at the start of a routine. */
677 HOST_WIDE_INT
678 bfin_initial_elimination_offset (int from, int to)
680 HOST_WIDE_INT offset = 0;
682 if (from == ARG_POINTER_REGNUM)
683 offset = n_regs_saved_by_prologue () * 4;
685 if (to == STACK_POINTER_REGNUM)
687 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
688 offset += crtl->outgoing_args_size;
689 else if (crtl->outgoing_args_size)
690 offset += FIXED_STACK_AREA;
692 offset += get_frame_size ();
695 return offset;
698 /* Emit code to load a constant CONSTANT into register REG; setting
699 RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
700 Make sure that the insns we generate need not be split. */
702 static void
703 frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
705 rtx insn;
706 rtx cst = GEN_INT (constant);
708 if (constant >= -32768 && constant < 65536)
709 insn = emit_move_insn (reg, cst);
710 else
712 /* We don't call split_load_immediate here, since dwarf2out.c can get
713 confused about some of the more clever sequences it can generate. */
714 insn = emit_insn (gen_movsi_high (reg, cst));
715 if (related)
716 RTX_FRAME_RELATED_P (insn) = 1;
717 insn = emit_insn (gen_movsi_low (reg, reg, cst));
719 if (related)
720 RTX_FRAME_RELATED_P (insn) = 1;
723 /* Generate efficient code to add a value to a P register.
724 Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
725 EPILOGUE_P is zero if this function is called for prologue,
726 otherwise it's nonzero. And it's less than zero if this is for
727 sibcall epilogue. */
729 static void
730 add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
732 if (value == 0)
733 return;
735 /* Choose whether to use a sequence using a temporary register, or
736 a sequence with multiple adds. We can add a signed 7-bit value
737 in one instruction. */
738 if (value > 120 || value < -120)
740 rtx tmpreg;
741 rtx tmpreg2;
742 rtx insn;
744 tmpreg2 = NULL_RTX;
746 /* For prologue or normal epilogue, P1 can be safely used
747 as the temporary register. For sibcall epilogue, we try to find
748 a call used P register, which will be restored in epilogue.
749 If we cannot find such a P register, we have to use one I register
750 to help us. */
752 if (epilogue_p >= 0)
753 tmpreg = gen_rtx_REG (SImode, REG_P1);
754 else
756 int i;
757 for (i = REG_P0; i <= REG_P5; i++)
758 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
759 || (!TARGET_FDPIC
760 && i == PIC_OFFSET_TABLE_REGNUM
761 && (crtl->uses_pic_offset_table
762 || (TARGET_ID_SHARED_LIBRARY
763 && ! current_function_is_leaf))))
764 break;
765 if (i <= REG_P5)
766 tmpreg = gen_rtx_REG (SImode, i);
767 else
769 tmpreg = gen_rtx_REG (SImode, REG_P1);
770 tmpreg2 = gen_rtx_REG (SImode, REG_I0);
771 emit_move_insn (tmpreg2, tmpreg);
775 if (frame)
776 frame_related_constant_load (tmpreg, value, TRUE);
777 else
778 insn = emit_move_insn (tmpreg, GEN_INT (value));
780 insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
781 if (frame)
782 RTX_FRAME_RELATED_P (insn) = 1;
784 if (tmpreg2 != NULL_RTX)
785 emit_move_insn (tmpreg, tmpreg2);
787 else
790 int size = value;
791 rtx insn;
793 if (size > 60)
794 size = 60;
795 else if (size < -60)
796 /* We could use -62, but that would leave the stack unaligned, so
797 it's no good. */
798 size = -60;
800 insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
801 if (frame)
802 RTX_FRAME_RELATED_P (insn) = 1;
803 value -= size;
805 while (value != 0);
808 /* Generate a LINK insn for a frame sized FRAME_SIZE. If this constant
809 is too large, generate a sequence of insns that has the same effect.
810 SPREG contains (reg:SI REG_SP). */
812 static void
813 emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
815 HOST_WIDE_INT link_size = frame_size;
816 rtx insn;
817 int i;
819 if (link_size > 262140)
820 link_size = 262140;
822 /* Use a LINK insn with as big a constant as possible, then subtract
823 any remaining size from the SP. */
824 insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
825 RTX_FRAME_RELATED_P (insn) = 1;
827 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
829 rtx set = XVECEXP (PATTERN (insn), 0, i);
830 gcc_assert (GET_CODE (set) == SET);
831 RTX_FRAME_RELATED_P (set) = 1;
834 frame_size -= link_size;
836 if (frame_size > 0)
838 /* Must use a call-clobbered PREG that isn't the static chain. */
839 rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
841 frame_related_constant_load (tmpreg, -frame_size, TRUE);
842 insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
843 RTX_FRAME_RELATED_P (insn) = 1;
847 /* Return the number of bytes we must reserve for outgoing arguments
848 in the current function's stack frame. */
850 static HOST_WIDE_INT
851 arg_area_size (void)
853 if (crtl->outgoing_args_size)
855 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
856 return crtl->outgoing_args_size;
857 else
858 return FIXED_STACK_AREA;
860 return 0;
863 /* Save RETS and FP, and allocate a stack frame. ALL is true if the
864 function must save all its registers (true only for certain interrupt
865 handlers). */
867 static void
868 do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
870 frame_size += arg_area_size ();
872 if (all
873 || stack_frame_needed_p ()
874 || (must_save_rets_p () && must_save_fp_p ()))
875 emit_link_insn (spreg, frame_size);
876 else
878 if (must_save_rets_p ())
880 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
881 gen_rtx_PRE_DEC (Pmode, spreg)),
882 bfin_rets_rtx);
883 rtx insn = emit_insn (pat);
884 RTX_FRAME_RELATED_P (insn) = 1;
886 if (must_save_fp_p ())
888 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
889 gen_rtx_PRE_DEC (Pmode, spreg)),
890 gen_rtx_REG (Pmode, REG_FP));
891 rtx insn = emit_insn (pat);
892 RTX_FRAME_RELATED_P (insn) = 1;
894 add_to_reg (spreg, -frame_size, 1, 0);
898 /* Like do_link, but used for epilogues to deallocate the stack frame.
899 EPILOGUE_P is zero if this function is called for prologue,
900 otherwise it's nonzero. And it's less than zero if this is for
901 sibcall epilogue. */
903 static void
904 do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
906 frame_size += arg_area_size ();
908 if (stack_frame_needed_p ())
909 emit_insn (gen_unlink ());
910 else
912 rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
914 add_to_reg (spreg, frame_size, 0, epilogue_p);
915 if (all || must_save_fp_p ())
917 rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
918 emit_move_insn (fpreg, postinc);
919 emit_use (fpreg);
921 if (all || must_save_rets_p ())
923 emit_move_insn (bfin_rets_rtx, postinc);
924 emit_use (bfin_rets_rtx);
929 /* Generate a prologue suitable for a function of kind FKIND. This is
930 called for interrupt and exception handler prologues.
931 SPREG contains (reg:SI REG_SP). */
933 static void
934 expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
936 HOST_WIDE_INT frame_size = get_frame_size ();
937 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
938 rtx predec = gen_rtx_MEM (SImode, predec1);
939 rtx insn;
940 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
941 tree kspisusp = lookup_attribute ("kspisusp", attrs);
943 if (kspisusp)
945 insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
946 RTX_FRAME_RELATED_P (insn) = 1;
949 /* We need space on the stack in case we need to save the argument
950 registers. */
951 if (fkind == EXCPT_HANDLER)
953 insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
954 RTX_FRAME_RELATED_P (insn) = 1;
957 /* If we're calling other functions, they won't save their call-clobbered
958 registers, so we must save everything here. */
959 if (!current_function_is_leaf)
960 all = true;
961 expand_prologue_reg_save (spreg, all, true);
963 if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
965 rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
966 rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
967 emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
968 emit_insn (gen_movsi_high (p5reg, chipid));
969 emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
970 emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
973 if (lookup_attribute ("nesting", attrs))
975 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
976 insn = emit_move_insn (predec, srcreg);
977 RTX_FRAME_RELATED_P (insn) = 1;
980 do_link (spreg, frame_size, all);
982 if (fkind == EXCPT_HANDLER)
984 rtx r0reg = gen_rtx_REG (SImode, REG_R0);
985 rtx r1reg = gen_rtx_REG (SImode, REG_R1);
986 rtx r2reg = gen_rtx_REG (SImode, REG_R2);
988 emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
989 emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
990 emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
991 emit_move_insn (r1reg, spreg);
992 emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
993 emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
997 /* Generate an epilogue suitable for a function of kind FKIND. This is
998 called for interrupt and exception handler epilogues.
999 SPREG contains (reg:SI REG_SP). */
1001 static void
1002 expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
1004 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1005 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
1006 rtx postinc = gen_rtx_MEM (SImode, postinc1);
1008 /* A slightly crude technique to stop flow from trying to delete "dead"
1009 insns. */
1010 MEM_VOLATILE_P (postinc) = 1;
1012 do_unlink (spreg, get_frame_size (), all, 1);
1014 if (lookup_attribute ("nesting", attrs))
1016 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1017 emit_move_insn (srcreg, postinc);
1020 /* If we're calling other functions, they won't save their call-clobbered
1021 registers, so we must save (and restore) everything here. */
1022 if (!current_function_is_leaf)
1023 all = true;
1025 expand_epilogue_reg_restore (spreg, all, true);
1027 /* Deallocate any space we left on the stack in case we needed to save the
1028 argument registers. */
1029 if (fkind == EXCPT_HANDLER)
1030 emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
1032 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
1035 /* Used while emitting the prologue to generate code to load the correct value
1036 into the PIC register, which is passed in DEST. */
1038 static rtx
1039 bfin_load_pic_reg (rtx dest)
1041 struct cgraph_local_info *i = NULL;
1042 rtx addr;
1044 i = cgraph_local_info (current_function_decl);
1046 /* Functions local to the translation unit don't need to reload the
1047 pic reg, since the caller always passes a usable one. */
1048 if (i && i->local)
1049 return pic_offset_table_rtx;
1051 if (global_options_set.x_bfin_library_id)
1052 addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4);
1053 else
1054 addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
1055 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1056 UNSPEC_LIBRARY_OFFSET));
1057 emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
1058 return dest;
1061 /* Generate RTL for the prologue of the current function. */
1063 void
1064 bfin_expand_prologue (void)
1066 HOST_WIDE_INT frame_size = get_frame_size ();
1067 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1068 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1069 rtx pic_reg_loaded = NULL_RTX;
1070 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1071 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1073 if (fkind != SUBROUTINE)
1075 expand_interrupt_handler_prologue (spreg, fkind, all);
1076 return;
1079 if (crtl->limit_stack
1080 || (TARGET_STACK_CHECK_L1
1081 && !DECL_NO_LIMIT_STACK (current_function_decl)))
1083 HOST_WIDE_INT offset
1084 = bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
1085 STACK_POINTER_REGNUM);
1086 rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
1087 rtx tmp = gen_rtx_REG (Pmode, REG_R3);
1088 rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
1090 emit_move_insn (tmp, p2reg);
1091 if (!lim)
1093 emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
1094 emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
1095 lim = p2reg;
1097 if (GET_CODE (lim) == SYMBOL_REF)
1099 if (TARGET_ID_SHARED_LIBRARY)
1101 rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
1102 rtx val;
1103 pic_reg_loaded = bfin_load_pic_reg (p2reg);
1104 val = legitimize_pic_address (stack_limit_rtx, p1reg,
1105 pic_reg_loaded);
1106 emit_move_insn (p1reg, val);
1107 frame_related_constant_load (p2reg, offset, FALSE);
1108 emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
1109 lim = p2reg;
1111 else
1113 rtx limit = plus_constant (lim, offset);
1114 emit_move_insn (p2reg, limit);
1115 lim = p2reg;
1118 else
1120 if (lim != p2reg)
1121 emit_move_insn (p2reg, lim);
1122 add_to_reg (p2reg, offset, 0, 0);
1123 lim = p2reg;
1125 emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
1126 emit_insn (gen_trapifcc ());
1127 emit_move_insn (p2reg, tmp);
1129 expand_prologue_reg_save (spreg, all, false);
1131 do_link (spreg, frame_size, all);
1133 if (TARGET_ID_SHARED_LIBRARY
1134 && !TARGET_SEP_DATA
1135 && (crtl->uses_pic_offset_table
1136 || !current_function_is_leaf))
1137 bfin_load_pic_reg (pic_offset_table_rtx);
1140 /* Generate RTL for the epilogue of the current function. NEED_RETURN is zero
1141 if this is for a sibcall. EH_RETURN is nonzero if we're expanding an
1142 eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
1143 false otherwise. */
1145 void
1146 bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
1148 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1149 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1150 int e = sibcall_p ? -1 : 1;
1151 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1152 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1154 if (fkind != SUBROUTINE)
1156 expand_interrupt_handler_epilogue (spreg, fkind, all);
1157 return;
1160 do_unlink (spreg, get_frame_size (), all, e);
1162 expand_epilogue_reg_restore (spreg, all, false);
1164 /* Omit the return insn if this is for a sibcall. */
1165 if (! need_return)
1166 return;
1168 if (eh_return)
1169 emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
1171 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
1174 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
1177 bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
1178 unsigned int new_reg)
1180 /* Interrupt functions can only use registers that have already been
1181 saved by the prologue, even if they would normally be
1182 call-clobbered. */
1184 if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
1185 && !df_regs_ever_live_p (new_reg))
1186 return 0;
1188 return 1;
1191 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */
1192 static void
1193 bfin_extra_live_on_entry (bitmap regs)
1195 if (TARGET_FDPIC)
1196 bitmap_set_bit (regs, FDPIC_REGNO);
1199 /* Return the value of the return address for the frame COUNT steps up
1200 from the current frame, after the prologue.
1201 We punt for everything but the current frame by returning const0_rtx. */
1204 bfin_return_addr_rtx (int count)
1206 if (count != 0)
1207 return const0_rtx;
1209 return get_hard_reg_initial_val (Pmode, REG_RETS);
1212 static rtx
1213 bfin_delegitimize_address (rtx orig_x)
1215 rtx x = orig_x;
1217 if (GET_CODE (x) != MEM)
1218 return orig_x;
1220 x = XEXP (x, 0);
1221 if (GET_CODE (x) == PLUS
1222 && GET_CODE (XEXP (x, 1)) == UNSPEC
1223 && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
1224 && GET_CODE (XEXP (x, 0)) == REG
1225 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
1226 return XVECEXP (XEXP (x, 1), 0, 0);
1228 return orig_x;
1231 /* This predicate is used to compute the length of a load/store insn.
1232 OP is a MEM rtx, we return nonzero if its addressing mode requires a
1233 32-bit instruction. */
1236 effective_address_32bit_p (rtx op, enum machine_mode mode)
1238 HOST_WIDE_INT offset;
1240 mode = GET_MODE (op);
1241 op = XEXP (op, 0);
1243 if (GET_CODE (op) != PLUS)
1245 gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
1246 || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
1247 return 0;
1250 if (GET_CODE (XEXP (op, 1)) == UNSPEC)
1251 return 1;
1253 offset = INTVAL (XEXP (op, 1));
1255 /* All byte loads use a 16-bit offset. */
1256 if (GET_MODE_SIZE (mode) == 1)
1257 return 1;
1259 if (GET_MODE_SIZE (mode) == 4)
1261 /* Frame pointer relative loads can use a negative offset, all others
1262 are restricted to a small positive one. */
1263 if (XEXP (op, 0) == frame_pointer_rtx)
1264 return offset < -128 || offset > 60;
1265 return offset < 0 || offset > 60;
1268 /* Must be HImode now. */
1269 return offset < 0 || offset > 30;
1272 /* Returns true if X is a memory reference using an I register. */
1273 bool
1274 bfin_dsp_memref_p (rtx x)
1276 if (! MEM_P (x))
1277 return false;
1278 x = XEXP (x, 0);
1279 if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
1280 || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
1281 x = XEXP (x, 0);
1282 return IREG_P (x);
1285 /* Return cost of the memory address ADDR.
1286 All addressing modes are equally cheap on the Blackfin. */
1288 static int
1289 bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
1291 return 1;
1294 /* Subroutine of print_operand; used to print a memory reference X to FILE. */
1296 void
1297 print_address_operand (FILE *file, rtx x)
1299 switch (GET_CODE (x))
1301 case PLUS:
1302 output_address (XEXP (x, 0));
1303 fprintf (file, "+");
1304 output_address (XEXP (x, 1));
1305 break;
1307 case PRE_DEC:
1308 fprintf (file, "--");
1309 output_address (XEXP (x, 0));
1310 break;
1311 case POST_INC:
1312 output_address (XEXP (x, 0));
1313 fprintf (file, "++");
1314 break;
1315 case POST_DEC:
1316 output_address (XEXP (x, 0));
1317 fprintf (file, "--");
1318 break;
1320 default:
1321 gcc_assert (GET_CODE (x) != MEM);
1322 print_operand (file, x, 0);
1323 break;
1327 /* Adding intp DImode support by Tony
1328 * -- Q: (low word)
1329 * -- R: (high word)
1332 void
1333 print_operand (FILE *file, rtx x, char code)
1335 enum machine_mode mode;
1337 if (code == '!')
1339 if (GET_MODE (current_output_insn) == SImode)
1340 fprintf (file, " ||");
1341 else
1342 fprintf (file, ";");
1343 return;
1346 mode = GET_MODE (x);
1348 switch (code)
1350 case 'j':
1351 switch (GET_CODE (x))
1353 case EQ:
1354 fprintf (file, "e");
1355 break;
1356 case NE:
1357 fprintf (file, "ne");
1358 break;
1359 case GT:
1360 fprintf (file, "g");
1361 break;
1362 case LT:
1363 fprintf (file, "l");
1364 break;
1365 case GE:
1366 fprintf (file, "ge");
1367 break;
1368 case LE:
1369 fprintf (file, "le");
1370 break;
1371 case GTU:
1372 fprintf (file, "g");
1373 break;
1374 case LTU:
1375 fprintf (file, "l");
1376 break;
1377 case GEU:
1378 fprintf (file, "ge");
1379 break;
1380 case LEU:
1381 fprintf (file, "le");
1382 break;
1383 default:
1384 output_operand_lossage ("invalid %%j value");
1386 break;
1388 case 'J': /* reverse logic */
1389 switch (GET_CODE(x))
1391 case EQ:
1392 fprintf (file, "ne");
1393 break;
1394 case NE:
1395 fprintf (file, "e");
1396 break;
1397 case GT:
1398 fprintf (file, "le");
1399 break;
1400 case LT:
1401 fprintf (file, "ge");
1402 break;
1403 case GE:
1404 fprintf (file, "l");
1405 break;
1406 case LE:
1407 fprintf (file, "g");
1408 break;
1409 case GTU:
1410 fprintf (file, "le");
1411 break;
1412 case LTU:
1413 fprintf (file, "ge");
1414 break;
1415 case GEU:
1416 fprintf (file, "l");
1417 break;
1418 case LEU:
1419 fprintf (file, "g");
1420 break;
1421 default:
1422 output_operand_lossage ("invalid %%J value");
1424 break;
1426 default:
1427 switch (GET_CODE (x))
1429 case REG:
1430 if (code == 'h')
1432 if (REGNO (x) < 32)
1433 fprintf (file, "%s", short_reg_names[REGNO (x)]);
1434 else
1435 output_operand_lossage ("invalid operand for code '%c'", code);
1437 else if (code == 'd')
1439 if (REGNO (x) < 32)
1440 fprintf (file, "%s", high_reg_names[REGNO (x)]);
1441 else
1442 output_operand_lossage ("invalid operand for code '%c'", code);
1444 else if (code == 'w')
1446 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1447 fprintf (file, "%s.w", reg_names[REGNO (x)]);
1448 else
1449 output_operand_lossage ("invalid operand for code '%c'", code);
1451 else if (code == 'x')
1453 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1454 fprintf (file, "%s.x", reg_names[REGNO (x)]);
1455 else
1456 output_operand_lossage ("invalid operand for code '%c'", code);
1458 else if (code == 'v')
1460 if (REGNO (x) == REG_A0)
1461 fprintf (file, "AV0");
1462 else if (REGNO (x) == REG_A1)
1463 fprintf (file, "AV1");
1464 else
1465 output_operand_lossage ("invalid operand for code '%c'", code);
1467 else if (code == 'D')
1469 if (D_REGNO_P (REGNO (x)))
1470 fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
1471 else
1472 output_operand_lossage ("invalid operand for code '%c'", code);
1474 else if (code == 'H')
1476 if ((mode == DImode || mode == DFmode) && REG_P (x))
1477 fprintf (file, "%s", reg_names[REGNO (x) + 1]);
1478 else
1479 output_operand_lossage ("invalid operand for code '%c'", code);
1481 else if (code == 'T')
1483 if (D_REGNO_P (REGNO (x)))
1484 fprintf (file, "%s", byte_reg_names[REGNO (x)]);
1485 else
1486 output_operand_lossage ("invalid operand for code '%c'", code);
1488 else
1489 fprintf (file, "%s", reg_names[REGNO (x)]);
1490 break;
1492 case MEM:
1493 fputc ('[', file);
1494 x = XEXP (x,0);
1495 print_address_operand (file, x);
1496 fputc (']', file);
1497 break;
1499 case CONST_INT:
1500 if (code == 'M')
1502 switch (INTVAL (x))
1504 case MACFLAG_NONE:
1505 break;
1506 case MACFLAG_FU:
1507 fputs ("(FU)", file);
1508 break;
1509 case MACFLAG_T:
1510 fputs ("(T)", file);
1511 break;
1512 case MACFLAG_TFU:
1513 fputs ("(TFU)", file);
1514 break;
1515 case MACFLAG_W32:
1516 fputs ("(W32)", file);
1517 break;
1518 case MACFLAG_IS:
1519 fputs ("(IS)", file);
1520 break;
1521 case MACFLAG_IU:
1522 fputs ("(IU)", file);
1523 break;
1524 case MACFLAG_IH:
1525 fputs ("(IH)", file);
1526 break;
1527 case MACFLAG_M:
1528 fputs ("(M)", file);
1529 break;
1530 case MACFLAG_IS_M:
1531 fputs ("(IS,M)", file);
1532 break;
1533 case MACFLAG_ISS2:
1534 fputs ("(ISS2)", file);
1535 break;
1536 case MACFLAG_S2RND:
1537 fputs ("(S2RND)", file);
1538 break;
1539 default:
1540 gcc_unreachable ();
1542 break;
1544 else if (code == 'b')
1546 if (INTVAL (x) == 0)
1547 fputs ("+=", file);
1548 else if (INTVAL (x) == 1)
1549 fputs ("-=", file);
1550 else
1551 gcc_unreachable ();
1552 break;
1554 /* Moves to half registers with d or h modifiers always use unsigned
1555 constants. */
1556 else if (code == 'd')
1557 x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
1558 else if (code == 'h')
1559 x = GEN_INT (INTVAL (x) & 0xffff);
1560 else if (code == 'N')
1561 x = GEN_INT (-INTVAL (x));
1562 else if (code == 'X')
1563 x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
1564 else if (code == 'Y')
1565 x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
1566 else if (code == 'Z')
1567 /* Used for LINK insns. */
1568 x = GEN_INT (-8 - INTVAL (x));
1570 /* fall through */
1572 case SYMBOL_REF:
1573 output_addr_const (file, x);
1574 break;
1576 case CONST_DOUBLE:
1577 output_operand_lossage ("invalid const_double operand");
1578 break;
1580 case UNSPEC:
1581 switch (XINT (x, 1))
1583 case UNSPEC_MOVE_PIC:
1584 output_addr_const (file, XVECEXP (x, 0, 0));
1585 fprintf (file, "@GOT");
1586 break;
1588 case UNSPEC_MOVE_FDPIC:
1589 output_addr_const (file, XVECEXP (x, 0, 0));
1590 fprintf (file, "@GOT17M4");
1591 break;
1593 case UNSPEC_FUNCDESC_GOT17M4:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1595 fprintf (file, "@FUNCDESC_GOT17M4");
1596 break;
1598 case UNSPEC_LIBRARY_OFFSET:
1599 fprintf (file, "_current_shared_library_p5_offset_");
1600 break;
1602 default:
1603 gcc_unreachable ();
1605 break;
1607 default:
1608 output_addr_const (file, x);
1613 /* Argument support functions. */
1615 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1616 for a call to a function whose data type is FNTYPE.
1617 For a library call, FNTYPE is 0.
1618 VDSP C Compiler manual, our ABI says that
1619 first 3 words of arguments will use R0, R1 and R2.
1622 void
1623 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
1624 rtx libname ATTRIBUTE_UNUSED)
1626 static CUMULATIVE_ARGS zero_cum;
1628 *cum = zero_cum;
1630 /* Set up the number of registers to use for passing arguments. */
1632 cum->nregs = max_arg_registers;
1633 cum->arg_regs = arg_regs;
1635 cum->call_cookie = CALL_NORMAL;
1636 /* Check for a longcall attribute. */
1637 if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
1638 cum->call_cookie |= CALL_SHORT;
1639 else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
1640 cum->call_cookie |= CALL_LONG;
1642 return;
1645 /* Update the data in CUM to advance over an argument
1646 of mode MODE and data type TYPE.
1647 (TYPE is null for libcalls where that information may not be available.) */
1649 static void
1650 bfin_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
1651 const_tree type, bool named ATTRIBUTE_UNUSED)
1653 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1654 int count, bytes, words;
1656 bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1657 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1659 cum->words += words;
1660 cum->nregs -= words;
1662 if (cum->nregs <= 0)
1664 cum->nregs = 0;
1665 cum->arg_regs = NULL;
1667 else
1669 for (count = 1; count <= words; count++)
1670 cum->arg_regs++;
1673 return;
1676 /* Define where to put the arguments to a function.
1677 Value is zero to push the argument on the stack,
1678 or a hard register in which to store the argument.
1680 MODE is the argument's machine mode.
1681 TYPE is the data type of the argument (as a tree).
1682 This is null for libcalls where that information may
1683 not be available.
1684 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1685 the preceding args and about the function being called.
1686 NAMED is nonzero if this argument is a named parameter
1687 (otherwise it is an extra parameter matching an ellipsis). */
1689 static rtx
1690 bfin_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
1691 const_tree type, bool named ATTRIBUTE_UNUSED)
1693 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1694 int bytes
1695 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1697 if (mode == VOIDmode)
1698 /* Compute operand 2 of the call insn. */
1699 return GEN_INT (cum->call_cookie);
1701 if (bytes == -1)
1702 return NULL_RTX;
1704 if (cum->nregs)
1705 return gen_rtx_REG (mode, *(cum->arg_regs));
1707 return NULL_RTX;
1710 /* For an arg passed partly in registers and partly in memory,
1711 this is the number of bytes passed in registers.
1712 For args passed entirely in registers or entirely in memory, zero.
1714 Refer VDSP C Compiler manual, our ABI.
1715 First 3 words are in registers. So, if an argument is larger
1716 than the registers available, it will span the register and
1717 stack. */
1719 static int
1720 bfin_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
1721 tree type ATTRIBUTE_UNUSED,
1722 bool named ATTRIBUTE_UNUSED)
1724 int bytes
1725 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1726 int bytes_left = get_cumulative_args (cum)->nregs * UNITS_PER_WORD;
1728 if (bytes == -1)
1729 return 0;
1731 if (bytes_left == 0)
1732 return 0;
1733 if (bytes > bytes_left)
1734 return bytes_left;
1735 return 0;
1738 /* Variable sized types are passed by reference. */
1740 static bool
1741 bfin_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
1742 enum machine_mode mode ATTRIBUTE_UNUSED,
1743 const_tree type, bool named ATTRIBUTE_UNUSED)
1745 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
1748 /* Decide whether a type should be returned in memory (true)
1749 or in a register (false). This is called by the macro
1750 TARGET_RETURN_IN_MEMORY. */
1752 static bool
1753 bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
1755 int size = int_size_in_bytes (type);
1756 return size > 2 * UNITS_PER_WORD || size == -1;
1759 /* Register in which address to store a structure value
1760 is passed to a function. */
1761 static rtx
1762 bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
1763 int incoming ATTRIBUTE_UNUSED)
1765 return gen_rtx_REG (Pmode, REG_P0);
1768 /* Return true when register may be used to pass function parameters. */
1770 bool
1771 function_arg_regno_p (int n)
1773 int i;
1774 for (i = 0; arg_regs[i] != -1; i++)
1775 if (n == arg_regs[i])
1776 return true;
1777 return false;
1780 /* Returns 1 if OP contains a symbol reference */
1783 symbolic_reference_mentioned_p (rtx op)
1785 register const char *fmt;
1786 register int i;
1788 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1789 return 1;
1791 fmt = GET_RTX_FORMAT (GET_CODE (op));
1792 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1794 if (fmt[i] == 'E')
1796 register int j;
1798 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1799 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1800 return 1;
1803 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1804 return 1;
1807 return 0;
1810 /* Decide whether we can make a sibling call to a function. DECL is the
1811 declaration of the function being targeted by the call and EXP is the
1812 CALL_EXPR representing the call. */
1814 static bool
1815 bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1816 tree exp ATTRIBUTE_UNUSED)
1818 struct cgraph_local_info *this_func, *called_func;
1819 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1820 if (fkind != SUBROUTINE)
1821 return false;
1822 if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
1823 return true;
1825 /* When compiling for ID shared libraries, can't sibcall a local function
1826 from a non-local function, because the local function thinks it does
1827 not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
1828 sibcall epilogue, and we end up with the wrong value in P5. */
1830 if (!decl)
1831 /* Not enough information. */
1832 return false;
1834 this_func = cgraph_local_info (current_function_decl);
1835 called_func = cgraph_local_info (decl);
1836 if (!called_func)
1837 return false;
1838 return !called_func->local || this_func->local;
1841 /* Write a template for a trampoline to F. */
1843 static void
1844 bfin_asm_trampoline_template (FILE *f)
1846 if (TARGET_FDPIC)
1848 fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */
1849 fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */
1850 fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */
1851 fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */
1852 fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */
1853 fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */
1854 fprintf (f, "\t.dw\t0xac4b\n"); /* p3 = [p1 + 4] */
1855 fprintf (f, "\t.dw\t0x9149\n"); /* p1 = [p1] */
1856 fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/
1858 else
1860 fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */
1861 fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */
1862 fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */
1863 fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */
1864 fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/
1868 /* Emit RTL insns to initialize the variable parts of a trampoline at
1869 M_TRAMP. FNDECL is the target function. CHAIN_VALUE is an RTX for
1870 the static chain value for the function. */
1872 static void
1873 bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1875 rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
1876 rtx t2 = copy_to_reg (chain_value);
1877 rtx mem;
1878 int i = 0;
1880 emit_block_move (m_tramp, assemble_trampoline_template (),
1881 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1883 if (TARGET_FDPIC)
1885 rtx a = force_reg (Pmode, plus_constant (XEXP (m_tramp, 0), 8));
1886 mem = adjust_address (m_tramp, Pmode, 0);
1887 emit_move_insn (mem, a);
1888 i = 8;
1891 mem = adjust_address (m_tramp, HImode, i + 2);
1892 emit_move_insn (mem, gen_lowpart (HImode, t1));
1893 emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
1894 mem = adjust_address (m_tramp, HImode, i + 6);
1895 emit_move_insn (mem, gen_lowpart (HImode, t1));
1897 mem = adjust_address (m_tramp, HImode, i + 10);
1898 emit_move_insn (mem, gen_lowpart (HImode, t2));
1899 emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
1900 mem = adjust_address (m_tramp, HImode, i + 14);
1901 emit_move_insn (mem, gen_lowpart (HImode, t2));
1904 /* Emit insns to move operands[1] into operands[0]. */
1906 void
1907 emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
1909 rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
1911 gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
1912 if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
1913 operands[1] = force_reg (SImode, operands[1]);
1914 else
1915 operands[1] = legitimize_pic_address (operands[1], temp,
1916 TARGET_FDPIC ? OUR_FDPIC_REG
1917 : pic_offset_table_rtx);
1920 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
1921 Returns true if no further code must be generated, false if the caller
1922 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1924 bool
1925 expand_move (rtx *operands, enum machine_mode mode)
1927 rtx op = operands[1];
1928 if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
1929 && SYMBOLIC_CONST (op))
1930 emit_pic_move (operands, mode);
1931 else if (mode == SImode && GET_CODE (op) == CONST
1932 && GET_CODE (XEXP (op, 0)) == PLUS
1933 && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
1934 && !targetm.legitimate_constant_p (mode, op))
1936 rtx dest = operands[0];
1937 rtx op0, op1;
1938 gcc_assert (!reload_in_progress && !reload_completed);
1939 op = XEXP (op, 0);
1940 op0 = force_reg (mode, XEXP (op, 0));
1941 op1 = XEXP (op, 1);
1942 if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
1943 op1 = force_reg (mode, op1);
1944 if (GET_CODE (dest) == MEM)
1945 dest = gen_reg_rtx (mode);
1946 emit_insn (gen_addsi3 (dest, op0, op1));
1947 if (dest == operands[0])
1948 return true;
1949 operands[1] = dest;
1951 /* Don't generate memory->memory or constant->memory moves, go through a
1952 register */
1953 else if ((reload_in_progress | reload_completed) == 0
1954 && GET_CODE (operands[0]) == MEM
1955 && GET_CODE (operands[1]) != REG)
1956 operands[1] = force_reg (mode, operands[1]);
1957 return false;
1960 /* Split one or more DImode RTL references into pairs of SImode
1961 references. The RTL can be REG, offsettable MEM, integer constant, or
1962 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1963 split and "num" is its length. lo_half and hi_half are output arrays
1964 that parallel "operands". */
1966 void
1967 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1969 while (num--)
1971 rtx op = operands[num];
1973 /* simplify_subreg refuse to split volatile memory addresses,
1974 but we still have to handle it. */
1975 if (GET_CODE (op) == MEM)
1977 lo_half[num] = adjust_address (op, SImode, 0);
1978 hi_half[num] = adjust_address (op, SImode, 4);
1980 else
1982 lo_half[num] = simplify_gen_subreg (SImode, op,
1983 GET_MODE (op) == VOIDmode
1984 ? DImode : GET_MODE (op), 0);
1985 hi_half[num] = simplify_gen_subreg (SImode, op,
1986 GET_MODE (op) == VOIDmode
1987 ? DImode : GET_MODE (op), 4);
1992 bool
1993 bfin_longcall_p (rtx op, int call_cookie)
1995 gcc_assert (GET_CODE (op) == SYMBOL_REF);
1996 if (SYMBOL_REF_WEAK (op))
1997 return 1;
1998 if (call_cookie & CALL_SHORT)
1999 return 0;
2000 if (call_cookie & CALL_LONG)
2001 return 1;
2002 if (TARGET_LONG_CALLS)
2003 return 1;
2004 return 0;
2007 /* Expand a call instruction. FNADDR is the call target, RETVAL the return value.
2008 COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
2009 SIBCALL is nonzero if this is a sibling call. */
2011 void
2012 bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
2014 rtx use = NULL, call;
2015 rtx callee = XEXP (fnaddr, 0);
2016 int nelts = 3;
2017 rtx pat;
2018 rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
2019 rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
2020 int n;
2022 /* In an untyped call, we can get NULL for operand 2. */
2023 if (cookie == NULL_RTX)
2024 cookie = const0_rtx;
2026 /* Static functions and indirect calls don't need the pic register. */
2027 if (!TARGET_FDPIC && flag_pic
2028 && GET_CODE (callee) == SYMBOL_REF
2029 && !SYMBOL_REF_LOCAL_P (callee))
2030 use_reg (&use, pic_offset_table_rtx);
2032 if (TARGET_FDPIC)
2034 int caller_in_sram, callee_in_sram;
2036 /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram. */
2037 caller_in_sram = callee_in_sram = 0;
2039 if (lookup_attribute ("l1_text",
2040 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2041 caller_in_sram = 1;
2042 else if (lookup_attribute ("l2",
2043 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2044 caller_in_sram = 2;
2046 if (GET_CODE (callee) == SYMBOL_REF
2047 && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)))
2049 if (lookup_attribute
2050 ("l1_text",
2051 DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2052 callee_in_sram = 1;
2053 else if (lookup_attribute
2054 ("l2",
2055 DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2056 callee_in_sram = 2;
2059 if (GET_CODE (callee) != SYMBOL_REF
2060 || bfin_longcall_p (callee, INTVAL (cookie))
2061 || (GET_CODE (callee) == SYMBOL_REF
2062 && !SYMBOL_REF_LOCAL_P (callee)
2063 && TARGET_INLINE_PLT)
2064 || caller_in_sram != callee_in_sram
2065 || (caller_in_sram && callee_in_sram
2066 && (GET_CODE (callee) != SYMBOL_REF
2067 || !SYMBOL_REF_LOCAL_P (callee))))
2069 rtx addr = callee;
2070 if (! address_operand (addr, Pmode))
2071 addr = force_reg (Pmode, addr);
2073 fnaddr = gen_reg_rtx (SImode);
2074 emit_insn (gen_load_funcdescsi (fnaddr, addr));
2075 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
2077 picreg = gen_reg_rtx (SImode);
2078 emit_insn (gen_load_funcdescsi (picreg,
2079 plus_constant (addr, 4)));
2082 nelts++;
2084 else if ((!register_no_elim_operand (callee, Pmode)
2085 && GET_CODE (callee) != SYMBOL_REF)
2086 || (GET_CODE (callee) == SYMBOL_REF
2087 && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
2088 || bfin_longcall_p (callee, INTVAL (cookie)))))
2090 callee = copy_to_mode_reg (Pmode, callee);
2091 fnaddr = gen_rtx_MEM (Pmode, callee);
2093 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
2095 if (retval)
2096 call = gen_rtx_SET (VOIDmode, retval, call);
2098 pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
2099 n = 0;
2100 XVECEXP (pat, 0, n++) = call;
2101 if (TARGET_FDPIC)
2102 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
2103 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
2104 if (sibcall)
2105 XVECEXP (pat, 0, n++) = ret_rtx;
2106 else
2107 XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
2108 call = emit_call_insn (pat);
2109 if (use)
2110 CALL_INSN_FUNCTION_USAGE (call) = use;
2113 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
2116 hard_regno_mode_ok (int regno, enum machine_mode mode)
2118 /* Allow only dregs to store value of mode HI or QI */
2119 enum reg_class rclass = REGNO_REG_CLASS (regno);
2121 if (mode == CCmode)
2122 return 0;
2124 if (mode == V2HImode)
2125 return D_REGNO_P (regno);
2126 if (rclass == CCREGS)
2127 return mode == BImode;
2128 if (mode == PDImode || mode == V2PDImode)
2129 return regno == REG_A0 || regno == REG_A1;
2131 /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
2132 up with a bad register class (such as ALL_REGS) for DImode. */
2133 if (mode == DImode)
2134 return regno < REG_M3;
2136 if (mode == SImode
2137 && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
2138 return 1;
2140 return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
2143 /* Implements target hook vector_mode_supported_p. */
2145 static bool
2146 bfin_vector_mode_supported_p (enum machine_mode mode)
2148 return mode == V2HImode;
2151 /* Return the cost of moving data from a register in class CLASS1 to
2152 one in class CLASS2. A cost of 2 is the default. */
2155 bfin_register_move_cost (enum machine_mode mode,
2156 enum reg_class class1, enum reg_class class2)
2158 /* These need secondary reloads, so they're more expensive. */
2159 if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
2160 || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
2161 return 4;
2163 /* If optimizing for size, always prefer reg-reg over reg-memory moves. */
2164 if (optimize_size)
2165 return 2;
2167 if (GET_MODE_CLASS (mode) == MODE_INT)
2169 /* Discourage trying to use the accumulators. */
2170 if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
2171 || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
2172 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
2173 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
2174 return 20;
2176 return 2;
2179 /* Return the cost of moving data of mode M between a
2180 register and memory. A value of 2 is the default; this cost is
2181 relative to those in `REGISTER_MOVE_COST'.
2183 ??? In theory L1 memory has single-cycle latency. We should add a switch
2184 that tells the compiler whether we expect to use only L1 memory for the
2185 program; it'll make the costs more accurate. */
2188 bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2189 enum reg_class rclass,
2190 int in ATTRIBUTE_UNUSED)
2192 /* Make memory accesses slightly more expensive than any register-register
2193 move. Also, penalize non-DP registers, since they need secondary
2194 reloads to load and store. */
2195 if (! reg_class_subset_p (rclass, DPREGS))
2196 return 10;
2198 return 8;
2201 /* Inform reload about cases where moving X with a mode MODE to a register in
2202 RCLASS requires an extra scratch register. Return the class needed for the
2203 scratch register. */
2205 static reg_class_t
2206 bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
2207 enum machine_mode mode, secondary_reload_info *sri)
2209 /* If we have HImode or QImode, we can only use DREGS as secondary registers;
2210 in most other cases we can also use PREGS. */
2211 enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
2212 enum reg_class x_class = NO_REGS;
2213 enum rtx_code code = GET_CODE (x);
2214 enum reg_class rclass = (enum reg_class) rclass_i;
2216 if (code == SUBREG)
2217 x = SUBREG_REG (x), code = GET_CODE (x);
2218 if (REG_P (x))
2220 int regno = REGNO (x);
2221 if (regno >= FIRST_PSEUDO_REGISTER)
2222 regno = reg_renumber[regno];
2224 if (regno == -1)
2225 code = MEM;
2226 else
2227 x_class = REGNO_REG_CLASS (regno);
2230 /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
2231 This happens as a side effect of register elimination, and we need
2232 a scratch register to do it. */
2233 if (fp_plus_const_operand (x, mode))
2235 rtx op2 = XEXP (x, 1);
2236 int large_constant_p = ! satisfies_constraint_Ks7 (op2);
2238 if (rclass == PREGS || rclass == PREGS_CLOBBERED)
2239 return NO_REGS;
2240 /* If destination is a DREG, we can do this without a scratch register
2241 if the constant is valid for an add instruction. */
2242 if ((rclass == DREGS || rclass == DPREGS)
2243 && ! large_constant_p)
2244 return NO_REGS;
2245 /* Reloading to anything other than a DREG? Use a PREG scratch
2246 register. */
2247 sri->icode = CODE_FOR_reload_insi;
2248 return NO_REGS;
2251 /* Data can usually be moved freely between registers of most classes.
2252 AREGS are an exception; they can only move to or from another register
2253 in AREGS or one in DREGS. They can also be assigned the constant 0. */
2254 if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
2255 return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
2256 || rclass == ODD_AREGS
2257 ? NO_REGS : DREGS);
2259 if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
2261 if (code == MEM)
2263 sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
2264 return NO_REGS;
2267 if (x != const0_rtx && x_class != DREGS)
2269 return DREGS;
2271 else
2272 return NO_REGS;
2275 /* CCREGS can only be moved from/to DREGS. */
2276 if (rclass == CCREGS && x_class != DREGS)
2277 return DREGS;
2278 if (x_class == CCREGS && rclass != DREGS)
2279 return DREGS;
2281 /* All registers other than AREGS can load arbitrary constants. The only
2282 case that remains is MEM. */
2283 if (code == MEM)
2284 if (! reg_class_subset_p (rclass, default_class))
2285 return default_class;
2287 return NO_REGS;
2290 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
2292 static bool
2293 bfin_class_likely_spilled_p (reg_class_t rclass)
2295 switch (rclass)
2297 case PREGS_CLOBBERED:
2298 case PROLOGUE_REGS:
2299 case P0REGS:
2300 case D0REGS:
2301 case D1REGS:
2302 case D2REGS:
2303 case CCREGS:
2304 return true;
2306 default:
2307 break;
2310 return false;
2313 static struct machine_function *
2314 bfin_init_machine_status (void)
2316 return ggc_alloc_cleared_machine_function ();
2319 /* Implement the TARGET_OPTION_OVERRIDE hook. */
2321 static void
2322 bfin_option_override (void)
2324 /* If processor type is not specified, enable all workarounds. */
2325 if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
2327 int i;
2329 for (i = 0; bfin_cpus[i].name != NULL; i++)
2330 bfin_workarounds |= bfin_cpus[i].workarounds;
2332 bfin_si_revision = 0xffff;
2335 if (bfin_csync_anomaly == 1)
2336 bfin_workarounds |= WA_SPECULATIVE_SYNCS;
2337 else if (bfin_csync_anomaly == 0)
2338 bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
2340 if (bfin_specld_anomaly == 1)
2341 bfin_workarounds |= WA_SPECULATIVE_LOADS;
2342 else if (bfin_specld_anomaly == 0)
2343 bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
2345 if (TARGET_OMIT_LEAF_FRAME_POINTER)
2346 flag_omit_frame_pointer = 1;
2348 #ifdef SUBTARGET_FDPIC_NOT_SUPPORTED
2349 if (TARGET_FDPIC)
2350 error ("-mfdpic is not supported, please use a bfin-linux-uclibc target");
2351 #endif
2353 /* Library identification */
2354 if (global_options_set.x_bfin_library_id && ! TARGET_ID_SHARED_LIBRARY)
2355 error ("-mshared-library-id= specified without -mid-shared-library");
2357 if (stack_limit_rtx && TARGET_FDPIC)
2359 warning (0, "-fstack-limit- options are ignored with -mfdpic; use -mstack-check-l1");
2360 stack_limit_rtx = NULL_RTX;
2363 if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
2364 error ("can%'t use multiple stack checking methods together");
2366 if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
2367 error ("ID shared libraries and FD-PIC mode can%'t be used together");
2369 /* Don't allow the user to specify -mid-shared-library and -msep-data
2370 together, as it makes little sense from a user's point of view... */
2371 if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
2372 error ("cannot specify both -msep-data and -mid-shared-library");
2373 /* ... internally, however, it's nearly the same. */
2374 if (TARGET_SEP_DATA)
2375 target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
2377 if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
2378 flag_pic = 1;
2380 /* There is no single unaligned SI op for PIC code. Sometimes we
2381 need to use ".4byte" and sometimes we need to use ".picptr".
2382 See bfin_assemble_integer for details. */
2383 if (TARGET_FDPIC)
2384 targetm.asm_out.unaligned_op.si = 0;
2386 /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
2387 since we don't support it and it'll just break. */
2388 if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
2389 flag_pic = 0;
2391 if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
2392 error ("-mmulticore can only be used with BF561");
2394 if (TARGET_COREA && !TARGET_MULTICORE)
2395 error ("-mcorea should be used with -mmulticore");
2397 if (TARGET_COREB && !TARGET_MULTICORE)
2398 error ("-mcoreb should be used with -mmulticore");
2400 if (TARGET_COREA && TARGET_COREB)
2401 error ("-mcorea and -mcoreb can%'t be used together");
2403 flag_schedule_insns = 0;
2405 init_machine_status = bfin_init_machine_status;
2408 /* Return the destination address of BRANCH.
2409 We need to use this instead of get_attr_length, because the
2410 cbranch_with_nops pattern conservatively sets its length to 6, and
2411 we still prefer to use shorter sequences. */
2413 static int
2414 branch_dest (rtx branch)
2416 rtx dest;
2417 int dest_uid;
2418 rtx pat = PATTERN (branch);
2419 if (GET_CODE (pat) == PARALLEL)
2420 pat = XVECEXP (pat, 0, 0);
2421 dest = SET_SRC (pat);
2422 if (GET_CODE (dest) == IF_THEN_ELSE)
2423 dest = XEXP (dest, 1);
2424 dest = XEXP (dest, 0);
2425 dest_uid = INSN_UID (dest);
2426 return INSN_ADDRESSES (dest_uid);
2429 /* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
2430 it's a branch that's predicted taken. */
2432 static int
2433 cbranch_predicted_taken_p (rtx insn)
2435 rtx x = find_reg_note (insn, REG_BR_PROB, 0);
2437 if (x)
2439 int pred_val = INTVAL (XEXP (x, 0));
2441 return pred_val >= REG_BR_PROB_BASE / 2;
2444 return 0;
2447 /* Templates for use by asm_conditional_branch. */
2449 static const char *ccbranch_templates[][3] = {
2450 { "if !cc jump %3;", "if cc jump 4 (bp); jump.s %3;", "if cc jump 6 (bp); jump.l %3;" },
2451 { "if cc jump %3;", "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
2452 { "if !cc jump %3 (bp);", "if cc jump 4; jump.s %3;", "if cc jump 6; jump.l %3;" },
2453 { "if cc jump %3 (bp);", "if !cc jump 4; jump.s %3;", "if !cc jump 6; jump.l %3;" },
2456 /* Output INSN, which is a conditional branch instruction with operands
2457 OPERANDS.
2459 We deal with the various forms of conditional branches that can be generated
2460 by bfin_reorg to prevent the hardware from doing speculative loads, by
2461 - emitting a sufficient number of nops, if N_NOPS is nonzero, or
2462 - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
2463 Either of these is only necessary if the branch is short, otherwise the
2464 template we use ends in an unconditional jump which flushes the pipeline
2465 anyway. */
2467 void
2468 asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken)
2470 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2471 /* Note : offset for instructions like if cc jmp; jump.[sl] offset
2472 is to be taken from start of if cc rather than jump.
2473 Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
2475 int len = (offset >= -1024 && offset <= 1022 ? 0
2476 : offset >= -4094 && offset <= 4096 ? 1
2477 : 2);
2478 int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
2479 int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
2480 output_asm_insn (ccbranch_templates[idx][len], operands);
2481 gcc_assert (n_nops == 0 || !bp);
2482 if (len == 0)
2483 while (n_nops-- > 0)
2484 output_asm_insn ("nop;", NULL);
2487 /* Emit rtl for a comparison operation CMP in mode MODE. Operands have been
2488 stored in bfin_compare_op0 and bfin_compare_op1 already. */
2491 bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
2493 enum rtx_code code1, code2;
2494 rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
2495 rtx tem = bfin_cc_rtx;
2496 enum rtx_code code = GET_CODE (cmp);
2498 /* If we have a BImode input, then we already have a compare result, and
2499 do not need to emit another comparison. */
2500 if (GET_MODE (op0) == BImode)
2502 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
2503 tem = op0, code2 = code;
2505 else
2507 switch (code) {
2508 /* bfin has these conditions */
2509 case EQ:
2510 case LT:
2511 case LE:
2512 case LEU:
2513 case LTU:
2514 code1 = code;
2515 code2 = NE;
2516 break;
2517 default:
2518 code1 = reverse_condition (code);
2519 code2 = EQ;
2520 break;
2522 emit_insn (gen_rtx_SET (VOIDmode, tem,
2523 gen_rtx_fmt_ee (code1, BImode, op0, op1)));
2526 return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
2529 /* Return nonzero iff C has exactly one bit set if it is interpreted
2530 as a 32-bit constant. */
2533 log2constp (unsigned HOST_WIDE_INT c)
2535 c &= 0xFFFFFFFF;
2536 return c != 0 && (c & (c-1)) == 0;
2539 /* Returns the number of consecutive least significant zeros in the binary
2540 representation of *V.
2541 We modify *V to contain the original value arithmetically shifted right by
2542 the number of zeroes. */
2544 static int
2545 shiftr_zero (HOST_WIDE_INT *v)
2547 unsigned HOST_WIDE_INT tmp = *v;
2548 unsigned HOST_WIDE_INT sgn;
2549 int n = 0;
2551 if (tmp == 0)
2552 return 0;
2554 sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
2555 while ((tmp & 0x1) == 0 && n <= 32)
2557 tmp = (tmp >> 1) | sgn;
2558 n++;
2560 *v = tmp;
2561 return n;
2564 /* After reload, split the load of an immediate constant. OPERANDS are the
2565 operands of the movsi_insn pattern which we are splitting. We return
2566 nonzero if we emitted a sequence to load the constant, zero if we emitted
2567 nothing because we want to use the splitter's default sequence. */
2570 split_load_immediate (rtx operands[])
2572 HOST_WIDE_INT val = INTVAL (operands[1]);
2573 HOST_WIDE_INT tmp;
2574 HOST_WIDE_INT shifted = val;
2575 HOST_WIDE_INT shifted_compl = ~val;
2576 int num_zero = shiftr_zero (&shifted);
2577 int num_compl_zero = shiftr_zero (&shifted_compl);
2578 unsigned int regno = REGNO (operands[0]);
2580 /* This case takes care of single-bit set/clear constants, which we could
2581 also implement with BITSET/BITCLR. */
2582 if (num_zero
2583 && shifted >= -32768 && shifted < 65536
2584 && (D_REGNO_P (regno)
2585 || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
2587 emit_insn (gen_movsi (operands[0], GEN_INT (shifted)));
2588 emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
2589 return 1;
2592 tmp = val & 0xFFFF;
2593 tmp |= -(tmp & 0x8000);
2595 /* If high word has one bit set or clear, try to use a bit operation. */
2596 if (D_REGNO_P (regno))
2598 if (log2constp (val & 0xFFFF0000))
2600 emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
2601 emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000)));
2602 return 1;
2604 else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
2606 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2607 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF)));
2611 if (D_REGNO_P (regno))
2613 if (tmp >= -64 && tmp <= 63)
2615 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2616 emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
2617 return 1;
2620 if ((val & 0xFFFF0000) == 0)
2622 emit_insn (gen_movsi (operands[0], const0_rtx));
2623 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2624 return 1;
2627 if ((val & 0xFFFF0000) == 0xFFFF0000)
2629 emit_insn (gen_movsi (operands[0], constm1_rtx));
2630 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2631 return 1;
2635 /* Need DREGs for the remaining case. */
2636 if (regno > REG_R7)
2637 return 0;
2639 if (optimize_size
2640 && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
2642 /* If optimizing for size, generate a sequence that has more instructions
2643 but is shorter. */
2644 emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl)));
2645 emit_insn (gen_ashlsi3 (operands[0], operands[0],
2646 GEN_INT (num_compl_zero)));
2647 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
2648 return 1;
2650 return 0;
2653 /* Return true if the legitimate memory address for a memory operand of mode
2654 MODE. Return false if not. */
2656 static bool
2657 bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value)
2659 unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
2660 int sz = GET_MODE_SIZE (mode);
2661 int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
2662 /* The usual offsettable_memref machinery doesn't work so well for this
2663 port, so we deal with the problem here. */
2664 if (value > 0 && sz == 8)
2665 v += 4;
2666 return (v & ~(0x7fff << shift)) == 0;
2669 static bool
2670 bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode,
2671 enum rtx_code outer_code)
2673 if (strict)
2674 return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
2675 else
2676 return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
2679 /* Recognize an RTL expression that is a valid memory address for an
2680 instruction. The MODE argument is the machine mode for the MEM expression
2681 that wants to use this address.
2683 Blackfin addressing modes are as follows:
2685 [preg]
2686 [preg + imm16]
2688 B [ Preg + uimm15 ]
2689 W [ Preg + uimm16m2 ]
2690 [ Preg + uimm17m4 ]
2692 [preg++]
2693 [preg--]
2694 [--sp]
2697 static bool
2698 bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
2700 switch (GET_CODE (x)) {
2701 case REG:
2702 if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
2703 return true;
2704 break;
2705 case PLUS:
2706 if (REG_P (XEXP (x, 0))
2707 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
2708 && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
2709 || (GET_CODE (XEXP (x, 1)) == CONST_INT
2710 && bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
2711 return true;
2712 break;
2713 case POST_INC:
2714 case POST_DEC:
2715 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2716 && REG_P (XEXP (x, 0))
2717 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
2718 return true;
2719 case PRE_DEC:
2720 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2721 && XEXP (x, 0) == stack_pointer_rtx
2722 && REG_P (XEXP (x, 0))
2723 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
2724 return true;
2725 break;
2726 default:
2727 break;
2729 return false;
2732 /* Decide whether we can force certain constants to memory. If we
2733 decide we can't, the caller should be able to cope with it in
2734 another way. */
2736 static bool
2737 bfin_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED,
2738 rtx x ATTRIBUTE_UNUSED)
2740 /* We have only one class of non-legitimate constants, and our movsi
2741 expander knows how to handle them. Dropping these constants into the
2742 data section would only shift the problem - we'd still get relocs
2743 outside the object, in the data section rather than the text section. */
2744 return true;
2747 /* Ensure that for any constant of the form symbol + offset, the offset
2748 remains within the object. Any other constants are ok.
2749 This ensures that flat binaries never have to deal with relocations
2750 crossing section boundaries. */
2752 static bool
2753 bfin_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2755 rtx sym;
2756 HOST_WIDE_INT offset;
2758 if (GET_CODE (x) != CONST)
2759 return true;
2761 x = XEXP (x, 0);
2762 gcc_assert (GET_CODE (x) == PLUS);
2764 sym = XEXP (x, 0);
2765 x = XEXP (x, 1);
2766 if (GET_CODE (sym) != SYMBOL_REF
2767 || GET_CODE (x) != CONST_INT)
2768 return true;
2769 offset = INTVAL (x);
2771 if (SYMBOL_REF_DECL (sym) == 0)
2772 return true;
2773 if (offset < 0
2774 || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
2775 return false;
2777 return true;
2780 static bool
2781 bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int *total, bool speed)
2783 enum rtx_code code = (enum rtx_code) code_i;
2784 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
2785 int cost2 = COSTS_N_INSNS (1);
2786 rtx op0, op1;
2788 switch (code)
2790 case CONST_INT:
2791 if (outer_code == SET || outer_code == PLUS)
2792 *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
2793 else if (outer_code == AND)
2794 *total = log2constp (~INTVAL (x)) ? 0 : cost2;
2795 else if (outer_code == LE || outer_code == LT || outer_code == EQ)
2796 *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
2797 else if (outer_code == LEU || outer_code == LTU)
2798 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
2799 else if (outer_code == MULT)
2800 *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
2801 else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
2802 *total = 0;
2803 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
2804 || outer_code == LSHIFTRT)
2805 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
2806 else if (outer_code == IOR || outer_code == XOR)
2807 *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
2808 else
2809 *total = cost2;
2810 return true;
2812 case CONST:
2813 case LABEL_REF:
2814 case SYMBOL_REF:
2815 case CONST_DOUBLE:
2816 *total = COSTS_N_INSNS (2);
2817 return true;
2819 case PLUS:
2820 op0 = XEXP (x, 0);
2821 op1 = XEXP (x, 1);
2822 if (GET_MODE (x) == SImode)
2824 if (GET_CODE (op0) == MULT
2825 && GET_CODE (XEXP (op0, 1)) == CONST_INT)
2827 HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
2828 if (val == 2 || val == 4)
2830 *total = cost2;
2831 *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
2832 *total += rtx_cost (op1, outer_code, speed);
2833 return true;
2836 *total = cost2;
2837 if (GET_CODE (op0) != REG
2838 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2839 *total += rtx_cost (op0, SET, speed);
2840 #if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
2841 towards creating too many induction variables. */
2842 if (!reg_or_7bit_operand (op1, SImode))
2843 *total += rtx_cost (op1, SET, speed);
2844 #endif
2846 else if (GET_MODE (x) == DImode)
2848 *total = 6 * cost2;
2849 if (GET_CODE (op1) != CONST_INT
2850 || !satisfies_constraint_Ks7 (op1))
2851 *total += rtx_cost (op1, PLUS, speed);
2852 if (GET_CODE (op0) != REG
2853 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2854 *total += rtx_cost (op0, PLUS, speed);
2856 return true;
2858 case MINUS:
2859 if (GET_MODE (x) == DImode)
2860 *total = 6 * cost2;
2861 else
2862 *total = cost2;
2863 return true;
2865 case ASHIFT:
2866 case ASHIFTRT:
2867 case LSHIFTRT:
2868 if (GET_MODE (x) == DImode)
2869 *total = 6 * cost2;
2870 else
2871 *total = cost2;
2873 op0 = XEXP (x, 0);
2874 op1 = XEXP (x, 1);
2875 if (GET_CODE (op0) != REG
2876 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2877 *total += rtx_cost (op0, code, speed);
2879 return true;
2881 case IOR:
2882 case AND:
2883 case XOR:
2884 op0 = XEXP (x, 0);
2885 op1 = XEXP (x, 1);
2887 /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */
2888 if (code == IOR)
2890 if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
2891 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
2892 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
2893 || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
2895 *total = cost2;
2896 return true;
2900 if (GET_CODE (op0) != REG
2901 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2902 *total += rtx_cost (op0, code, speed);
2904 if (GET_MODE (x) == DImode)
2906 *total = 2 * cost2;
2907 return true;
2909 *total = cost2;
2910 if (GET_MODE (x) != SImode)
2911 return true;
2913 if (code == AND)
2915 if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
2916 *total += rtx_cost (XEXP (x, 1), code, speed);
2918 else
2920 if (! regorlog2_operand (XEXP (x, 1), SImode))
2921 *total += rtx_cost (XEXP (x, 1), code, speed);
2924 return true;
2926 case ZERO_EXTRACT:
2927 case SIGN_EXTRACT:
2928 if (outer_code == SET
2929 && XEXP (x, 1) == const1_rtx
2930 && GET_CODE (XEXP (x, 2)) == CONST_INT)
2932 *total = 2 * cost2;
2933 return true;
2935 /* fall through */
2937 case SIGN_EXTEND:
2938 case ZERO_EXTEND:
2939 *total = cost2;
2940 return true;
2942 case MULT:
2944 op0 = XEXP (x, 0);
2945 op1 = XEXP (x, 1);
2946 if (GET_CODE (op0) == GET_CODE (op1)
2947 && (GET_CODE (op0) == ZERO_EXTEND
2948 || GET_CODE (op0) == SIGN_EXTEND))
2950 *total = COSTS_N_INSNS (1);
2951 op0 = XEXP (op0, 0);
2952 op1 = XEXP (op1, 0);
2954 else if (!speed)
2955 *total = COSTS_N_INSNS (1);
2956 else
2957 *total = COSTS_N_INSNS (3);
2959 if (GET_CODE (op0) != REG
2960 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2961 *total += rtx_cost (op0, MULT, speed);
2962 if (GET_CODE (op1) != REG
2963 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
2964 *total += rtx_cost (op1, MULT, speed);
2966 return true;
2968 case UDIV:
2969 case UMOD:
2970 *total = COSTS_N_INSNS (32);
2971 return true;
2973 case VEC_CONCAT:
2974 case VEC_SELECT:
2975 if (outer_code == SET)
2976 *total = cost2;
2977 return true;
2979 default:
2980 return false;
2984 /* Used for communication between {push,pop}_multiple_operation (which
2985 we use not only as a predicate) and the corresponding output functions. */
2986 static int first_preg_to_save, first_dreg_to_save;
2987 static int n_regs_to_save;
2990 push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2992 int lastdreg = 8, lastpreg = 6;
2993 int i, group;
2995 first_preg_to_save = lastpreg;
2996 first_dreg_to_save = lastdreg;
2997 for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
2999 rtx t = XVECEXP (op, 0, i);
3000 rtx src, dest;
3001 int regno;
3003 if (GET_CODE (t) != SET)
3004 return 0;
3006 src = SET_SRC (t);
3007 dest = SET_DEST (t);
3008 if (GET_CODE (dest) != MEM || ! REG_P (src))
3009 return 0;
3010 dest = XEXP (dest, 0);
3011 if (GET_CODE (dest) != PLUS
3012 || ! REG_P (XEXP (dest, 0))
3013 || REGNO (XEXP (dest, 0)) != REG_SP
3014 || GET_CODE (XEXP (dest, 1)) != CONST_INT
3015 || INTVAL (XEXP (dest, 1)) != -i * 4)
3016 return 0;
3018 regno = REGNO (src);
3019 if (group == 0)
3021 if (D_REGNO_P (regno))
3023 group = 1;
3024 first_dreg_to_save = lastdreg = regno - REG_R0;
3026 else if (regno >= REG_P0 && regno <= REG_P7)
3028 group = 2;
3029 first_preg_to_save = lastpreg = regno - REG_P0;
3031 else
3032 return 0;
3034 continue;
3037 if (group == 1)
3039 if (regno >= REG_P0 && regno <= REG_P7)
3041 group = 2;
3042 first_preg_to_save = lastpreg = regno - REG_P0;
3044 else if (regno != REG_R0 + lastdreg + 1)
3045 return 0;
3046 else
3047 lastdreg++;
3049 else if (group == 2)
3051 if (regno != REG_P0 + lastpreg + 1)
3052 return 0;
3053 lastpreg++;
3056 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3057 return 1;
3061 pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3063 int lastdreg = 8, lastpreg = 6;
3064 int i, group;
3066 for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
3068 rtx t = XVECEXP (op, 0, i);
3069 rtx src, dest;
3070 int regno;
3072 if (GET_CODE (t) != SET)
3073 return 0;
3075 src = SET_SRC (t);
3076 dest = SET_DEST (t);
3077 if (GET_CODE (src) != MEM || ! REG_P (dest))
3078 return 0;
3079 src = XEXP (src, 0);
3081 if (i == 1)
3083 if (! REG_P (src) || REGNO (src) != REG_SP)
3084 return 0;
3086 else if (GET_CODE (src) != PLUS
3087 || ! REG_P (XEXP (src, 0))
3088 || REGNO (XEXP (src, 0)) != REG_SP
3089 || GET_CODE (XEXP (src, 1)) != CONST_INT
3090 || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
3091 return 0;
3093 regno = REGNO (dest);
3094 if (group == 0)
3096 if (regno == REG_R7)
3098 group = 1;
3099 lastdreg = 7;
3101 else if (regno != REG_P0 + lastpreg - 1)
3102 return 0;
3103 else
3104 lastpreg--;
3106 else if (group == 1)
3108 if (regno != REG_R0 + lastdreg - 1)
3109 return 0;
3110 else
3111 lastdreg--;
3114 first_dreg_to_save = lastdreg;
3115 first_preg_to_save = lastpreg;
3116 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3117 return 1;
3120 /* Emit assembly code for one multi-register push described by INSN, with
3121 operands in OPERANDS. */
3123 void
3124 output_push_multiple (rtx insn, rtx *operands)
3126 char buf[80];
3127 int ok;
3129 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3130 ok = push_multiple_operation (PATTERN (insn), VOIDmode);
3131 gcc_assert (ok);
3133 if (first_dreg_to_save == 8)
3134 sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
3135 else if (first_preg_to_save == 6)
3136 sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
3137 else
3138 sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
3139 first_dreg_to_save, first_preg_to_save);
3141 output_asm_insn (buf, operands);
3144 /* Emit assembly code for one multi-register pop described by INSN, with
3145 operands in OPERANDS. */
3147 void
3148 output_pop_multiple (rtx insn, rtx *operands)
3150 char buf[80];
3151 int ok;
3153 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3154 ok = pop_multiple_operation (PATTERN (insn), VOIDmode);
3155 gcc_assert (ok);
3157 if (first_dreg_to_save == 8)
3158 sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
3159 else if (first_preg_to_save == 6)
3160 sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
3161 else
3162 sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
3163 first_dreg_to_save, first_preg_to_save);
3165 output_asm_insn (buf, operands);
3168 /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */
3170 static void
3171 single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset)
3173 rtx scratch = gen_reg_rtx (mode);
3174 rtx srcmem, dstmem;
3176 srcmem = adjust_address_nv (src, mode, offset);
3177 dstmem = adjust_address_nv (dst, mode, offset);
3178 emit_move_insn (scratch, srcmem);
3179 emit_move_insn (dstmem, scratch);
3182 /* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
3183 alignment ALIGN_EXP. Return true if successful, false if we should fall
3184 back on a different method. */
3186 bool
3187 bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
3189 rtx srcreg, destreg, countreg;
3190 HOST_WIDE_INT align = 0;
3191 unsigned HOST_WIDE_INT count = 0;
3193 if (GET_CODE (align_exp) == CONST_INT)
3194 align = INTVAL (align_exp);
3195 if (GET_CODE (count_exp) == CONST_INT)
3197 count = INTVAL (count_exp);
3198 #if 0
3199 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
3200 return false;
3201 #endif
3204 /* If optimizing for size, only do single copies inline. */
3205 if (optimize_size)
3207 if (count == 2 && align < 2)
3208 return false;
3209 if (count == 4 && align < 4)
3210 return false;
3211 if (count != 1 && count != 2 && count != 4)
3212 return false;
3214 if (align < 2 && count != 1)
3215 return false;
3217 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
3218 if (destreg != XEXP (dst, 0))
3219 dst = replace_equiv_address_nv (dst, destreg);
3220 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
3221 if (srcreg != XEXP (src, 0))
3222 src = replace_equiv_address_nv (src, srcreg);
3224 if (count != 0 && align >= 2)
3226 unsigned HOST_WIDE_INT offset = 0;
3228 if (align >= 4)
3230 if ((count & ~3) == 4)
3232 single_move_for_movmem (dst, src, SImode, offset);
3233 offset = 4;
3235 else if (count & ~3)
3237 HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
3238 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3240 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
3241 cfun->machine->has_loopreg_clobber = true;
3243 if (count & 2)
3245 single_move_for_movmem (dst, src, HImode, offset);
3246 offset += 2;
3249 else
3251 if ((count & ~1) == 2)
3253 single_move_for_movmem (dst, src, HImode, offset);
3254 offset = 2;
3256 else if (count & ~1)
3258 HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
3259 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3261 emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
3262 cfun->machine->has_loopreg_clobber = true;
3265 if (count & 1)
3267 single_move_for_movmem (dst, src, QImode, offset);
3269 return true;
3271 return false;
3274 /* Compute the alignment for a local variable.
3275 TYPE is the data type, and ALIGN is the alignment that
3276 the object would ordinarily have. The value of this macro is used
3277 instead of that alignment to align the object. */
3279 unsigned
3280 bfin_local_alignment (tree type, unsigned align)
3282 /* Increasing alignment for (relatively) big types allows the builtin
3283 memcpy can use 32 bit loads/stores. */
3284 if (TYPE_SIZE (type)
3285 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
3286 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8
3287 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32)
3288 return 32;
3289 return align;
3292 /* Implement TARGET_SCHED_ISSUE_RATE. */
3294 static int
3295 bfin_issue_rate (void)
3297 return 3;
3300 static int
3301 bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3303 enum attr_type dep_insn_type;
3304 int dep_insn_code_number;
3306 /* Anti and output dependencies have zero cost. */
3307 if (REG_NOTE_KIND (link) != 0)
3308 return 0;
3310 dep_insn_code_number = recog_memoized (dep_insn);
3312 /* If we can't recognize the insns, we can't really do anything. */
3313 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
3314 return cost;
3316 dep_insn_type = get_attr_type (dep_insn);
3318 if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
3320 rtx pat = PATTERN (dep_insn);
3321 rtx dest, src;
3323 if (GET_CODE (pat) == PARALLEL)
3324 pat = XVECEXP (pat, 0, 0);
3325 dest = SET_DEST (pat);
3326 src = SET_SRC (pat);
3327 if (! ADDRESS_REGNO_P (REGNO (dest))
3328 || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
3329 return cost;
3330 return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
3333 return cost;
3336 /* This function acts like NEXT_INSN, but is aware of three-insn bundles and
3337 skips all subsequent parallel instructions if INSN is the start of such
3338 a group. */
3339 static rtx
3340 find_next_insn_start (rtx insn)
3342 if (GET_MODE (insn) == SImode)
3344 while (GET_MODE (insn) != QImode)
3345 insn = NEXT_INSN (insn);
3347 return NEXT_INSN (insn);
3350 /* This function acts like PREV_INSN, but is aware of three-insn bundles and
3351 skips all subsequent parallel instructions if INSN is the start of such
3352 a group. */
3353 static rtx
3354 find_prev_insn_start (rtx insn)
3356 insn = PREV_INSN (insn);
3357 gcc_assert (GET_MODE (insn) != SImode);
3358 if (GET_MODE (insn) == QImode)
3360 while (GET_MODE (PREV_INSN (insn)) == SImode)
3361 insn = PREV_INSN (insn);
3363 return insn;
3366 /* Increment the counter for the number of loop instructions in the
3367 current function. */
3369 void
3370 bfin_hardware_loop (void)
3372 cfun->machine->has_hardware_loops++;
3375 /* Maximum loop nesting depth. */
3376 #define MAX_LOOP_DEPTH 2
3378 /* Maximum size of a loop. */
3379 #define MAX_LOOP_LENGTH 2042
3381 /* Maximum distance of the LSETUP instruction from the loop start. */
3382 #define MAX_LSETUP_DISTANCE 30
3384 /* We need to keep a vector of loops */
3385 typedef struct loop_info_d *loop_info;
3386 DEF_VEC_P (loop_info);
3387 DEF_VEC_ALLOC_P (loop_info,heap);
3389 /* Information about a loop we have found (or are in the process of
3390 finding). */
3391 struct GTY (()) loop_info_d
3393 /* loop number, for dumps */
3394 int loop_no;
3396 /* All edges that jump into and out of the loop. */
3397 VEC(edge,gc) *incoming;
3399 /* We can handle two cases: all incoming edges have the same destination
3400 block, or all incoming edges have the same source block. These two
3401 members are set to the common source or destination we found, or NULL
3402 if different blocks were found. If both are NULL the loop can't be
3403 optimized. */
3404 basic_block incoming_src;
3405 basic_block incoming_dest;
3407 /* First block in the loop. This is the one branched to by the loop_end
3408 insn. */
3409 basic_block head;
3411 /* Last block in the loop (the one with the loop_end insn). */
3412 basic_block tail;
3414 /* The successor block of the loop. This is the one the loop_end insn
3415 falls into. */
3416 basic_block successor;
3418 /* The last instruction in the tail. */
3419 rtx last_insn;
3421 /* The loop_end insn. */
3422 rtx loop_end;
3424 /* The iteration register. */
3425 rtx iter_reg;
3427 /* The new label placed at the beginning of the loop. */
3428 rtx start_label;
3430 /* The new label placed at the end of the loop. */
3431 rtx end_label;
3433 /* The length of the loop. */
3434 int length;
3436 /* The nesting depth of the loop. */
3437 int depth;
3439 /* Nonzero if we can't optimize this loop. */
3440 int bad;
3442 /* True if we have visited this loop. */
3443 int visited;
3445 /* True if this loop body clobbers any of LC0, LT0, or LB0. */
3446 int clobber_loop0;
3448 /* True if this loop body clobbers any of LC1, LT1, or LB1. */
3449 int clobber_loop1;
3451 /* Next loop in the graph. */
3452 struct loop_info_d *next;
3454 /* Immediate outer loop of this loop. */
3455 struct loop_info_d *outer;
3457 /* Vector of blocks only within the loop, including those within
3458 inner loops. */
3459 VEC (basic_block,heap) *blocks;
3461 /* Same information in a bitmap. */
3462 bitmap block_bitmap;
3464 /* Vector of inner loops within this loop */
3465 VEC (loop_info,heap) *loops;
3468 static void
3469 bfin_dump_loops (loop_info loops)
3471 loop_info loop;
3473 for (loop = loops; loop; loop = loop->next)
3475 loop_info i;
3476 basic_block b;
3477 unsigned ix;
3479 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
3480 if (loop->bad)
3481 fprintf (dump_file, "(bad) ");
3482 fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
3484 fprintf (dump_file, " blocks: [ ");
3485 FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, b)
3486 fprintf (dump_file, "%d ", b->index);
3487 fprintf (dump_file, "] ");
3489 fprintf (dump_file, " inner loops: [ ");
3490 FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, i)
3491 fprintf (dump_file, "%d ", i->loop_no);
3492 fprintf (dump_file, "]\n");
3494 fprintf (dump_file, "\n");
3497 /* Scan the blocks of LOOP (and its inferiors) looking for basic block
3498 BB. Return true, if we find it. */
3500 static bool
3501 bfin_bb_in_loop (loop_info loop, basic_block bb)
3503 return bitmap_bit_p (loop->block_bitmap, bb->index);
3506 /* Scan the blocks of LOOP (and its inferiors) looking for uses of
3507 REG. Return true, if we find any. Don't count the loop's loop_end
3508 insn if it matches LOOP_END. */
3510 static bool
3511 bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
3513 unsigned ix;
3514 basic_block bb;
3516 FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb)
3518 rtx insn;
3520 for (insn = BB_HEAD (bb);
3521 insn != NEXT_INSN (BB_END (bb));
3522 insn = NEXT_INSN (insn))
3524 if (!INSN_P (insn))
3525 continue;
3526 if (insn == loop_end)
3527 continue;
3528 if (reg_mentioned_p (reg, PATTERN (insn)))
3529 return true;
3532 return false;
3535 /* Estimate the length of INSN conservatively. */
3537 static int
3538 length_for_loop (rtx insn)
3540 int length = 0;
3541 if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
3543 if (ENABLE_WA_SPECULATIVE_SYNCS)
3544 length = 8;
3545 else if (ENABLE_WA_SPECULATIVE_LOADS)
3546 length = 6;
3548 else if (LABEL_P (insn))
3550 if (ENABLE_WA_SPECULATIVE_SYNCS)
3551 length = 4;
3554 if (NONDEBUG_INSN_P (insn))
3555 length += get_attr_length (insn);
3557 return length;
3560 /* Optimize LOOP. */
3562 static void
3563 bfin_optimize_loop (loop_info loop)
3565 basic_block bb;
3566 loop_info inner;
3567 rtx insn, last_insn;
3568 rtx loop_init, start_label, end_label;
3569 rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
3570 rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
3571 rtx lc_reg, lt_reg, lb_reg;
3572 rtx seq, seq_end;
3573 int length;
3574 unsigned ix;
3575 int inner_depth = 0;
3577 if (loop->visited)
3578 return;
3580 loop->visited = 1;
3582 if (loop->bad)
3584 if (dump_file)
3585 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
3586 goto bad_loop;
3589 /* Every loop contains in its list of inner loops every loop nested inside
3590 it, even if there are intermediate loops. This works because we're doing
3591 a depth-first search here and never visit a loop more than once. */
3592 FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, inner)
3594 bfin_optimize_loop (inner);
3596 if (!inner->bad && inner_depth < inner->depth)
3598 inner_depth = inner->depth;
3600 loop->clobber_loop0 |= inner->clobber_loop0;
3601 loop->clobber_loop1 |= inner->clobber_loop1;
3605 loop->depth = inner_depth + 1;
3606 if (loop->depth > MAX_LOOP_DEPTH)
3608 if (dump_file)
3609 fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
3610 goto bad_loop;
3613 /* Get the loop iteration register. */
3614 iter_reg = loop->iter_reg;
3616 if (!REG_P (iter_reg))
3618 if (dump_file)
3619 fprintf (dump_file, ";; loop %d iteration count not in a register\n",
3620 loop->loop_no);
3621 goto bad_loop;
3623 scratchreg = NULL_RTX;
3624 scratch_init = iter_reg;
3625 scratch_init_insn = NULL_RTX;
3626 if (!PREG_P (iter_reg) && loop->incoming_src)
3628 basic_block bb_in = loop->incoming_src;
3629 int i;
3630 for (i = REG_P0; i <= REG_P5; i++)
3631 if ((df_regs_ever_live_p (i)
3632 || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
3633 && call_used_regs[i]))
3634 && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
3636 scratchreg = gen_rtx_REG (SImode, i);
3637 break;
3639 for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
3640 insn = PREV_INSN (insn))
3642 rtx set;
3643 if (NOTE_P (insn) || BARRIER_P (insn))
3644 continue;
3645 set = single_set (insn);
3646 if (set && rtx_equal_p (SET_DEST (set), iter_reg))
3648 if (CONSTANT_P (SET_SRC (set)))
3650 scratch_init = SET_SRC (set);
3651 scratch_init_insn = insn;
3653 break;
3655 else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
3656 break;
3660 if (loop->incoming_src)
3662 /* Make sure the predecessor is before the loop start label, as required by
3663 the LSETUP instruction. */
3664 length = 0;
3665 insn = BB_END (loop->incoming_src);
3666 /* If we have to insert the LSETUP before a jump, count that jump in the
3667 length. */
3668 if (VEC_length (edge, loop->incoming) > 1
3669 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
3671 gcc_assert (JUMP_P (insn));
3672 insn = PREV_INSN (insn);
3675 for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
3676 length += length_for_loop (insn);
3678 if (!insn)
3680 if (dump_file)
3681 fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
3682 loop->loop_no);
3683 goto bad_loop;
3686 /* Account for the pop of a scratch register where necessary. */
3687 if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
3688 && ENABLE_WA_LOAD_LCREGS)
3689 length += 2;
3691 if (length > MAX_LSETUP_DISTANCE)
3693 if (dump_file)
3694 fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
3695 goto bad_loop;
3699 /* Check if start_label appears before loop_end and calculate the
3700 offset between them. We calculate the length of instructions
3701 conservatively. */
3702 length = 0;
3703 for (insn = loop->start_label;
3704 insn && insn != loop->loop_end;
3705 insn = NEXT_INSN (insn))
3706 length += length_for_loop (insn);
3708 if (!insn)
3710 if (dump_file)
3711 fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
3712 loop->loop_no);
3713 goto bad_loop;
3716 loop->length = length;
3717 if (loop->length > MAX_LOOP_LENGTH)
3719 if (dump_file)
3720 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3721 goto bad_loop;
3724 /* Scan all the blocks to make sure they don't use iter_reg. */
3725 if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
3727 if (dump_file)
3728 fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
3729 goto bad_loop;
3732 /* Scan all the insns to see if the loop body clobber
3733 any hardware loop registers. */
3735 reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
3736 reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
3737 reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
3738 reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
3739 reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
3740 reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
3742 FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb)
3744 rtx insn;
3746 for (insn = BB_HEAD (bb);
3747 insn != NEXT_INSN (BB_END (bb));
3748 insn = NEXT_INSN (insn))
3750 if (!INSN_P (insn))
3751 continue;
3753 if (reg_set_p (reg_lc0, insn)
3754 || reg_set_p (reg_lt0, insn)
3755 || reg_set_p (reg_lb0, insn))
3756 loop->clobber_loop0 = 1;
3758 if (reg_set_p (reg_lc1, insn)
3759 || reg_set_p (reg_lt1, insn)
3760 || reg_set_p (reg_lb1, insn))
3761 loop->clobber_loop1 |= 1;
3765 if ((loop->clobber_loop0 && loop->clobber_loop1)
3766 || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
3768 loop->depth = MAX_LOOP_DEPTH + 1;
3769 if (dump_file)
3770 fprintf (dump_file, ";; loop %d no loop reg available\n",
3771 loop->loop_no);
3772 goto bad_loop;
3775 /* There should be an instruction before the loop_end instruction
3776 in the same basic block. And the instruction must not be
3777 - JUMP
3778 - CONDITIONAL BRANCH
3779 - CALL
3780 - CSYNC
3781 - SSYNC
3782 - Returns (RTS, RTN, etc.) */
3784 bb = loop->tail;
3785 last_insn = find_prev_insn_start (loop->loop_end);
3787 while (1)
3789 for (; last_insn != BB_HEAD (bb);
3790 last_insn = find_prev_insn_start (last_insn))
3791 if (NONDEBUG_INSN_P (last_insn))
3792 break;
3794 if (last_insn != BB_HEAD (bb))
3795 break;
3797 if (single_pred_p (bb)
3798 && single_pred_edge (bb)->flags & EDGE_FALLTHRU
3799 && single_pred (bb) != ENTRY_BLOCK_PTR)
3801 bb = single_pred (bb);
3802 last_insn = BB_END (bb);
3803 continue;
3805 else
3807 last_insn = NULL_RTX;
3808 break;
3812 if (!last_insn)
3814 if (dump_file)
3815 fprintf (dump_file, ";; loop %d has no last instruction\n",
3816 loop->loop_no);
3817 goto bad_loop;
3820 if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
3822 if (dump_file)
3823 fprintf (dump_file, ";; loop %d has bad last instruction\n",
3824 loop->loop_no);
3825 goto bad_loop;
3827 /* In all other cases, try to replace a bad last insn with a nop. */
3828 else if (JUMP_P (last_insn)
3829 || CALL_P (last_insn)
3830 || get_attr_type (last_insn) == TYPE_SYNC
3831 || get_attr_type (last_insn) == TYPE_CALL
3832 || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
3833 || recog_memoized (last_insn) == CODE_FOR_return_internal
3834 || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
3835 || asm_noperands (PATTERN (last_insn)) >= 0)
3837 if (loop->length + 2 > MAX_LOOP_LENGTH)
3839 if (dump_file)
3840 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3841 goto bad_loop;
3843 if (dump_file)
3844 fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
3845 loop->loop_no);
3847 last_insn = emit_insn_after (gen_forced_nop (), last_insn);
3850 loop->last_insn = last_insn;
3852 /* The loop is good for replacement. */
3853 start_label = loop->start_label;
3854 end_label = gen_label_rtx ();
3855 iter_reg = loop->iter_reg;
3857 if (loop->depth == 1 && !loop->clobber_loop1)
3859 lc_reg = reg_lc1;
3860 lt_reg = reg_lt1;
3861 lb_reg = reg_lb1;
3862 loop->clobber_loop1 = 1;
3864 else
3866 lc_reg = reg_lc0;
3867 lt_reg = reg_lt0;
3868 lb_reg = reg_lb0;
3869 loop->clobber_loop0 = 1;
3872 loop->end_label = end_label;
3874 /* Create a sequence containing the loop setup. */
3875 start_sequence ();
3877 /* LSETUP only accepts P registers. If we have one, we can use it,
3878 otherwise there are several ways of working around the problem.
3879 If we're not affected by anomaly 312, we can load the LC register
3880 from any iteration register, and use LSETUP without initialization.
3881 If we've found a P scratch register that's not live here, we can
3882 instead copy the iter_reg into that and use an initializing LSETUP.
3883 If all else fails, push and pop P0 and use it as a scratch. */
3884 if (P_REGNO_P (REGNO (iter_reg)))
3886 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3887 lb_reg, end_label,
3888 lc_reg, iter_reg);
3889 seq_end = emit_insn (loop_init);
3891 else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
3893 emit_insn (gen_movsi (lc_reg, iter_reg));
3894 loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
3895 lb_reg, end_label,
3896 lc_reg);
3897 seq_end = emit_insn (loop_init);
3899 else if (scratchreg != NULL_RTX)
3901 emit_insn (gen_movsi (scratchreg, scratch_init));
3902 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3903 lb_reg, end_label,
3904 lc_reg, scratchreg);
3905 seq_end = emit_insn (loop_init);
3906 if (scratch_init_insn != NULL_RTX)
3907 delete_insn (scratch_init_insn);
3909 else
3911 rtx p0reg = gen_rtx_REG (SImode, REG_P0);
3912 rtx push = gen_frame_mem (SImode,
3913 gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
3914 rtx pop = gen_frame_mem (SImode,
3915 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
3916 emit_insn (gen_movsi (push, p0reg));
3917 emit_insn (gen_movsi (p0reg, scratch_init));
3918 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3919 lb_reg, end_label,
3920 lc_reg, p0reg);
3921 emit_insn (loop_init);
3922 seq_end = emit_insn (gen_movsi (p0reg, pop));
3923 if (scratch_init_insn != NULL_RTX)
3924 delete_insn (scratch_init_insn);
3927 if (dump_file)
3929 fprintf (dump_file, ";; replacing loop %d initializer with\n",
3930 loop->loop_no);
3931 print_rtl_single (dump_file, loop_init);
3932 fprintf (dump_file, ";; replacing loop %d terminator with\n",
3933 loop->loop_no);
3934 print_rtl_single (dump_file, loop->loop_end);
3937 /* If the loop isn't entered at the top, also create a jump to the entry
3938 point. */
3939 if (!loop->incoming_src && loop->head != loop->incoming_dest)
3941 rtx label = BB_HEAD (loop->incoming_dest);
3942 /* If we're jumping to the final basic block in the loop, and there's
3943 only one cheap instruction before the end (typically an increment of
3944 an induction variable), we can just emit a copy here instead of a
3945 jump. */
3946 if (loop->incoming_dest == loop->tail
3947 && next_real_insn (label) == last_insn
3948 && asm_noperands (last_insn) < 0
3949 && GET_CODE (PATTERN (last_insn)) == SET)
3951 seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
3953 else
3955 emit_jump_insn (gen_jump (label));
3956 seq_end = emit_barrier ();
3960 seq = get_insns ();
3961 end_sequence ();
3963 if (loop->incoming_src)
3965 rtx prev = BB_END (loop->incoming_src);
3966 if (VEC_length (edge, loop->incoming) > 1
3967 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
3969 gcc_assert (JUMP_P (prev));
3970 prev = PREV_INSN (prev);
3972 emit_insn_after (seq, prev);
3974 else
3976 basic_block new_bb;
3977 edge e;
3978 edge_iterator ei;
3980 #ifdef ENABLE_CHECKING
3981 if (loop->head != loop->incoming_dest)
3983 /* We aren't entering the loop at the top. Since we've established
3984 that the loop is entered only at one point, this means there
3985 can't be fallthru edges into the head. Any such fallthru edges
3986 would become invalid when we insert the new block, so verify
3987 that this does not in fact happen. */
3988 FOR_EACH_EDGE (e, ei, loop->head->preds)
3989 gcc_assert (!(e->flags & EDGE_FALLTHRU));
3991 #endif
3993 emit_insn_before (seq, BB_HEAD (loop->head));
3994 seq = emit_label_before (gen_label_rtx (), seq);
3996 new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
3997 FOR_EACH_EDGE (e, ei, loop->incoming)
3999 if (!(e->flags & EDGE_FALLTHRU)
4000 || e->dest != loop->head)
4001 redirect_edge_and_branch_force (e, new_bb);
4002 else
4003 redirect_edge_succ (e, new_bb);
4005 e = make_edge (new_bb, loop->head, 0);
4008 delete_insn (loop->loop_end);
4009 /* Insert the loop end label before the last instruction of the loop. */
4010 emit_label_before (loop->end_label, loop->last_insn);
4012 return;
4014 bad_loop:
4016 if (dump_file)
4017 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
4019 loop->bad = 1;
4021 if (DPREG_P (loop->iter_reg))
4023 /* If loop->iter_reg is a DREG or PREG, we can split it here
4024 without scratch register. */
4025 rtx insn, test;
4027 emit_insn_before (gen_addsi3 (loop->iter_reg,
4028 loop->iter_reg,
4029 constm1_rtx),
4030 loop->loop_end);
4032 test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
4033 insn = emit_jump_insn_before (gen_cbranchsi4 (test,
4034 loop->iter_reg, const0_rtx,
4035 loop->start_label),
4036 loop->loop_end);
4038 JUMP_LABEL (insn) = loop->start_label;
4039 LABEL_NUSES (loop->start_label)++;
4040 delete_insn (loop->loop_end);
4044 /* Called from bfin_reorg_loops when a potential loop end is found. LOOP is
4045 a newly set up structure describing the loop, it is this function's
4046 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
4047 loop_end insn and its enclosing basic block. */
4049 static void
4050 bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn)
4052 unsigned dwork = 0;
4053 basic_block bb;
4054 VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20);
4056 loop->tail = tail_bb;
4057 loop->head = BRANCH_EDGE (tail_bb)->dest;
4058 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
4059 loop->loop_end = tail_insn;
4060 loop->last_insn = NULL_RTX;
4061 loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1));
4062 loop->depth = loop->length = 0;
4063 loop->visited = 0;
4064 loop->clobber_loop0 = loop->clobber_loop1 = 0;
4065 loop->outer = NULL;
4066 loop->loops = NULL;
4067 loop->incoming = VEC_alloc (edge, gc, 2);
4068 loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
4069 loop->end_label = NULL_RTX;
4070 loop->bad = 0;
4072 VEC_safe_push (basic_block, heap, works, loop->head);
4074 while (VEC_iterate (basic_block, works, dwork++, bb))
4076 edge e;
4077 edge_iterator ei;
4078 if (bb == EXIT_BLOCK_PTR)
4080 /* We've reached the exit block. The loop must be bad. */
4081 if (dump_file)
4082 fprintf (dump_file,
4083 ";; Loop is bad - reached exit block while scanning\n");
4084 loop->bad = 1;
4085 break;
4088 if (!bitmap_set_bit (loop->block_bitmap, bb->index))
4089 continue;
4091 /* We've not seen this block before. Add it to the loop's
4092 list and then add each successor to the work list. */
4094 VEC_safe_push (basic_block, heap, loop->blocks, bb);
4096 if (bb != tail_bb)
4098 FOR_EACH_EDGE (e, ei, bb->succs)
4100 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
4101 if (!REGNO_REG_SET_P (df_get_live_in (succ),
4102 REGNO (loop->iter_reg)))
4103 continue;
4104 if (!VEC_space (basic_block, works, 1))
4106 if (dwork)
4108 VEC_block_remove (basic_block, works, 0, dwork);
4109 dwork = 0;
4111 else
4112 VEC_reserve (basic_block, heap, works, 1);
4114 VEC_quick_push (basic_block, works, succ);
4119 /* Find the predecessor, and make sure nothing else jumps into this loop. */
4120 if (!loop->bad)
4122 int pass, retry;
4123 FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb)
4125 edge e;
4126 edge_iterator ei;
4127 FOR_EACH_EDGE (e, ei, bb->preds)
4129 basic_block pred = e->src;
4131 if (!bfin_bb_in_loop (loop, pred))
4133 if (dump_file)
4134 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
4135 loop->loop_no, pred->index,
4136 e->dest->index);
4137 VEC_safe_push (edge, gc, loop->incoming, e);
4142 for (pass = 0, retry = 1; retry && pass < 2; pass++)
4144 edge e;
4145 edge_iterator ei;
4146 bool first = true;
4147 retry = 0;
4149 FOR_EACH_EDGE (e, ei, loop->incoming)
4151 if (first)
4153 loop->incoming_src = e->src;
4154 loop->incoming_dest = e->dest;
4155 first = false;
4157 else
4159 if (e->dest != loop->incoming_dest)
4160 loop->incoming_dest = NULL;
4161 if (e->src != loop->incoming_src)
4162 loop->incoming_src = NULL;
4164 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
4166 if (pass == 0)
4168 if (dump_file)
4169 fprintf (dump_file,
4170 ";; retrying loop %d with forwarder blocks\n",
4171 loop->loop_no);
4172 retry = 1;
4173 break;
4175 loop->bad = 1;
4176 if (dump_file)
4177 fprintf (dump_file,
4178 ";; can't find suitable entry for loop %d\n",
4179 loop->loop_no);
4180 goto out;
4183 if (retry)
4185 retry = 0;
4186 FOR_EACH_EDGE (e, ei, loop->incoming)
4188 if (forwarder_block_p (e->src))
4190 edge e2;
4191 edge_iterator ei2;
4193 if (dump_file)
4194 fprintf (dump_file,
4195 ";; Adding forwarder block %d to loop %d and retrying\n",
4196 e->src->index, loop->loop_no);
4197 VEC_safe_push (basic_block, heap, loop->blocks, e->src);
4198 bitmap_set_bit (loop->block_bitmap, e->src->index);
4199 FOR_EACH_EDGE (e2, ei2, e->src->preds)
4200 VEC_safe_push (edge, gc, loop->incoming, e2);
4201 VEC_unordered_remove (edge, loop->incoming, ei.index);
4202 retry = 1;
4203 break;
4206 if (!retry)
4208 if (dump_file)
4209 fprintf (dump_file, ";; No forwarder blocks found\n");
4210 loop->bad = 1;
4216 out:
4217 VEC_free (basic_block, heap, works);
4220 /* Analyze the structure of the loops in the current function. Use STACK
4221 for bitmap allocations. Returns all the valid candidates for hardware
4222 loops found in this function. */
4223 static loop_info
4224 bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file)
4226 loop_info loops = NULL;
4227 loop_info loop;
4228 basic_block bb;
4229 bitmap tmp_bitmap;
4230 int nloops = 0;
4232 /* Find all the possible loop tails. This means searching for every
4233 loop_end instruction. For each one found, create a loop_info
4234 structure and add the head block to the work list. */
4235 FOR_EACH_BB (bb)
4237 rtx tail = BB_END (bb);
4239 while (GET_CODE (tail) == NOTE)
4240 tail = PREV_INSN (tail);
4242 bb->aux = NULL;
4244 if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
4246 rtx insn;
4247 /* A possible loop end */
4249 /* There's a degenerate case we can handle - an empty loop consisting
4250 of only a back branch. Handle that by deleting the branch. */
4251 insn = BB_HEAD (BRANCH_EDGE (bb)->dest);
4252 if (next_real_insn (insn) == tail)
4254 if (dump_file)
4256 fprintf (dump_file, ";; degenerate loop ending at\n");
4257 print_rtl_single (dump_file, tail);
4259 delete_insn_and_edges (tail);
4260 continue;
4263 loop = XNEW (struct loop_info_d);
4264 loop->next = loops;
4265 loops = loop;
4266 loop->loop_no = nloops++;
4267 loop->blocks = VEC_alloc (basic_block, heap, 20);
4268 loop->block_bitmap = BITMAP_ALLOC (stack);
4269 bb->aux = loop;
4271 if (dump_file)
4273 fprintf (dump_file, ";; potential loop %d ending at\n",
4274 loop->loop_no);
4275 print_rtl_single (dump_file, tail);
4278 bfin_discover_loop (loop, bb, tail);
4282 tmp_bitmap = BITMAP_ALLOC (stack);
4283 /* Compute loop nestings. */
4284 for (loop = loops; loop; loop = loop->next)
4286 loop_info other;
4287 if (loop->bad)
4288 continue;
4290 for (other = loop->next; other; other = other->next)
4292 if (other->bad)
4293 continue;
4295 bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap);
4296 if (bitmap_empty_p (tmp_bitmap))
4297 continue;
4298 if (bitmap_equal_p (tmp_bitmap, other->block_bitmap))
4300 other->outer = loop;
4301 VEC_safe_push (loop_info, heap, loop->loops, other);
4303 else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap))
4305 loop->outer = other;
4306 VEC_safe_push (loop_info, heap, other->loops, loop);
4308 else
4310 if (dump_file)
4311 fprintf (dump_file,
4312 ";; can't find suitable nesting for loops %d and %d\n",
4313 loop->loop_no, other->loop_no);
4314 loop->bad = other->bad = 1;
4318 BITMAP_FREE (tmp_bitmap);
4320 return loops;
4323 /* Free up the loop structures in LOOPS. */
4324 static void
4325 free_loops (loop_info loops)
4327 while (loops)
4329 loop_info loop = loops;
4330 loops = loop->next;
4331 VEC_free (loop_info, heap, loop->loops);
4332 VEC_free (basic_block, heap, loop->blocks);
4333 BITMAP_FREE (loop->block_bitmap);
4334 XDELETE (loop);
4338 #define BB_AUX_INDEX(BB) ((intptr_t)(BB)->aux)
4340 /* The taken-branch edge from the loop end can actually go forward. Since the
4341 Blackfin's LSETUP instruction requires that the loop end be after the loop
4342 start, try to reorder a loop's basic blocks when we find such a case. */
4343 static void
4344 bfin_reorder_loops (loop_info loops, FILE *dump_file)
4346 basic_block bb;
4347 loop_info loop;
4349 FOR_EACH_BB (bb)
4350 bb->aux = NULL;
4351 cfg_layout_initialize (0);
4353 for (loop = loops; loop; loop = loop->next)
4355 intptr_t index;
4356 basic_block bb;
4357 edge e;
4358 edge_iterator ei;
4360 if (loop->bad)
4361 continue;
4363 /* Recreate an index for basic blocks that represents their order. */
4364 for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0;
4365 bb != EXIT_BLOCK_PTR;
4366 bb = bb->next_bb, index++)
4367 bb->aux = (PTR) index;
4369 if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail))
4370 continue;
4372 FOR_EACH_EDGE (e, ei, loop->head->succs)
4374 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
4375 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
4377 basic_block start_bb = e->dest;
4378 basic_block start_prev_bb = start_bb->prev_bb;
4380 if (dump_file)
4381 fprintf (dump_file, ";; Moving block %d before block %d\n",
4382 loop->head->index, start_bb->index);
4383 loop->head->prev_bb->next_bb = loop->head->next_bb;
4384 loop->head->next_bb->prev_bb = loop->head->prev_bb;
4386 loop->head->prev_bb = start_prev_bb;
4387 loop->head->next_bb = start_bb;
4388 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
4389 break;
4392 loops = loops->next;
4395 FOR_EACH_BB (bb)
4397 if (bb->next_bb != EXIT_BLOCK_PTR)
4398 bb->aux = bb->next_bb;
4399 else
4400 bb->aux = NULL;
4402 cfg_layout_finalize ();
4403 df_analyze ();
4406 /* Run from machine_dependent_reorg, this pass looks for doloop_end insns
4407 and tries to rewrite the RTL of these loops so that proper Blackfin
4408 hardware loops are generated. */
4410 static void
4411 bfin_reorg_loops (FILE *dump_file)
4413 loop_info loops = NULL;
4414 loop_info loop;
4415 basic_block bb;
4416 bitmap_obstack stack;
4418 bitmap_obstack_initialize (&stack);
4420 if (dump_file)
4421 fprintf (dump_file, ";; Find loops, first pass\n\n");
4423 loops = bfin_discover_loops (&stack, dump_file);
4425 if (dump_file)
4426 bfin_dump_loops (loops);
4428 bfin_reorder_loops (loops, dump_file);
4429 free_loops (loops);
4431 if (dump_file)
4432 fprintf (dump_file, ";; Find loops, second pass\n\n");
4434 loops = bfin_discover_loops (&stack, dump_file);
4435 if (dump_file)
4437 fprintf (dump_file, ";; All loops found:\n\n");
4438 bfin_dump_loops (loops);
4441 /* Now apply the optimizations. */
4442 for (loop = loops; loop; loop = loop->next)
4443 bfin_optimize_loop (loop);
4445 if (dump_file)
4447 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
4448 bfin_dump_loops (loops);
4451 free_loops (loops);
4453 if (dump_file)
4454 print_rtl (dump_file, get_insns ());
4456 FOR_EACH_BB (bb)
4457 bb->aux = NULL;
4459 splitting_loops = 1;
4460 FOR_EACH_BB (bb)
4462 rtx insn = BB_END (bb);
4463 if (!JUMP_P (insn))
4464 continue;
4466 try_split (PATTERN (insn), insn, 1);
4468 splitting_loops = 0;
4471 /* Possibly generate a SEQUENCE out of three insns found in SLOT.
4472 Returns true if we modified the insn chain, false otherwise. */
4473 static bool
4474 gen_one_bundle (rtx slot[3])
4476 gcc_assert (slot[1] != NULL_RTX);
4478 /* Don't add extra NOPs if optimizing for size. */
4479 if (optimize_size
4480 && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
4481 return false;
4483 /* Verify that we really can do the multi-issue. */
4484 if (slot[0])
4486 rtx t = NEXT_INSN (slot[0]);
4487 while (t != slot[1])
4489 if (GET_CODE (t) != NOTE
4490 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4491 return false;
4492 t = NEXT_INSN (t);
4495 if (slot[2])
4497 rtx t = NEXT_INSN (slot[1]);
4498 while (t != slot[2])
4500 if (GET_CODE (t) != NOTE
4501 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4502 return false;
4503 t = NEXT_INSN (t);
4507 if (slot[0] == NULL_RTX)
4509 slot[0] = emit_insn_before (gen_mnop (), slot[1]);
4510 df_insn_rescan (slot[0]);
4512 if (slot[2] == NULL_RTX)
4514 slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
4515 df_insn_rescan (slot[2]);
4518 /* Avoid line number information being printed inside one bundle. */
4519 if (INSN_LOCATOR (slot[1])
4520 && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
4521 INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
4522 if (INSN_LOCATOR (slot[2])
4523 && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
4524 INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
4526 /* Terminate them with "|| " instead of ";" in the output. */
4527 PUT_MODE (slot[0], SImode);
4528 PUT_MODE (slot[1], SImode);
4529 /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */
4530 PUT_MODE (slot[2], QImode);
4531 return true;
4534 /* Go through all insns, and use the information generated during scheduling
4535 to generate SEQUENCEs to represent bundles of instructions issued
4536 simultaneously. */
4538 static void
4539 bfin_gen_bundles (void)
4541 basic_block bb;
4542 FOR_EACH_BB (bb)
4544 rtx insn, next;
4545 rtx slot[3];
4546 int n_filled = 0;
4548 slot[0] = slot[1] = slot[2] = NULL_RTX;
4549 for (insn = BB_HEAD (bb);; insn = next)
4551 int at_end;
4552 rtx delete_this = NULL_RTX;
4554 if (NONDEBUG_INSN_P (insn))
4556 enum attr_type type = get_attr_type (insn);
4558 if (type == TYPE_STALL)
4560 gcc_assert (n_filled == 0);
4561 delete_this = insn;
4563 else
4565 if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
4566 slot[0] = insn;
4567 else if (slot[1] == NULL_RTX)
4568 slot[1] = insn;
4569 else
4570 slot[2] = insn;
4571 n_filled++;
4575 next = NEXT_INSN (insn);
4576 while (next && insn != BB_END (bb)
4577 && !(INSN_P (next)
4578 && GET_CODE (PATTERN (next)) != USE
4579 && GET_CODE (PATTERN (next)) != CLOBBER))
4581 insn = next;
4582 next = NEXT_INSN (insn);
4585 /* BB_END can change due to emitting extra NOPs, so check here. */
4586 at_end = insn == BB_END (bb);
4587 if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
4589 if ((n_filled < 2
4590 || !gen_one_bundle (slot))
4591 && slot[0] != NULL_RTX)
4593 rtx pat = PATTERN (slot[0]);
4594 if (GET_CODE (pat) == SET
4595 && GET_CODE (SET_SRC (pat)) == UNSPEC
4596 && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
4598 SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
4599 INSN_CODE (slot[0]) = -1;
4600 df_insn_rescan (slot[0]);
4603 n_filled = 0;
4604 slot[0] = slot[1] = slot[2] = NULL_RTX;
4606 if (delete_this != NULL_RTX)
4607 delete_insn (delete_this);
4608 if (at_end)
4609 break;
4614 /* Ensure that no var tracking notes are emitted in the middle of a
4615 three-instruction bundle. */
4617 static void
4618 reorder_var_tracking_notes (void)
4620 basic_block bb;
4621 FOR_EACH_BB (bb)
4623 rtx insn, next;
4624 rtx queue = NULL_RTX;
4625 bool in_bundle = false;
4627 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
4629 next = NEXT_INSN (insn);
4631 if (INSN_P (insn))
4633 /* Emit queued up notes at the last instruction of a bundle. */
4634 if (GET_MODE (insn) == QImode)
4636 while (queue)
4638 rtx next_queue = PREV_INSN (queue);
4639 PREV_INSN (NEXT_INSN (insn)) = queue;
4640 NEXT_INSN (queue) = NEXT_INSN (insn);
4641 NEXT_INSN (insn) = queue;
4642 PREV_INSN (queue) = insn;
4643 queue = next_queue;
4645 in_bundle = false;
4647 else if (GET_MODE (insn) == SImode)
4648 in_bundle = true;
4650 else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
4652 if (in_bundle)
4654 rtx prev = PREV_INSN (insn);
4655 PREV_INSN (next) = prev;
4656 NEXT_INSN (prev) = next;
4658 PREV_INSN (insn) = queue;
4659 queue = insn;
4666 /* On some silicon revisions, functions shorter than a certain number of cycles
4667 can cause unpredictable behaviour. Work around this by adding NOPs as
4668 needed. */
4669 static void
4670 workaround_rts_anomaly (void)
4672 rtx insn, first_insn = NULL_RTX;
4673 int cycles = 4;
4675 if (! ENABLE_WA_RETS)
4676 return;
4678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4680 rtx pat;
4682 if (BARRIER_P (insn))
4683 return;
4685 if (NOTE_P (insn) || LABEL_P (insn))
4686 continue;
4688 if (first_insn == NULL_RTX)
4689 first_insn = insn;
4690 pat = PATTERN (insn);
4691 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4692 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
4693 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
4694 continue;
4696 if (CALL_P (insn))
4697 return;
4699 if (JUMP_P (insn))
4701 if (recog_memoized (insn) == CODE_FOR_return_internal)
4702 break;
4704 /* Nothing to worry about for direct jumps. */
4705 if (!any_condjump_p (insn))
4706 return;
4707 if (cycles <= 1)
4708 return;
4709 cycles--;
4711 else if (INSN_P (insn))
4713 rtx pat = PATTERN (insn);
4714 int this_cycles = 1;
4716 if (GET_CODE (pat) == PARALLEL)
4718 if (push_multiple_operation (pat, VOIDmode)
4719 || pop_multiple_operation (pat, VOIDmode))
4720 this_cycles = n_regs_to_save;
4722 else
4724 int icode = recog_memoized (insn);
4726 if (icode == CODE_FOR_link)
4727 this_cycles = 4;
4728 else if (icode == CODE_FOR_unlink)
4729 this_cycles = 3;
4730 else if (icode == CODE_FOR_mulsi3)
4731 this_cycles = 5;
4733 if (this_cycles >= cycles)
4734 return;
4736 cycles -= this_cycles;
4739 while (cycles > 0)
4741 emit_insn_before (gen_nop (), first_insn);
4742 cycles--;
4746 /* Return an insn type for INSN that can be used by the caller for anomaly
4747 workarounds. This differs from plain get_attr_type in that it handles
4748 SEQUENCEs. */
4750 static enum attr_type
4751 type_for_anomaly (rtx insn)
4753 rtx pat = PATTERN (insn);
4754 if (GET_CODE (pat) == SEQUENCE)
4756 enum attr_type t;
4757 t = get_attr_type (XVECEXP (pat, 0, 1));
4758 if (t == TYPE_MCLD)
4759 return t;
4760 t = get_attr_type (XVECEXP (pat, 0, 2));
4761 if (t == TYPE_MCLD)
4762 return t;
4763 return TYPE_MCST;
4765 else
4766 return get_attr_type (insn);
4769 /* Return true iff the address found in MEM is based on the register
4770 NP_REG and optionally has a positive offset. */
4771 static bool
4772 harmless_null_pointer_p (rtx mem, int np_reg)
4774 mem = XEXP (mem, 0);
4775 if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC)
4776 mem = XEXP (mem, 0);
4777 if (REG_P (mem) && (int) REGNO (mem) == np_reg)
4778 return true;
4779 if (GET_CODE (mem) == PLUS
4780 && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg)
4782 mem = XEXP (mem, 1);
4783 if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0)
4784 return true;
4786 return false;
4789 /* Return nonzero if INSN contains any loads that may trap. */
4791 static bool
4792 trapping_loads_p (rtx insn, int np_reg, bool after_np_branch)
4794 rtx mem = SET_SRC (single_set (insn));
4796 if (!after_np_branch)
4797 np_reg = -1;
4798 return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg))
4799 && may_trap_p (mem));
4802 /* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of
4803 a three-insn bundle, see if one of them is a load and return that if so.
4804 Return NULL_RTX if the insn does not contain loads. */
4805 static rtx
4806 find_load (rtx insn)
4808 if (!NONDEBUG_INSN_P (insn))
4809 return NULL_RTX;
4810 if (get_attr_type (insn) == TYPE_MCLD)
4811 return insn;
4812 if (GET_MODE (insn) != SImode)
4813 return NULL_RTX;
4814 do {
4815 insn = NEXT_INSN (insn);
4816 if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
4817 && get_attr_type (insn) == TYPE_MCLD)
4818 return insn;
4819 } while (GET_MODE (insn) != QImode);
4820 return NULL_RTX;
4823 /* Determine whether PAT is an indirect call pattern. */
4824 static bool
4825 indirect_call_p (rtx pat)
4827 if (GET_CODE (pat) == PARALLEL)
4828 pat = XVECEXP (pat, 0, 0);
4829 if (GET_CODE (pat) == SET)
4830 pat = SET_SRC (pat);
4831 gcc_assert (GET_CODE (pat) == CALL);
4832 pat = XEXP (pat, 0);
4833 gcc_assert (GET_CODE (pat) == MEM);
4834 pat = XEXP (pat, 0);
4836 return REG_P (pat);
4839 /* During workaround_speculation, track whether we're in the shadow of a
4840 conditional branch that tests a P register for NULL. If so, we can omit
4841 emitting NOPs if we see a load from that P register, since a speculative
4842 access at address 0 isn't a problem, and the load is executed in all other
4843 cases anyway.
4844 Global for communication with note_np_check_stores through note_stores.
4846 int np_check_regno = -1;
4847 bool np_after_branch = false;
4849 /* Subroutine of workaround_speculation, called through note_stores. */
4850 static void
4851 note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
4852 void *data ATTRIBUTE_UNUSED)
4854 if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno))
4855 np_check_regno = -1;
4858 static void
4859 workaround_speculation (void)
4861 rtx insn, next;
4862 rtx last_condjump = NULL_RTX;
4863 int cycles_since_jump = INT_MAX;
4864 int delay_added = 0;
4866 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
4867 && ! ENABLE_WA_INDIRECT_CALLS)
4868 return;
4870 /* First pass: find predicted-false branches; if something after them
4871 needs nops, insert them or change the branch to predict true. */
4872 for (insn = get_insns (); insn; insn = next)
4874 rtx pat;
4875 int delay_needed = 0;
4877 next = find_next_insn_start (insn);
4879 if (NOTE_P (insn) || BARRIER_P (insn))
4880 continue;
4882 if (LABEL_P (insn))
4884 np_check_regno = -1;
4885 continue;
4888 pat = PATTERN (insn);
4889 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4890 || GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
4891 continue;
4893 if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
4895 np_check_regno = -1;
4896 continue;
4899 if (JUMP_P (insn))
4901 /* Is this a condjump based on a null pointer comparison we saw
4902 earlier? */
4903 if (np_check_regno != -1
4904 && recog_memoized (insn) == CODE_FOR_cbranchbi4)
4906 rtx op = XEXP (SET_SRC (PATTERN (insn)), 0);
4907 gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE);
4908 if (GET_CODE (op) == NE)
4909 np_after_branch = true;
4911 if (any_condjump_p (insn)
4912 && ! cbranch_predicted_taken_p (insn))
4914 last_condjump = insn;
4915 delay_added = 0;
4916 cycles_since_jump = 0;
4918 else
4919 cycles_since_jump = INT_MAX;
4921 else if (CALL_P (insn))
4923 np_check_regno = -1;
4924 if (cycles_since_jump < INT_MAX)
4925 cycles_since_jump++;
4926 if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
4928 delay_needed = 3;
4931 else if (NONDEBUG_INSN_P (insn))
4933 rtx load_insn = find_load (insn);
4934 enum attr_type type = type_for_anomaly (insn);
4936 if (cycles_since_jump < INT_MAX)
4937 cycles_since_jump++;
4939 /* Detect a comparison of a P register with zero. If we later
4940 see a condjump based on it, we have found a null pointer
4941 check. */
4942 if (recog_memoized (insn) == CODE_FOR_compare_eq)
4944 rtx src = SET_SRC (PATTERN (insn));
4945 if (REG_P (XEXP (src, 0))
4946 && P_REGNO_P (REGNO (XEXP (src, 0)))
4947 && XEXP (src, 1) == const0_rtx)
4949 np_check_regno = REGNO (XEXP (src, 0));
4950 np_after_branch = false;
4952 else
4953 np_check_regno = -1;
4956 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
4958 if (trapping_loads_p (load_insn, np_check_regno,
4959 np_after_branch))
4960 delay_needed = 4;
4962 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
4963 delay_needed = 3;
4965 /* See if we need to forget about a null pointer comparison
4966 we found earlier. */
4967 if (recog_memoized (insn) != CODE_FOR_compare_eq)
4969 note_stores (PATTERN (insn), note_np_check_stores, NULL);
4970 if (np_check_regno != -1)
4972 if (find_regno_note (insn, REG_INC, np_check_regno))
4973 np_check_regno = -1;
4979 if (delay_needed > cycles_since_jump
4980 && (delay_needed - cycles_since_jump) > delay_added)
4982 rtx pat1;
4983 int num_clobbers;
4984 rtx *op = recog_data.operand;
4986 delay_needed -= cycles_since_jump;
4988 extract_insn (last_condjump);
4989 if (optimize_size)
4991 pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
4992 op[3]);
4993 cycles_since_jump = INT_MAX;
4995 else
4997 /* Do not adjust cycles_since_jump in this case, so that
4998 we'll increase the number of NOPs for a subsequent insn
4999 if necessary. */
5000 pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
5001 GEN_INT (delay_needed));
5002 delay_added = delay_needed;
5004 PATTERN (last_condjump) = pat1;
5005 INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
5007 if (CALL_P (insn))
5009 cycles_since_jump = INT_MAX;
5010 delay_added = 0;
5014 /* Second pass: for predicted-true branches, see if anything at the
5015 branch destination needs extra nops. */
5016 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5018 int cycles_since_jump;
5019 if (JUMP_P (insn)
5020 && any_condjump_p (insn)
5021 && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
5022 || cbranch_predicted_taken_p (insn)))
5024 rtx target = JUMP_LABEL (insn);
5025 rtx label = target;
5026 rtx next_tgt;
5028 cycles_since_jump = 0;
5029 for (; target && cycles_since_jump < 3; target = next_tgt)
5031 rtx pat;
5033 next_tgt = find_next_insn_start (target);
5035 if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
5036 continue;
5038 pat = PATTERN (target);
5039 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5040 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5041 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5042 continue;
5044 if (NONDEBUG_INSN_P (target))
5046 rtx load_insn = find_load (target);
5047 enum attr_type type = type_for_anomaly (target);
5048 int delay_needed = 0;
5049 if (cycles_since_jump < INT_MAX)
5050 cycles_since_jump++;
5052 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
5054 if (trapping_loads_p (load_insn, -1, false))
5055 delay_needed = 2;
5057 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
5058 delay_needed = 2;
5060 if (delay_needed > cycles_since_jump)
5062 rtx prev = prev_real_insn (label);
5063 delay_needed -= cycles_since_jump;
5064 if (dump_file)
5065 fprintf (dump_file, "Adding %d nops after %d\n",
5066 delay_needed, INSN_UID (label));
5067 if (JUMP_P (prev)
5068 && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
5070 rtx x;
5071 HOST_WIDE_INT v;
5073 if (dump_file)
5074 fprintf (dump_file,
5075 "Reducing nops on insn %d.\n",
5076 INSN_UID (prev));
5077 x = PATTERN (prev);
5078 x = XVECEXP (x, 0, 1);
5079 v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
5080 XVECEXP (x, 0, 0) = GEN_INT (v);
5082 while (delay_needed-- > 0)
5083 emit_insn_after (gen_nop (), label);
5084 break;
5092 /* Called just before the final scheduling pass. If we need to insert NOPs
5093 later on to work around speculative loads, insert special placeholder
5094 insns that cause loads to be delayed for as many cycles as necessary
5095 (and possible). This reduces the number of NOPs we need to add.
5096 The dummy insns we generate are later removed by bfin_gen_bundles. */
5097 static void
5098 add_sched_insns_for_speculation (void)
5100 rtx insn;
5102 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
5103 && ! ENABLE_WA_INDIRECT_CALLS)
5104 return;
5106 /* First pass: find predicted-false branches; if something after them
5107 needs nops, insert them or change the branch to predict true. */
5108 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5110 rtx pat;
5112 if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
5113 continue;
5115 pat = PATTERN (insn);
5116 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5117 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5118 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5119 continue;
5121 if (JUMP_P (insn))
5123 if (any_condjump_p (insn)
5124 && !cbranch_predicted_taken_p (insn))
5126 rtx n = next_real_insn (insn);
5127 emit_insn_before (gen_stall (GEN_INT (3)), n);
5132 /* Second pass: for predicted-true branches, see if anything at the
5133 branch destination needs extra nops. */
5134 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5136 if (JUMP_P (insn)
5137 && any_condjump_p (insn)
5138 && (cbranch_predicted_taken_p (insn)))
5140 rtx target = JUMP_LABEL (insn);
5141 rtx next = next_real_insn (target);
5143 if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
5144 && get_attr_type (next) == TYPE_STALL)
5145 continue;
5146 emit_insn_before (gen_stall (GEN_INT (1)), next);
5151 /* We use the machine specific reorg pass for emitting CSYNC instructions
5152 after conditional branches as needed.
5154 The Blackfin is unusual in that a code sequence like
5155 if cc jump label
5156 r0 = (p0)
5157 may speculatively perform the load even if the condition isn't true. This
5158 happens for a branch that is predicted not taken, because the pipeline
5159 isn't flushed or stalled, so the early stages of the following instructions,
5160 which perform the memory reference, are allowed to execute before the
5161 jump condition is evaluated.
5162 Therefore, we must insert additional instructions in all places where this
5163 could lead to incorrect behavior. The manual recommends CSYNC, while
5164 VDSP seems to use NOPs (even though its corresponding compiler option is
5165 named CSYNC).
5167 When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
5168 When optimizing for size, we turn the branch into a predicted taken one.
5169 This may be slower due to mispredicts, but saves code size. */
5171 static void
5172 bfin_reorg (void)
5174 /* We are freeing block_for_insn in the toplev to keep compatibility
5175 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5176 compute_bb_for_insn ();
5178 if (flag_schedule_insns_after_reload)
5180 splitting_for_sched = 1;
5181 split_all_insns ();
5182 splitting_for_sched = 0;
5184 add_sched_insns_for_speculation ();
5186 timevar_push (TV_SCHED2);
5187 if (flag_selective_scheduling2
5188 && !maybe_skip_selective_scheduling ())
5189 run_selective_scheduling ();
5190 else
5191 schedule_insns ();
5192 timevar_pop (TV_SCHED2);
5194 /* Examine the schedule and insert nops as necessary for 64-bit parallel
5195 instructions. */
5196 bfin_gen_bundles ();
5199 df_analyze ();
5201 /* Doloop optimization */
5202 if (cfun->machine->has_hardware_loops)
5203 bfin_reorg_loops (dump_file);
5205 workaround_speculation ();
5207 if (flag_var_tracking)
5209 timevar_push (TV_VAR_TRACKING);
5210 variable_tracking_main ();
5211 reorder_var_tracking_notes ();
5212 timevar_pop (TV_VAR_TRACKING);
5215 df_finish_pass (false);
5217 workaround_rts_anomaly ();
5220 /* Handle interrupt_handler, exception_handler and nmi_handler function
5221 attributes; arguments as in struct attribute_spec.handler. */
5223 static tree
5224 handle_int_attribute (tree *node, tree name,
5225 tree args ATTRIBUTE_UNUSED,
5226 int flags ATTRIBUTE_UNUSED,
5227 bool *no_add_attrs)
5229 tree x = *node;
5230 if (TREE_CODE (x) == FUNCTION_DECL)
5231 x = TREE_TYPE (x);
5233 if (TREE_CODE (x) != FUNCTION_TYPE)
5235 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5236 name);
5237 *no_add_attrs = true;
5239 else if (funkind (x) != SUBROUTINE)
5240 error ("multiple function type attributes specified");
5242 return NULL_TREE;
5245 /* Return 0 if the attributes for two types are incompatible, 1 if they
5246 are compatible, and 2 if they are nearly compatible (which causes a
5247 warning to be generated). */
5249 static int
5250 bfin_comp_type_attributes (const_tree type1, const_tree type2)
5252 e_funkind kind1, kind2;
5254 if (TREE_CODE (type1) != FUNCTION_TYPE)
5255 return 1;
5257 kind1 = funkind (type1);
5258 kind2 = funkind (type2);
5260 if (kind1 != kind2)
5261 return 0;
5263 /* Check for mismatched modifiers */
5264 if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
5265 != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
5266 return 0;
5268 if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
5269 != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
5270 return 0;
5272 if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
5273 != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
5274 return 0;
5276 if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
5277 != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
5278 return 0;
5280 return 1;
5283 /* Handle a "longcall" or "shortcall" attribute; arguments as in
5284 struct attribute_spec.handler. */
5286 static tree
5287 bfin_handle_longcall_attribute (tree *node, tree name,
5288 tree args ATTRIBUTE_UNUSED,
5289 int flags ATTRIBUTE_UNUSED,
5290 bool *no_add_attrs)
5292 if (TREE_CODE (*node) != FUNCTION_TYPE
5293 && TREE_CODE (*node) != FIELD_DECL
5294 && TREE_CODE (*node) != TYPE_DECL)
5296 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5297 name);
5298 *no_add_attrs = true;
5301 if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
5302 && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
5303 || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
5304 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
5306 warning (OPT_Wattributes,
5307 "can%'t apply both longcall and shortcall attributes to the same function");
5308 *no_add_attrs = true;
5311 return NULL_TREE;
5314 /* Handle a "l1_text" attribute; arguments as in
5315 struct attribute_spec.handler. */
5317 static tree
5318 bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5319 int ARG_UNUSED (flags), bool *no_add_attrs)
5321 tree decl = *node;
5323 if (TREE_CODE (decl) != FUNCTION_DECL)
5325 error ("%qE attribute only applies to functions",
5326 name);
5327 *no_add_attrs = true;
5330 /* The decl may have already been given a section attribute
5331 from a previous declaration. Ensure they match. */
5332 else if (DECL_SECTION_NAME (decl) != NULL_TREE
5333 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5334 ".l1.text") != 0)
5336 error ("section of %q+D conflicts with previous declaration",
5337 decl);
5338 *no_add_attrs = true;
5340 else
5341 DECL_SECTION_NAME (decl) = build_string (9, ".l1.text");
5343 return NULL_TREE;
5346 /* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
5347 arguments as in struct attribute_spec.handler. */
5349 static tree
5350 bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5351 int ARG_UNUSED (flags), bool *no_add_attrs)
5353 tree decl = *node;
5355 if (TREE_CODE (decl) != VAR_DECL)
5357 error ("%qE attribute only applies to variables",
5358 name);
5359 *no_add_attrs = true;
5361 else if (current_function_decl != NULL_TREE
5362 && !TREE_STATIC (decl))
5364 error ("%qE attribute cannot be specified for local variables",
5365 name);
5366 *no_add_attrs = true;
5368 else
5370 const char *section_name;
5372 if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
5373 section_name = ".l1.data";
5374 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
5375 section_name = ".l1.data.A";
5376 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
5377 section_name = ".l1.data.B";
5378 else
5379 gcc_unreachable ();
5381 /* The decl may have already been given a section attribute
5382 from a previous declaration. Ensure they match. */
5383 if (DECL_SECTION_NAME (decl) != NULL_TREE
5384 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5385 section_name) != 0)
5387 error ("section of %q+D conflicts with previous declaration",
5388 decl);
5389 *no_add_attrs = true;
5391 else
5392 DECL_SECTION_NAME (decl)
5393 = build_string (strlen (section_name) + 1, section_name);
5396 return NULL_TREE;
5399 /* Handle a "l2" attribute; arguments as in struct attribute_spec.handler. */
5401 static tree
5402 bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name),
5403 tree ARG_UNUSED (args), int ARG_UNUSED (flags),
5404 bool *no_add_attrs)
5406 tree decl = *node;
5408 if (TREE_CODE (decl) == FUNCTION_DECL)
5410 if (DECL_SECTION_NAME (decl) != NULL_TREE
5411 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5412 ".l2.text") != 0)
5414 error ("section of %q+D conflicts with previous declaration",
5415 decl);
5416 *no_add_attrs = true;
5418 else
5419 DECL_SECTION_NAME (decl) = build_string (9, ".l2.text");
5421 else if (TREE_CODE (decl) == VAR_DECL)
5423 if (DECL_SECTION_NAME (decl) != NULL_TREE
5424 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5425 ".l2.data") != 0)
5427 error ("section of %q+D conflicts with previous declaration",
5428 decl);
5429 *no_add_attrs = true;
5431 else
5432 DECL_SECTION_NAME (decl) = build_string (9, ".l2.data");
5435 return NULL_TREE;
5438 /* Table of valid machine attributes. */
5439 static const struct attribute_spec bfin_attribute_table[] =
5441 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
5442 affects_type_identity } */
5443 { "interrupt_handler", 0, 0, false, true, true, handle_int_attribute,
5444 false },
5445 { "exception_handler", 0, 0, false, true, true, handle_int_attribute,
5446 false },
5447 { "nmi_handler", 0, 0, false, true, true, handle_int_attribute, false },
5448 { "nesting", 0, 0, false, true, true, NULL, false },
5449 { "kspisusp", 0, 0, false, true, true, NULL, false },
5450 { "saveall", 0, 0, false, true, true, NULL, false },
5451 { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute,
5452 false },
5453 { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute,
5454 false },
5455 { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute,
5456 false },
5457 { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute,
5458 false },
5459 { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute,
5460 false },
5461 { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute,
5462 false },
5463 { "l2", 0, 0, true, false, false, bfin_handle_l2_attribute, false },
5464 { NULL, 0, 0, false, false, false, NULL, false }
5467 /* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to
5468 tell the assembler to generate pointers to function descriptors in
5469 some cases. */
5471 static bool
5472 bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
5474 if (TARGET_FDPIC && size == UNITS_PER_WORD)
5476 if (GET_CODE (value) == SYMBOL_REF
5477 && SYMBOL_REF_FUNCTION_P (value))
5479 fputs ("\t.picptr\tfuncdesc(", asm_out_file);
5480 output_addr_const (asm_out_file, value);
5481 fputs (")\n", asm_out_file);
5482 return true;
5484 if (!aligned_p)
5486 /* We've set the unaligned SI op to NULL, so we always have to
5487 handle the unaligned case here. */
5488 assemble_integer_with_op ("\t.4byte\t", value);
5489 return true;
5492 return default_assemble_integer (value, size, aligned_p);
5495 /* Output the assembler code for a thunk function. THUNK_DECL is the
5496 declaration for the thunk function itself, FUNCTION is the decl for
5497 the target function. DELTA is an immediate constant offset to be
5498 added to THIS. If VCALL_OFFSET is nonzero, the word at
5499 *(*this + vcall_offset) should be added to THIS. */
5501 static void
5502 bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
5503 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
5504 HOST_WIDE_INT vcall_offset, tree function)
5506 rtx xops[3];
5507 /* The this parameter is passed as the first argument. */
5508 rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
5510 /* Adjust the this parameter by a fixed constant. */
5511 if (delta)
5513 xops[1] = this_rtx;
5514 if (delta >= -64 && delta <= 63)
5516 xops[0] = GEN_INT (delta);
5517 output_asm_insn ("%1 += %0;", xops);
5519 else if (delta >= -128 && delta < -64)
5521 xops[0] = GEN_INT (delta + 64);
5522 output_asm_insn ("%1 += -64; %1 += %0;", xops);
5524 else if (delta > 63 && delta <= 126)
5526 xops[0] = GEN_INT (delta - 63);
5527 output_asm_insn ("%1 += 63; %1 += %0;", xops);
5529 else
5531 xops[0] = GEN_INT (delta);
5532 output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
5536 /* Adjust the this parameter by a value stored in the vtable. */
5537 if (vcall_offset)
5539 rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
5540 rtx tmp = gen_rtx_REG (Pmode, REG_R3);
5542 xops[1] = tmp;
5543 xops[2] = p2tmp;
5544 output_asm_insn ("%2 = r0; %2 = [%2];", xops);
5546 /* Adjust the this parameter. */
5547 xops[0] = gen_rtx_MEM (Pmode, plus_constant (p2tmp, vcall_offset));
5548 if (!memory_operand (xops[0], Pmode))
5550 rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
5551 xops[0] = GEN_INT (vcall_offset);
5552 xops[1] = tmp2;
5553 output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
5554 xops[0] = gen_rtx_MEM (Pmode, p2tmp);
5556 xops[2] = this_rtx;
5557 output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
5560 xops[0] = XEXP (DECL_RTL (function), 0);
5561 if (1 || !flag_pic || (*targetm.binds_local_p) (function))
5562 output_asm_insn ("jump.l\t%P0", xops);
5565 /* Codes for all the Blackfin builtins. */
5566 enum bfin_builtins
5568 BFIN_BUILTIN_CSYNC,
5569 BFIN_BUILTIN_SSYNC,
5570 BFIN_BUILTIN_ONES,
5571 BFIN_BUILTIN_COMPOSE_2X16,
5572 BFIN_BUILTIN_EXTRACTLO,
5573 BFIN_BUILTIN_EXTRACTHI,
5575 BFIN_BUILTIN_SSADD_2X16,
5576 BFIN_BUILTIN_SSSUB_2X16,
5577 BFIN_BUILTIN_SSADDSUB_2X16,
5578 BFIN_BUILTIN_SSSUBADD_2X16,
5579 BFIN_BUILTIN_MULT_2X16,
5580 BFIN_BUILTIN_MULTR_2X16,
5581 BFIN_BUILTIN_NEG_2X16,
5582 BFIN_BUILTIN_ABS_2X16,
5583 BFIN_BUILTIN_MIN_2X16,
5584 BFIN_BUILTIN_MAX_2X16,
5586 BFIN_BUILTIN_SSADD_1X16,
5587 BFIN_BUILTIN_SSSUB_1X16,
5588 BFIN_BUILTIN_MULT_1X16,
5589 BFIN_BUILTIN_MULTR_1X16,
5590 BFIN_BUILTIN_NORM_1X16,
5591 BFIN_BUILTIN_NEG_1X16,
5592 BFIN_BUILTIN_ABS_1X16,
5593 BFIN_BUILTIN_MIN_1X16,
5594 BFIN_BUILTIN_MAX_1X16,
5596 BFIN_BUILTIN_SUM_2X16,
5597 BFIN_BUILTIN_DIFFHL_2X16,
5598 BFIN_BUILTIN_DIFFLH_2X16,
5600 BFIN_BUILTIN_SSADD_1X32,
5601 BFIN_BUILTIN_SSSUB_1X32,
5602 BFIN_BUILTIN_NORM_1X32,
5603 BFIN_BUILTIN_ROUND_1X32,
5604 BFIN_BUILTIN_NEG_1X32,
5605 BFIN_BUILTIN_ABS_1X32,
5606 BFIN_BUILTIN_MIN_1X32,
5607 BFIN_BUILTIN_MAX_1X32,
5608 BFIN_BUILTIN_MULT_1X32,
5609 BFIN_BUILTIN_MULT_1X32X32,
5610 BFIN_BUILTIN_MULT_1X32X32NS,
5612 BFIN_BUILTIN_MULHISILL,
5613 BFIN_BUILTIN_MULHISILH,
5614 BFIN_BUILTIN_MULHISIHL,
5615 BFIN_BUILTIN_MULHISIHH,
5617 BFIN_BUILTIN_LSHIFT_1X16,
5618 BFIN_BUILTIN_LSHIFT_2X16,
5619 BFIN_BUILTIN_SSASHIFT_1X16,
5620 BFIN_BUILTIN_SSASHIFT_2X16,
5621 BFIN_BUILTIN_SSASHIFT_1X32,
5623 BFIN_BUILTIN_CPLX_MUL_16,
5624 BFIN_BUILTIN_CPLX_MAC_16,
5625 BFIN_BUILTIN_CPLX_MSU_16,
5627 BFIN_BUILTIN_CPLX_MUL_16_S40,
5628 BFIN_BUILTIN_CPLX_MAC_16_S40,
5629 BFIN_BUILTIN_CPLX_MSU_16_S40,
5631 BFIN_BUILTIN_CPLX_SQU,
5633 BFIN_BUILTIN_LOADBYTES,
5635 BFIN_BUILTIN_MAX
5638 #define def_builtin(NAME, TYPE, CODE) \
5639 do { \
5640 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
5641 NULL, NULL_TREE); \
5642 } while (0)
5644 /* Set up all builtin functions for this target. */
5645 static void
5646 bfin_init_builtins (void)
5648 tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
5649 tree void_ftype_void
5650 = build_function_type_list (void_type_node, NULL_TREE);
5651 tree short_ftype_short
5652 = build_function_type_list (short_integer_type_node, short_integer_type_node,
5653 NULL_TREE);
5654 tree short_ftype_int_int
5655 = build_function_type_list (short_integer_type_node, integer_type_node,
5656 integer_type_node, NULL_TREE);
5657 tree int_ftype_int_int
5658 = build_function_type_list (integer_type_node, integer_type_node,
5659 integer_type_node, NULL_TREE);
5660 tree int_ftype_int
5661 = build_function_type_list (integer_type_node, integer_type_node,
5662 NULL_TREE);
5663 tree short_ftype_int
5664 = build_function_type_list (short_integer_type_node, integer_type_node,
5665 NULL_TREE);
5666 tree int_ftype_v2hi_v2hi
5667 = build_function_type_list (integer_type_node, V2HI_type_node,
5668 V2HI_type_node, NULL_TREE);
5669 tree v2hi_ftype_v2hi_v2hi
5670 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5671 V2HI_type_node, NULL_TREE);
5672 tree v2hi_ftype_v2hi_v2hi_v2hi
5673 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5674 V2HI_type_node, V2HI_type_node, NULL_TREE);
5675 tree v2hi_ftype_int_int
5676 = build_function_type_list (V2HI_type_node, integer_type_node,
5677 integer_type_node, NULL_TREE);
5678 tree v2hi_ftype_v2hi_int
5679 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5680 integer_type_node, NULL_TREE);
5681 tree int_ftype_short_short
5682 = build_function_type_list (integer_type_node, short_integer_type_node,
5683 short_integer_type_node, NULL_TREE);
5684 tree v2hi_ftype_v2hi
5685 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
5686 tree short_ftype_v2hi
5687 = build_function_type_list (short_integer_type_node, V2HI_type_node,
5688 NULL_TREE);
5689 tree int_ftype_pint
5690 = build_function_type_list (integer_type_node,
5691 build_pointer_type (integer_type_node),
5692 NULL_TREE);
5694 /* Add the remaining MMX insns with somewhat more complicated types. */
5695 def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
5696 def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
5698 def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
5700 def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
5701 BFIN_BUILTIN_COMPOSE_2X16);
5702 def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
5703 BFIN_BUILTIN_EXTRACTHI);
5704 def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
5705 BFIN_BUILTIN_EXTRACTLO);
5707 def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
5708 BFIN_BUILTIN_MIN_2X16);
5709 def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
5710 BFIN_BUILTIN_MAX_2X16);
5712 def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
5713 BFIN_BUILTIN_SSADD_2X16);
5714 def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
5715 BFIN_BUILTIN_SSSUB_2X16);
5716 def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
5717 BFIN_BUILTIN_SSADDSUB_2X16);
5718 def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
5719 BFIN_BUILTIN_SSSUBADD_2X16);
5720 def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
5721 BFIN_BUILTIN_MULT_2X16);
5722 def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
5723 BFIN_BUILTIN_MULTR_2X16);
5724 def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
5725 BFIN_BUILTIN_NEG_2X16);
5726 def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
5727 BFIN_BUILTIN_ABS_2X16);
5729 def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
5730 BFIN_BUILTIN_MIN_1X16);
5731 def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
5732 BFIN_BUILTIN_MAX_1X16);
5734 def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
5735 BFIN_BUILTIN_SSADD_1X16);
5736 def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
5737 BFIN_BUILTIN_SSSUB_1X16);
5738 def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
5739 BFIN_BUILTIN_MULT_1X16);
5740 def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
5741 BFIN_BUILTIN_MULTR_1X16);
5742 def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
5743 BFIN_BUILTIN_NEG_1X16);
5744 def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
5745 BFIN_BUILTIN_ABS_1X16);
5746 def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
5747 BFIN_BUILTIN_NORM_1X16);
5749 def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
5750 BFIN_BUILTIN_SUM_2X16);
5751 def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
5752 BFIN_BUILTIN_DIFFHL_2X16);
5753 def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
5754 BFIN_BUILTIN_DIFFLH_2X16);
5756 def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
5757 BFIN_BUILTIN_MULHISILL);
5758 def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
5759 BFIN_BUILTIN_MULHISIHL);
5760 def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
5761 BFIN_BUILTIN_MULHISILH);
5762 def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
5763 BFIN_BUILTIN_MULHISIHH);
5765 def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
5766 BFIN_BUILTIN_MIN_1X32);
5767 def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
5768 BFIN_BUILTIN_MAX_1X32);
5770 def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
5771 BFIN_BUILTIN_SSADD_1X32);
5772 def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
5773 BFIN_BUILTIN_SSSUB_1X32);
5774 def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
5775 BFIN_BUILTIN_NEG_1X32);
5776 def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
5777 BFIN_BUILTIN_ABS_1X32);
5778 def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
5779 BFIN_BUILTIN_NORM_1X32);
5780 def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
5781 BFIN_BUILTIN_ROUND_1X32);
5782 def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
5783 BFIN_BUILTIN_MULT_1X32);
5784 def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
5785 BFIN_BUILTIN_MULT_1X32X32);
5786 def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
5787 BFIN_BUILTIN_MULT_1X32X32NS);
5789 /* Shifts. */
5790 def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
5791 BFIN_BUILTIN_SSASHIFT_1X16);
5792 def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
5793 BFIN_BUILTIN_SSASHIFT_2X16);
5794 def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
5795 BFIN_BUILTIN_LSHIFT_1X16);
5796 def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
5797 BFIN_BUILTIN_LSHIFT_2X16);
5798 def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
5799 BFIN_BUILTIN_SSASHIFT_1X32);
5801 /* Complex numbers. */
5802 def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
5803 BFIN_BUILTIN_SSADD_2X16);
5804 def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
5805 BFIN_BUILTIN_SSSUB_2X16);
5806 def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
5807 BFIN_BUILTIN_CPLX_MUL_16);
5808 def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
5809 BFIN_BUILTIN_CPLX_MAC_16);
5810 def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
5811 BFIN_BUILTIN_CPLX_MSU_16);
5812 def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
5813 BFIN_BUILTIN_CPLX_MUL_16_S40);
5814 def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5815 BFIN_BUILTIN_CPLX_MAC_16_S40);
5816 def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5817 BFIN_BUILTIN_CPLX_MSU_16_S40);
5818 def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
5819 BFIN_BUILTIN_CPLX_SQU);
5821 /* "Unaligned" load. */
5822 def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
5823 BFIN_BUILTIN_LOADBYTES);
5828 struct builtin_description
5830 const enum insn_code icode;
5831 const char *const name;
5832 const enum bfin_builtins code;
5833 int macflag;
5836 static const struct builtin_description bdesc_2arg[] =
5838 { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
5840 { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
5841 { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
5842 { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
5843 { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
5844 { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
5846 { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
5847 { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
5848 { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
5849 { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
5851 { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
5852 { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
5853 { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
5854 { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
5856 { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
5857 { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
5858 { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
5859 { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
5860 { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
5861 { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
5863 { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
5864 { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
5865 { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
5866 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
5867 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
5869 { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
5870 { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
5871 { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
5872 { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
5876 static const struct builtin_description bdesc_1arg[] =
5878 { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
5880 { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
5882 { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
5883 { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
5884 { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
5886 { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
5887 { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
5888 { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
5889 { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
5891 { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
5892 { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
5893 { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
5894 { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
5897 /* Errors in the source file can cause expand_expr to return const0_rtx
5898 where we expect a vector. To avoid crashing, use one of the vector
5899 clear instructions. */
5900 static rtx
5901 safe_vector_operand (rtx x, enum machine_mode mode)
5903 if (x != const0_rtx)
5904 return x;
5905 x = gen_reg_rtx (SImode);
5907 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
5908 return gen_lowpart (mode, x);
5911 /* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1
5912 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
5914 static rtx
5915 bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
5916 int macflag)
5918 rtx pat;
5919 tree arg0 = CALL_EXPR_ARG (exp, 0);
5920 tree arg1 = CALL_EXPR_ARG (exp, 1);
5921 rtx op0 = expand_normal (arg0);
5922 rtx op1 = expand_normal (arg1);
5923 enum machine_mode op0mode = GET_MODE (op0);
5924 enum machine_mode op1mode = GET_MODE (op1);
5925 enum machine_mode tmode = insn_data[icode].operand[0].mode;
5926 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
5927 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
5929 if (VECTOR_MODE_P (mode0))
5930 op0 = safe_vector_operand (op0, mode0);
5931 if (VECTOR_MODE_P (mode1))
5932 op1 = safe_vector_operand (op1, mode1);
5934 if (! target
5935 || GET_MODE (target) != tmode
5936 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5937 target = gen_reg_rtx (tmode);
5939 if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
5941 op0mode = HImode;
5942 op0 = gen_lowpart (HImode, op0);
5944 if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
5946 op1mode = HImode;
5947 op1 = gen_lowpart (HImode, op1);
5949 /* In case the insn wants input operands in modes different from
5950 the result, abort. */
5951 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
5952 && (op1mode == mode1 || op1mode == VOIDmode));
5954 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5955 op0 = copy_to_mode_reg (mode0, op0);
5956 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
5957 op1 = copy_to_mode_reg (mode1, op1);
5959 if (macflag == -1)
5960 pat = GEN_FCN (icode) (target, op0, op1);
5961 else
5962 pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
5963 if (! pat)
5964 return 0;
5966 emit_insn (pat);
5967 return target;
5970 /* Subroutine of bfin_expand_builtin to take care of unop insns. */
5972 static rtx
5973 bfin_expand_unop_builtin (enum insn_code icode, tree exp,
5974 rtx target)
5976 rtx pat;
5977 tree arg0 = CALL_EXPR_ARG (exp, 0);
5978 rtx op0 = expand_normal (arg0);
5979 enum machine_mode op0mode = GET_MODE (op0);
5980 enum machine_mode tmode = insn_data[icode].operand[0].mode;
5981 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
5983 if (! target
5984 || GET_MODE (target) != tmode
5985 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5986 target = gen_reg_rtx (tmode);
5988 if (VECTOR_MODE_P (mode0))
5989 op0 = safe_vector_operand (op0, mode0);
5991 if (op0mode == SImode && mode0 == HImode)
5993 op0mode = HImode;
5994 op0 = gen_lowpart (HImode, op0);
5996 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
5998 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5999 op0 = copy_to_mode_reg (mode0, op0);
6001 pat = GEN_FCN (icode) (target, op0);
6002 if (! pat)
6003 return 0;
6004 emit_insn (pat);
6005 return target;
6008 /* Expand an expression EXP that calls a built-in function,
6009 with result going to TARGET if that's convenient
6010 (and in mode MODE if that's convenient).
6011 SUBTARGET may be used as the target for computing one of EXP's operands.
6012 IGNORE is nonzero if the value is to be ignored. */
6014 static rtx
6015 bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6016 rtx subtarget ATTRIBUTE_UNUSED,
6017 enum machine_mode mode ATTRIBUTE_UNUSED,
6018 int ignore ATTRIBUTE_UNUSED)
6020 size_t i;
6021 enum insn_code icode;
6022 const struct builtin_description *d;
6023 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6024 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6025 tree arg0, arg1, arg2;
6026 rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
6027 enum machine_mode tmode, mode0;
6029 switch (fcode)
6031 case BFIN_BUILTIN_CSYNC:
6032 emit_insn (gen_csync ());
6033 return 0;
6034 case BFIN_BUILTIN_SSYNC:
6035 emit_insn (gen_ssync ());
6036 return 0;
6038 case BFIN_BUILTIN_DIFFHL_2X16:
6039 case BFIN_BUILTIN_DIFFLH_2X16:
6040 case BFIN_BUILTIN_SUM_2X16:
6041 arg0 = CALL_EXPR_ARG (exp, 0);
6042 op0 = expand_normal (arg0);
6043 icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
6044 : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
6045 : CODE_FOR_ssaddhilov2hi3);
6046 tmode = insn_data[icode].operand[0].mode;
6047 mode0 = insn_data[icode].operand[1].mode;
6049 if (! target
6050 || GET_MODE (target) != tmode
6051 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6052 target = gen_reg_rtx (tmode);
6054 if (VECTOR_MODE_P (mode0))
6055 op0 = safe_vector_operand (op0, mode0);
6057 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6058 op0 = copy_to_mode_reg (mode0, op0);
6060 pat = GEN_FCN (icode) (target, op0, op0);
6061 if (! pat)
6062 return 0;
6063 emit_insn (pat);
6064 return target;
6066 case BFIN_BUILTIN_MULT_1X32X32:
6067 case BFIN_BUILTIN_MULT_1X32X32NS:
6068 arg0 = CALL_EXPR_ARG (exp, 0);
6069 arg1 = CALL_EXPR_ARG (exp, 1);
6070 op0 = expand_normal (arg0);
6071 op1 = expand_normal (arg1);
6072 if (! target
6073 || !register_operand (target, SImode))
6074 target = gen_reg_rtx (SImode);
6075 if (! register_operand (op0, SImode))
6076 op0 = copy_to_mode_reg (SImode, op0);
6077 if (! register_operand (op1, SImode))
6078 op1 = copy_to_mode_reg (SImode, op1);
6080 a1reg = gen_rtx_REG (PDImode, REG_A1);
6081 a0reg = gen_rtx_REG (PDImode, REG_A0);
6082 tmp1 = gen_lowpart (V2HImode, op0);
6083 tmp2 = gen_lowpart (V2HImode, op1);
6084 emit_insn (gen_flag_macinit1hi (a1reg,
6085 gen_lowpart (HImode, op0),
6086 gen_lowpart (HImode, op1),
6087 GEN_INT (MACFLAG_FU)));
6088 emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
6090 if (fcode == BFIN_BUILTIN_MULT_1X32X32)
6091 emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
6092 const1_rtx, const1_rtx,
6093 const1_rtx, const0_rtx, a1reg,
6094 const0_rtx, GEN_INT (MACFLAG_NONE),
6095 GEN_INT (MACFLAG_M)));
6096 else
6098 /* For saturating multiplication, there's exactly one special case
6099 to be handled: multiplying the smallest negative value with
6100 itself. Due to shift correction in fractional multiplies, this
6101 can overflow. Iff this happens, OP2 will contain 1, which, when
6102 added in 32 bits to the smallest negative, wraps to the largest
6103 positive, which is the result we want. */
6104 op2 = gen_reg_rtx (V2HImode);
6105 emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
6106 emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
6107 gen_lowpart (SImode, op2)));
6108 emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
6109 const1_rtx, const1_rtx,
6110 const1_rtx, const0_rtx, a1reg,
6111 const0_rtx, GEN_INT (MACFLAG_NONE),
6112 GEN_INT (MACFLAG_M)));
6113 op2 = gen_reg_rtx (SImode);
6114 emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
6116 emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
6117 const1_rtx, const0_rtx,
6118 a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
6119 emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
6120 emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
6121 if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
6122 emit_insn (gen_addsi3 (target, target, op2));
6123 return target;
6125 case BFIN_BUILTIN_CPLX_MUL_16:
6126 case BFIN_BUILTIN_CPLX_MUL_16_S40:
6127 arg0 = CALL_EXPR_ARG (exp, 0);
6128 arg1 = CALL_EXPR_ARG (exp, 1);
6129 op0 = expand_normal (arg0);
6130 op1 = expand_normal (arg1);
6131 accvec = gen_reg_rtx (V2PDImode);
6132 icode = CODE_FOR_flag_macv2hi_parts;
6133 tmode = insn_data[icode].operand[0].mode;
6135 if (! target
6136 || GET_MODE (target) != V2HImode
6137 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6138 target = gen_reg_rtx (tmode);
6139 if (! register_operand (op0, GET_MODE (op0)))
6140 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6141 if (! register_operand (op1, GET_MODE (op1)))
6142 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6144 if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
6145 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6146 const0_rtx, const0_rtx,
6147 const1_rtx, GEN_INT (MACFLAG_W32)));
6148 else
6149 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6150 const0_rtx, const0_rtx,
6151 const1_rtx, GEN_INT (MACFLAG_NONE)));
6152 emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
6153 const1_rtx, const1_rtx,
6154 const0_rtx, accvec, const1_rtx, const0_rtx,
6155 GEN_INT (MACFLAG_NONE), accvec));
6157 return target;
6159 case BFIN_BUILTIN_CPLX_MAC_16:
6160 case BFIN_BUILTIN_CPLX_MSU_16:
6161 case BFIN_BUILTIN_CPLX_MAC_16_S40:
6162 case BFIN_BUILTIN_CPLX_MSU_16_S40:
6163 arg0 = CALL_EXPR_ARG (exp, 0);
6164 arg1 = CALL_EXPR_ARG (exp, 1);
6165 arg2 = CALL_EXPR_ARG (exp, 2);
6166 op0 = expand_normal (arg0);
6167 op1 = expand_normal (arg1);
6168 op2 = expand_normal (arg2);
6169 accvec = gen_reg_rtx (V2PDImode);
6170 icode = CODE_FOR_flag_macv2hi_parts;
6171 tmode = insn_data[icode].operand[0].mode;
6173 if (! target
6174 || GET_MODE (target) != V2HImode
6175 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6176 target = gen_reg_rtx (tmode);
6177 if (! register_operand (op1, GET_MODE (op1)))
6178 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6179 if (! register_operand (op2, GET_MODE (op2)))
6180 op2 = copy_to_mode_reg (GET_MODE (op2), op2);
6182 tmp1 = gen_reg_rtx (SImode);
6183 tmp2 = gen_reg_rtx (SImode);
6184 emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
6185 emit_move_insn (tmp2, gen_lowpart (SImode, op0));
6186 emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
6187 emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
6188 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6189 || fcode == BFIN_BUILTIN_CPLX_MSU_16)
6190 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6191 const0_rtx, const0_rtx,
6192 const1_rtx, accvec, const0_rtx,
6193 const0_rtx,
6194 GEN_INT (MACFLAG_W32)));
6195 else
6196 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6197 const0_rtx, const0_rtx,
6198 const1_rtx, accvec, const0_rtx,
6199 const0_rtx,
6200 GEN_INT (MACFLAG_NONE)));
6201 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6202 || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
6204 tmp1 = const1_rtx;
6205 tmp2 = const0_rtx;
6207 else
6209 tmp1 = const0_rtx;
6210 tmp2 = const1_rtx;
6212 emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
6213 const1_rtx, const1_rtx,
6214 const0_rtx, accvec, tmp1, tmp2,
6215 GEN_INT (MACFLAG_NONE), accvec));
6217 return target;
6219 case BFIN_BUILTIN_CPLX_SQU:
6220 arg0 = CALL_EXPR_ARG (exp, 0);
6221 op0 = expand_normal (arg0);
6222 accvec = gen_reg_rtx (V2PDImode);
6223 icode = CODE_FOR_flag_mulv2hi;
6224 tmp1 = gen_reg_rtx (V2HImode);
6225 tmp2 = gen_reg_rtx (V2HImode);
6227 if (! target
6228 || GET_MODE (target) != V2HImode
6229 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6230 target = gen_reg_rtx (V2HImode);
6231 if (! register_operand (op0, GET_MODE (op0)))
6232 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6234 emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
6236 emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
6237 const0_rtx, const1_rtx,
6238 GEN_INT (MACFLAG_NONE)));
6240 emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
6241 const0_rtx));
6242 emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
6243 const0_rtx, const1_rtx));
6245 return target;
6247 default:
6248 break;
6251 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6252 if (d->code == fcode)
6253 return bfin_expand_binop_builtin (d->icode, exp, target,
6254 d->macflag);
6256 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6257 if (d->code == fcode)
6258 return bfin_expand_unop_builtin (d->icode, exp, target);
6260 gcc_unreachable ();
6263 static void
6264 bfin_conditional_register_usage (void)
6266 /* initialize condition code flag register rtx */
6267 bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
6268 bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
6269 if (TARGET_FDPIC)
6270 call_used_regs[FDPIC_REGNO] = 1;
6271 if (!TARGET_FDPIC && flag_pic)
6273 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
6274 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
6278 #undef TARGET_INIT_BUILTINS
6279 #define TARGET_INIT_BUILTINS bfin_init_builtins
6281 #undef TARGET_EXPAND_BUILTIN
6282 #define TARGET_EXPAND_BUILTIN bfin_expand_builtin
6284 #undef TARGET_ASM_GLOBALIZE_LABEL
6285 #define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label
6287 #undef TARGET_ASM_FILE_START
6288 #define TARGET_ASM_FILE_START output_file_start
6290 #undef TARGET_ATTRIBUTE_TABLE
6291 #define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
6293 #undef TARGET_COMP_TYPE_ATTRIBUTES
6294 #define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
6296 #undef TARGET_RTX_COSTS
6297 #define TARGET_RTX_COSTS bfin_rtx_costs
6299 #undef TARGET_ADDRESS_COST
6300 #define TARGET_ADDRESS_COST bfin_address_cost
6302 #undef TARGET_ASM_INTEGER
6303 #define TARGET_ASM_INTEGER bfin_assemble_integer
6305 #undef TARGET_MACHINE_DEPENDENT_REORG
6306 #define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
6308 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6309 #define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
6311 #undef TARGET_ASM_OUTPUT_MI_THUNK
6312 #define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
6313 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6314 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
6316 #undef TARGET_SCHED_ADJUST_COST
6317 #define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
6319 #undef TARGET_SCHED_ISSUE_RATE
6320 #define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
6322 #undef TARGET_PROMOTE_FUNCTION_MODE
6323 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
6325 #undef TARGET_ARG_PARTIAL_BYTES
6326 #define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
6328 #undef TARGET_FUNCTION_ARG
6329 #define TARGET_FUNCTION_ARG bfin_function_arg
6331 #undef TARGET_FUNCTION_ARG_ADVANCE
6332 #define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance
6334 #undef TARGET_PASS_BY_REFERENCE
6335 #define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
6337 #undef TARGET_SETUP_INCOMING_VARARGS
6338 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
6340 #undef TARGET_STRUCT_VALUE_RTX
6341 #define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
6343 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6344 #define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
6346 #undef TARGET_OPTION_OVERRIDE
6347 #define TARGET_OPTION_OVERRIDE bfin_option_override
6349 #undef TARGET_SECONDARY_RELOAD
6350 #define TARGET_SECONDARY_RELOAD bfin_secondary_reload
6352 #undef TARGET_CLASS_LIKELY_SPILLED_P
6353 #define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p
6355 #undef TARGET_DELEGITIMIZE_ADDRESS
6356 #define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
6358 #undef TARGET_LEGITIMATE_CONSTANT_P
6359 #define TARGET_LEGITIMATE_CONSTANT_P bfin_legitimate_constant_p
6361 #undef TARGET_CANNOT_FORCE_CONST_MEM
6362 #define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
6364 #undef TARGET_RETURN_IN_MEMORY
6365 #define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
6367 #undef TARGET_LEGITIMATE_ADDRESS_P
6368 #define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p
6370 #undef TARGET_FRAME_POINTER_REQUIRED
6371 #define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
6373 #undef TARGET_CAN_ELIMINATE
6374 #define TARGET_CAN_ELIMINATE bfin_can_eliminate
6376 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6377 #define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage
6379 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6380 #define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template
6381 #undef TARGET_TRAMPOLINE_INIT
6382 #define TARGET_TRAMPOLINE_INIT bfin_trampoline_init
6384 #undef TARGET_EXTRA_LIVE_ON_ENTRY
6385 #define TARGET_EXTRA_LIVE_ON_ENTRY bfin_extra_live_on_entry
6387 /* Passes after sched2 can break the helpful TImode annotations that
6388 haifa-sched puts on every insn. Just do scheduling in reorg. */
6389 #undef TARGET_DELAY_SCHED2
6390 #define TARGET_DELAY_SCHED2 true
6392 /* Variable tracking should be run after all optimizations which
6393 change order of insns. It also needs a valid CFG. */
6394 #undef TARGET_DELAY_VARTRACK
6395 #define TARGET_DELAY_VARTRACK true
6397 struct gcc_target targetm = TARGET_INITIALIZER;