* pa/pa.c (print_operand, case 'Y'): Fix comparisons to handle
[official-gcc.git] / gcc / config / pa / pa.c
blobc8bff3b44eaf5b27a655156b82c6101e6587cf02
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Kludgery. We hold the operands to a fmpy insn here so we can
66 compare them with the operands for an fadd/fsub to determine if
67 they can be combined into a fmpyadd/fmpysub insn.
69 This _WILL_ disappear as the code to combine independent insns
70 matures. */
71 static rtx fmpy_operands[3];
73 /* Keep track of the number of bytes we have output in the CODE subspaces
74 during this compilation so we'll know when to emit inline long-calls. */
76 unsigned int total_code_bytes;
78 /* Variables to handle plabels that we discover are necessary at assembly
79 output time. They are output after the current function. */
81 struct defer_plab
83 rtx internal_label;
84 rtx symbol;
85 } *deferred_plabels = 0;
86 int n_deferred_plabels = 0;
88 void
89 override_options ()
91 /* Default to 7100 scheduling. If the 7100LC scheduling ever
92 gets reasonably tuned, it should be the default since that
93 what most PAs sold now are. */
94 if (pa_cpu_string == NULL
95 || ! strcmp (pa_cpu_string, "7100"))
97 pa_cpu_string = "7100";
98 pa_cpu = PROCESSOR_7100;
100 else if (! strcmp (pa_cpu_string, "700"))
102 pa_cpu_string = "700";
103 pa_cpu = PROCESSOR_700;
105 else if (! strcmp (pa_cpu_string, "7100LC"))
107 pa_cpu_string = "7100LC";
108 pa_cpu = PROCESSOR_7100LC;
110 else
112 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
115 if (flag_pic && TARGET_PORTABLE_RUNTIME)
117 warning ("PIC code generation is not supported in the portable runtime model\n");
120 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
122 warning ("PIC code generation is not compatable with fast indirect calls\n");
125 if (flag_pic && profile_flag)
127 warning ("PIC code generation is not compatable with profiling\n");
130 if (TARGET_SPACE && (flag_pic || profile_flag))
132 warning ("Out of line entry/exit sequences are not compatable\n");
133 warning ("with PIC or profiling\n");
136 if (! TARGET_GAS && write_symbols != NO_DEBUG)
138 warning ("-g is only supported when using GAS on this processor,");
139 warning ("-g option disabled.");
140 write_symbols = NO_DEBUG;
145 /* Return non-zero only if OP is a register of mode MODE,
146 or CONST0_RTX. */
148 reg_or_0_operand (op, mode)
149 rtx op;
150 enum machine_mode mode;
152 return (op == CONST0_RTX (mode) || register_operand (op, mode));
155 /* Return non-zero if OP is suitable for use in a call to a named
156 function.
158 (???) For 2.5 try to eliminate either call_operand_address or
159 function_label_operand, they perform very similar functions. */
161 call_operand_address (op, mode)
162 rtx op;
163 enum machine_mode mode;
165 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
168 /* Return 1 if X contains a symbolic expression. We know these
169 expressions will have one of a few well defined forms, so
170 we need only check those forms. */
172 symbolic_expression_p (x)
173 register rtx x;
176 /* Strip off any HIGH. */
177 if (GET_CODE (x) == HIGH)
178 x = XEXP (x, 0);
180 return (symbolic_operand (x, VOIDmode));
184 symbolic_operand (op, mode)
185 register rtx op;
186 enum machine_mode mode;
188 switch (GET_CODE (op))
190 case SYMBOL_REF:
191 case LABEL_REF:
192 return 1;
193 case CONST:
194 op = XEXP (op, 0);
195 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
196 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
197 && GET_CODE (XEXP (op, 1)) == CONST_INT);
198 default:
199 return 0;
203 /* Return truth value of statement that OP is a symbolic memory
204 operand of mode MODE. */
207 symbolic_memory_operand (op, mode)
208 rtx op;
209 enum machine_mode mode;
211 if (GET_CODE (op) == SUBREG)
212 op = SUBREG_REG (op);
213 if (GET_CODE (op) != MEM)
214 return 0;
215 op = XEXP (op, 0);
216 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
217 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
220 /* Return 1 if the operand is either a register or a memory operand that is
221 not symbolic. */
224 reg_or_nonsymb_mem_operand (op, mode)
225 register rtx op;
226 enum machine_mode mode;
228 if (register_operand (op, mode))
229 return 1;
231 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
232 return 1;
234 return 0;
237 /* Return 1 if the operand is either a register, zero, or a memory operand
238 that is not symbolic. */
241 reg_or_0_or_nonsymb_mem_operand (op, mode)
242 register rtx op;
243 enum machine_mode mode;
245 if (register_operand (op, mode))
246 return 1;
248 if (op == CONST0_RTX (mode))
249 return 1;
251 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
252 return 1;
254 return 0;
257 /* Accept any constant that can be moved in one instructions into a
258 general register. */
260 cint_ok_for_move (intval)
261 HOST_WIDE_INT intval;
263 /* OK if ldo, ldil, or zdepi, can be used. */
264 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
265 || zdepi_cint_p (intval));
268 /* Accept anything that can be moved in one instruction into a general
269 register. */
271 move_operand (op, mode)
272 rtx op;
273 enum machine_mode mode;
275 if (register_operand (op, mode))
276 return 1;
278 if (GET_CODE (op) == CONST_INT)
279 return cint_ok_for_move (INTVAL (op));
281 if (GET_CODE (op) == SUBREG)
282 op = SUBREG_REG (op);
283 if (GET_CODE (op) != MEM)
284 return 0;
286 op = XEXP (op, 0);
287 if (GET_CODE (op) == LO_SUM)
288 return (register_operand (XEXP (op, 0), Pmode)
289 && CONSTANT_P (XEXP (op, 1)));
291 /* Since move_operand is only used for source operands, we can always
292 allow scaled indexing! */
293 if (GET_CODE (op) == PLUS
294 && ((GET_CODE (XEXP (op, 0)) == MULT
295 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
296 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
297 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
298 && GET_CODE (XEXP (op, 1)) == REG)
299 || (GET_CODE (XEXP (op, 1)) == MULT
300 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
301 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
302 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
303 && GET_CODE (XEXP (op, 0)) == REG)))
304 return 1;
306 return memory_address_p (mode, op);
309 /* Accept REG and any CONST_INT that can be moved in one instruction into a
310 general register. */
312 reg_or_cint_move_operand (op, mode)
313 rtx op;
314 enum machine_mode mode;
316 if (register_operand (op, mode))
317 return 1;
319 if (GET_CODE (op) == CONST_INT)
320 return cint_ok_for_move (INTVAL (op));
322 return 0;
326 pic_label_operand (op, mode)
327 rtx op;
328 enum machine_mode mode;
330 if (!flag_pic)
331 return 0;
333 switch (GET_CODE (op))
335 case LABEL_REF:
336 return 1;
337 case CONST:
338 op = XEXP (op, 0);
339 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
340 && GET_CODE (XEXP (op, 1)) == CONST_INT);
341 default:
342 return 0;
347 fp_reg_operand (op, mode)
348 rtx op;
349 enum machine_mode mode;
351 return reg_renumber && FP_REG_P (op);
356 /* Return truth value of whether OP can be used as an operand in a
357 three operand arithmetic insn that accepts registers of mode MODE
358 or 14-bit signed integers. */
360 arith_operand (op, mode)
361 rtx op;
362 enum machine_mode mode;
364 return (register_operand (op, mode)
365 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
368 /* Return truth value of whether OP can be used as an operand in a
369 three operand arithmetic insn that accepts registers of mode MODE
370 or 11-bit signed integers. */
372 arith11_operand (op, mode)
373 rtx op;
374 enum machine_mode mode;
376 return (register_operand (op, mode)
377 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
380 /* A constant integer suitable for use in a PRE_MODIFY memory
381 reference. */
383 pre_cint_operand (op, mode)
384 rtx op;
385 enum machine_mode mode;
387 return (GET_CODE (op) == CONST_INT
388 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
391 /* A constant integer suitable for use in a POST_MODIFY memory
392 reference. */
394 post_cint_operand (op, mode)
395 rtx op;
396 enum machine_mode mode;
398 return (GET_CODE (op) == CONST_INT
399 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
403 arith_double_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
407 return (register_operand (op, mode)
408 || (GET_CODE (op) == CONST_DOUBLE
409 && GET_MODE (op) == mode
410 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
411 && (CONST_DOUBLE_HIGH (op) >= 0
412 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
415 /* Return truth value of whether OP is a integer which fits the
416 range constraining immediate operands in three-address insns, or
417 is an integer register. */
420 ireg_or_int5_operand (op, mode)
421 rtx op;
422 enum machine_mode mode;
424 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
425 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
428 /* Return truth value of whether OP is a integer which fits the
429 range constraining immediate operands in three-address insns. */
432 int5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
440 uint5_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
448 int11_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
456 uint32_operand (op, mode)
457 rtx op;
458 enum machine_mode mode;
460 #if HOST_BITS_PER_WIDE_INT > 32
461 /* All allowed constants will fit a CONST_INT. */
462 return (GET_CODE (op) == CONST_INT
463 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
464 #else
465 return (GET_CODE (op) == CONST_INT
466 || (GET_CODE (op) == CONST_DOUBLE
467 && CONST_DOUBLE_HIGH (op) == 0));
468 #endif
472 arith5_operand (op, mode)
473 rtx op;
474 enum machine_mode mode;
476 return register_operand (op, mode) || int5_operand (op, mode);
479 /* True iff zdepi can be used to generate this CONST_INT. */
481 zdepi_cint_p (x)
482 unsigned HOST_WIDE_INT x;
484 unsigned HOST_WIDE_INT lsb_mask, t;
486 /* This might not be obvious, but it's at least fast.
487 This function is critical; we don't have the time loops would take. */
488 lsb_mask = x & -x;
489 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
490 /* Return true iff t is a power of two. */
491 return ((t & (t - 1)) == 0);
494 /* True iff depi or extru can be used to compute (reg & mask).
495 Accept bit pattern like these:
496 0....01....1
497 1....10....0
498 1..10..01..1 */
500 and_mask_p (mask)
501 unsigned HOST_WIDE_INT mask;
503 mask = ~mask;
504 mask += mask & -mask;
505 return (mask & (mask - 1)) == 0;
508 /* True iff depi or extru can be used to compute (reg & OP). */
510 and_operand (op, mode)
511 rtx op;
512 enum machine_mode mode;
514 return (register_operand (op, mode)
515 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
518 /* True iff depi can be used to compute (reg | MASK). */
520 ior_mask_p (mask)
521 unsigned HOST_WIDE_INT mask;
523 mask += mask & -mask;
524 return (mask & (mask - 1)) == 0;
527 /* True iff depi can be used to compute (reg | OP). */
529 ior_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
537 lhs_lshift_operand (op, mode)
538 rtx op;
539 enum machine_mode mode;
541 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
544 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
545 Such values can be the left hand side x in (x << r), using the zvdepi
546 instruction. */
548 lhs_lshift_cint_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
552 unsigned HOST_WIDE_INT x;
553 if (GET_CODE (op) != CONST_INT)
554 return 0;
555 x = INTVAL (op) >> 4;
556 return (x & (x + 1)) == 0;
560 arith32_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
568 pc_or_label_operand (op, mode)
569 rtx op;
570 enum machine_mode mode;
572 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
575 /* Legitimize PIC addresses. If the address is already
576 position-independent, we return ORIG. Newly generated
577 position-independent addresses go to REG. If we need more
578 than one register, we lose. */
581 legitimize_pic_address (orig, mode, reg)
582 rtx orig, reg;
583 enum machine_mode mode;
585 rtx pic_ref = orig;
587 /* Labels need special handling. */
588 if (pic_label_operand (orig))
590 emit_insn (gen_pic_load_label (reg, orig));
591 current_function_uses_pic_offset_table = 1;
592 return reg;
594 if (GET_CODE (orig) == SYMBOL_REF)
596 if (reg == 0)
597 abort ();
599 if (flag_pic == 2)
601 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
602 pic_ref = gen_rtx (MEM, Pmode,
603 gen_rtx (LO_SUM, Pmode, reg,
604 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
606 else
607 pic_ref = gen_rtx (MEM, Pmode,
608 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
609 current_function_uses_pic_offset_table = 1;
610 RTX_UNCHANGING_P (pic_ref) = 1;
611 emit_move_insn (reg, pic_ref);
612 return reg;
614 else if (GET_CODE (orig) == CONST)
616 rtx base;
618 if (GET_CODE (XEXP (orig, 0)) == PLUS
619 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
620 return orig;
622 if (reg == 0)
623 abort ();
625 if (GET_CODE (XEXP (orig, 0)) == PLUS)
627 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
628 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
629 base == reg ? 0 : reg);
631 else abort ();
632 if (GET_CODE (orig) == CONST_INT)
634 if (INT_14_BITS (orig))
635 return plus_constant_for_output (base, INTVAL (orig));
636 orig = force_reg (Pmode, orig);
638 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
639 /* Likewise, should we set special REG_NOTEs here? */
641 return pic_ref;
644 /* Try machine-dependent ways of modifying an illegitimate address
645 to be legitimate. If we find one, return the new, valid address.
646 This macro is used in only one place: `memory_address' in explow.c.
648 OLDX is the address as it was before break_out_memory_refs was called.
649 In some cases it is useful to look at this to decide what needs to be done.
651 MODE and WIN are passed so that this macro can use
652 GO_IF_LEGITIMATE_ADDRESS.
654 It is always safe for this macro to do nothing. It exists to recognize
655 opportunities to optimize the output.
657 For the PA, transform:
659 memory(X + <large int>)
661 into:
663 if (<large int> & mask) >= 16
664 Y = (<large int> & ~mask) + mask + 1 Round up.
665 else
666 Y = (<large int> & ~mask) Round down.
667 Z = X + Y
668 memory (Z + (<large int> - Y));
670 This is for CSE to find several similar references, and only use one Z.
672 X can either be a SYMBOL_REF or REG, but because combine can not
673 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
674 D will not fit in 14 bits.
676 MODE_FLOAT references allow displacements which fit in 5 bits, so use
677 0x1f as the mask.
679 MODE_INT references allow displacements which fit in 14 bits, so use
680 0x3fff as the mask.
682 This relies on the fact that most mode MODE_FLOAT references will use FP
683 registers and most mode MODE_INT references will use integer registers.
684 (In the rare case of an FP register used in an integer MODE, we depend
685 on secondary reloads to clean things up.)
688 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
689 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
690 addressing modes to be used).
692 Put X and Z into registers. Then put the entire expression into
693 a register. */
696 hppa_legitimize_address (x, oldx, mode)
697 rtx x, oldx;
698 enum machine_mode mode;
700 rtx orig = x;
702 if (flag_pic)
703 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
705 /* Strip off CONST. */
706 if (GET_CODE (x) == CONST)
707 x = XEXP (x, 0);
709 /* Special case. Get the SYMBOL_REF into a register and use indexing.
710 That should always be safe. */
711 if (GET_CODE (x) == PLUS
712 && GET_CODE (XEXP (x, 0)) == REG
713 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
715 rtx reg = force_reg (SImode, XEXP (x, 1));
716 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
719 /* Note we must reject symbols which represent function addresses
720 since the assembler/linker can't handle arithmetic on plabels. */
721 if (GET_CODE (x) == PLUS
722 && GET_CODE (XEXP (x, 1)) == CONST_INT
723 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
724 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
725 || GET_CODE (XEXP (x, 0)) == REG))
727 rtx int_part, ptr_reg;
728 int newoffset;
729 int offset = INTVAL (XEXP (x, 1));
730 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
732 /* Choose which way to round the offset. Round up if we
733 are >= halfway to the next boundary. */
734 if ((offset & mask) >= ((mask + 1) / 2))
735 newoffset = (offset & ~ mask) + mask + 1;
736 else
737 newoffset = (offset & ~ mask);
739 /* If the newoffset will not fit in 14 bits (ldo), then
740 handling this would take 4 or 5 instructions (2 to load
741 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
742 add the new offset and the SYMBOL_REF.) Combine can
743 not handle 4->2 or 5->2 combinations, so do not create
744 them. */
745 if (! VAL_14_BITS_P (newoffset)
746 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
748 rtx const_part = gen_rtx (CONST, VOIDmode,
749 gen_rtx (PLUS, Pmode,
750 XEXP (x, 0),
751 GEN_INT (newoffset)));
752 rtx tmp_reg
753 = force_reg (Pmode,
754 gen_rtx (HIGH, Pmode, const_part));
755 ptr_reg
756 = force_reg (Pmode,
757 gen_rtx (LO_SUM, Pmode,
758 tmp_reg, const_part));
760 else
762 if (! VAL_14_BITS_P (newoffset))
763 int_part = force_reg (Pmode, GEN_INT (newoffset));
764 else
765 int_part = GEN_INT (newoffset);
767 ptr_reg = force_reg (Pmode,
768 gen_rtx (PLUS, Pmode,
769 force_reg (Pmode, XEXP (x, 0)),
770 int_part));
772 return plus_constant (ptr_reg, offset - newoffset);
775 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
777 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
778 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
779 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
780 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
781 || GET_CODE (XEXP (x, 1)) == SUBREG)
782 && GET_CODE (XEXP (x, 1)) != CONST)
784 int val = INTVAL (XEXP (XEXP (x, 0), 1));
785 rtx reg1, reg2;
787 reg1 = XEXP (x, 1);
788 if (GET_CODE (reg1) != REG)
789 reg1 = force_reg (Pmode, force_operand (reg1, 0));
791 reg2 = XEXP (XEXP (x, 0), 0);
792 if (GET_CODE (reg2) != REG)
793 reg2 = force_reg (Pmode, force_operand (reg2, 0));
795 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
796 gen_rtx (MULT, Pmode,
797 reg2, GEN_INT (val)),
798 reg1));
801 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
803 Only do so for floating point modes since this is more speculative
804 and we lose if it's an integer store. */
805 if (GET_CODE (x) == PLUS
806 && GET_CODE (XEXP (x, 0)) == PLUS
807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
808 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
809 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
810 && (mode == SFmode || mode == DFmode))
813 /* First, try and figure out what to use as a base register. */
814 rtx reg1, reg2, base, idx, orig_base;
816 reg1 = XEXP (XEXP (x, 0), 1);
817 reg2 = XEXP (x, 1);
818 base = NULL_RTX;
819 idx = NULL_RTX;
821 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
822 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
823 know it's a base register below. */
824 if (GET_CODE (reg1) != REG)
825 reg1 = force_reg (Pmode, force_operand (reg1, 0));
827 if (GET_CODE (reg2) != REG)
828 reg2 = force_reg (Pmode, force_operand (reg2, 0));
830 /* Figure out what the base and index are. */
832 if (GET_CODE (reg1) == REG
833 && REGNO_POINTER_FLAG (REGNO (reg1)))
835 base = reg1;
836 orig_base = XEXP (XEXP (x, 0), 1);
837 idx = gen_rtx (PLUS, Pmode,
838 gen_rtx (MULT, Pmode,
839 XEXP (XEXP (XEXP (x, 0), 0), 0),
840 XEXP (XEXP (XEXP (x, 0), 0), 1)),
841 XEXP (x, 1));
843 else if (GET_CODE (reg2) == REG
844 && REGNO_POINTER_FLAG (REGNO (reg2)))
846 base = reg2;
847 orig_base = XEXP (x, 1);
848 idx = XEXP (x, 0);
851 if (base == 0)
852 return orig;
854 /* If the index adds a large constant, try to scale the
855 constant so that it can be loaded with only one insn. */
856 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
857 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
858 / INTVAL (XEXP (XEXP (idx, 0), 1)))
859 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
861 /* Divide the CONST_INT by the scale factor, then add it to A. */
862 int val = INTVAL (XEXP (idx, 1));
864 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
865 reg1 = XEXP (XEXP (idx, 0), 0);
866 if (GET_CODE (reg1) != REG)
867 reg1 = force_reg (Pmode, force_operand (reg1, 0));
869 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
871 /* We can now generate a simple scaled indexed address. */
872 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
873 gen_rtx (MULT, Pmode, reg1,
874 XEXP (XEXP (idx, 0), 1)),
875 base));
878 /* If B + C is still a valid base register, then add them. */
879 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
880 && INTVAL (XEXP (idx, 1)) <= 4096
881 && INTVAL (XEXP (idx, 1)) >= -4096)
883 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
884 rtx reg1, reg2;
886 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
888 reg2 = XEXP (XEXP (idx, 0), 0);
889 if (GET_CODE (reg2) != CONST_INT)
890 reg2 = force_reg (Pmode, force_operand (reg2, 0));
892 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
893 gen_rtx (MULT, Pmode,
894 reg2, GEN_INT (val)),
895 reg1));
898 /* Get the index into a register, then add the base + index and
899 return a register holding the result. */
901 /* First get A into a register. */
902 reg1 = XEXP (XEXP (idx, 0), 0);
903 if (GET_CODE (reg1) != REG)
904 reg1 = force_reg (Pmode, force_operand (reg1, 0));
906 /* And get B into a register. */
907 reg2 = XEXP (idx, 1);
908 if (GET_CODE (reg2) != REG)
909 reg2 = force_reg (Pmode, force_operand (reg2, 0));
911 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
912 gen_rtx (MULT, Pmode, reg1,
913 XEXP (XEXP (idx, 0), 1)),
914 reg2));
916 /* Add the result to our base register and return. */
917 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
921 /* Uh-oh. We might have an address for x[n-100000]. This needs
922 special handling to avoid creating an indexed memory address
923 with x-100000 as the base.
925 If the constant part is small enough, then it's still safe because
926 there is a guard page at the beginning and end of the data segment.
928 Scaled references are common enough that we want to try and rearrange the
929 terms so that we can use indexing for these addresses too. Only
930 do the optimization for floatint point modes. */
932 if (GET_CODE (x) == PLUS
933 && symbolic_expression_p (XEXP (x, 1)))
935 /* Ugly. We modify things here so that the address offset specified
936 by the index expression is computed first, then added to x to form
937 the entire address. */
939 rtx regx1, regx2, regy1, regy2, y;
941 /* Strip off any CONST. */
942 y = XEXP (x, 1);
943 if (GET_CODE (y) == CONST)
944 y = XEXP (y, 0);
946 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
948 /* See if this looks like
949 (plus (mult (reg) (shadd_const))
950 (const (plus (symbol_ref) (const_int))))
952 Where const_int is small. In that case the const
953 expression is a valid pointer for indexing.
955 If const_int is big, but can be divided evenly by shadd_const
956 and added to (reg). This allows more scaled indexed addresses. */
957 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
958 && GET_CODE (XEXP (x, 0)) == MULT
959 && GET_CODE (XEXP (y, 1)) == CONST_INT
960 && INTVAL (XEXP (y, 1)) >= -4096
961 && INTVAL (XEXP (y, 1)) <= 4095
962 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
963 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
965 int val = INTVAL (XEXP (XEXP (x, 0), 1));
966 rtx reg1, reg2;
968 reg1 = XEXP (x, 1);
969 if (GET_CODE (reg1) != REG)
970 reg1 = force_reg (Pmode, force_operand (reg1, 0));
972 reg2 = XEXP (XEXP (x, 0), 0);
973 if (GET_CODE (reg2) != REG)
974 reg2 = force_reg (Pmode, force_operand (reg2, 0));
976 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
977 gen_rtx (MULT, Pmode,
978 reg2, GEN_INT (val)),
979 reg1));
981 else if ((mode == DFmode || mode == SFmode)
982 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
983 && GET_CODE (XEXP (x, 0)) == MULT
984 && GET_CODE (XEXP (y, 1)) == CONST_INT
985 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
986 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
987 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
989 regx1
990 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
991 / INTVAL (XEXP (XEXP (x, 0), 1))));
992 regx2 = XEXP (XEXP (x, 0), 0);
993 if (GET_CODE (regx2) != REG)
994 regx2 = force_reg (Pmode, force_operand (regx2, 0));
995 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
996 regx2, regx1));
997 return force_reg (Pmode,
998 gen_rtx (PLUS, Pmode,
999 gen_rtx (MULT, Pmode, regx2,
1000 XEXP (XEXP (x, 0), 1)),
1001 force_reg (Pmode, XEXP (y, 0))));
1003 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1004 && INTVAL (XEXP (y, 1)) >= -4096
1005 && INTVAL (XEXP (y, 1)) <= 4095)
1007 /* This is safe because of the guard page at the
1008 beginning and end of the data space. Just
1009 return the original address. */
1010 return orig;
1012 else
1014 /* Doesn't look like one we can optimize. */
1015 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1016 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1017 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1018 regx1 = force_reg (Pmode,
1019 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1020 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1025 return orig;
1028 /* For the HPPA, REG and REG+CONST is cost 0
1029 and addresses involving symbolic constants are cost 2.
1031 PIC addresses are very expensive.
1033 It is no coincidence that this has the same structure
1034 as GO_IF_LEGITIMATE_ADDRESS. */
1036 hppa_address_cost (X)
1037 rtx X;
1039 if (GET_CODE (X) == PLUS)
1040 return 1;
1041 else if (GET_CODE (X) == LO_SUM)
1042 return 1;
1043 else if (GET_CODE (X) == HIGH)
1044 return 2;
1045 return 4;
1048 /* Emit insns to move operands[1] into operands[0].
1050 Return 1 if we have written out everything that needs to be done to
1051 do the move. Otherwise, return 0 and the caller will emit the move
1052 normally. */
1055 emit_move_sequence (operands, mode, scratch_reg)
1056 rtx *operands;
1057 enum machine_mode mode;
1058 rtx scratch_reg;
1060 register rtx operand0 = operands[0];
1061 register rtx operand1 = operands[1];
1063 if (reload_in_progress && GET_CODE (operand0) == REG
1064 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1065 operand0 = reg_equiv_mem[REGNO (operand0)];
1066 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1067 && GET_CODE (SUBREG_REG (operand0)) == REG
1068 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1070 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1071 operand0 = alter_subreg (operand0);
1074 if (reload_in_progress && GET_CODE (operand1) == REG
1075 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1076 operand1 = reg_equiv_mem[REGNO (operand1)];
1077 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1078 && GET_CODE (SUBREG_REG (operand1)) == REG
1079 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1081 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1082 operand1 = alter_subreg (operand1);
1085 /* Handle secondary reloads for loads/stores of FP registers from
1086 REG+D addresses where D does not fit in 5 bits, including
1087 (subreg (mem (addr))) cases. */
1088 if (fp_reg_operand (operand0, mode)
1089 && ((GET_CODE (operand1) == MEM
1090 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1091 || ((GET_CODE (operand1) == SUBREG
1092 && GET_CODE (XEXP (operand1, 0)) == MEM
1093 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1094 && scratch_reg)
1096 if (GET_CODE (operand1) == SUBREG)
1097 operand1 = XEXP (operand1, 0);
1099 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1101 /* D might not fit in 14 bits either; for such cases load D into
1102 scratch reg. */
1103 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1105 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1106 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1107 SImode,
1108 XEXP (XEXP (operand1, 0), 0),
1109 scratch_reg));
1111 else
1112 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1113 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1114 scratch_reg)));
1115 return 1;
1117 else if (fp_reg_operand (operand1, mode)
1118 && ((GET_CODE (operand0) == MEM
1119 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1120 || ((GET_CODE (operand0) == SUBREG)
1121 && GET_CODE (XEXP (operand0, 0)) == MEM
1122 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1123 && scratch_reg)
1125 if (GET_CODE (operand0) == SUBREG)
1126 operand0 = XEXP (operand0, 0);
1128 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1129 /* D might not fit in 14 bits either; for such cases load D into
1130 scratch reg. */
1131 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1133 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1134 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1135 SImode,
1136 XEXP (XEXP (operand0, 0), 0),
1137 scratch_reg));
1139 else
1140 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1141 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1142 operand1));
1143 return 1;
1145 /* Handle secondary reloads for loads of FP registers from constant
1146 expressions by forcing the constant into memory.
1148 use scratch_reg to hold the address of the memory location.
1150 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1151 NO_REGS when presented with a const_int and an register class
1152 containing only FP registers. Doing so unfortunately creates
1153 more problems than it solves. Fix this for 2.5. */
1154 else if (fp_reg_operand (operand0, mode)
1155 && CONSTANT_P (operand1)
1156 && scratch_reg)
1158 rtx xoperands[2];
1160 /* Force the constant into memory and put the address of the
1161 memory location into scratch_reg. */
1162 xoperands[0] = scratch_reg;
1163 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1164 emit_move_sequence (xoperands, Pmode, 0);
1166 /* Now load the destination register. */
1167 emit_insn (gen_rtx (SET, mode, operand0,
1168 gen_rtx (MEM, mode, scratch_reg)));
1169 return 1;
1171 /* Handle secondary reloads for SAR. These occur when trying to load
1172 the SAR from memory a FP register, or with a constant. */
1173 else if (GET_CODE (operand0) == REG
1174 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1175 && (GET_CODE (operand1) == MEM
1176 || GET_CODE (operand1) == CONST_INT
1177 || (GET_CODE (operand1) == REG
1178 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1179 && scratch_reg)
1181 emit_move_insn (scratch_reg, operand1);
1182 emit_move_insn (operand0, scratch_reg);
1183 return 1;
1185 /* Handle most common case: storing into a register. */
1186 else if (register_operand (operand0, mode))
1188 if (register_operand (operand1, mode)
1189 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1190 || (operand1 == CONST0_RTX (mode))
1191 || (GET_CODE (operand1) == HIGH
1192 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1193 /* Only `general_operands' can come here, so MEM is ok. */
1194 || GET_CODE (operand1) == MEM)
1196 /* Run this case quickly. */
1197 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1198 return 1;
1201 else if (GET_CODE (operand0) == MEM)
1203 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1205 /* Run this case quickly. */
1206 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1207 return 1;
1209 if (! (reload_in_progress || reload_completed))
1211 operands[0] = validize_mem (operand0);
1212 operands[1] = operand1 = force_reg (mode, operand1);
1216 /* Simplify the source if we need to. */
1217 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1218 || (GET_CODE (operand1) == HIGH
1219 && symbolic_operand (XEXP (operand1, 0), mode)))
1221 int ishighonly = 0;
1223 if (GET_CODE (operand1) == HIGH)
1225 ishighonly = 1;
1226 operand1 = XEXP (operand1, 0);
1228 if (symbolic_operand (operand1, mode))
1230 rtx const_part = NULL;
1232 /* Argh. The assembler and linker can't handle arithmetic
1233 involving plabels. We'll have to split up operand1 here
1234 if it's a function label involved in an arithmetic
1235 expression. Luckily, this only happens with addition
1236 of constants to plabels, which simplifies the test.
1238 We add the constant back in just before returning to
1239 our caller. */
1240 if (GET_CODE (operand1) == CONST
1241 && GET_CODE (XEXP (operand1, 0)) == PLUS
1242 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1244 /* Save away the constant part of the expression. */
1245 const_part = XEXP (XEXP (operand1, 0), 1);
1246 if (GET_CODE (const_part) != CONST_INT)
1247 abort ();
1249 /* Set operand1 to just the SYMBOL_REF. */
1250 operand1 = XEXP (XEXP (operand1, 0), 0);
1253 if (flag_pic)
1255 rtx temp;
1257 if (reload_in_progress || reload_completed)
1258 temp = scratch_reg ? scratch_reg : operand0;
1259 else
1260 temp = gen_reg_rtx (Pmode);
1262 /* If operand1 is a function label, then we've got to
1263 force it to memory, then load op0 from memory. */
1264 if (function_label_operand (operand1, mode))
1266 operands[1] = force_const_mem (mode, operand1);
1267 emit_move_sequence (operands, mode, temp);
1269 /* Likewise for (const (plus (symbol) (const_int))) when
1270 generating pic code during or after reload and const_int
1271 will not fit in 14 bits. */
1272 else if (GET_CODE (operand1) == CONST
1273 && GET_CODE (XEXP (operand1, 0)) == PLUS
1274 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1275 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1276 && (reload_completed || reload_in_progress)
1277 && flag_pic)
1279 operands[1] = force_const_mem (mode, operand1);
1280 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1281 mode, temp);
1282 emit_move_sequence (operands, mode, temp);
1284 else
1286 operands[1] = legitimize_pic_address (operand1, mode, temp);
1287 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1290 /* On the HPPA, references to data space are supposed to use dp,
1291 register 27, but showing it in the RTL inhibits various cse
1292 and loop optimizations. */
1293 else
1295 rtx temp, set;
1297 if (reload_in_progress || reload_completed)
1298 temp = scratch_reg ? scratch_reg : operand0;
1299 else
1300 temp = gen_reg_rtx (mode);
1302 /* Loading a SYMBOL_REF into a register makes that register
1303 safe to be used as the base in an indexed address.
1305 Don't mark hard registers though. That loses. */
1306 if (GET_CODE (operand0) == REG
1307 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1308 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1309 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1310 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1311 if (ishighonly)
1312 set = gen_rtx (SET, mode, operand0, temp);
1313 else
1314 set = gen_rtx (SET, VOIDmode,
1315 operand0,
1316 gen_rtx (LO_SUM, mode, temp, operand1));
1318 emit_insn (gen_rtx (SET, VOIDmode,
1319 temp,
1320 gen_rtx (HIGH, mode, operand1)));
1321 emit_insn (set);
1325 /* Add back in the constant part if needed. */
1326 if (const_part != NULL)
1327 expand_inc (operand0, const_part);
1328 return 1;
1330 else if (GET_CODE (operand1) != CONST_INT
1331 || ! cint_ok_for_move (INTVAL (operand1)))
1333 rtx temp;
1335 if (reload_in_progress || reload_completed)
1336 temp = operand0;
1337 else
1338 temp = gen_reg_rtx (mode);
1340 emit_insn (gen_rtx (SET, VOIDmode, temp,
1341 gen_rtx (HIGH, mode, operand1)));
1342 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1345 /* Now have insn-emit do whatever it normally does. */
1346 return 0;
1349 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1350 it will need a link/runtime reloc. */
1353 reloc_needed (exp)
1354 tree exp;
1356 int reloc = 0;
1358 switch (TREE_CODE (exp))
1360 case ADDR_EXPR:
1361 return 1;
1363 case PLUS_EXPR:
1364 case MINUS_EXPR:
1365 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1366 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1367 break;
1369 case NOP_EXPR:
1370 case CONVERT_EXPR:
1371 case NON_LVALUE_EXPR:
1372 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1373 break;
1375 case CONSTRUCTOR:
1377 register tree link;
1378 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1379 if (TREE_VALUE (link) != 0)
1380 reloc |= reloc_needed (TREE_VALUE (link));
1382 break;
1384 case ERROR_MARK:
1385 break;
1387 return reloc;
1390 /* Does operand (which is a symbolic_operand) live in text space? If
1391 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1394 read_only_operand (operand)
1395 rtx operand;
1397 if (GET_CODE (operand) == CONST)
1398 operand = XEXP (XEXP (operand, 0), 0);
1399 if (flag_pic)
1401 if (GET_CODE (operand) == SYMBOL_REF)
1402 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1404 else
1406 if (GET_CODE (operand) == SYMBOL_REF)
1407 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1409 return 1;
1413 /* Return the best assembler insn template
1414 for moving operands[1] into operands[0] as a fullword. */
1415 char *
1416 singlemove_string (operands)
1417 rtx *operands;
1419 HOST_WIDE_INT intval;
1421 if (GET_CODE (operands[0]) == MEM)
1422 return "stw %r1,%0";
1423 if (GET_CODE (operands[1]) == MEM)
1424 return "ldw %1,%0";
1425 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1427 long i;
1428 REAL_VALUE_TYPE d;
1430 if (GET_MODE (operands[1]) != SFmode)
1431 abort ();
1433 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1434 bit pattern. */
1435 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1436 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1438 operands[1] = GEN_INT (i);
1439 /* Fall through to CONST_INT case. */
1441 if (GET_CODE (operands[1]) == CONST_INT)
1443 intval = INTVAL (operands[1]);
1445 if (VAL_14_BITS_P (intval))
1446 return "ldi %1,%0";
1447 else if ((intval & 0x7ff) == 0)
1448 return "ldil L'%1,%0";
1449 else if (zdepi_cint_p (intval))
1450 return "zdepi %Z1,%0";
1451 else
1452 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1454 return "copy %1,%0";
1458 /* Compute position (in OP[1]) and width (in OP[2])
1459 useful for copying IMM to a register using the zdepi
1460 instructions. Store the immediate value to insert in OP[0]. */
1461 void
1462 compute_zdepi_operands (imm, op)
1463 unsigned HOST_WIDE_INT imm;
1464 unsigned *op;
1466 int lsb, len;
1468 /* Find the least significant set bit in IMM. */
1469 for (lsb = 0; lsb < 32; lsb++)
1471 if ((imm & 1) != 0)
1472 break;
1473 imm >>= 1;
1476 /* Choose variants based on *sign* of the 5-bit field. */
1477 if ((imm & 0x10) == 0)
1478 len = (lsb <= 28) ? 4 : 32 - lsb;
1479 else
1481 /* Find the width of the bitstring in IMM. */
1482 for (len = 5; len < 32; len++)
1484 if ((imm & (1 << len)) == 0)
1485 break;
1488 /* Sign extend IMM as a 5-bit value. */
1489 imm = (imm & 0xf) - 0x10;
1492 op[0] = imm;
1493 op[1] = 31 - lsb;
1494 op[2] = len;
1497 /* Output assembler code to perform a doubleword move insn
1498 with operands OPERANDS. */
1500 char *
1501 output_move_double (operands)
1502 rtx *operands;
1504 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1505 rtx latehalf[2];
1506 rtx addreg0 = 0, addreg1 = 0;
1508 /* First classify both operands. */
1510 if (REG_P (operands[0]))
1511 optype0 = REGOP;
1512 else if (offsettable_memref_p (operands[0]))
1513 optype0 = OFFSOP;
1514 else if (GET_CODE (operands[0]) == MEM)
1515 optype0 = MEMOP;
1516 else
1517 optype0 = RNDOP;
1519 if (REG_P (operands[1]))
1520 optype1 = REGOP;
1521 else if (CONSTANT_P (operands[1]))
1522 optype1 = CNSTOP;
1523 else if (offsettable_memref_p (operands[1]))
1524 optype1 = OFFSOP;
1525 else if (GET_CODE (operands[1]) == MEM)
1526 optype1 = MEMOP;
1527 else
1528 optype1 = RNDOP;
1530 /* Check for the cases that the operand constraints are not
1531 supposed to allow to happen. Abort if we get one,
1532 because generating code for these cases is painful. */
1534 if (optype0 != REGOP && optype1 != REGOP)
1535 abort ();
1537 /* Handle auto decrementing and incrementing loads and stores
1538 specifically, since the structure of the function doesn't work
1539 for them without major modification. Do it better when we learn
1540 this port about the general inc/dec addressing of PA.
1541 (This was written by tege. Chide him if it doesn't work.) */
1543 if (optype0 == MEMOP)
1545 /* We have to output the address syntax ourselves, since print_operand
1546 doesn't deal with the addresses we want to use. Fix this later. */
1548 rtx addr = XEXP (operands[0], 0);
1549 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1551 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1553 operands[0] = XEXP (addr, 0);
1554 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1555 abort ();
1557 if (!reg_overlap_mentioned_p (high_reg, addr))
1559 /* No overlap between high target register and address
1560 register. (We do this in a non-obvious way to
1561 save a register file writeback) */
1562 if (GET_CODE (addr) == POST_INC)
1563 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1564 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1566 else
1567 abort();
1569 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1571 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1573 operands[0] = XEXP (addr, 0);
1574 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1575 abort ();
1577 if (!reg_overlap_mentioned_p (high_reg, addr))
1579 /* No overlap between high target register and address
1580 register. (We do this in a non-obvious way to
1581 save a register file writeback) */
1582 if (GET_CODE (addr) == PRE_INC)
1583 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1584 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1586 else
1587 abort();
1590 if (optype1 == MEMOP)
1592 /* We have to output the address syntax ourselves, since print_operand
1593 doesn't deal with the addresses we want to use. Fix this later. */
1595 rtx addr = XEXP (operands[1], 0);
1596 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1598 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1600 operands[1] = XEXP (addr, 0);
1601 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1602 abort ();
1604 if (!reg_overlap_mentioned_p (high_reg, addr))
1606 /* No overlap between high target register and address
1607 register. (We do this in a non-obvious way to
1608 save a register file writeback) */
1609 if (GET_CODE (addr) == POST_INC)
1610 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1611 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1613 else
1615 /* This is an undefined situation. We should load into the
1616 address register *and* update that register. Probably
1617 we don't need to handle this at all. */
1618 if (GET_CODE (addr) == POST_INC)
1619 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1620 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1623 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1625 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1627 operands[1] = XEXP (addr, 0);
1628 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1629 abort ();
1631 if (!reg_overlap_mentioned_p (high_reg, addr))
1633 /* No overlap between high target register and address
1634 register. (We do this in a non-obvious way to
1635 save a register file writeback) */
1636 if (GET_CODE (addr) == PRE_INC)
1637 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1638 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1640 else
1642 /* This is an undefined situation. We should load into the
1643 address register *and* update that register. Probably
1644 we don't need to handle this at all. */
1645 if (GET_CODE (addr) == PRE_INC)
1646 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1647 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1652 /* If an operand is an unoffsettable memory ref, find a register
1653 we can increment temporarily to make it refer to the second word. */
1655 if (optype0 == MEMOP)
1656 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1658 if (optype1 == MEMOP)
1659 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1661 /* Ok, we can do one word at a time.
1662 Normally we do the low-numbered word first.
1664 In either case, set up in LATEHALF the operands to use
1665 for the high-numbered word and in some cases alter the
1666 operands in OPERANDS to be suitable for the low-numbered word. */
1668 if (optype0 == REGOP)
1669 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1670 else if (optype0 == OFFSOP)
1671 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1672 else
1673 latehalf[0] = operands[0];
1675 if (optype1 == REGOP)
1676 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1677 else if (optype1 == OFFSOP)
1678 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1679 else if (optype1 == CNSTOP)
1680 split_double (operands[1], &operands[1], &latehalf[1]);
1681 else
1682 latehalf[1] = operands[1];
1684 /* If the first move would clobber the source of the second one,
1685 do them in the other order.
1687 This can happen in two cases:
1689 mem -> register where the first half of the destination register
1690 is the same register used in the memory's address. Reload
1691 can create such insns.
1693 mem in this case will be either register indirect or register
1694 indirect plus a valid offset.
1696 register -> register move where REGNO(dst) == REGNO(src + 1)
1697 someone (Tim/Tege?) claimed this can happen for parameter loads.
1699 Handle mem -> register case first. */
1700 if (optype0 == REGOP
1701 && (optype1 == MEMOP || optype1 == OFFSOP)
1702 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1703 operands[1], 0))
1705 /* Do the late half first. */
1706 if (addreg1)
1707 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1708 output_asm_insn (singlemove_string (latehalf), latehalf);
1710 /* Then clobber. */
1711 if (addreg1)
1712 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1713 return singlemove_string (operands);
1716 /* Now handle register -> register case. */
1717 if (optype0 == REGOP && optype1 == REGOP
1718 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1720 output_asm_insn (singlemove_string (latehalf), latehalf);
1721 return singlemove_string (operands);
1724 /* Normal case: do the two words, low-numbered first. */
1726 output_asm_insn (singlemove_string (operands), operands);
1728 /* Make any unoffsettable addresses point at high-numbered word. */
1729 if (addreg0)
1730 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1731 if (addreg1)
1732 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1734 /* Do that word. */
1735 output_asm_insn (singlemove_string (latehalf), latehalf);
1737 /* Undo the adds we just did. */
1738 if (addreg0)
1739 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1740 if (addreg1)
1741 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1743 return "";
1746 char *
1747 output_fp_move_double (operands)
1748 rtx *operands;
1750 if (FP_REG_P (operands[0]))
1752 if (FP_REG_P (operands[1])
1753 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1754 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1755 else
1756 output_asm_insn ("fldd%F1 %1,%0", operands);
1758 else if (FP_REG_P (operands[1]))
1760 output_asm_insn ("fstd%F0 %1,%0", operands);
1762 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1764 if (GET_CODE (operands[0]) == REG)
1766 rtx xoperands[2];
1767 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1768 xoperands[0] = operands[0];
1769 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1771 /* This is a pain. You have to be prepared to deal with an
1772 arbitrary address here including pre/post increment/decrement.
1774 so avoid this in the MD. */
1775 else
1776 abort ();
1778 else abort ();
1779 return "";
1782 /* Return a REG that occurs in ADDR with coefficient 1.
1783 ADDR can be effectively incremented by incrementing REG. */
1785 static rtx
1786 find_addr_reg (addr)
1787 rtx addr;
1789 while (GET_CODE (addr) == PLUS)
1791 if (GET_CODE (XEXP (addr, 0)) == REG)
1792 addr = XEXP (addr, 0);
1793 else if (GET_CODE (XEXP (addr, 1)) == REG)
1794 addr = XEXP (addr, 1);
1795 else if (CONSTANT_P (XEXP (addr, 0)))
1796 addr = XEXP (addr, 1);
1797 else if (CONSTANT_P (XEXP (addr, 1)))
1798 addr = XEXP (addr, 0);
1799 else
1800 abort ();
1802 if (GET_CODE (addr) == REG)
1803 return addr;
1804 abort ();
1807 /* Emit code to perform a block move.
1809 OPERANDS[0] is the destination pointer as a REG, clobbered.
1810 OPERANDS[1] is the source pointer as a REG, clobbered.
1811 OPERANDS[2] is a register for temporary storage.
1812 OPERANDS[4] is the size as a CONST_INT
1813 OPERANDS[3] is a register for temporary storage.
1814 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1815 OPERNADS[6] is another temporary register. */
1817 char *
1818 output_block_move (operands, size_is_constant)
1819 rtx *operands;
1820 int size_is_constant;
1822 int align = INTVAL (operands[5]);
1823 unsigned long n_bytes = INTVAL (operands[4]);
1825 /* We can't move more than four bytes at a time because the PA
1826 has no longer integer move insns. (Could use fp mem ops?) */
1827 if (align > 4)
1828 align = 4;
1830 /* Note that we know each loop below will execute at least twice
1831 (else we would have open-coded the copy). */
1832 switch (align)
1834 case 4:
1835 /* Pre-adjust the loop counter. */
1836 operands[4] = GEN_INT (n_bytes - 8);
1837 output_asm_insn ("ldi %4,%2", operands);
1839 /* Copying loop. */
1840 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1841 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1842 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1843 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1844 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1846 /* Handle the residual. There could be up to 7 bytes of
1847 residual to copy! */
1848 if (n_bytes % 8 != 0)
1850 operands[4] = GEN_INT (n_bytes % 4);
1851 if (n_bytes % 8 >= 4)
1852 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1853 if (n_bytes % 4 != 0)
1854 output_asm_insn ("ldw 0(0,%1),%6", operands);
1855 if (n_bytes % 8 >= 4)
1856 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1857 if (n_bytes % 4 != 0)
1858 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1860 return "";
1862 case 2:
1863 /* Pre-adjust the loop counter. */
1864 operands[4] = GEN_INT (n_bytes - 4);
1865 output_asm_insn ("ldi %4,%2", operands);
1867 /* Copying loop. */
1868 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1869 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1870 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1871 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1872 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1874 /* Handle the residual. */
1875 if (n_bytes % 4 != 0)
1877 if (n_bytes % 4 >= 2)
1878 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1879 if (n_bytes % 2 != 0)
1880 output_asm_insn ("ldb 0(0,%1),%6", operands);
1881 if (n_bytes % 4 >= 2)
1882 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1883 if (n_bytes % 2 != 0)
1884 output_asm_insn ("stb %6,0(0,%0)", operands);
1886 return "";
1888 case 1:
1889 /* Pre-adjust the loop counter. */
1890 operands[4] = GEN_INT (n_bytes - 2);
1891 output_asm_insn ("ldi %4,%2", operands);
1893 /* Copying loop. */
1894 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1895 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1896 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1897 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1898 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1900 /* Handle the residual. */
1901 if (n_bytes % 2 != 0)
1903 output_asm_insn ("ldb 0(0,%1),%3", operands);
1904 output_asm_insn ("stb %3,0(0,%0)", operands);
1906 return "";
1908 default:
1909 abort ();
1913 /* Count the number of insns necessary to handle this block move.
1915 Basic structure is the same as emit_block_move, except that we
1916 count insns rather than emit them. */
1919 compute_movstrsi_length (insn)
1920 rtx insn;
1922 rtx pat = PATTERN (insn);
1923 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1924 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1925 unsigned int n_insns = 0;
1927 /* We can't move more than four bytes at a time because the PA
1928 has no longer integer move insns. (Could use fp mem ops?) */
1929 if (align > 4)
1930 align = 4;
1932 /* The basic opying loop. */
1933 n_insns = 6;
1935 /* Residuals. */
1936 if (n_bytes % (2 * align) != 0)
1938 /* Any residual caused by unrolling the copy loop. */
1939 if (n_bytes % (2 * align) > align)
1940 n_insns += 1;
1942 /* Any residual because the number of bytes was not a
1943 multiple of the alignment. */
1944 if (n_bytes % align != 0)
1945 n_insns += 1;
1948 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1949 return n_insns * 4;
1953 char *
1954 output_and (operands)
1955 rtx *operands;
1957 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1959 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
1960 int ls0, ls1, ms0, p, len;
1962 for (ls0 = 0; ls0 < 32; ls0++)
1963 if ((mask & (1 << ls0)) == 0)
1964 break;
1966 for (ls1 = ls0; ls1 < 32; ls1++)
1967 if ((mask & (1 << ls1)) != 0)
1968 break;
1970 for (ms0 = ls1; ms0 < 32; ms0++)
1971 if ((mask & (1 << ms0)) == 0)
1972 break;
1974 if (ms0 != 32)
1975 abort();
1977 if (ls1 == 32)
1979 len = ls0;
1981 if (len == 0)
1982 abort ();
1984 operands[2] = GEN_INT (len);
1985 return "extru %1,31,%2,%0";
1987 else
1989 /* We could use this `depi' for the case above as well, but `depi'
1990 requires one more register file access than an `extru'. */
1992 p = 31 - ls0;
1993 len = ls1 - ls0;
1995 operands[2] = GEN_INT (p);
1996 operands[3] = GEN_INT (len);
1997 return "depi 0,%2,%3,%0";
2000 else
2001 return "and %1,%2,%0";
2004 char *
2005 output_ior (operands)
2006 rtx *operands;
2008 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2009 int bs0, bs1, p, len;
2011 if (INTVAL (operands[2]) == 0)
2012 return "copy %1,%0";
2014 for (bs0 = 0; bs0 < 32; bs0++)
2015 if ((mask & (1 << bs0)) != 0)
2016 break;
2018 for (bs1 = bs0; bs1 < 32; bs1++)
2019 if ((mask & (1 << bs1)) == 0)
2020 break;
2022 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2023 abort();
2025 p = 31 - bs0;
2026 len = bs1 - bs0;
2028 operands[2] = GEN_INT (p);
2029 operands[3] = GEN_INT (len);
2030 return "depi -1,%2,%3,%0";
2033 /* Output an ascii string. */
2034 void
2035 output_ascii (file, p, size)
2036 FILE *file;
2037 unsigned char *p;
2038 int size;
2040 int i;
2041 int chars_output;
2042 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2044 /* The HP assembler can only take strings of 256 characters at one
2045 time. This is a limitation on input line length, *not* the
2046 length of the string. Sigh. Even worse, it seems that the
2047 restriction is in number of input characters (see \xnn &
2048 \whatever). So we have to do this very carefully. */
2050 fputs ("\t.STRING \"", file);
2052 chars_output = 0;
2053 for (i = 0; i < size; i += 4)
2055 int co = 0;
2056 int io = 0;
2057 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2059 register unsigned int c = p[i + io];
2061 if (c == '\"' || c == '\\')
2062 partial_output[co++] = '\\';
2063 if (c >= ' ' && c < 0177)
2064 partial_output[co++] = c;
2065 else
2067 unsigned int hexd;
2068 partial_output[co++] = '\\';
2069 partial_output[co++] = 'x';
2070 hexd = c / 16 - 0 + '0';
2071 if (hexd > '9')
2072 hexd -= '9' - 'a' + 1;
2073 partial_output[co++] = hexd;
2074 hexd = c % 16 - 0 + '0';
2075 if (hexd > '9')
2076 hexd -= '9' - 'a' + 1;
2077 partial_output[co++] = hexd;
2080 if (chars_output + co > 243)
2082 fputs ("\"\n\t.STRING \"", file);
2083 chars_output = 0;
2085 fwrite (partial_output, 1, co, file);
2086 chars_output += co;
2087 co = 0;
2089 fputs ("\"\n", file);
2092 /* Try to rewrite floating point comparisons & branches to avoid
2093 useless add,tr insns.
2095 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2096 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2097 first attempt to remove useless add,tr insns. It is zero
2098 for the second pass as reorg sometimes leaves bogus REG_DEAD
2099 notes lying around.
2101 When CHECK_NOTES is zero we can only eliminate add,tr insns
2102 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2103 instructions. */
2104 void
2105 remove_useless_addtr_insns (insns, check_notes)
2106 rtx insns;
2107 int check_notes;
2109 rtx insn;
2110 int all;
2111 static int pass = 0;
2113 /* This is fairly cheap, so always run it when optimizing. */
2114 if (optimize > 0)
2116 int fcmp_count = 0;
2117 int fbranch_count = 0;
2119 /* Walk all the insns in this function looking for fcmp & fbranch
2120 instructions. Keep track of how many of each we find. */
2121 insns = get_insns ();
2122 for (insn = insns; insn; insn = next_insn (insn))
2124 rtx tmp;
2126 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2127 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2128 continue;
2130 tmp = PATTERN (insn);
2132 /* It must be a set. */
2133 if (GET_CODE (tmp) != SET)
2134 continue;
2136 /* If the destination is CCFP, then we've found an fcmp insn. */
2137 tmp = SET_DEST (tmp);
2138 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2140 fcmp_count++;
2141 continue;
2144 tmp = PATTERN (insn);
2145 /* If this is an fbranch instruction, bump the fbranch counter. */
2146 if (GET_CODE (tmp) == SET
2147 && SET_DEST (tmp) == pc_rtx
2148 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2149 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2150 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2151 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2153 fbranch_count++;
2154 continue;
2159 /* Find all floating point compare + branch insns. If possible,
2160 reverse the comparison & the branch to avoid add,tr insns. */
2161 for (insn = insns; insn; insn = next_insn (insn))
2163 rtx tmp, next;
2165 /* Ignore anything that isn't an INSN. */
2166 if (GET_CODE (insn) != INSN)
2167 continue;
2169 tmp = PATTERN (insn);
2171 /* It must be a set. */
2172 if (GET_CODE (tmp) != SET)
2173 continue;
2175 /* The destination must be CCFP, which is register zero. */
2176 tmp = SET_DEST (tmp);
2177 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2178 continue;
2180 /* INSN should be a set of CCFP.
2182 See if the result of this insn is used in a reversed FP
2183 conditional branch. If so, reverse our condition and
2184 the branch. Doing so avoids useless add,tr insns. */
2185 next = next_insn (insn);
2186 while (next)
2188 /* Jumps, calls and labels stop our search. */
2189 if (GET_CODE (next) == JUMP_INSN
2190 || GET_CODE (next) == CALL_INSN
2191 || GET_CODE (next) == CODE_LABEL)
2192 break;
2194 /* As does another fcmp insn. */
2195 if (GET_CODE (next) == INSN
2196 && GET_CODE (PATTERN (next)) == SET
2197 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2198 && REGNO (SET_DEST (PATTERN (next))) == 0)
2199 break;
2201 next = next_insn (next);
2204 /* Is NEXT_INSN a branch? */
2205 if (next
2206 && GET_CODE (next) == JUMP_INSN)
2208 rtx pattern = PATTERN (next);
2210 /* If it a reversed fp conditional branch (eg uses add,tr)
2211 and CCFP dies, then reverse our conditional and the branch
2212 to avoid the add,tr. */
2213 if (GET_CODE (pattern) == SET
2214 && SET_DEST (pattern) == pc_rtx
2215 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2216 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2217 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2218 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2219 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2220 && (fcmp_count == fbranch_count
2221 || (check_notes
2222 && find_regno_note (next, REG_DEAD, 0))))
2224 /* Reverse the branch. */
2225 tmp = XEXP (SET_SRC (pattern), 1);
2226 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2227 XEXP (SET_SRC (pattern), 2) = tmp;
2228 INSN_CODE (next) = -1;
2230 /* Reverse our condition. */
2231 tmp = PATTERN (insn);
2232 PUT_CODE (XEXP (tmp, 1),
2233 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2239 pass = !pass;
2243 /* You may have trouble believing this, but this is the HP-PA stack
2244 layout. Wow.
2246 Offset Contents
2248 Variable arguments (optional; any number may be allocated)
2250 SP-(4*(N+9)) arg word N
2252 SP-56 arg word 5
2253 SP-52 arg word 4
2255 Fixed arguments (must be allocated; may remain unused)
2257 SP-48 arg word 3
2258 SP-44 arg word 2
2259 SP-40 arg word 1
2260 SP-36 arg word 0
2262 Frame Marker
2264 SP-32 External Data Pointer (DP)
2265 SP-28 External sr4
2266 SP-24 External/stub RP (RP')
2267 SP-20 Current RP
2268 SP-16 Static Link
2269 SP-12 Clean up
2270 SP-8 Calling Stub RP (RP'')
2271 SP-4 Previous SP
2273 Top of Frame
2275 SP-0 Stack Pointer (points to next available address)
2279 /* This function saves registers as follows. Registers marked with ' are
2280 this function's registers (as opposed to the previous function's).
2281 If a frame_pointer isn't needed, r4 is saved as a general register;
2282 the space for the frame pointer is still allocated, though, to keep
2283 things simple.
2286 Top of Frame
2288 SP (FP') Previous FP
2289 SP + 4 Alignment filler (sigh)
2290 SP + 8 Space for locals reserved here.
2294 SP + n All call saved register used.
2298 SP + o All call saved fp registers used.
2302 SP + p (SP') points to next available address.
2306 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2307 Handle case where DISP > 8k by using the add_high_const pattern.
2309 Note in DISP > 8k case, we will leave the high part of the address
2310 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2311 static void
2312 store_reg (reg, disp, base)
2313 int reg, disp, base;
2315 if (VAL_14_BITS_P (disp))
2317 emit_move_insn (gen_rtx (MEM, SImode,
2318 gen_rtx (PLUS, SImode,
2319 gen_rtx (REG, SImode, base),
2320 GEN_INT (disp))),
2321 gen_rtx (REG, SImode, reg));
2323 else
2325 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2326 gen_rtx (REG, SImode, base),
2327 GEN_INT (disp)));
2328 emit_move_insn (gen_rtx (MEM, SImode,
2329 gen_rtx (LO_SUM, SImode,
2330 gen_rtx (REG, SImode, 1),
2331 GEN_INT (disp))),
2332 gen_rtx (REG, SImode, reg));
2336 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2337 Handle case where DISP > 8k by using the add_high_const pattern.
2339 Note in DISP > 8k case, we will leave the high part of the address
2340 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2341 static void
2342 load_reg (reg, disp, base)
2343 int reg, disp, base;
2345 if (VAL_14_BITS_P (disp))
2347 emit_move_insn (gen_rtx (REG, SImode, reg),
2348 gen_rtx (MEM, SImode,
2349 gen_rtx (PLUS, SImode,
2350 gen_rtx (REG, SImode, base),
2351 GEN_INT (disp))));
2353 else
2355 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2356 gen_rtx (REG, SImode, base),
2357 GEN_INT (disp)));
2358 emit_move_insn (gen_rtx (REG, SImode, reg),
2359 gen_rtx (MEM, SImode,
2360 gen_rtx (LO_SUM, SImode,
2361 gen_rtx (REG, SImode, 1),
2362 GEN_INT (disp))));
2366 /* Emit RTL to set REG to the value specified by BASE+DISP.
2367 Handle case where DISP > 8k by using the add_high_const pattern.
2369 Note in DISP > 8k case, we will leave the high part of the address
2370 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2371 static void
2372 set_reg_plus_d(reg, base, disp)
2373 int reg, base, disp;
2375 if (VAL_14_BITS_P (disp))
2377 emit_move_insn (gen_rtx (REG, SImode, reg),
2378 gen_rtx (PLUS, SImode,
2379 gen_rtx (REG, SImode, base),
2380 GEN_INT (disp)));
2382 else
2384 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2385 gen_rtx (REG, SImode, base),
2386 GEN_INT (disp)));
2387 emit_move_insn (gen_rtx (REG, SImode, reg),
2388 gen_rtx (LO_SUM, SImode,
2389 gen_rtx (REG, SImode, 1),
2390 GEN_INT (disp)));
2394 /* Global variables set by FUNCTION_PROLOGUE. */
2395 /* Size of frame. Need to know this to emit return insns from
2396 leaf procedures. */
2397 static int actual_fsize;
2398 static int local_fsize, save_fregs;
2401 compute_frame_size (size, fregs_live)
2402 int size;
2403 int *fregs_live;
2405 extern int current_function_outgoing_args_size;
2406 int i, fsize;
2408 /* 8 is space for frame pointer + filler. If any frame is allocated
2409 we need to add this in because of STARTING_FRAME_OFFSET. */
2410 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2412 /* We must leave enough space for all the callee saved registers
2413 from 3 .. highest used callee save register since we don't
2414 know if we're going to have an inline or out of line prologue
2415 and epilogue. */
2416 for (i = 18; i >= 3; i--)
2417 if (regs_ever_live[i])
2419 fsize += 4 * (i - 2);
2420 break;
2423 /* Round the stack. */
2424 fsize = (fsize + 7) & ~7;
2426 /* We must leave enough space for all the callee saved registers
2427 from 3 .. highest used callee save register since we don't
2428 know if we're going to have an inline or out of line prologue
2429 and epilogue. */
2430 for (i = 66; i >= 48; i -= 2)
2431 if (regs_ever_live[i] || regs_ever_live[i + 1])
2433 if (fregs_live)
2434 *fregs_live = 1;
2436 fsize += 4 * (i - 46);
2437 break;
2440 fsize += current_function_outgoing_args_size;
2441 if (! leaf_function_p () || fsize)
2442 fsize += 32;
2443 return (fsize + 63) & ~63;
2446 rtx hp_profile_label_rtx;
2447 static char hp_profile_label_name[8];
2448 void
2449 output_function_prologue (file, size)
2450 FILE *file;
2451 int size;
2453 /* The function's label and associated .PROC must never be
2454 separated and must be output *after* any profiling declarations
2455 to avoid changing spaces/subspaces within a procedure. */
2456 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2457 fputs ("\t.PROC\n", file);
2459 /* hppa_expand_prologue does the dirty work now. We just need
2460 to output the assembler directives which denote the start
2461 of a function. */
2462 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2463 if (regs_ever_live[2] || profile_flag)
2464 fputs (",CALLS,SAVE_RP", file);
2465 else
2466 fputs (",NO_CALLS", file);
2468 if (frame_pointer_needed)
2469 fputs (",SAVE_SP", file);
2471 /* Pass on information about the number of callee register saves
2472 performed in the prologue.
2474 The compiler is supposed to pass the highest register number
2475 saved, the assembler then has to adjust that number before
2476 entering it into the unwind descriptor (to account for any
2477 caller saved registers with lower register numbers than the
2478 first callee saved register). */
2479 if (gr_saved)
2480 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2482 if (fr_saved)
2483 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2485 fputs ("\n\t.ENTRY\n", file);
2487 /* Horrid hack. emit_function_prologue will modify this RTL in
2488 place to get the expected results. */
2489 if (profile_flag)
2490 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2491 hp_profile_labelno);
2493 /* If we're using GAS and not using the portable runtime model, then
2494 we don't need to accumulate the total number of code bytes. */
2495 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2496 total_code_bytes = 0;
2497 else if (insn_addresses)
2499 unsigned int old_total = total_code_bytes;
2501 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2502 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2504 /* Be prepared to handle overflows. */
2505 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2507 else
2508 total_code_bytes = -1;
2510 remove_useless_addtr_insns (get_insns (), 0);
2513 void
2514 hppa_expand_prologue()
2516 extern char call_used_regs[];
2517 int size = get_frame_size ();
2518 int merge_sp_adjust_with_store = 0;
2519 int i, offset;
2520 rtx tmpreg, size_rtx;
2522 gr_saved = 0;
2523 fr_saved = 0;
2524 save_fregs = 0;
2525 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2526 actual_fsize = compute_frame_size (size, &save_fregs);
2528 /* Compute a few things we will use often. */
2529 tmpreg = gen_rtx (REG, SImode, 1);
2530 size_rtx = GEN_INT (actual_fsize);
2532 /* Handle out of line prologues and epilogues. */
2533 if (TARGET_SPACE)
2535 rtx operands[2];
2536 int saves = 0;
2537 int outline_insn_count = 0;
2538 int inline_insn_count = 0;
2540 /* Count the number of insns for the inline and out of line
2541 variants so we can choose one appropriately.
2543 No need to screw with counting actual_fsize operations -- they're
2544 done for both inline and out of line prologues. */
2545 if (regs_ever_live[2])
2546 inline_insn_count += 1;
2548 if (! cint_ok_for_move (local_fsize))
2549 outline_insn_count += 2;
2550 else
2551 outline_insn_count += 1;
2553 /* Put the register save info into %r22. */
2554 for (i = 18; i >= 3; i--)
2555 if (regs_ever_live[i] && ! call_used_regs[i])
2557 /* -1 because the stack adjustment is normally done in
2558 the same insn as a register save. */
2559 inline_insn_count += (i - 2) - 1;
2560 saves = i;
2561 break;
2564 for (i = 66; i >= 48; i -= 2)
2565 if (regs_ever_live[i] || regs_ever_live[i + 1])
2567 /* +1 needed as we load %r1 with the start of the freg
2568 save area. */
2569 inline_insn_count += (i/2 - 23) + 1;
2570 saves |= ((i/2 - 12 ) << 16);
2571 break;
2574 if (frame_pointer_needed)
2575 inline_insn_count += 3;
2577 if (! cint_ok_for_move (saves))
2578 outline_insn_count += 2;
2579 else
2580 outline_insn_count += 1;
2582 if (TARGET_PORTABLE_RUNTIME)
2583 outline_insn_count += 2;
2584 else
2585 outline_insn_count += 1;
2587 /* If there's a lot of insns in the prologue, then do it as
2588 an out-of-line sequence. */
2589 if (inline_insn_count > outline_insn_count)
2591 /* Put the local_fisze into %r19. */
2592 operands[0] = gen_rtx (REG, SImode, 19);
2593 operands[1] = GEN_INT (local_fsize);
2594 emit_move_insn (operands[0], operands[1]);
2596 /* Put the stack size into %r21. */
2597 operands[0] = gen_rtx (REG, SImode, 21);
2598 operands[1] = size_rtx;
2599 emit_move_insn (operands[0], operands[1]);
2601 operands[0] = gen_rtx (REG, SImode, 22);
2602 operands[1] = GEN_INT (saves);
2603 emit_move_insn (operands[0], operands[1]);
2605 /* Now call the out-of-line prologue. */
2606 emit_insn (gen_outline_prologue_call ());
2607 emit_insn (gen_blockage ());
2609 /* Note that we're using an out-of-line prologue. */
2610 out_of_line_prologue_epilogue = 1;
2611 return;
2615 out_of_line_prologue_epilogue = 0;
2617 /* Save RP first. The calling conventions manual states RP will
2618 always be stored into the caller's frame at sp-20. */
2619 if (regs_ever_live[2] || profile_flag)
2620 store_reg (2, -20, STACK_POINTER_REGNUM);
2622 /* Allocate the local frame and set up the frame pointer if needed. */
2623 if (actual_fsize)
2624 if (frame_pointer_needed)
2626 /* Copy the old frame pointer temporarily into %r1. Set up the
2627 new stack pointer, then store away the saved old frame pointer
2628 into the stack at sp+actual_fsize and at the same time update
2629 the stack pointer by actual_fsize bytes. Two versions, first
2630 handles small (<8k) frames. The second handles large (>8k)
2631 frames. */
2632 emit_move_insn (tmpreg, frame_pointer_rtx);
2633 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2634 if (VAL_14_BITS_P (actual_fsize))
2635 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2636 else
2638 /* It is incorrect to store the saved frame pointer at *sp,
2639 then increment sp (writes beyond the current stack boundary).
2641 So instead use stwm to store at *sp and post-increment the
2642 stack pointer as an atomic operation. Then increment sp to
2643 finish allocating the new frame. */
2644 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2645 set_reg_plus_d (STACK_POINTER_REGNUM,
2646 STACK_POINTER_REGNUM,
2647 actual_fsize - 64);
2650 /* no frame pointer needed. */
2651 else
2653 /* In some cases we can perform the first callee register save
2654 and allocating the stack frame at the same time. If so, just
2655 make a note of it and defer allocating the frame until saving
2656 the callee registers. */
2657 if (VAL_14_BITS_P (-actual_fsize)
2658 && local_fsize == 0
2659 && ! profile_flag
2660 && ! flag_pic)
2661 merge_sp_adjust_with_store = 1;
2662 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2663 else if (actual_fsize != 0)
2664 set_reg_plus_d (STACK_POINTER_REGNUM,
2665 STACK_POINTER_REGNUM,
2666 actual_fsize);
2668 /* The hppa calling conventions say that that %r19, the pic offset
2669 register, is saved at sp - 32 (in this function's frame) when
2670 generating PIC code. FIXME: What is the correct thing to do
2671 for functions which make no calls and allocate no frame? Do
2672 we need to allocate a frame, or can we just omit the save? For
2673 now we'll just omit the save. */
2674 if (actual_fsize != 0 && flag_pic)
2675 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2677 /* Profiling code.
2679 Instead of taking one argument, the counter label, as most normal
2680 mcounts do, _mcount appears to behave differently on the HPPA. It
2681 takes the return address of the caller, the address of this routine,
2682 and the address of the label. Also, it isn't magic, so
2683 argument registers have to be preserved. */
2684 if (profile_flag)
2686 int pc_offset, i, arg_offset, basereg, offsetadj;
2688 pc_offset = 4 + (frame_pointer_needed
2689 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2690 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2692 /* When the function has a frame pointer, use it as the base
2693 register for saving/restore registers. Else use the stack
2694 pointer. Adjust the offset according to the frame size if
2695 this function does not have a frame pointer. */
2697 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2698 : STACK_POINTER_REGNUM;
2699 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2701 /* Horrid hack. emit_function_prologue will modify this RTL in
2702 place to get the expected results. sprintf here is just to
2703 put something in the name. */
2704 sprintf(hp_profile_label_name, "LP$%04d", -1);
2705 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2706 hp_profile_label_name);
2707 if (current_function_returns_struct)
2708 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2710 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2711 if (regs_ever_live [i])
2713 store_reg (i, arg_offset, basereg);
2714 /* Deal with arg_offset not fitting in 14 bits. */
2715 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2718 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2719 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2720 emit_move_insn (gen_rtx (REG, SImode, 24),
2721 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2722 /* %r25 is set from within the output pattern. */
2723 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2725 /* Restore argument registers. */
2726 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2727 if (regs_ever_live [i])
2728 load_reg (i, arg_offset, basereg);
2730 if (current_function_returns_struct)
2731 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2735 /* Normal register save.
2737 Do not save the frame pointer in the frame_pointer_needed case. It
2738 was done earlier. */
2739 if (frame_pointer_needed)
2741 for (i = 18, offset = local_fsize; i >= 4; i--)
2742 if (regs_ever_live[i] && ! call_used_regs[i])
2744 store_reg (i, offset, FRAME_POINTER_REGNUM);
2745 offset += 4;
2746 gr_saved++;
2748 /* Account for %r3 which is saved in a special place. */
2749 gr_saved++;
2751 /* No frame pointer needed. */
2752 else
2754 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2755 if (regs_ever_live[i] && ! call_used_regs[i])
2757 /* If merge_sp_adjust_with_store is nonzero, then we can
2758 optimize the first GR save. */
2759 if (merge_sp_adjust_with_store)
2761 merge_sp_adjust_with_store = 0;
2762 emit_insn (gen_post_stwm (stack_pointer_rtx,
2763 gen_rtx (REG, SImode, i),
2764 GEN_INT (-offset)));
2766 else
2767 store_reg (i, offset, STACK_POINTER_REGNUM);
2768 offset += 4;
2769 gr_saved++;
2772 /* If we wanted to merge the SP adjustment with a GR save, but we never
2773 did any GR saves, then just emit the adjustment here. */
2774 if (merge_sp_adjust_with_store)
2775 set_reg_plus_d (STACK_POINTER_REGNUM,
2776 STACK_POINTER_REGNUM,
2777 actual_fsize);
2780 /* Align pointer properly (doubleword boundary). */
2781 offset = (offset + 7) & ~7;
2783 /* Floating point register store. */
2784 if (save_fregs)
2786 /* First get the frame or stack pointer to the start of the FP register
2787 save area. */
2788 if (frame_pointer_needed)
2789 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2790 else
2791 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2793 /* Now actually save the FP registers. */
2794 for (i = 66; i >= 48; i -= 2)
2796 if (regs_ever_live[i] || regs_ever_live[i + 1])
2798 emit_move_insn (gen_rtx (MEM, DFmode,
2799 gen_rtx (POST_INC, DFmode, tmpreg)),
2800 gen_rtx (REG, DFmode, i));
2801 fr_saved++;
2806 /* When generating PIC code it is necessary to save/restore the
2807 PIC register around each function call. We used to do this
2808 in the call patterns themselves, but that implementation
2809 made incorrect assumptions about using global variables to hold
2810 per-function rtl code generated in the backend.
2812 So instead, we copy the PIC register into a reserved callee saved
2813 register in the prologue. Then after each call we reload the PIC
2814 register from the callee saved register. We also reload the PIC
2815 register from the callee saved register in the epilogue ensure the
2816 PIC register is valid at function exit.
2818 This may (depending on the exact characteristics of the function)
2819 even be more efficient.
2821 Avoid this if the callee saved register wasn't used (these are
2822 leaf functions). */
2823 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2824 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2825 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2829 void
2830 output_function_epilogue (file, size)
2831 FILE *file;
2832 int size;
2834 rtx insn = get_last_insn ();
2835 int i;
2837 /* hppa_expand_epilogue does the dirty work now. We just need
2838 to output the assembler directives which denote the end
2839 of a function.
2841 To make debuggers happy, emit a nop if the epilogue was completely
2842 eliminated due to a volatile call as the last insn in the
2843 current function. That way the return address (in %r2) will
2844 always point to a valid instruction in the current function. */
2846 /* Get the last real insn. */
2847 if (GET_CODE (insn) == NOTE)
2848 insn = prev_real_insn (insn);
2850 /* If it is a sequence, then look inside. */
2851 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2852 insn = XVECEXP (PATTERN (insn), 0, 0);
2854 /* If insn is a CALL_INSN, then it must be a call to a volatile
2855 function (otherwise there would be epilogue insns). */
2856 if (insn && GET_CODE (insn) == CALL_INSN)
2857 fputs ("\tnop\n", file);
2859 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2861 /* If we have deferred plabels, then we need to switch into the data
2862 section and align it to a 4 byte boundary before we output the
2863 deferred plabels. */
2864 if (n_deferred_plabels)
2866 data_section ();
2867 ASM_OUTPUT_ALIGN (file, 2);
2870 /* Now output the deferred plabels. */
2871 for (i = 0; i < n_deferred_plabels; i++)
2873 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
2874 assemble_integer (deferred_plabels[i].symbol, 4, 1);
2876 n_deferred_plabels = 0;
2879 void
2880 hppa_expand_epilogue ()
2882 rtx tmpreg;
2883 int offset,i;
2884 int merge_sp_adjust_with_load = 0;
2886 /* Handle out of line prologues and epilogues. */
2887 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2889 int saves = 0;
2890 rtx operands[2];
2892 /* Put the register save info into %r22. */
2893 for (i = 18; i >= 3; i--)
2894 if (regs_ever_live[i] && ! call_used_regs[i])
2896 saves = i;
2897 break;
2900 for (i = 66; i >= 48; i -= 2)
2901 if (regs_ever_live[i] || regs_ever_live[i + 1])
2903 saves |= ((i/2 - 12 ) << 16);
2904 break;
2907 emit_insn (gen_blockage ());
2909 /* Put the local_fisze into %r19. */
2910 operands[0] = gen_rtx (REG, SImode, 19);
2911 operands[1] = GEN_INT (local_fsize);
2912 emit_move_insn (operands[0], operands[1]);
2914 /* Put the stack size into %r21. */
2915 operands[0] = gen_rtx (REG, SImode, 21);
2916 operands[1] = GEN_INT (actual_fsize);
2917 emit_move_insn (operands[0], operands[1]);
2919 operands[0] = gen_rtx (REG, SImode, 22);
2920 operands[1] = GEN_INT (saves);
2921 emit_move_insn (operands[0], operands[1]);
2923 /* Now call the out-of-line epilogue. */
2924 emit_insn (gen_outline_epilogue_call ());
2925 return;
2928 /* We will use this often. */
2929 tmpreg = gen_rtx (REG, SImode, 1);
2931 /* Try to restore RP early to avoid load/use interlocks when
2932 RP gets used in the return (bv) instruction. This appears to still
2933 be necessary even when we schedule the prologue and epilogue. */
2934 if (frame_pointer_needed
2935 && (regs_ever_live [2] || profile_flag))
2936 load_reg (2, -20, FRAME_POINTER_REGNUM);
2938 /* No frame pointer, and stack is smaller than 8k. */
2939 else if (! frame_pointer_needed
2940 && VAL_14_BITS_P (actual_fsize + 20)
2941 && (regs_ever_live[2] || profile_flag))
2942 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2944 /* General register restores. */
2945 if (frame_pointer_needed)
2947 for (i = 18, offset = local_fsize; i >= 4; i--)
2948 if (regs_ever_live[i] && ! call_used_regs[i])
2950 load_reg (i, offset, FRAME_POINTER_REGNUM);
2951 offset += 4;
2954 else
2956 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2958 if (regs_ever_live[i] && ! call_used_regs[i])
2960 /* Only for the first load.
2961 merge_sp_adjust_with_load holds the register load
2962 with which we will merge the sp adjustment. */
2963 if (VAL_14_BITS_P (actual_fsize + 20)
2964 && local_fsize == 0
2965 && ! merge_sp_adjust_with_load)
2966 merge_sp_adjust_with_load = i;
2967 else
2968 load_reg (i, offset, STACK_POINTER_REGNUM);
2969 offset += 4;
2974 /* Align pointer properly (doubleword boundary). */
2975 offset = (offset + 7) & ~7;
2977 /* FP register restores. */
2978 if (save_fregs)
2980 /* Adjust the register to index off of. */
2981 if (frame_pointer_needed)
2982 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2983 else
2984 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2986 /* Actually do the restores now. */
2987 for (i = 66; i >= 48; i -= 2)
2989 if (regs_ever_live[i] || regs_ever_live[i + 1])
2991 emit_move_insn (gen_rtx (REG, DFmode, i),
2992 gen_rtx (MEM, DFmode,
2993 gen_rtx (POST_INC, DFmode, tmpreg)));
2998 /* Emit a blockage insn here to keep these insns from being moved to
2999 an earlier spot in the epilogue, or into the main instruction stream.
3001 This is necessary as we must not cut the stack back before all the
3002 restores are finished. */
3003 emit_insn (gen_blockage ());
3004 /* No frame pointer, but we have a stack greater than 8k. We restore
3005 %r2 very late in this case. (All other cases are restored as early
3006 as possible.) */
3007 if (! frame_pointer_needed
3008 && ! VAL_14_BITS_P (actual_fsize + 20)
3009 && (regs_ever_live[2] || profile_flag))
3011 set_reg_plus_d (STACK_POINTER_REGNUM,
3012 STACK_POINTER_REGNUM,
3013 - actual_fsize);
3015 /* This used to try and be clever by not depending on the value in
3016 %r30 and instead use the value held in %r1 (so that the 2nd insn
3017 which sets %r30 could be put in the delay slot of the return insn).
3019 That won't work since if the stack is exactly 8k set_reg_plus_d
3020 doesn't set %r1, just %r30. */
3021 load_reg (2, - 20, STACK_POINTER_REGNUM);
3024 /* Reset stack pointer (and possibly frame pointer). The stack
3025 pointer is initially set to fp + 64 to avoid a race condition. */
3026 else if (frame_pointer_needed)
3028 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3029 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3030 stack_pointer_rtx,
3031 GEN_INT (-64)));
3033 /* If we were deferring a callee register restore, do it now. */
3034 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3035 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3036 merge_sp_adjust_with_load),
3037 stack_pointer_rtx,
3038 GEN_INT (- actual_fsize)));
3039 else if (actual_fsize != 0)
3040 set_reg_plus_d (STACK_POINTER_REGNUM,
3041 STACK_POINTER_REGNUM,
3042 - actual_fsize);
3045 /* This is only valid once reload has completed because it depends on
3046 knowing exactly how much (if any) frame there is and...
3048 It's only valid if there is no frame marker to de-allocate and...
3050 It's only valid if %r2 hasn't been saved into the caller's frame
3051 (we're not profiling and %r2 isn't live anywhere). */
3053 hppa_can_use_return_insn_p ()
3055 return (reload_completed
3056 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3057 && ! profile_flag
3058 && ! regs_ever_live[2]
3059 && ! frame_pointer_needed);
3062 void
3063 emit_bcond_fp (code, operand0)
3064 enum rtx_code code;
3065 rtx operand0;
3067 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3068 gen_rtx (IF_THEN_ELSE, VOIDmode,
3069 gen_rtx (code, VOIDmode,
3070 gen_rtx (REG, CCFPmode, 0),
3071 const0_rtx),
3072 gen_rtx (LABEL_REF, VOIDmode, operand0),
3073 pc_rtx)));
3078 gen_cmp_fp (code, operand0, operand1)
3079 enum rtx_code code;
3080 rtx operand0, operand1;
3082 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3083 gen_rtx (code, CCFPmode, operand0, operand1));
3086 /* Adjust the cost of a scheduling dependency. Return the new cost of
3087 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3090 pa_adjust_cost (insn, link, dep_insn, cost)
3091 rtx insn;
3092 rtx link;
3093 rtx dep_insn;
3094 int cost;
3096 if (! recog_memoized (insn))
3097 return 0;
3099 if (REG_NOTE_KIND (link) == 0)
3101 /* Data dependency; DEP_INSN writes a register that INSN reads some
3102 cycles later. */
3104 if (get_attr_type (insn) == TYPE_FPSTORE)
3106 rtx pat = PATTERN (insn);
3107 rtx dep_pat = PATTERN (dep_insn);
3108 if (GET_CODE (pat) == PARALLEL)
3110 /* This happens for the fstXs,mb patterns. */
3111 pat = XVECEXP (pat, 0, 0);
3113 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3114 /* If this happens, we have to extend this to schedule
3115 optimally. Return 0 for now. */
3116 return 0;
3118 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3120 if (! recog_memoized (dep_insn))
3121 return 0;
3122 /* DEP_INSN is writing its result to the register
3123 being stored in the fpstore INSN. */
3124 switch (get_attr_type (dep_insn))
3126 case TYPE_FPLOAD:
3127 /* This cost 3 cycles, not 2 as the md says for the
3128 700 and 7100. Note scaling of cost for 7100. */
3129 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3131 case TYPE_FPALU:
3132 case TYPE_FPMULSGL:
3133 case TYPE_FPMULDBL:
3134 case TYPE_FPDIVSGL:
3135 case TYPE_FPDIVDBL:
3136 case TYPE_FPSQRTSGL:
3137 case TYPE_FPSQRTDBL:
3138 /* In these important cases, we save one cycle compared to
3139 when flop instruction feed each other. */
3140 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3142 default:
3143 return cost;
3148 /* For other data dependencies, the default cost specified in the
3149 md is correct. */
3150 return cost;
3152 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3154 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3155 cycles later. */
3157 if (get_attr_type (insn) == TYPE_FPLOAD)
3159 rtx pat = PATTERN (insn);
3160 rtx dep_pat = PATTERN (dep_insn);
3161 if (GET_CODE (pat) == PARALLEL)
3163 /* This happens for the fldXs,mb patterns. */
3164 pat = XVECEXP (pat, 0, 0);
3166 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3167 /* If this happens, we have to extend this to schedule
3168 optimally. Return 0 for now. */
3169 return 0;
3171 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3173 if (! recog_memoized (dep_insn))
3174 return 0;
3175 switch (get_attr_type (dep_insn))
3177 case TYPE_FPALU:
3178 case TYPE_FPMULSGL:
3179 case TYPE_FPMULDBL:
3180 case TYPE_FPDIVSGL:
3181 case TYPE_FPDIVDBL:
3182 case TYPE_FPSQRTSGL:
3183 case TYPE_FPSQRTDBL:
3184 /* A fpload can't be issued until one cycle before a
3185 preceding arithmetic operation has finished if
3186 the target of the fpload is any of the sources
3187 (or destination) of the arithmetic operation. */
3188 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3190 default:
3191 return 0;
3195 else if (get_attr_type (insn) == TYPE_FPALU)
3197 rtx pat = PATTERN (insn);
3198 rtx dep_pat = PATTERN (dep_insn);
3199 if (GET_CODE (pat) == PARALLEL)
3201 /* This happens for the fldXs,mb patterns. */
3202 pat = XVECEXP (pat, 0, 0);
3204 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3205 /* If this happens, we have to extend this to schedule
3206 optimally. Return 0 for now. */
3207 return 0;
3209 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3211 if (! recog_memoized (dep_insn))
3212 return 0;
3213 switch (get_attr_type (dep_insn))
3215 case TYPE_FPDIVSGL:
3216 case TYPE_FPDIVDBL:
3217 case TYPE_FPSQRTSGL:
3218 case TYPE_FPSQRTDBL:
3219 /* An ALU flop can't be issued until two cycles before a
3220 preceding divide or sqrt operation has finished if
3221 the target of the ALU flop is any of the sources
3222 (or destination) of the divide or sqrt operation. */
3223 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3225 default:
3226 return 0;
3231 /* For other anti dependencies, the cost is 0. */
3232 return 0;
3234 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3236 /* Output dependency; DEP_INSN writes a register that INSN writes some
3237 cycles later. */
3238 if (get_attr_type (insn) == TYPE_FPLOAD)
3240 rtx pat = PATTERN (insn);
3241 rtx dep_pat = PATTERN (dep_insn);
3242 if (GET_CODE (pat) == PARALLEL)
3244 /* This happens for the fldXs,mb patterns. */
3245 pat = XVECEXP (pat, 0, 0);
3247 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3248 /* If this happens, we have to extend this to schedule
3249 optimally. Return 0 for now. */
3250 return 0;
3252 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3254 if (! recog_memoized (dep_insn))
3255 return 0;
3256 switch (get_attr_type (dep_insn))
3258 case TYPE_FPALU:
3259 case TYPE_FPMULSGL:
3260 case TYPE_FPMULDBL:
3261 case TYPE_FPDIVSGL:
3262 case TYPE_FPDIVDBL:
3263 case TYPE_FPSQRTSGL:
3264 case TYPE_FPSQRTDBL:
3265 /* A fpload can't be issued until one cycle before a
3266 preceding arithmetic operation has finished if
3267 the target of the fpload is the destination of the
3268 arithmetic operation. */
3269 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3271 default:
3272 return 0;
3276 else if (get_attr_type (insn) == TYPE_FPALU)
3278 rtx pat = PATTERN (insn);
3279 rtx dep_pat = PATTERN (dep_insn);
3280 if (GET_CODE (pat) == PARALLEL)
3282 /* This happens for the fldXs,mb patterns. */
3283 pat = XVECEXP (pat, 0, 0);
3285 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3286 /* If this happens, we have to extend this to schedule
3287 optimally. Return 0 for now. */
3288 return 0;
3290 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3292 if (! recog_memoized (dep_insn))
3293 return 0;
3294 switch (get_attr_type (dep_insn))
3296 case TYPE_FPDIVSGL:
3297 case TYPE_FPDIVDBL:
3298 case TYPE_FPSQRTSGL:
3299 case TYPE_FPSQRTDBL:
3300 /* An ALU flop can't be issued until two cycles before a
3301 preceding divide or sqrt operation has finished if
3302 the target of the ALU flop is also the target of
3303 of the divide or sqrt operation. */
3304 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3306 default:
3307 return 0;
3312 /* For other output dependencies, the cost is 0. */
3313 return 0;
3315 else
3316 abort ();
3319 /* Return any length adjustment needed by INSN which already has its length
3320 computed as LENGTH. Return zero if no adjustment is necessary.
3322 For the PA: function calls, millicode calls, and backwards short
3323 conditional branches with unfilled delay slots need an adjustment by +1
3324 (to account for the NOP which will be inserted into the instruction stream).
3326 Also compute the length of an inline block move here as it is too
3327 complicated to express as a length attribute in pa.md. */
3329 pa_adjust_insn_length (insn, length)
3330 rtx insn;
3331 int length;
3333 rtx pat = PATTERN (insn);
3335 /* Call insns which are *not* indirect and have unfilled delay slots. */
3336 if (GET_CODE (insn) == CALL_INSN)
3339 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3340 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3341 return 4;
3342 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3343 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3344 == SYMBOL_REF)
3345 return 4;
3346 else
3347 return 0;
3349 /* Jumps inside switch tables which have unfilled delay slots
3350 also need adjustment. */
3351 else if (GET_CODE (insn) == JUMP_INSN
3352 && simplejump_p (insn)
3353 && GET_MODE (PATTERN (insn)) == DImode)
3354 return 4;
3355 /* Millicode insn with an unfilled delay slot. */
3356 else if (GET_CODE (insn) == INSN
3357 && GET_CODE (pat) != SEQUENCE
3358 && GET_CODE (pat) != USE
3359 && GET_CODE (pat) != CLOBBER
3360 && get_attr_type (insn) == TYPE_MILLI)
3361 return 4;
3362 /* Block move pattern. */
3363 else if (GET_CODE (insn) == INSN
3364 && GET_CODE (pat) == PARALLEL
3365 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3366 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3367 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3368 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3369 return compute_movstrsi_length (insn) - 4;
3370 /* Conditional branch with an unfilled delay slot. */
3371 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3373 /* Adjust a short backwards conditional with an unfilled delay slot. */
3374 if (GET_CODE (pat) == SET
3375 && length == 4
3376 && ! forward_branch_p (insn))
3377 return 4;
3378 else if (GET_CODE (pat) == PARALLEL
3379 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3380 && length == 4)
3381 return 4;
3382 /* Adjust dbra insn with short backwards conditional branch with
3383 unfilled delay slot -- only for case where counter is in a
3384 general register register. */
3385 else if (GET_CODE (pat) == PARALLEL
3386 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3387 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3388 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3389 && length == 4
3390 && ! forward_branch_p (insn))
3391 return 4;
3392 else
3393 return 0;
3395 return 0;
3398 /* Print operand X (an rtx) in assembler syntax to file FILE.
3399 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3400 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3402 void
3403 print_operand (file, x, code)
3404 FILE *file;
3405 rtx x;
3406 int code;
3408 switch (code)
3410 case '#':
3411 /* Output a 'nop' if there's nothing for the delay slot. */
3412 if (dbr_sequence_length () == 0)
3413 fputs ("\n\tnop", file);
3414 return;
3415 case '*':
3416 /* Output an nullification completer if there's nothing for the */
3417 /* delay slot or nullification is requested. */
3418 if (dbr_sequence_length () == 0 ||
3419 (final_sequence &&
3420 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3421 fputs (",n", file);
3422 return;
3423 case 'R':
3424 /* Print out the second register name of a register pair.
3425 I.e., R (6) => 7. */
3426 fputs (reg_names[REGNO (x)+1], file);
3427 return;
3428 case 'r':
3429 /* A register or zero. */
3430 if (x == const0_rtx
3431 || (x == CONST0_RTX (DFmode))
3432 || (x == CONST0_RTX (SFmode)))
3434 fputs ("0", file);
3435 return;
3437 else
3438 break;
3439 case 'C': /* Plain (C)ondition */
3440 case 'X':
3441 switch (GET_CODE (x))
3443 case EQ:
3444 fputs ("=", file); break;
3445 case NE:
3446 fputs ("<>", file); break;
3447 case GT:
3448 fputs (">", file); break;
3449 case GE:
3450 fputs (">=", file); break;
3451 case GEU:
3452 fputs (">>=", file); break;
3453 case GTU:
3454 fputs (">>", file); break;
3455 case LT:
3456 fputs ("<", file); break;
3457 case LE:
3458 fputs ("<=", file); break;
3459 case LEU:
3460 fputs ("<<=", file); break;
3461 case LTU:
3462 fputs ("<<", file); break;
3463 default:
3464 abort ();
3466 return;
3467 case 'N': /* Condition, (N)egated */
3468 switch (GET_CODE (x))
3470 case EQ:
3471 fputs ("<>", file); break;
3472 case NE:
3473 fputs ("=", file); break;
3474 case GT:
3475 fputs ("<=", file); break;
3476 case GE:
3477 fputs ("<", file); break;
3478 case GEU:
3479 fputs ("<<", file); break;
3480 case GTU:
3481 fputs ("<<=", file); break;
3482 case LT:
3483 fputs (">=", file); break;
3484 case LE:
3485 fputs (">", file); break;
3486 case LEU:
3487 fputs (">>", file); break;
3488 case LTU:
3489 fputs (">>=", file); break;
3490 default:
3491 abort ();
3493 return;
3494 /* For floating point comparisons. Need special conditions to deal
3495 with NaNs properly. */
3496 case 'Y':
3497 switch (GET_CODE (x))
3499 case EQ:
3500 fputs ("!=", file); break;
3501 case NE:
3502 fputs ("=", file); break;
3503 case GT:
3504 fputs ("<=", file); break;
3505 case GE:
3506 fputs ("<", file); break;
3507 case LT:
3508 fputs (">=", file); break;
3509 case LE:
3510 fputs (">", file); break;
3511 default:
3512 abort ();
3514 return;
3515 case 'S': /* Condition, operands are (S)wapped. */
3516 switch (GET_CODE (x))
3518 case EQ:
3519 fputs ("=", file); break;
3520 case NE:
3521 fputs ("<>", file); break;
3522 case GT:
3523 fputs ("<", file); break;
3524 case GE:
3525 fputs ("<=", file); break;
3526 case GEU:
3527 fputs ("<<=", file); break;
3528 case GTU:
3529 fputs ("<<", file); break;
3530 case LT:
3531 fputs (">", file); break;
3532 case LE:
3533 fputs (">=", file); break;
3534 case LEU:
3535 fputs (">>=", file); break;
3536 case LTU:
3537 fputs (">>", file); break;
3538 default:
3539 abort ();
3541 return;
3542 case 'B': /* Condition, (B)oth swapped and negate. */
3543 switch (GET_CODE (x))
3545 case EQ:
3546 fputs ("<>", file); break;
3547 case NE:
3548 fputs ("=", file); break;
3549 case GT:
3550 fputs (">=", file); break;
3551 case GE:
3552 fputs (">", file); break;
3553 case GEU:
3554 fputs (">>", file); break;
3555 case GTU:
3556 fputs (">>=", file); break;
3557 case LT:
3558 fputs ("<=", file); break;
3559 case LE:
3560 fputs ("<", file); break;
3561 case LEU:
3562 fputs ("<<", file); break;
3563 case LTU:
3564 fputs ("<<=", file); break;
3565 default:
3566 abort ();
3568 return;
3569 case 'k':
3570 if (GET_CODE (x) == CONST_INT)
3572 fprintf (file, "%d", ~INTVAL (x));
3573 return;
3575 abort();
3576 case 'L':
3577 if (GET_CODE (x) == CONST_INT)
3579 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3580 return;
3582 abort();
3583 case 'O':
3584 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3586 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3587 return;
3589 abort();
3590 case 'P':
3591 if (GET_CODE (x) == CONST_INT)
3593 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3594 return;
3596 abort();
3597 case 'I':
3598 if (GET_CODE (x) == CONST_INT)
3599 fputs ("i", file);
3600 return;
3601 case 'M':
3602 case 'F':
3603 switch (GET_CODE (XEXP (x, 0)))
3605 case PRE_DEC:
3606 case PRE_INC:
3607 fputs ("s,mb", file);
3608 break;
3609 case POST_DEC:
3610 case POST_INC:
3611 fputs ("s,ma", file);
3612 break;
3613 case PLUS:
3614 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3615 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3616 fputs ("x,s", file);
3617 else if (code == 'F')
3618 fputs ("s", file);
3619 break;
3620 default:
3621 if (code == 'F')
3622 fputs ("s", file);
3623 break;
3625 return;
3626 case 'G':
3627 output_global_address (file, x, 0);
3628 return;
3629 case 'H':
3630 output_global_address (file, x, 1);
3631 return;
3632 case 0: /* Don't do anything special */
3633 break;
3634 case 'Z':
3636 unsigned op[3];
3637 compute_zdepi_operands (INTVAL (x), op);
3638 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3639 return;
3641 default:
3642 abort ();
3644 if (GET_CODE (x) == REG)
3646 fputs (reg_names [REGNO (x)], file);
3647 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3648 fputs ("L", file);
3650 else if (GET_CODE (x) == MEM)
3652 int size = GET_MODE_SIZE (GET_MODE (x));
3653 rtx base = XEXP (XEXP (x, 0), 0);
3654 switch (GET_CODE (XEXP (x, 0)))
3656 case PRE_DEC:
3657 case POST_DEC:
3658 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3659 break;
3660 case PRE_INC:
3661 case POST_INC:
3662 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3663 break;
3664 default:
3665 if (GET_CODE (XEXP (x, 0)) == PLUS
3666 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3667 fprintf (file, "%s(0,%s)",
3668 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3669 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3670 else if (GET_CODE (XEXP (x, 0)) == PLUS
3671 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3672 fprintf (file, "%s(0,%s)",
3673 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3674 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3675 else
3676 output_address (XEXP (x, 0));
3677 break;
3680 else
3681 output_addr_const (file, x);
3684 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3686 void
3687 output_global_address (file, x, round_constant)
3688 FILE *file;
3689 rtx x;
3690 int round_constant;
3693 /* Imagine (high (const (plus ...))). */
3694 if (GET_CODE (x) == HIGH)
3695 x = XEXP (x, 0);
3697 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3698 assemble_name (file, XSTR (x, 0));
3699 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3701 assemble_name (file, XSTR (x, 0));
3702 fputs ("-$global$", file);
3704 else if (GET_CODE (x) == CONST)
3706 char *sep = "";
3707 int offset = 0; /* assembler wants -$global$ at end */
3708 rtx base;
3710 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3712 base = XEXP (XEXP (x, 0), 0);
3713 output_addr_const (file, base);
3715 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3716 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3717 else abort ();
3719 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3721 base = XEXP (XEXP (x, 0), 1);
3722 output_addr_const (file, base);
3724 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3725 offset = INTVAL (XEXP (XEXP (x, 0),1));
3726 else abort ();
3728 /* How bogus. The compiler is apparently responsible for
3729 rounding the constant if it uses an LR field selector.
3731 The linker and/or assembler seem a better place since
3732 they have to do this kind of thing already.
3734 If we fail to do this, HP's optimizing linker may eliminate
3735 an addil, but not update the ldw/stw/ldo instruction that
3736 uses the result of the addil. */
3737 if (round_constant)
3738 offset = ((offset + 0x1000) & ~0x1fff);
3740 if (GET_CODE (XEXP (x, 0)) == PLUS)
3742 if (offset < 0)
3744 offset = -offset;
3745 sep = "-";
3747 else
3748 sep = "+";
3750 else if (GET_CODE (XEXP (x, 0)) == MINUS
3751 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3752 sep = "-";
3753 else abort ();
3755 if (!read_only_operand (base) && !flag_pic)
3756 fputs ("-$global$", file);
3757 if (offset)
3758 fprintf (file,"%s%d", sep, offset);
3760 else
3761 output_addr_const (file, x);
3764 /* HP's millicode routines mean something special to the assembler.
3765 Keep track of which ones we have used. */
3767 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3768 static char imported[(int)end1000];
3769 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3770 static char import_string[] = ".IMPORT $$....,MILLICODE";
3771 #define MILLI_START 10
3773 static void
3774 import_milli (code)
3775 enum millicodes code;
3777 char str[sizeof (import_string)];
3779 if (!imported[(int)code])
3781 imported[(int)code] = 1;
3782 strcpy (str, import_string);
3783 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3784 output_asm_insn (str, 0);
3788 /* The register constraints have put the operands and return value in
3789 the proper registers. */
3791 char *
3792 output_mul_insn (unsignedp, insn)
3793 int unsignedp;
3794 rtx insn;
3796 import_milli (mulI);
3797 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3800 /* Emit the rtl for doing a division by a constant. */
3802 /* Do magic division millicodes exist for this value? */
3803 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3804 1, 1};
3806 /* We'll use an array to keep track of the magic millicodes and
3807 whether or not we've used them already. [n][0] is signed, [n][1] is
3808 unsigned. */
3810 static int div_milli[16][2];
3813 div_operand (op, mode)
3814 rtx op;
3815 enum machine_mode mode;
3817 return (mode == SImode
3818 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3819 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3820 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3824 emit_hpdiv_const (operands, unsignedp)
3825 rtx *operands;
3826 int unsignedp;
3828 if (GET_CODE (operands[2]) == CONST_INT
3829 && INTVAL (operands[2]) > 0
3830 && INTVAL (operands[2]) < 16
3831 && magic_milli[INTVAL (operands[2])])
3833 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3834 emit
3835 (gen_rtx
3836 (PARALLEL, VOIDmode,
3837 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3838 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3839 gen_rtx (REG, SImode, 26),
3840 operands[2])),
3841 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3842 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3843 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3844 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3845 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3846 return 1;
3848 return 0;
3851 char *
3852 output_div_insn (operands, unsignedp, insn)
3853 rtx *operands;
3854 int unsignedp;
3855 rtx insn;
3857 int divisor;
3859 /* If the divisor is a constant, try to use one of the special
3860 opcodes .*/
3861 if (GET_CODE (operands[0]) == CONST_INT)
3863 static char buf[100];
3864 divisor = INTVAL (operands[0]);
3865 if (!div_milli[divisor][unsignedp])
3867 div_milli[divisor][unsignedp] = 1;
3868 if (unsignedp)
3869 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3870 else
3871 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3873 if (unsignedp)
3875 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3876 return output_millicode_call (insn,
3877 gen_rtx (SYMBOL_REF, SImode, buf));
3879 else
3881 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3882 return output_millicode_call (insn,
3883 gen_rtx (SYMBOL_REF, SImode, buf));
3886 /* Divisor isn't a special constant. */
3887 else
3889 if (unsignedp)
3891 import_milli (divU);
3892 return output_millicode_call (insn,
3893 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
3895 else
3897 import_milli (divI);
3898 return output_millicode_call (insn,
3899 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
3904 /* Output a $$rem millicode to do mod. */
3906 char *
3907 output_mod_insn (unsignedp, insn)
3908 int unsignedp;
3909 rtx insn;
3911 if (unsignedp)
3913 import_milli (remU);
3914 return output_millicode_call (insn,
3915 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
3917 else
3919 import_milli (remI);
3920 return output_millicode_call (insn,
3921 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
3925 void
3926 output_arg_descriptor (call_insn)
3927 rtx call_insn;
3929 char *arg_regs[4];
3930 enum machine_mode arg_mode;
3931 rtx link;
3932 int i, output_flag = 0;
3933 int regno;
3935 for (i = 0; i < 4; i++)
3936 arg_regs[i] = 0;
3938 /* Specify explicitly that no argument relocations should take place
3939 if using the portable runtime calling conventions. */
3940 if (TARGET_PORTABLE_RUNTIME)
3942 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
3943 asm_out_file);
3944 return;
3947 if (GET_CODE (call_insn) != CALL_INSN)
3948 abort ();
3949 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
3951 rtx use = XEXP (link, 0);
3953 if (! (GET_CODE (use) == USE
3954 && GET_CODE (XEXP (use, 0)) == REG
3955 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
3956 continue;
3958 arg_mode = GET_MODE (XEXP (use, 0));
3959 regno = REGNO (XEXP (use, 0));
3960 if (regno >= 23 && regno <= 26)
3962 arg_regs[26 - regno] = "GR";
3963 if (arg_mode == DImode)
3964 arg_regs[25 - regno] = "GR";
3966 else if (regno >= 32 && regno <= 39)
3968 if (arg_mode == SFmode)
3969 arg_regs[(regno - 32) / 2] = "FR";
3970 else
3972 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
3973 arg_regs[(regno - 34) / 2] = "FR";
3974 arg_regs[(regno - 34) / 2 + 1] = "FU";
3975 #else
3976 arg_regs[(regno - 34) / 2] = "FU";
3977 arg_regs[(regno - 34) / 2 + 1] = "FR";
3978 #endif
3982 fputs ("\t.CALL ", asm_out_file);
3983 for (i = 0; i < 4; i++)
3985 if (arg_regs[i])
3987 if (output_flag++)
3988 fputc (',', asm_out_file);
3989 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
3992 fputc ('\n', asm_out_file);
3995 /* Return the class of any secondary reload register that is needed to
3996 move IN into a register in class CLASS using mode MODE.
3998 Profiling has showed this routine and its descendants account for
3999 a significant amount of compile time (~7%). So it has been
4000 optimized to reduce redundant computations and eliminate useless
4001 function calls.
4003 It might be worthwhile to try and make this a leaf function too. */
4005 enum reg_class
4006 secondary_reload_class (class, mode, in)
4007 enum reg_class class;
4008 enum machine_mode mode;
4009 rtx in;
4011 int regno, is_symbolic;
4013 /* Trying to load a constant into a FP register during PIC code
4014 generation will require %r1 as a scratch register. */
4015 if (flag_pic == 2
4016 && GET_MODE_CLASS (mode) == MODE_INT
4017 && FP_REG_CLASS_P (class)
4018 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4019 return R1_REGS;
4021 /* Profiling showed the PA port spends about 1.3% of its compilation
4022 time in true_regnum from calls inside secondary_reload_class. */
4024 if (GET_CODE (in) == REG)
4026 regno = REGNO (in);
4027 if (regno >= FIRST_PSEUDO_REGISTER)
4028 regno = true_regnum (in);
4030 else if (GET_CODE (in) == SUBREG)
4031 regno = true_regnum (in);
4032 else
4033 regno = -1;
4035 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4036 && GET_MODE_CLASS (mode) == MODE_INT
4037 && FP_REG_CLASS_P (class))
4038 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4039 return GENERAL_REGS;
4041 if (GET_CODE (in) == HIGH)
4042 in = XEXP (in, 0);
4044 /* Profiling has showed GCC spends about 2.6% of its compilation
4045 time in symbolic_operand from calls inside secondary_reload_class.
4047 We use an inline copy and only compute its return value once to avoid
4048 useless work. */
4049 switch (GET_CODE (in))
4051 rtx tmp;
4053 case SYMBOL_REF:
4054 case LABEL_REF:
4055 is_symbolic = 1;
4056 break;
4057 case CONST:
4058 tmp = XEXP (in, 0);
4059 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4060 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4061 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4062 break;
4063 default:
4064 is_symbolic = 0;
4065 break;
4068 if (!flag_pic
4069 && is_symbolic
4070 && read_only_operand (in))
4071 return NO_REGS;
4073 if (class != R1_REGS && is_symbolic)
4074 return R1_REGS;
4076 return NO_REGS;
4079 enum direction
4080 function_arg_padding (mode, type)
4081 enum machine_mode mode;
4082 tree type;
4084 int size;
4086 if (mode == BLKmode)
4088 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4089 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4090 else
4091 return upward; /* Don't know if this is right, but */
4092 /* same as old definition. */
4094 else
4095 size = GET_MODE_BITSIZE (mode);
4096 if (size < PARM_BOUNDARY)
4097 return downward;
4098 else if (size % PARM_BOUNDARY)
4099 return upward;
4100 else
4101 return none;
4105 /* Do what is necessary for `va_start'. The argument is ignored;
4106 We look at the current function to determine if stdargs or varargs
4107 is used and fill in an initial va_list. A pointer to this constructor
4108 is returned. */
4110 struct rtx_def *
4111 hppa_builtin_saveregs (arglist)
4112 tree arglist;
4114 rtx offset;
4115 tree fntype = TREE_TYPE (current_function_decl);
4116 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4117 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4118 != void_type_node)))
4119 ? UNITS_PER_WORD : 0);
4121 if (argadj)
4122 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4123 else
4124 offset = current_function_arg_offset_rtx;
4126 /* Store general registers on the stack. */
4127 move_block_from_reg (23,
4128 gen_rtx (MEM, BLKmode,
4129 plus_constant
4130 (current_function_internal_arg_pointer, -16)),
4131 4, 4 * UNITS_PER_WORD);
4132 return copy_to_reg (expand_binop (Pmode, add_optab,
4133 current_function_internal_arg_pointer,
4134 offset, 0, 0, OPTAB_LIB_WIDEN));
4137 /* This routine handles all the normal conditional branch sequences we
4138 might need to generate. It handles compare immediate vs compare
4139 register, nullification of delay slots, varying length branches,
4140 negated branches, and all combinations of the above. It returns the
4141 output appropriate to emit the branch corresponding to all given
4142 parameters. */
4144 char *
4145 output_cbranch (operands, nullify, length, negated, insn)
4146 rtx *operands;
4147 int nullify, length, negated;
4148 rtx insn;
4150 static char buf[100];
4151 int useskip = 0;
4153 /* A conditional branch to the following instruction (eg the delay slot) is
4154 asking for a disaster. This can happen when not optimizing.
4156 In such cases it is safe to emit nothing. */
4158 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4159 return "";
4161 /* If this is a long branch with its delay slot unfilled, set `nullify'
4162 as it can nullify the delay slot and save a nop. */
4163 if (length == 8 && dbr_sequence_length () == 0)
4164 nullify = 1;
4166 /* If this is a short forward conditional branch which did not get
4167 its delay slot filled, the delay slot can still be nullified. */
4168 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4169 nullify = forward_branch_p (insn);
4171 /* A forward branch over a single nullified insn can be done with a
4172 comclr instruction. This avoids a single cycle penalty due to
4173 mis-predicted branch if we fall through (branch not taken). */
4174 if (length == 4
4175 && next_real_insn (insn) != 0
4176 && get_attr_length (next_real_insn (insn)) == 4
4177 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4178 && nullify)
4179 useskip = 1;
4181 switch (length)
4183 /* All short conditional branches except backwards with an unfilled
4184 delay slot. */
4185 case 4:
4186 if (useskip)
4187 strcpy (buf, "com%I2clr,");
4188 else
4189 strcpy (buf, "com%I2b,");
4190 if (negated)
4191 strcat (buf, "%B3");
4192 else
4193 strcat (buf, "%S3");
4194 if (useskip)
4195 strcat (buf, " %2,%1,0");
4196 else if (nullify)
4197 strcat (buf, ",n %2,%1,%0");
4198 else
4199 strcat (buf, " %2,%1,%0");
4200 break;
4202 /* All long conditionals. Note an short backward branch with an
4203 unfilled delay slot is treated just like a long backward branch
4204 with an unfilled delay slot. */
4205 case 8:
4206 /* Handle weird backwards branch with a filled delay slot
4207 with is nullified. */
4208 if (dbr_sequence_length () != 0
4209 && ! forward_branch_p (insn)
4210 && nullify)
4212 strcpy (buf, "com%I2b,");
4213 if (negated)
4214 strcat (buf, "%S3");
4215 else
4216 strcat (buf, "%B3");
4217 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4219 /* Handle short backwards branch with an unfilled delay slot.
4220 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4221 taken and untaken branches. */
4222 else if (dbr_sequence_length () == 0
4223 && ! forward_branch_p (insn)
4224 && insn_addresses
4225 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4226 - insn_addresses[INSN_UID (insn)] - 8))
4228 strcpy (buf, "com%I2b,");
4229 if (negated)
4230 strcat (buf, "%B3 %2,%1,%0%#");
4231 else
4232 strcat (buf, "%S3 %2,%1,%0%#");
4234 else
4236 strcpy (buf, "com%I2clr,");
4237 if (negated)
4238 strcat (buf, "%S3");
4239 else
4240 strcat (buf, "%B3");
4241 if (nullify)
4242 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4243 else
4244 strcat (buf, " %2,%1,0\n\tbl %0,0");
4246 break;
4248 default:
4249 abort();
4251 return buf;
4254 /* This routine handles all the branch-on-bit conditional branch sequences we
4255 might need to generate. It handles nullification of delay slots,
4256 varying length branches, negated branches and all combinations of the
4257 above. it returns the appropriate output template to emit the branch. */
4259 char *
4260 output_bb (operands, nullify, length, negated, insn, which)
4261 rtx *operands;
4262 int nullify, length, negated;
4263 rtx insn;
4264 int which;
4266 static char buf[100];
4267 int useskip = 0;
4269 /* A conditional branch to the following instruction (eg the delay slot) is
4270 asking for a disaster. I do not think this can happen as this pattern
4271 is only used when optimizing; jump optimization should eliminate the
4272 jump. But be prepared just in case. */
4274 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4275 return "";
4277 /* If this is a long branch with its delay slot unfilled, set `nullify'
4278 as it can nullify the delay slot and save a nop. */
4279 if (length == 8 && dbr_sequence_length () == 0)
4280 nullify = 1;
4282 /* If this is a short forward conditional branch which did not get
4283 its delay slot filled, the delay slot can still be nullified. */
4284 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4285 nullify = forward_branch_p (insn);
4287 /* A forward branch over a single nullified insn can be done with a
4288 extrs instruction. This avoids a single cycle penalty due to
4289 mis-predicted branch if we fall through (branch not taken). */
4291 if (length == 4
4292 && next_real_insn (insn) != 0
4293 && get_attr_length (next_real_insn (insn)) == 4
4294 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4295 && nullify)
4296 useskip = 1;
4298 switch (length)
4301 /* All short conditional branches except backwards with an unfilled
4302 delay slot. */
4303 case 4:
4304 if (useskip)
4305 strcpy (buf, "extrs,");
4306 else
4307 strcpy (buf, "bb,");
4308 if ((which == 0 && negated)
4309 || (which == 1 && ! negated))
4310 strcat (buf, ">=");
4311 else
4312 strcat (buf, "<");
4313 if (useskip)
4314 strcat (buf, " %0,%1,1,0");
4315 else if (nullify && negated)
4316 strcat (buf, ",n %0,%1,%3");
4317 else if (nullify && ! negated)
4318 strcat (buf, ",n %0,%1,%2");
4319 else if (! nullify && negated)
4320 strcat (buf, "%0,%1,%3");
4321 else if (! nullify && ! negated)
4322 strcat (buf, " %0,%1,%2");
4323 break;
4325 /* All long conditionals. Note an short backward branch with an
4326 unfilled delay slot is treated just like a long backward branch
4327 with an unfilled delay slot. */
4328 case 8:
4329 /* Handle weird backwards branch with a filled delay slot
4330 with is nullified. */
4331 if (dbr_sequence_length () != 0
4332 && ! forward_branch_p (insn)
4333 && nullify)
4335 strcpy (buf, "bb,");
4336 if ((which == 0 && negated)
4337 || (which == 1 && ! negated))
4338 strcat (buf, "<");
4339 else
4340 strcat (buf, ">=");
4341 if (negated)
4342 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4343 else
4344 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4346 /* Handle short backwards branch with an unfilled delay slot.
4347 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4348 taken and untaken branches. */
4349 else if (dbr_sequence_length () == 0
4350 && ! forward_branch_p (insn)
4351 && insn_addresses
4352 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4353 - insn_addresses[INSN_UID (insn)] - 8))
4355 strcpy (buf, "bb,");
4356 if ((which == 0 && negated)
4357 || (which == 1 && ! negated))
4358 strcat (buf, ">=");
4359 else
4360 strcat (buf, "<");
4361 if (negated)
4362 strcat (buf, " %0,%1,%3%#");
4363 else
4364 strcat (buf, " %0,%1,%2%#");
4366 else
4368 strcpy (buf, "extrs,");
4369 if ((which == 0 && negated)
4370 || (which == 1 && ! negated))
4371 strcat (buf, "<");
4372 else
4373 strcat (buf, ">=");
4374 if (nullify && negated)
4375 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4376 else if (nullify && ! negated)
4377 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4378 else if (negated)
4379 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4380 else
4381 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4383 break;
4385 default:
4386 abort();
4388 return buf;
4391 /* This routine handles all the branch-on-variable-bit conditional branch
4392 sequences we might need to generate. It handles nullification of delay
4393 slots, varying length branches, negated branches and all combinations
4394 of the above. it returns the appropriate output template to emit the
4395 branch. */
4397 char *
4398 output_bvb (operands, nullify, length, negated, insn, which)
4399 rtx *operands;
4400 int nullify, length, negated;
4401 rtx insn;
4402 int which;
4404 static char buf[100];
4405 int useskip = 0;
4407 /* A conditional branch to the following instruction (eg the delay slot) is
4408 asking for a disaster. I do not think this can happen as this pattern
4409 is only used when optimizing; jump optimization should eliminate the
4410 jump. But be prepared just in case. */
4412 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4413 return "";
4415 /* If this is a long branch with its delay slot unfilled, set `nullify'
4416 as it can nullify the delay slot and save a nop. */
4417 if (length == 8 && dbr_sequence_length () == 0)
4418 nullify = 1;
4420 /* If this is a short forward conditional branch which did not get
4421 its delay slot filled, the delay slot can still be nullified. */
4422 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4423 nullify = forward_branch_p (insn);
4425 /* A forward branch over a single nullified insn can be done with a
4426 extrs instruction. This avoids a single cycle penalty due to
4427 mis-predicted branch if we fall through (branch not taken). */
4429 if (length == 4
4430 && next_real_insn (insn) != 0
4431 && get_attr_length (next_real_insn (insn)) == 4
4432 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4433 && nullify)
4434 useskip = 1;
4436 switch (length)
4439 /* All short conditional branches except backwards with an unfilled
4440 delay slot. */
4441 case 4:
4442 if (useskip)
4443 strcpy (buf, "vextrs,");
4444 else
4445 strcpy (buf, "bvb,");
4446 if ((which == 0 && negated)
4447 || (which == 1 && ! negated))
4448 strcat (buf, ">=");
4449 else
4450 strcat (buf, "<");
4451 if (useskip)
4452 strcat (buf, " %0,1,0");
4453 else if (nullify && negated)
4454 strcat (buf, ",n %0,%3");
4455 else if (nullify && ! negated)
4456 strcat (buf, ",n %0,%2");
4457 else if (! nullify && negated)
4458 strcat (buf, "%0,%3");
4459 else if (! nullify && ! negated)
4460 strcat (buf, " %0,%2");
4461 break;
4463 /* All long conditionals. Note an short backward branch with an
4464 unfilled delay slot is treated just like a long backward branch
4465 with an unfilled delay slot. */
4466 case 8:
4467 /* Handle weird backwards branch with a filled delay slot
4468 with is nullified. */
4469 if (dbr_sequence_length () != 0
4470 && ! forward_branch_p (insn)
4471 && nullify)
4473 strcpy (buf, "bvb,");
4474 if ((which == 0 && negated)
4475 || (which == 1 && ! negated))
4476 strcat (buf, "<");
4477 else
4478 strcat (buf, ">=");
4479 if (negated)
4480 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4481 else
4482 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4484 /* Handle short backwards branch with an unfilled delay slot.
4485 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4486 taken and untaken branches. */
4487 else if (dbr_sequence_length () == 0
4488 && ! forward_branch_p (insn)
4489 && insn_addresses
4490 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4491 - insn_addresses[INSN_UID (insn)] - 8))
4493 strcpy (buf, "bvb,");
4494 if ((which == 0 && negated)
4495 || (which == 1 && ! negated))
4496 strcat (buf, ">=");
4497 else
4498 strcat (buf, "<");
4499 if (negated)
4500 strcat (buf, " %0,%3%#");
4501 else
4502 strcat (buf, " %0,%2%#");
4504 else
4506 strcpy (buf, "vextrs,");
4507 if ((which == 0 && negated)
4508 || (which == 1 && ! negated))
4509 strcat (buf, "<");
4510 else
4511 strcat (buf, ">=");
4512 if (nullify && negated)
4513 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4514 else if (nullify && ! negated)
4515 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4516 else if (negated)
4517 strcat (buf, " %0,1,0\n\tbl %3,0");
4518 else
4519 strcat (buf, " %0,1,0\n\tbl %2,0");
4521 break;
4523 default:
4524 abort();
4526 return buf;
4529 /* Return the output template for emitting a dbra type insn.
4531 Note it may perform some output operations on its own before
4532 returning the final output string. */
4533 char *
4534 output_dbra (operands, insn, which_alternative)
4535 rtx *operands;
4536 rtx insn;
4537 int which_alternative;
4540 /* A conditional branch to the following instruction (eg the delay slot) is
4541 asking for a disaster. Be prepared! */
4543 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4545 if (which_alternative == 0)
4546 return "ldo %1(%0),%0";
4547 else if (which_alternative == 1)
4549 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4550 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4551 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4552 return "fldws -16(0,%%r30),%0";
4554 else
4556 output_asm_insn ("ldw %0,%4", operands);
4557 return "ldo %1(%4),%4\n\tstw %4,%0";
4561 if (which_alternative == 0)
4563 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4564 int length = get_attr_length (insn);
4566 /* If this is a long branch with its delay slot unfilled, set `nullify'
4567 as it can nullify the delay slot and save a nop. */
4568 if (length == 8 && dbr_sequence_length () == 0)
4569 nullify = 1;
4571 /* If this is a short forward conditional branch which did not get
4572 its delay slot filled, the delay slot can still be nullified. */
4573 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4574 nullify = forward_branch_p (insn);
4576 /* Handle short versions first. */
4577 if (length == 4 && nullify)
4578 return "addib,%C2,n %1,%0,%3";
4579 else if (length == 4 && ! nullify)
4580 return "addib,%C2 %1,%0,%3";
4581 else if (length == 8)
4583 /* Handle weird backwards branch with a fulled delay slot
4584 which is nullified. */
4585 if (dbr_sequence_length () != 0
4586 && ! forward_branch_p (insn)
4587 && nullify)
4588 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4589 /* Handle short backwards branch with an unfilled delay slot.
4590 Using a addb;nop rather than addi;bl saves 1 cycle for both
4591 taken and untaken branches. */
4592 else if (dbr_sequence_length () == 0
4593 && ! forward_branch_p (insn)
4594 && insn_addresses
4595 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4596 - insn_addresses[INSN_UID (insn)] - 8))
4597 return "addib,%C2 %1,%0,%3%#";
4599 /* Handle normal cases. */
4600 if (nullify)
4601 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4602 else
4603 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4605 else
4606 abort();
4608 /* Deal with gross reload from FP register case. */
4609 else if (which_alternative == 1)
4611 /* Move loop counter from FP register to MEM then into a GR,
4612 increment the GR, store the GR into MEM, and finally reload
4613 the FP register from MEM from within the branch's delay slot. */
4614 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4615 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4616 if (get_attr_length (insn) == 24)
4617 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4618 else
4619 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4621 /* Deal with gross reload from memory case. */
4622 else
4624 /* Reload loop counter from memory, the store back to memory
4625 happens in the branch's delay slot. */
4626 output_asm_insn ("ldw %0,%4", operands);
4627 if (get_attr_length (insn) == 12)
4628 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4629 else
4630 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4634 /* Return the output template for emitting a dbra type insn.
4636 Note it may perform some output operations on its own before
4637 returning the final output string. */
4638 char *
4639 output_movb (operands, insn, which_alternative, reverse_comparison)
4640 rtx *operands;
4641 rtx insn;
4642 int which_alternative;
4643 int reverse_comparison;
4646 /* A conditional branch to the following instruction (eg the delay slot) is
4647 asking for a disaster. Be prepared! */
4649 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4651 if (which_alternative == 0)
4652 return "copy %1,%0";
4653 else if (which_alternative == 1)
4655 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4656 return "fldws -16(0,%%r30),%0";
4658 else if (which_alternative == 2)
4659 return "stw %1,%0";
4660 else
4661 return "mtsar %r1";
4664 /* Support the second variant. */
4665 if (reverse_comparison)
4666 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4668 if (which_alternative == 0)
4670 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4671 int length = get_attr_length (insn);
4673 /* If this is a long branch with its delay slot unfilled, set `nullify'
4674 as it can nullify the delay slot and save a nop. */
4675 if (length == 8 && dbr_sequence_length () == 0)
4676 nullify = 1;
4678 /* If this is a short forward conditional branch which did not get
4679 its delay slot filled, the delay slot can still be nullified. */
4680 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4681 nullify = forward_branch_p (insn);
4683 /* Handle short versions first. */
4684 if (length == 4 && nullify)
4685 return "movb,%C2,n %1,%0,%3";
4686 else if (length == 4 && ! nullify)
4687 return "movb,%C2 %1,%0,%3";
4688 else if (length == 8)
4690 /* Handle weird backwards branch with a filled delay slot
4691 which is nullified. */
4692 if (dbr_sequence_length () != 0
4693 && ! forward_branch_p (insn)
4694 && nullify)
4695 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4697 /* Handle short backwards branch with an unfilled delay slot.
4698 Using a movb;nop rather than or;bl saves 1 cycle for both
4699 taken and untaken branches. */
4700 else if (dbr_sequence_length () == 0
4701 && ! forward_branch_p (insn)
4702 && insn_addresses
4703 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4704 - insn_addresses[INSN_UID (insn)] - 8))
4705 return "movb,%C2 %1,%0,%3%#";
4706 /* Handle normal cases. */
4707 if (nullify)
4708 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4709 else
4710 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4712 else
4713 abort();
4715 /* Deal with gross reload from FP register case. */
4716 else if (which_alternative == 1)
4718 /* Move loop counter from FP register to MEM then into a GR,
4719 increment the GR, store the GR into MEM, and finally reload
4720 the FP register from MEM from within the branch's delay slot. */
4721 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4722 if (get_attr_length (insn) == 12)
4723 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4724 else
4725 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4727 /* Deal with gross reload from memory case. */
4728 else if (which_alternative == 2)
4730 /* Reload loop counter from memory, the store back to memory
4731 happens in the branch's delay slot. */
4732 if (get_attr_length (insn) == 8)
4733 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4734 else
4735 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4737 /* Handle SAR as a destination. */
4738 else
4740 if (get_attr_length (insn) == 8)
4741 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4742 else
4743 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4748 /* INSN is a millicode call. It may have an unconditional jump in its delay
4749 slot.
4751 CALL_DEST is the routine we are calling. */
4753 char *
4754 output_millicode_call (insn, call_dest)
4755 rtx insn;
4756 rtx call_dest;
4758 int distance;
4759 rtx xoperands[4];
4760 rtx seq_insn;
4762 /* Handle common case -- empty delay slot or no jump in the delay slot,
4763 and we're sure that the branch will reach the beginning of the $CODE$
4764 subspace. */
4765 if ((dbr_sequence_length () == 0
4766 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4767 || (dbr_sequence_length () != 0
4768 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4769 && get_attr_length (insn) == 4))
4771 xoperands[0] = call_dest;
4772 output_asm_insn ("bl %0,%%r31%#", xoperands);
4773 return "";
4776 /* This call may not reach the beginning of the $CODE$ subspace. */
4777 if (get_attr_length (insn) > 4)
4779 int delay_insn_deleted = 0;
4780 rtx xoperands[2];
4781 rtx link;
4783 /* We need to emit an inline long-call branch. */
4784 if (dbr_sequence_length () != 0
4785 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4787 /* A non-jump insn in the delay slot. By definition we can
4788 emit this insn before the call. */
4789 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4791 /* Now delete the delay insn. */
4792 PUT_CODE (NEXT_INSN (insn), NOTE);
4793 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4794 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4795 delay_insn_deleted = 1;
4798 /* If we're allowed to use be/ble instructions, then this is the
4799 best sequence to use for a long millicode call. */
4800 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4801 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4803 xoperands[0] = call_dest;
4804 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4805 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4806 output_asm_insn ("nop", xoperands);
4808 /* Pure portable runtime doesn't allow be/ble; we also don't have
4809 PIC support int he assembler/linker, so this sequence is needed. */
4810 else if (TARGET_PORTABLE_RUNTIME)
4812 xoperands[0] = call_dest;
4813 /* Get the address of our target into %r29. */
4814 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4815 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4817 /* Get our return address into %r31. */
4818 output_asm_insn ("blr 0,%%r31", xoperands);
4820 /* Jump to our target address in %r29. */
4821 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4823 /* Empty delay slot. Note this insn gets fetched twice and
4824 executed once. To be safe we use a nop. */
4825 output_asm_insn ("nop", xoperands);
4826 return "";
4828 /* PIC long millicode call sequence. */
4829 else
4831 xoperands[0] = call_dest;
4832 xoperands[1] = gen_label_rtx ();
4833 /* Get our address + 8 into %r1. */
4834 output_asm_insn ("bl .+8,%%r1", xoperands);
4836 /* Add %r1 to the offset of our target from the next insn. */
4837 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4838 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4839 CODE_LABEL_NUMBER (xoperands[1]));
4840 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4842 /* Get the return address into %r31. */
4843 output_asm_insn ("blr 0,%%r31", xoperands);
4845 /* Branch to our target which is in %r1. */
4846 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4848 /* Empty delay slot. Note this insn gets fetched twice and
4849 executed once. To be safe we use a nop. */
4850 output_asm_insn ("nop", xoperands);
4853 /* If we had a jump in the call's delay slot, output it now. */
4854 if (dbr_sequence_length () != 0
4855 && !delay_insn_deleted)
4857 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4858 output_asm_insn ("b,n %0", xoperands);
4860 /* Now delete the delay insn. */
4861 PUT_CODE (NEXT_INSN (insn), NOTE);
4862 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4863 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4865 return "";
4868 /* This call has an unconditional jump in its delay slot and the
4869 call is known to reach its target or the beginning of the current
4870 subspace. */
4872 /* Use the containing sequence insn's address. */
4873 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
4875 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
4876 - insn_addresses[INSN_UID (seq_insn)] - 8;
4878 /* If the branch was too far away, emit a normal call followed
4879 by a nop, followed by the unconditional branch.
4881 If the branch is close, then adjust %r2 from within the
4882 call's delay slot. */
4884 xoperands[0] = call_dest;
4885 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4886 if (! VAL_14_BITS_P (distance))
4887 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
4888 else
4890 xoperands[3] = gen_label_rtx ();
4891 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
4892 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4893 CODE_LABEL_NUMBER (xoperands[3]));
4896 /* Delete the jump. */
4897 PUT_CODE (NEXT_INSN (insn), NOTE);
4898 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4899 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4900 return "";
4903 /* INSN is either a function call. It may have an unconditional jump
4904 in its delay slot.
4906 CALL_DEST is the routine we are calling. */
4908 char *
4909 output_call (insn, call_dest)
4910 rtx insn;
4911 rtx call_dest;
4913 int distance;
4914 rtx xoperands[4];
4915 rtx seq_insn;
4917 /* Handle common case -- empty delay slot or no jump in the delay slot,
4918 and we're sure that the branch will reach the beginning of the $CODE$
4919 subspace. */
4920 if ((dbr_sequence_length () == 0
4921 && get_attr_length (insn) == 8)
4922 || (dbr_sequence_length () != 0
4923 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4924 && get_attr_length (insn) == 4))
4926 xoperands[0] = call_dest;
4927 output_asm_insn ("bl %0,%%r2%#", xoperands);
4928 return "";
4931 /* This call may not reach the beginning of the $CODE$ subspace. */
4932 if (get_attr_length (insn) > 8)
4934 int delay_insn_deleted = 0;
4935 rtx xoperands[2];
4936 rtx link;
4938 /* We need to emit an inline long-call branch. Furthermore,
4939 because we're changing a named function call into an indirect
4940 function call well after the parameters have been set up, we
4941 need to make sure any FP args appear in both the integer
4942 and FP registers. Also, we need move any delay slot insn
4943 out of the delay slot. And finally, we can't rely on the linker
4944 being able to fix the call to $$dyncall! -- Yuk!. */
4945 if (dbr_sequence_length () != 0
4946 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4948 /* A non-jump insn in the delay slot. By definition we can
4949 emit this insn before the call (and in fact before argument
4950 relocating. */
4951 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4953 /* Now delete the delay insn. */
4954 PUT_CODE (NEXT_INSN (insn), NOTE);
4955 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4956 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4957 delay_insn_deleted = 1;
4960 /* Now copy any FP arguments into integer registers. */
4961 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
4963 int arg_mode, regno;
4964 rtx use = XEXP (link, 0);
4965 if (! (GET_CODE (use) == USE
4966 && GET_CODE (XEXP (use, 0)) == REG
4967 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4968 continue;
4970 arg_mode = GET_MODE (XEXP (use, 0));
4971 regno = REGNO (XEXP (use, 0));
4972 /* Is it a floating point register? */
4973 if (regno >= 32 && regno <= 39)
4975 /* Copy from the FP register into an integer register
4976 (via memory). */
4977 if (arg_mode == SFmode)
4979 xoperands[0] = XEXP (use, 0);
4980 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
4981 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
4982 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
4984 else
4986 xoperands[0] = XEXP (use, 0);
4987 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
4988 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
4989 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
4990 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
4996 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
4997 we don't have any direct calls in that case. */
4998 if (flag_pic)
5000 /* We have to load the address of the function using a procedure
5001 label (plabel). The LP and RP relocs don't work reliably for PIC,
5002 so we make a plain 32 bit plabel in the data segment instead. We
5003 have to defer outputting it of course... Not pretty. */
5005 xoperands[0] = gen_label_rtx ();
5006 xoperands[1] = gen_label_rtx ();
5007 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5008 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5009 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5011 if (deferred_plabels == 0)
5012 deferred_plabels = (struct defer_plab *)
5013 xmalloc (1 * sizeof (struct defer_plab));
5014 else
5015 deferred_plabels = (struct defer_plab *)
5016 xrealloc (deferred_plabels,
5017 (n_deferred_plabels + 1) * sizeof (struct defer_plab));
5018 deferred_plabels[n_deferred_plabels].internal_label = xoperands[0];
5019 deferred_plabels[n_deferred_plabels].symbol = call_dest;
5020 n_deferred_plabels++;
5022 /* Get our address + 8 into %r1. */
5023 output_asm_insn ("bl .+8,%%r1", xoperands);
5025 /* Add %r1 to the offset of dyncall from the next insn. */
5026 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5027 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5028 CODE_LABEL_NUMBER (xoperands[1]));
5029 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5031 /* Get the return address into %r31. */
5032 output_asm_insn ("blr 0,%%r31", xoperands);
5034 /* Branch to our target which is in %r1. */
5035 output_asm_insn ("bv 0(%%r1)", xoperands);
5037 /* Copy the return address into %r2 also. */
5038 output_asm_insn ("copy %%r31,%%r2", xoperands);
5040 else
5042 /* No PIC stuff to worry about. We can use ldil;ble. */
5043 xoperands[0] = call_dest;
5045 /* Get the address of our target into %r22. */
5046 output_asm_insn ("ldil LP%%%0,%%r22", xoperands);
5047 output_asm_insn ("ldo RP%%%0(%%r22),%%r22", xoperands);
5049 /* Get the high part of the address of $dyncall into %r2, then
5050 add in the low part in the branch instruction. */
5051 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5052 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5054 /* Copy the return pointer into both %r31 and %r2. */
5055 output_asm_insn ("copy %%r31,%%r2", xoperands);
5058 /* If we had a jump in the call's delay slot, output it now. */
5059 if (dbr_sequence_length () != 0
5060 && !delay_insn_deleted)
5062 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5063 output_asm_insn ("b,n %0", xoperands);
5065 /* Now delete the delay insn. */
5066 PUT_CODE (NEXT_INSN (insn), NOTE);
5067 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5068 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5070 return "";
5073 /* This call has an unconditional jump in its delay slot and the
5074 call is known to reach its target or the beginning of the current
5075 subspace. */
5077 /* Use the containing sequence insn's address. */
5078 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5080 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5081 - insn_addresses[INSN_UID (seq_insn)] - 8;
5083 /* If the branch was too far away, emit a normal call followed
5084 by a nop, followed by the unconditional branch.
5086 If the branch is close, then adjust %r2 from within the
5087 call's delay slot. */
5089 xoperands[0] = call_dest;
5090 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5091 if (! VAL_14_BITS_P (distance))
5092 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5093 else
5095 xoperands[3] = gen_label_rtx ();
5096 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5097 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5098 CODE_LABEL_NUMBER (xoperands[3]));
5101 /* Delete the jump. */
5102 PUT_CODE (NEXT_INSN (insn), NOTE);
5103 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5104 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5105 return "";
5108 extern struct obstack permanent_obstack;
5109 extern struct obstack *saveable_obstack;
5111 /* In HPUX 8.0's shared library scheme, special relocations are needed
5112 for function labels if they might be passed to a function
5113 in a shared library (because shared libraries don't live in code
5114 space), and special magic is needed to construct their address.
5116 For reasons too disgusting to describe storage for the new name
5117 is allocated either on the saveable_obstack (released at function
5118 exit) or on the permanent_obstack for things that can never change
5119 (libcall names for example). */
5121 void
5122 hppa_encode_label (sym, permanent)
5123 rtx sym;
5124 int permanent;
5126 char *str = XSTR (sym, 0);
5127 int len = strlen (str);
5128 char *newstr;
5130 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5131 len + 2);
5133 if (str[0] == '*')
5134 *newstr++ = *str++;
5135 strcpy (newstr + 1, str);
5136 *newstr = '@';
5137 XSTR (sym,0) = newstr;
5141 function_label_operand (op, mode)
5142 rtx op;
5143 enum machine_mode mode;
5145 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5148 /* Returns 1 if OP is a function label involved in a simple addition
5149 with a constant. Used to keep certain patterns from matching
5150 during instruction combination. */
5152 is_function_label_plus_const (op)
5153 rtx op;
5155 /* Strip off any CONST. */
5156 if (GET_CODE (op) == CONST)
5157 op = XEXP (op, 0);
5159 return (GET_CODE (op) == PLUS
5160 && function_label_operand (XEXP (op, 0), Pmode)
5161 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5164 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5165 use in fmpyadd instructions. */
5167 fmpyaddoperands (operands)
5168 rtx *operands;
5170 enum machine_mode mode = GET_MODE (operands[0]);
5172 /* Must be a floating point mode. */
5173 if (mode != SFmode && mode != DFmode)
5174 return 0;
5176 /* All modes must be the same. */
5177 if (! (mode == GET_MODE (operands[1])
5178 && mode == GET_MODE (operands[2])
5179 && mode == GET_MODE (operands[3])
5180 && mode == GET_MODE (operands[4])
5181 && mode == GET_MODE (operands[5])))
5182 return 0;
5184 /* All operands must be registers. */
5185 if (! (GET_CODE (operands[1]) == REG
5186 && GET_CODE (operands[2]) == REG
5187 && GET_CODE (operands[3]) == REG
5188 && GET_CODE (operands[4]) == REG
5189 && GET_CODE (operands[5]) == REG))
5190 return 0;
5192 /* Only 2 real operands to the addition. One of the input operands must
5193 be the same as the output operand. */
5194 if (! rtx_equal_p (operands[3], operands[4])
5195 && ! rtx_equal_p (operands[3], operands[5]))
5196 return 0;
5198 /* Inout operand of add can not conflict with any operands from multiply. */
5199 if (rtx_equal_p (operands[3], operands[0])
5200 || rtx_equal_p (operands[3], operands[1])
5201 || rtx_equal_p (operands[3], operands[2]))
5202 return 0;
5204 /* multiply can not feed into addition operands. */
5205 if (rtx_equal_p (operands[4], operands[0])
5206 || rtx_equal_p (operands[5], operands[0]))
5207 return 0;
5209 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5210 if (mode == SFmode
5211 && (REGNO (operands[0]) < 57
5212 || REGNO (operands[1]) < 57
5213 || REGNO (operands[2]) < 57
5214 || REGNO (operands[3]) < 57
5215 || REGNO (operands[4]) < 57
5216 || REGNO (operands[5]) < 57))
5217 return 0;
5219 /* Passed. Operands are suitable for fmpyadd. */
5220 return 1;
5223 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5224 use in fmpysub instructions. */
5226 fmpysuboperands (operands)
5227 rtx *operands;
5229 enum machine_mode mode = GET_MODE (operands[0]);
5231 /* Must be a floating point mode. */
5232 if (mode != SFmode && mode != DFmode)
5233 return 0;
5235 /* All modes must be the same. */
5236 if (! (mode == GET_MODE (operands[1])
5237 && mode == GET_MODE (operands[2])
5238 && mode == GET_MODE (operands[3])
5239 && mode == GET_MODE (operands[4])
5240 && mode == GET_MODE (operands[5])))
5241 return 0;
5243 /* All operands must be registers. */
5244 if (! (GET_CODE (operands[1]) == REG
5245 && GET_CODE (operands[2]) == REG
5246 && GET_CODE (operands[3]) == REG
5247 && GET_CODE (operands[4]) == REG
5248 && GET_CODE (operands[5]) == REG))
5249 return 0;
5251 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5252 operation, so operands[4] must be the same as operand[3]. */
5253 if (! rtx_equal_p (operands[3], operands[4]))
5254 return 0;
5256 /* multiply can not feed into subtraction. */
5257 if (rtx_equal_p (operands[5], operands[0]))
5258 return 0;
5260 /* Inout operand of sub can not conflict with any operands from multiply. */
5261 if (rtx_equal_p (operands[3], operands[0])
5262 || rtx_equal_p (operands[3], operands[1])
5263 || rtx_equal_p (operands[3], operands[2]))
5264 return 0;
5266 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5267 if (mode == SFmode
5268 && (REGNO (operands[0]) < 57
5269 || REGNO (operands[1]) < 57
5270 || REGNO (operands[2]) < 57
5271 || REGNO (operands[3]) < 57
5272 || REGNO (operands[4]) < 57
5273 || REGNO (operands[5]) < 57))
5274 return 0;
5276 /* Passed. Operands are suitable for fmpysub. */
5277 return 1;
5281 plus_xor_ior_operator (op, mode)
5282 rtx op;
5283 enum machine_mode mode;
5285 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5286 || GET_CODE (op) == IOR);
5289 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5290 constants for shadd instructions. */
5292 shadd_constant_p (val)
5293 int val;
5295 if (val == 2 || val == 4 || val == 8)
5296 return 1;
5297 else
5298 return 0;
5301 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5302 the valid constant for shadd instructions. */
5304 shadd_operand (op, mode)
5305 rtx op;
5306 enum machine_mode mode;
5308 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5311 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5314 basereg_operand (op, mode)
5315 rtx op;
5316 enum machine_mode mode;
5318 /* cse will create some unscaled indexed addresses, however; it
5319 generally isn't a win on the PA, so avoid creating unscaled
5320 indexed addresses until after cse is finished. */
5321 if (!cse_not_expected)
5322 return 0;
5324 /* Once reload has started everything is considered valid. Reload should
5325 only create indexed addresses using the stack/frame pointer, and any
5326 others were checked for validity when created by the combine pass.
5328 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5329 we don't have to worry about the braindamaged implicit space register
5330 selection using the basereg only (rather than effective address)
5331 screwing us over. */
5332 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5333 return (GET_CODE (op) == REG);
5335 /* Stack is always OK for indexing. */
5336 if (op == stack_pointer_rtx)
5337 return 1;
5339 /* While it's always safe to index off the frame pointer, it's not
5340 always profitable, particularly when the frame pointer is being
5341 eliminated. */
5342 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5343 return 1;
5345 /* The only other valid OPs are pseudo registers with
5346 REGNO_POINTER_FLAG set. */
5347 if (GET_CODE (op) != REG
5348 || REGNO (op) < FIRST_PSEUDO_REGISTER
5349 || ! register_operand (op, mode))
5350 return 0;
5352 return REGNO_POINTER_FLAG (REGNO (op));
5355 /* Return 1 if this operand is anything other than a hard register. */
5358 non_hard_reg_operand (op, mode)
5359 rtx op;
5360 enum machine_mode mode;
5362 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5365 /* Return 1 if INSN branches forward. Should be using insn_addresses
5366 to avoid walking through all the insns... */
5368 forward_branch_p (insn)
5369 rtx insn;
5371 rtx label = JUMP_LABEL (insn);
5373 while (insn)
5375 if (insn == label)
5376 break;
5377 else
5378 insn = NEXT_INSN (insn);
5381 return (insn == label);
5384 /* Return 1 if OP is an equality comparison, else return 0. */
5386 eq_neq_comparison_operator (op, mode)
5387 rtx op;
5388 enum machine_mode mode;
5390 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5393 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5395 movb_comparison_operator (op, mode)
5396 rtx op;
5397 enum machine_mode mode;
5399 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5400 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5403 /* Return 1 if INSN is in the delay slot of a call instruction. */
5405 jump_in_call_delay (insn)
5406 rtx insn;
5409 if (GET_CODE (insn) != JUMP_INSN)
5410 return 0;
5412 if (PREV_INSN (insn)
5413 && PREV_INSN (PREV_INSN (insn))
5414 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5416 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5418 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5419 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5422 else
5423 return 0;
5426 /* Output an unconditional move and branch insn. */
5428 char *
5429 output_parallel_movb (operands, length)
5430 rtx *operands;
5431 int length;
5433 /* These are the cases in which we win. */
5434 if (length == 4)
5435 return "mov%I1b,tr %1,%0,%2";
5437 /* None of these cases wins, but they don't lose either. */
5438 if (dbr_sequence_length () == 0)
5440 /* Nothing in the delay slot, fake it by putting the combined
5441 insn (the copy or add) in the delay slot of a bl. */
5442 if (GET_CODE (operands[1]) == CONST_INT)
5443 return "bl %2,0\n\tldi %1,%0";
5444 else
5445 return "bl %2,0\n\tcopy %1,%0";
5447 else
5449 /* Something in the delay slot, but we've got a long branch. */
5450 if (GET_CODE (operands[1]) == CONST_INT)
5451 return "ldi %1,%0\n\tbl %2,0";
5452 else
5453 return "copy %1,%0\n\tbl %2,0";
5457 /* Output an unconditional add and branch insn. */
5459 char *
5460 output_parallel_addb (operands, length)
5461 rtx *operands;
5462 int length;
5464 /* To make life easy we want operand0 to be the shared input/output
5465 operand and operand1 to be the readonly operand. */
5466 if (operands[0] == operands[1])
5467 operands[1] = operands[2];
5469 /* These are the cases in which we win. */
5470 if (length == 4)
5471 return "add%I1b,tr %1,%0,%3";
5473 /* None of these cases win, but they don't lose either. */
5474 if (dbr_sequence_length () == 0)
5476 /* Nothing in the delay slot, fake it by putting the combined
5477 insn (the copy or add) in the delay slot of a bl. */
5478 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5480 else
5482 /* Something in the delay slot, but we've got a long branch. */
5483 return "add%I1 %1,%0,%0\n\tbl %3,0";
5487 /* Return nonzero if INSN represents an integer add which might be
5488 combinable with an unconditional branch. */
5490 combinable_add (insn)
5491 rtx insn;
5493 rtx src, dest, prev, pattern = PATTERN (insn);
5495 /* Must be a (set (reg) (plus (reg) (reg/5_bit_int))) */
5496 if (GET_CODE (pattern) != SET
5497 || GET_CODE (SET_SRC (pattern)) != PLUS
5498 || GET_CODE (SET_DEST (pattern)) != REG)
5499 return 0;
5501 src = SET_SRC (pattern);
5502 dest = SET_DEST (pattern);
5504 /* Must be an integer add. */
5505 if (GET_MODE (src) != SImode
5506 || GET_MODE (dest) != SImode)
5507 return 0;
5509 /* Each operand must be an integer register and/or 5 bit immediate. */
5510 if (!ireg_or_int5_operand (dest, VOIDmode)
5511 || !ireg_or_int5_operand (XEXP (src, 0), VOIDmode)
5512 || !ireg_or_int5_operand (XEXP (src, 1), VOIDmode))
5513 return 0;
5515 /* The destination must also be one of the sources. */
5516 return (dest == XEXP (src, 0) || dest == XEXP (src, 1));
5519 /* Return nonzero if INSN represents an integer load/copy which might be
5520 combinable with an unconditional branch. */
5522 combinable_copy (insn)
5523 rtx insn;
5525 rtx src, dest, pattern = PATTERN (insn);
5526 enum machine_mode mode;
5528 /* Must be a (set (reg) (reg/5_bit_int)). */
5529 if (GET_CODE (pattern) != SET)
5530 return 0;
5532 src = SET_SRC (pattern);
5533 dest = SET_DEST (pattern);
5535 /* Must be a mode that corresponds to a single integer register. */
5536 mode = GET_MODE (dest);
5537 if (mode != SImode
5538 && mode != SFmode
5539 && mode != HImode
5540 && mode != QImode)
5541 return 0;
5543 /* Each operand must be a register or 5 bit integer. */
5544 if (!ireg_or_int5_operand (dest, VOIDmode)
5545 || !ireg_or_int5_operand (src, VOIDmode))
5546 return 0;
5548 return 1;
5551 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5552 is used to discourage creating parallel movb/addb insns since a jump
5553 which immediately follows a call can execute in the delay slot of the
5554 call. */
5556 following_call (insn)
5557 rtx insn;
5559 /* Find the previous real insn, skipping NOTEs. */
5560 insn = PREV_INSN (insn);
5561 while (insn && GET_CODE (insn) == NOTE)
5562 insn = PREV_INSN (insn);
5564 /* Check for CALL_INSNs and millicode calls. */
5565 if (insn
5566 && (GET_CODE (insn) == CALL_INSN
5567 || (GET_CODE (insn) == INSN
5568 && GET_CODE (PATTERN (insn)) != SEQUENCE
5569 && GET_CODE (PATTERN (insn)) != USE
5570 && GET_CODE (PATTERN (insn)) != CLOBBER
5571 && get_attr_type (insn) == TYPE_MILLI)))
5572 return 1;
5574 return 0;
5577 /* Return nonzero if this is a floating point multiply (fmpy) which
5578 could be combined with a suitable floating point add or sub insn. */
5580 combinable_fmpy (insn)
5581 rtx insn;
5583 rtx src, dest, pattern = PATTERN (insn);
5584 enum machine_mode mode;
5586 /* Only on 1.1 and later cpus. */
5587 if (!TARGET_SNAKE)
5588 return 0;
5590 /* Must be a (set (reg) (mult (reg) (reg))). */
5591 if (GET_CODE (pattern) != SET
5592 || GET_CODE (SET_SRC (pattern)) != MULT
5593 || GET_CODE (SET_DEST (pattern)) != REG)
5594 return 0;
5596 src = SET_SRC (pattern);
5597 dest = SET_DEST (pattern);
5599 /* Must be registers. */
5600 if (GET_CODE (XEXP (src, 0)) != REG
5601 || GET_CODE (XEXP (src, 1)) != REG)
5602 return 0;
5604 /* Must be a floating point mode. Must match the mode of the fmul. */
5605 mode = GET_MODE (dest);
5606 if (mode != DFmode && mode != SFmode)
5607 return 0;
5609 /* SFmode limits the registers which can be used to the upper
5610 32 32bit FP registers. */
5611 if (mode == SFmode
5612 && (REGNO (dest) < 57
5613 || REGNO (XEXP (src, 0)) < 57
5614 || REGNO (XEXP (src, 1)) < 57))
5615 return 0;
5617 /* Save our operands, we'll need to verify they don't conflict with
5618 those in the fadd or fsub. XXX This needs to disasppear soon. */
5619 fmpy_operands[0] = dest;
5620 fmpy_operands[1] = XEXP (src, 0);
5621 fmpy_operands[2] = XEXP (src, 1);
5623 return 1;
5626 /* Return nonzero if INSN is a floating point add suitable for combining
5627 with the most recently examined floating point multiply. */
5629 combinable_fadd (insn)
5630 rtx insn;
5632 rtx src, dest, pattern = PATTERN (insn);
5633 enum machine_mode mode;
5635 /* Must be a (set (reg) (plus (reg) (reg))). */
5636 if (GET_CODE (pattern) != SET
5637 || GET_CODE (SET_SRC (pattern)) != PLUS
5638 || GET_CODE (SET_DEST (pattern)) != REG)
5639 return 0;
5641 src = SET_SRC (pattern);
5642 dest = SET_DEST (pattern);
5644 /* Must be registers. */
5645 if (GET_CODE (XEXP (src, 0)) != REG
5646 || GET_CODE (XEXP (src, 1)) != REG)
5647 return 0;
5649 /* Must be a floating point mode. Must match the mode of the fmul. */
5650 mode = GET_MODE (dest);
5651 if (mode != DFmode && mode != SFmode)
5652 return 0;
5654 if (mode != GET_MODE (fmpy_operands[0]))
5655 return 0;
5657 /* SFmode limits the registers which can be used to the upper
5658 32 32bit FP registers. */
5659 if (mode == SFmode
5660 && (REGNO (dest) < 57
5661 || REGNO (XEXP (src, 0)) < 57
5662 || REGNO (XEXP (src, 1)) < 57))
5663 return 0;
5665 /* Only 2 real operands to the addition. One of the input operands
5666 must be the same as the output operand. */
5667 if (! rtx_equal_p (dest, XEXP (src, 0))
5668 && ! rtx_equal_p (dest, XEXP (src, 1)))
5669 return 0;
5671 /* Inout operand of the add can not conflict with any operands from the
5672 multiply. */
5673 if (rtx_equal_p (dest, fmpy_operands[0])
5674 || rtx_equal_p (dest, fmpy_operands[1])
5675 || rtx_equal_p (dest, fmpy_operands[2]))
5676 return 0;
5678 /* The multiply can not feed into the addition. */
5679 if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0))
5680 || rtx_equal_p (fmpy_operands[0], XEXP (src, 1)))
5681 return 0;
5683 return 1;
5686 /* Return nonzero if INSN is a floating point sub suitable for combining
5687 with the most recently examined floating point multiply. */
5689 combinable_fsub (insn)
5690 rtx insn;
5692 rtx src, dest, pattern = PATTERN (insn);
5693 enum machine_mode mode;
5695 /* Must be (set (reg) (minus (reg) (reg))). */
5696 if (GET_CODE (pattern) != SET
5697 || GET_CODE (SET_SRC (pattern)) != MINUS
5698 || GET_CODE (SET_DEST (pattern)) != REG)
5699 return 0;
5701 src = SET_SRC (pattern);
5702 dest = SET_DEST (pattern);
5704 if (GET_CODE (XEXP (src, 0)) != REG
5705 || GET_CODE (XEXP (src, 1)) != REG)
5706 return 0;
5708 /* Must be a floating point mode. Must match the mode of the fmul. */
5709 mode = GET_MODE (dest);
5710 if (mode != DFmode && mode != SFmode)
5711 return 0;
5713 if (mode != GET_MODE (fmpy_operands[0]))
5714 return 0;
5716 /* SFmode limits the registers which can be used to the upper
5717 32 32bit FP registers. */
5718 if (mode == SFmode && (REGNO (dest) < 57 || REGNO (XEXP (src, 1)) < 57))
5719 return 0;
5721 /* Only 2 real operands to the subtraction. Output must be the
5722 same as the first operand of the MINUS. */
5723 if (! rtx_equal_p (dest, XEXP (src, 0)))
5724 return 0;
5726 /* Inout operand of the sub can not conflict with any operands from the
5727 multiply. */
5728 if (rtx_equal_p (dest, fmpy_operands[0])
5729 || rtx_equal_p (dest, fmpy_operands[1])
5730 || rtx_equal_p (dest, fmpy_operands[2]))
5731 return 0;
5733 /* The multiply can not feed into the subtraction. */
5734 if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0))
5735 || rtx_equal_p (fmpy_operands[0], XEXP (src, 1)))
5736 return 0;
5738 return 1;
5741 /* We use this hook to perform a PA specific optimization which is difficult
5742 to do in earlier passes.
5744 We want the delay slots of branches within jump tables to be filled.
5745 None of the compiler passes at the moment even has the notion that a
5746 PA jump table doesn't contain addresses, but instead contains actual
5747 instructions!
5749 Because we actually jump into the table, the addresses of each entry
5750 must stay constant in relation to the beginning of the table (which
5751 itself must stay constant relative to the instruction to jump into
5752 it). I don't believe we can guarantee earlier passes of the compiler
5753 will adhere to those rules.
5755 So, late in the compilation process we find all the jump tables, and
5756 expand them into real code -- eg each entry in the jump table vector
5757 will get an appropriate label followed by a jump to the final target.
5759 Reorg and the final jump pass can then optimize these branches and
5760 fill their delay slots. We end up with smaller, more efficient code.
5762 The jump instructions within the table are special; we must be able
5763 to identify them during assembly output (if the jumps don't get filled
5764 we need to emit a nop rather than nullifying the delay slot)). We
5765 identify jumps in switch tables by marking the SET with DImode. */
5767 pa_reorg (insns)
5768 rtx insns;
5770 rtx insn;
5772 remove_useless_addtr_insns (insns, 1);
5774 /* This is fairly cheap, so always run it if optimizing. */
5775 if (optimize > 0)
5777 /* Find and explode all ADDR_VEC insns. */
5778 insns = get_insns ();
5779 for (insn = insns; insn; insn = NEXT_INSN (insn))
5781 rtx pattern, tmp, location;
5782 unsigned int length, i;
5784 /* Find an ADDR_VEC insn to explode. */
5785 if (GET_CODE (insn) != JUMP_INSN
5786 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5787 continue;
5789 /* If needed, emit marker for the beginning of the branch table. */
5790 if (TARGET_GAS)
5791 emit_insn_before (gen_begin_brtab (), insn);
5793 pattern = PATTERN (insn);
5794 location = PREV_INSN (insn);
5795 length = XVECLEN (pattern, 0);
5797 for (i = 0; i < length; i++)
5799 /* Emit the jump itself. */
5800 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5801 tmp = emit_jump_insn_after (tmp, location);
5802 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5803 LABEL_NUSES (JUMP_LABEL (tmp))++;
5805 /* Emit a BARRIER after the jump. */
5806 location = NEXT_INSN (location);
5807 emit_barrier_after (location);
5809 /* Put a CODE_LABEL before each so jump.c does not optimize
5810 the jumps away. */
5811 location = NEXT_INSN (location);
5812 tmp = gen_label_rtx ();
5813 LABEL_NUSES (tmp) = 1;
5814 emit_label_after (tmp, location);
5815 location = NEXT_INSN (location);
5818 /* If needed, emit marker for the end of the branch table. */
5819 if (TARGET_GAS)
5820 emit_insn_before (gen_end_brtab (), location);
5821 /* Delete the ADDR_VEC. */
5822 delete_insn (insn);
5825 else if (TARGET_GAS)
5827 /* Sill need an end_brtab insn. */
5828 insns = get_insns ();
5829 for (insn = insns; insn; insn = NEXT_INSN (insn))
5831 /* Find an ADDR_VEC insn. */
5832 if (GET_CODE (insn) != JUMP_INSN
5833 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5834 continue;
5836 /* Now generate markers for the beginning and end of the
5837 branc table. */
5838 emit_insn_before (gen_begin_brtab (), insn);
5839 emit_insn_after (gen_end_brtab (), insn);