* pa/pa.c (compute_movstrsi_length): Handle residuals correctly.
[official-gcc.git] / gcc / config / pa / pa.c
blob95b6c00dc811f29c7d1c3fac052a03bc4516a4b2
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
68 unsigned int total_code_bytes;
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
73 struct defer_plab
75 rtx internal_label;
76 rtx symbol;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
80 void
81 override_options ()
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
92 else if (! strcmp (pa_cpu_string, "700"))
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
97 else if (! strcmp (pa_cpu_string, "7100LC"))
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
102 else
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
109 warning ("PIC code generation is not supported in the portable runtime model\n");
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
117 if (flag_pic && profile_flag)
119 warning ("PIC code generation is not compatable with profiling\n");
122 if (TARGET_SPACE && (flag_pic || profile_flag))
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
164 symbolic_expression_p (x)
165 register rtx x;
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
172 return (symbolic_operand (x, VOIDmode));
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
180 switch (GET_CODE (op))
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
220 if (register_operand (op, mode))
221 return 1;
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
226 return 0;
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
237 if (register_operand (op, mode))
238 return 1;
240 if (op == CONST0_RTX (mode))
241 return 1;
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
246 return 0;
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
267 if (register_operand (op, mode))
268 return 1;
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
298 return memory_address_p (mode, op);
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
308 if (register_operand (op, mode))
309 return 1;
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
314 return 0;
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
322 if (!flag_pic)
323 return 0;
325 switch (GET_CODE (op))
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
343 return reg_renumber && FP_REG_P (op);
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
468 return register_operand (op, mode) || int5_operand (op, mode);
471 /* True iff zdepi can be used to generate this CONST_INT. */
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
476 unsigned HOST_WIDE_INT lsb_mask, t;
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
500 /* True iff depi or extru can be used to compute (reg & OP). */
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
510 /* True iff depi can be used to compute (reg | MASK). */
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
519 /* True iff depi can be used to compute (reg | OP). */
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
577 rtx pic_ref = orig;
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
586 if (GET_CODE (orig) == SYMBOL_REF)
588 if (reg == 0)
589 abort ();
591 if (flag_pic == 2)
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
606 else if (GET_CODE (orig) == CONST)
608 rtx base;
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
614 if (reg == 0)
615 abort ();
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
633 return pic_ref;
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
649 For the PA, transform:
651 memory(X + <large int>)
653 into:
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
662 This is for CSE to find several similar references, and only use one Z.
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
692 rtx orig = x;
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
752 else
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
764 return plus_constant (ptr_reg, offset - newoffset);
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
822 /* Figure out what the base and index are. */
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
843 if (base == 0)
844 return orig;
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
931 rtx regx1, regx2, regy1, regy2, y;
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1004 else
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1017 return orig;
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1023 PIC addresses are very expensive.
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1028 hppa_address_cost (X)
1029 rtx X;
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1040 /* Emit insns to move operands[1] into operands[0].
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1140 use scratch_reg to hold the address of the memory location.
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1150 rtx xoperands[2];
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1173 /* D might not fit in 14 bits either; for such cases load D into
1174 scratch reg. */
1175 if (GET_CODE (operand1) == MEM
1176 && !memory_address_p (SImode, XEXP (operand1, 0)))
1178 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1179 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1180 SImode,
1181 XEXP (XEXP (operand1, 0), 0),
1182 scratch_reg));
1183 emit_move_insn (scratch_reg, gen_rtx (MEM, GET_MODE (operand1),
1184 scratch_reg));
1186 else
1187 emit_move_insn (scratch_reg, operand1);
1188 emit_move_insn (operand0, scratch_reg);
1189 return 1;
1191 /* Handle most common case: storing into a register. */
1192 else if (register_operand (operand0, mode))
1194 if (register_operand (operand1, mode)
1195 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1196 || (operand1 == CONST0_RTX (mode))
1197 || (GET_CODE (operand1) == HIGH
1198 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1199 /* Only `general_operands' can come here, so MEM is ok. */
1200 || GET_CODE (operand1) == MEM)
1202 /* Run this case quickly. */
1203 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1204 return 1;
1207 else if (GET_CODE (operand0) == MEM)
1209 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1210 && !(reload_in_progress || reload_completed))
1212 rtx temp = gen_reg_rtx (DFmode);
1214 emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
1215 emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
1216 return 1;
1218 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1220 /* Run this case quickly. */
1221 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1222 return 1;
1224 if (! (reload_in_progress || reload_completed))
1226 operands[0] = validize_mem (operand0);
1227 operands[1] = operand1 = force_reg (mode, operand1);
1231 /* Simplify the source if we need to. */
1232 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1233 || (GET_CODE (operand1) == HIGH
1234 && symbolic_operand (XEXP (operand1, 0), mode)))
1236 int ishighonly = 0;
1238 if (GET_CODE (operand1) == HIGH)
1240 ishighonly = 1;
1241 operand1 = XEXP (operand1, 0);
1243 if (symbolic_operand (operand1, mode))
1245 rtx const_part = NULL;
1247 /* Argh. The assembler and linker can't handle arithmetic
1248 involving plabels. We'll have to split up operand1 here
1249 if it's a function label involved in an arithmetic
1250 expression. Luckily, this only happens with addition
1251 of constants to plabels, which simplifies the test.
1253 We add the constant back in just before returning to
1254 our caller. */
1255 if (GET_CODE (operand1) == CONST
1256 && GET_CODE (XEXP (operand1, 0)) == PLUS
1257 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1259 /* Save away the constant part of the expression. */
1260 const_part = XEXP (XEXP (operand1, 0), 1);
1261 if (GET_CODE (const_part) != CONST_INT)
1262 abort ();
1264 /* Set operand1 to just the SYMBOL_REF. */
1265 operand1 = XEXP (XEXP (operand1, 0), 0);
1268 if (flag_pic)
1270 rtx temp;
1272 if (reload_in_progress || reload_completed)
1273 temp = scratch_reg ? scratch_reg : operand0;
1274 else
1275 temp = gen_reg_rtx (Pmode);
1277 /* If operand1 is a function label, then we've got to
1278 force it to memory, then load op0 from memory. */
1279 if (function_label_operand (operand1, mode))
1281 operands[1] = force_const_mem (mode, operand1);
1282 emit_move_sequence (operands, mode, temp);
1284 /* Likewise for (const (plus (symbol) (const_int))) when
1285 generating pic code during or after reload and const_int
1286 will not fit in 14 bits. */
1287 else if (GET_CODE (operand1) == CONST
1288 && GET_CODE (XEXP (operand1, 0)) == PLUS
1289 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1290 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1291 && (reload_completed || reload_in_progress)
1292 && flag_pic)
1294 operands[1] = force_const_mem (mode, operand1);
1295 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1296 mode, temp);
1297 emit_move_sequence (operands, mode, temp);
1299 else
1301 operands[1] = legitimize_pic_address (operand1, mode, temp);
1302 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1305 /* On the HPPA, references to data space are supposed to use dp,
1306 register 27, but showing it in the RTL inhibits various cse
1307 and loop optimizations. */
1308 else
1310 rtx temp, set;
1312 if (reload_in_progress || reload_completed)
1313 temp = scratch_reg ? scratch_reg : operand0;
1314 else
1315 temp = gen_reg_rtx (mode);
1317 /* Loading a SYMBOL_REF into a register makes that register
1318 safe to be used as the base in an indexed address.
1320 Don't mark hard registers though. That loses. */
1321 if (GET_CODE (operand0) == REG
1322 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1323 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1324 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1325 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1326 if (ishighonly)
1327 set = gen_rtx (SET, mode, operand0, temp);
1328 else
1329 set = gen_rtx (SET, VOIDmode,
1330 operand0,
1331 gen_rtx (LO_SUM, mode, temp, operand1));
1333 emit_insn (gen_rtx (SET, VOIDmode,
1334 temp,
1335 gen_rtx (HIGH, mode, operand1)));
1336 emit_insn (set);
1340 /* Add back in the constant part if needed. */
1341 if (const_part != NULL)
1342 expand_inc (operand0, const_part);
1343 return 1;
1345 else if (GET_CODE (operand1) != CONST_INT
1346 || ! cint_ok_for_move (INTVAL (operand1)))
1348 rtx temp;
1350 if (reload_in_progress || reload_completed)
1351 temp = operand0;
1352 else
1353 temp = gen_reg_rtx (mode);
1355 emit_insn (gen_rtx (SET, VOIDmode, temp,
1356 gen_rtx (HIGH, mode, operand1)));
1357 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1360 /* Now have insn-emit do whatever it normally does. */
1361 return 0;
1364 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1365 it will need a link/runtime reloc). */
1368 reloc_needed (exp)
1369 tree exp;
1371 int reloc = 0;
1373 switch (TREE_CODE (exp))
1375 case ADDR_EXPR:
1376 return 1;
1378 case PLUS_EXPR:
1379 case MINUS_EXPR:
1380 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1381 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1382 break;
1384 case NOP_EXPR:
1385 case CONVERT_EXPR:
1386 case NON_LVALUE_EXPR:
1387 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1388 break;
1390 case CONSTRUCTOR:
1392 register tree link;
1393 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1394 if (TREE_VALUE (link) != 0)
1395 reloc |= reloc_needed (TREE_VALUE (link));
1397 break;
1399 case ERROR_MARK:
1400 break;
1402 return reloc;
1405 /* Does operand (which is a symbolic_operand) live in text space? If
1406 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1409 read_only_operand (operand)
1410 rtx operand;
1412 if (GET_CODE (operand) == CONST)
1413 operand = XEXP (XEXP (operand, 0), 0);
1414 if (flag_pic)
1416 if (GET_CODE (operand) == SYMBOL_REF)
1417 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1419 else
1421 if (GET_CODE (operand) == SYMBOL_REF)
1422 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1424 return 1;
1428 /* Return the best assembler insn template
1429 for moving operands[1] into operands[0] as a fullword. */
1430 char *
1431 singlemove_string (operands)
1432 rtx *operands;
1434 HOST_WIDE_INT intval;
1436 if (GET_CODE (operands[0]) == MEM)
1437 return "stw %r1,%0";
1438 if (GET_CODE (operands[1]) == MEM)
1439 return "ldw %1,%0";
1440 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1442 long i;
1443 REAL_VALUE_TYPE d;
1445 if (GET_MODE (operands[1]) != SFmode)
1446 abort ();
1448 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1449 bit pattern. */
1450 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1451 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1453 operands[1] = GEN_INT (i);
1454 /* Fall through to CONST_INT case. */
1456 if (GET_CODE (operands[1]) == CONST_INT)
1458 intval = INTVAL (operands[1]);
1460 if (VAL_14_BITS_P (intval))
1461 return "ldi %1,%0";
1462 else if ((intval & 0x7ff) == 0)
1463 return "ldil L'%1,%0";
1464 else if (zdepi_cint_p (intval))
1465 return "zdepi %Z1,%0";
1466 else
1467 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1469 return "copy %1,%0";
1473 /* Compute position (in OP[1]) and width (in OP[2])
1474 useful for copying IMM to a register using the zdepi
1475 instructions. Store the immediate value to insert in OP[0]. */
1476 void
1477 compute_zdepi_operands (imm, op)
1478 unsigned HOST_WIDE_INT imm;
1479 unsigned *op;
1481 int lsb, len;
1483 /* Find the least significant set bit in IMM. */
1484 for (lsb = 0; lsb < 32; lsb++)
1486 if ((imm & 1) != 0)
1487 break;
1488 imm >>= 1;
1491 /* Choose variants based on *sign* of the 5-bit field. */
1492 if ((imm & 0x10) == 0)
1493 len = (lsb <= 28) ? 4 : 32 - lsb;
1494 else
1496 /* Find the width of the bitstring in IMM. */
1497 for (len = 5; len < 32; len++)
1499 if ((imm & (1 << len)) == 0)
1500 break;
1503 /* Sign extend IMM as a 5-bit value. */
1504 imm = (imm & 0xf) - 0x10;
1507 op[0] = imm;
1508 op[1] = 31 - lsb;
1509 op[2] = len;
1512 /* Output assembler code to perform a doubleword move insn
1513 with operands OPERANDS. */
1515 char *
1516 output_move_double (operands)
1517 rtx *operands;
1519 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1520 rtx latehalf[2];
1521 rtx addreg0 = 0, addreg1 = 0;
1523 /* First classify both operands. */
1525 if (REG_P (operands[0]))
1526 optype0 = REGOP;
1527 else if (offsettable_memref_p (operands[0]))
1528 optype0 = OFFSOP;
1529 else if (GET_CODE (operands[0]) == MEM)
1530 optype0 = MEMOP;
1531 else
1532 optype0 = RNDOP;
1534 if (REG_P (operands[1]))
1535 optype1 = REGOP;
1536 else if (CONSTANT_P (operands[1]))
1537 optype1 = CNSTOP;
1538 else if (offsettable_memref_p (operands[1]))
1539 optype1 = OFFSOP;
1540 else if (GET_CODE (operands[1]) == MEM)
1541 optype1 = MEMOP;
1542 else
1543 optype1 = RNDOP;
1545 /* Check for the cases that the operand constraints are not
1546 supposed to allow to happen. Abort if we get one,
1547 because generating code for these cases is painful. */
1549 if (optype0 != REGOP && optype1 != REGOP)
1550 abort ();
1552 /* Handle auto decrementing and incrementing loads and stores
1553 specifically, since the structure of the function doesn't work
1554 for them without major modification. Do it better when we learn
1555 this port about the general inc/dec addressing of PA.
1556 (This was written by tege. Chide him if it doesn't work.) */
1558 if (optype0 == MEMOP)
1560 /* We have to output the address syntax ourselves, since print_operand
1561 doesn't deal with the addresses we want to use. Fix this later. */
1563 rtx addr = XEXP (operands[0], 0);
1564 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1566 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1568 operands[0] = XEXP (addr, 0);
1569 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1570 abort ();
1572 if (!reg_overlap_mentioned_p (high_reg, addr))
1574 /* No overlap between high target register and address
1575 register. (We do this in a non-obvious way to
1576 save a register file writeback) */
1577 if (GET_CODE (addr) == POST_INC)
1578 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1579 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1581 else
1582 abort();
1584 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1586 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1588 operands[0] = XEXP (addr, 0);
1589 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1590 abort ();
1592 if (!reg_overlap_mentioned_p (high_reg, addr))
1594 /* No overlap between high target register and address
1595 register. (We do this in a non-obvious way to
1596 save a register file writeback) */
1597 if (GET_CODE (addr) == PRE_INC)
1598 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1599 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1601 else
1602 abort();
1605 if (optype1 == MEMOP)
1607 /* We have to output the address syntax ourselves, since print_operand
1608 doesn't deal with the addresses we want to use. Fix this later. */
1610 rtx addr = XEXP (operands[1], 0);
1611 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1613 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1615 operands[1] = XEXP (addr, 0);
1616 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1617 abort ();
1619 if (!reg_overlap_mentioned_p (high_reg, addr))
1621 /* No overlap between high target register and address
1622 register. (We do this in a non-obvious way to
1623 save a register file writeback) */
1624 if (GET_CODE (addr) == POST_INC)
1625 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1626 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1628 else
1630 /* This is an undefined situation. We should load into the
1631 address register *and* update that register. Probably
1632 we don't need to handle this at all. */
1633 if (GET_CODE (addr) == POST_INC)
1634 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1635 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1638 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1640 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1642 operands[1] = XEXP (addr, 0);
1643 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1644 abort ();
1646 if (!reg_overlap_mentioned_p (high_reg, addr))
1648 /* No overlap between high target register and address
1649 register. (We do this in a non-obvious way to
1650 save a register file writeback) */
1651 if (GET_CODE (addr) == PRE_INC)
1652 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1653 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1655 else
1657 /* This is an undefined situation. We should load into the
1658 address register *and* update that register. Probably
1659 we don't need to handle this at all. */
1660 if (GET_CODE (addr) == PRE_INC)
1661 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1662 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1667 /* If an operand is an unoffsettable memory ref, find a register
1668 we can increment temporarily to make it refer to the second word. */
1670 if (optype0 == MEMOP)
1671 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1673 if (optype1 == MEMOP)
1674 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1676 /* Ok, we can do one word at a time.
1677 Normally we do the low-numbered word first.
1679 In either case, set up in LATEHALF the operands to use
1680 for the high-numbered word and in some cases alter the
1681 operands in OPERANDS to be suitable for the low-numbered word. */
1683 if (optype0 == REGOP)
1684 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1685 else if (optype0 == OFFSOP)
1686 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1687 else
1688 latehalf[0] = operands[0];
1690 if (optype1 == REGOP)
1691 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1692 else if (optype1 == OFFSOP)
1693 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1694 else if (optype1 == CNSTOP)
1695 split_double (operands[1], &operands[1], &latehalf[1]);
1696 else
1697 latehalf[1] = operands[1];
1699 /* If the first move would clobber the source of the second one,
1700 do them in the other order.
1702 This can happen in two cases:
1704 mem -> register where the first half of the destination register
1705 is the same register used in the memory's address. Reload
1706 can create such insns.
1708 mem in this case will be either register indirect or register
1709 indirect plus a valid offset.
1711 register -> register move where REGNO(dst) == REGNO(src + 1)
1712 someone (Tim/Tege?) claimed this can happen for parameter loads.
1714 Handle mem -> register case first. */
1715 if (optype0 == REGOP
1716 && (optype1 == MEMOP || optype1 == OFFSOP)
1717 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1718 operands[1], 0))
1720 /* Do the late half first. */
1721 if (addreg1)
1722 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1723 output_asm_insn (singlemove_string (latehalf), latehalf);
1725 /* Then clobber. */
1726 if (addreg1)
1727 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1728 return singlemove_string (operands);
1731 /* Now handle register -> register case. */
1732 if (optype0 == REGOP && optype1 == REGOP
1733 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1735 output_asm_insn (singlemove_string (latehalf), latehalf);
1736 return singlemove_string (operands);
1739 /* Normal case: do the two words, low-numbered first. */
1741 output_asm_insn (singlemove_string (operands), operands);
1743 /* Make any unoffsettable addresses point at high-numbered word. */
1744 if (addreg0)
1745 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1746 if (addreg1)
1747 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1749 /* Do that word. */
1750 output_asm_insn (singlemove_string (latehalf), latehalf);
1752 /* Undo the adds we just did. */
1753 if (addreg0)
1754 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1755 if (addreg1)
1756 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1758 return "";
1761 char *
1762 output_fp_move_double (operands)
1763 rtx *operands;
1765 if (FP_REG_P (operands[0]))
1767 if (FP_REG_P (operands[1])
1768 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1769 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1770 else
1771 output_asm_insn ("fldd%F1 %1,%0", operands);
1773 else if (FP_REG_P (operands[1]))
1775 output_asm_insn ("fstd%F0 %1,%0", operands);
1777 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1779 if (GET_CODE (operands[0]) == REG)
1781 rtx xoperands[2];
1782 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1783 xoperands[0] = operands[0];
1784 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1786 /* This is a pain. You have to be prepared to deal with an
1787 arbitrary address here including pre/post increment/decrement.
1789 so avoid this in the MD. */
1790 else
1791 abort ();
1793 else abort ();
1794 return "";
1797 /* Return a REG that occurs in ADDR with coefficient 1.
1798 ADDR can be effectively incremented by incrementing REG. */
1800 static rtx
1801 find_addr_reg (addr)
1802 rtx addr;
1804 while (GET_CODE (addr) == PLUS)
1806 if (GET_CODE (XEXP (addr, 0)) == REG)
1807 addr = XEXP (addr, 0);
1808 else if (GET_CODE (XEXP (addr, 1)) == REG)
1809 addr = XEXP (addr, 1);
1810 else if (CONSTANT_P (XEXP (addr, 0)))
1811 addr = XEXP (addr, 1);
1812 else if (CONSTANT_P (XEXP (addr, 1)))
1813 addr = XEXP (addr, 0);
1814 else
1815 abort ();
1817 if (GET_CODE (addr) == REG)
1818 return addr;
1819 abort ();
1822 /* Emit code to perform a block move.
1824 OPERANDS[0] is the destination pointer as a REG, clobbered.
1825 OPERANDS[1] is the source pointer as a REG, clobbered.
1826 OPERANDS[2] is a register for temporary storage.
1827 OPERANDS[4] is the size as a CONST_INT
1828 OPERANDS[3] is a register for temporary storage.
1829 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1830 OPERNADS[6] is another temporary register. */
1832 char *
1833 output_block_move (operands, size_is_constant)
1834 rtx *operands;
1835 int size_is_constant;
1837 int align = INTVAL (operands[5]);
1838 unsigned long n_bytes = INTVAL (operands[4]);
1840 /* We can't move more than four bytes at a time because the PA
1841 has no longer integer move insns. (Could use fp mem ops?) */
1842 if (align > 4)
1843 align = 4;
1845 /* Note that we know each loop below will execute at least twice
1846 (else we would have open-coded the copy). */
1847 switch (align)
1849 case 4:
1850 /* Pre-adjust the loop counter. */
1851 operands[4] = GEN_INT (n_bytes - 8);
1852 output_asm_insn ("ldi %4,%2", operands);
1854 /* Copying loop. */
1855 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1856 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1857 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1858 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1859 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1861 /* Handle the residual. There could be up to 7 bytes of
1862 residual to copy! */
1863 if (n_bytes % 8 != 0)
1865 operands[4] = GEN_INT (n_bytes % 4);
1866 if (n_bytes % 8 >= 4)
1867 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1868 if (n_bytes % 4 != 0)
1869 output_asm_insn ("ldw 0(0,%1),%6", operands);
1870 if (n_bytes % 8 >= 4)
1871 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1872 if (n_bytes % 4 != 0)
1873 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1875 return "";
1877 case 2:
1878 /* Pre-adjust the loop counter. */
1879 operands[4] = GEN_INT (n_bytes - 4);
1880 output_asm_insn ("ldi %4,%2", operands);
1882 /* Copying loop. */
1883 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1884 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1885 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1886 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1887 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1889 /* Handle the residual. */
1890 if (n_bytes % 4 != 0)
1892 if (n_bytes % 4 >= 2)
1893 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1894 if (n_bytes % 2 != 0)
1895 output_asm_insn ("ldb 0(0,%1),%6", operands);
1896 if (n_bytes % 4 >= 2)
1897 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1898 if (n_bytes % 2 != 0)
1899 output_asm_insn ("stb %6,0(0,%0)", operands);
1901 return "";
1903 case 1:
1904 /* Pre-adjust the loop counter. */
1905 operands[4] = GEN_INT (n_bytes - 2);
1906 output_asm_insn ("ldi %4,%2", operands);
1908 /* Copying loop. */
1909 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1910 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1911 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1912 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1913 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1915 /* Handle the residual. */
1916 if (n_bytes % 2 != 0)
1918 output_asm_insn ("ldb 0(0,%1),%3", operands);
1919 output_asm_insn ("stb %3,0(0,%0)", operands);
1921 return "";
1923 default:
1924 abort ();
1928 /* Count the number of insns necessary to handle this block move.
1930 Basic structure is the same as emit_block_move, except that we
1931 count insns rather than emit them. */
1934 compute_movstrsi_length (insn)
1935 rtx insn;
1937 rtx pat = PATTERN (insn);
1938 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1939 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1940 unsigned int n_insns = 0;
1942 /* We can't move more than four bytes at a time because the PA
1943 has no longer integer move insns. (Could use fp mem ops?) */
1944 if (align > 4)
1945 align = 4;
1947 /* The basic copying loop. */
1948 n_insns = 6;
1950 /* Residuals. */
1951 if (n_bytes % (2 * align) != 0)
1953 if ((n_bytes % (2 * align)) >= align)
1954 n_insns += 2;
1956 if ((n_bytes % align) != 0)
1957 n_insns += 2;
1960 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1961 return n_insns * 4;
1965 char *
1966 output_and (operands)
1967 rtx *operands;
1969 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1971 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
1972 int ls0, ls1, ms0, p, len;
1974 for (ls0 = 0; ls0 < 32; ls0++)
1975 if ((mask & (1 << ls0)) == 0)
1976 break;
1978 for (ls1 = ls0; ls1 < 32; ls1++)
1979 if ((mask & (1 << ls1)) != 0)
1980 break;
1982 for (ms0 = ls1; ms0 < 32; ms0++)
1983 if ((mask & (1 << ms0)) == 0)
1984 break;
1986 if (ms0 != 32)
1987 abort();
1989 if (ls1 == 32)
1991 len = ls0;
1993 if (len == 0)
1994 abort ();
1996 operands[2] = GEN_INT (len);
1997 return "extru %1,31,%2,%0";
1999 else
2001 /* We could use this `depi' for the case above as well, but `depi'
2002 requires one more register file access than an `extru'. */
2004 p = 31 - ls0;
2005 len = ls1 - ls0;
2007 operands[2] = GEN_INT (p);
2008 operands[3] = GEN_INT (len);
2009 return "depi 0,%2,%3,%0";
2012 else
2013 return "and %1,%2,%0";
2016 char *
2017 output_ior (operands)
2018 rtx *operands;
2020 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2021 int bs0, bs1, p, len;
2023 if (INTVAL (operands[2]) == 0)
2024 return "copy %1,%0";
2026 for (bs0 = 0; bs0 < 32; bs0++)
2027 if ((mask & (1 << bs0)) != 0)
2028 break;
2030 for (bs1 = bs0; bs1 < 32; bs1++)
2031 if ((mask & (1 << bs1)) == 0)
2032 break;
2034 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2035 abort();
2037 p = 31 - bs0;
2038 len = bs1 - bs0;
2040 operands[2] = GEN_INT (p);
2041 operands[3] = GEN_INT (len);
2042 return "depi -1,%2,%3,%0";
2045 /* Output an ascii string. */
2046 void
2047 output_ascii (file, p, size)
2048 FILE *file;
2049 unsigned char *p;
2050 int size;
2052 int i;
2053 int chars_output;
2054 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2056 /* The HP assembler can only take strings of 256 characters at one
2057 time. This is a limitation on input line length, *not* the
2058 length of the string. Sigh. Even worse, it seems that the
2059 restriction is in number of input characters (see \xnn &
2060 \whatever). So we have to do this very carefully. */
2062 fputs ("\t.STRING \"", file);
2064 chars_output = 0;
2065 for (i = 0; i < size; i += 4)
2067 int co = 0;
2068 int io = 0;
2069 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2071 register unsigned int c = p[i + io];
2073 if (c == '\"' || c == '\\')
2074 partial_output[co++] = '\\';
2075 if (c >= ' ' && c < 0177)
2076 partial_output[co++] = c;
2077 else
2079 unsigned int hexd;
2080 partial_output[co++] = '\\';
2081 partial_output[co++] = 'x';
2082 hexd = c / 16 - 0 + '0';
2083 if (hexd > '9')
2084 hexd -= '9' - 'a' + 1;
2085 partial_output[co++] = hexd;
2086 hexd = c % 16 - 0 + '0';
2087 if (hexd > '9')
2088 hexd -= '9' - 'a' + 1;
2089 partial_output[co++] = hexd;
2092 if (chars_output + co > 243)
2094 fputs ("\"\n\t.STRING \"", file);
2095 chars_output = 0;
2097 fwrite (partial_output, 1, co, file);
2098 chars_output += co;
2099 co = 0;
2101 fputs ("\"\n", file);
2104 /* Try to rewrite floating point comparisons & branches to avoid
2105 useless add,tr insns.
2107 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2108 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2109 first attempt to remove useless add,tr insns. It is zero
2110 for the second pass as reorg sometimes leaves bogus REG_DEAD
2111 notes lying around.
2113 When CHECK_NOTES is zero we can only eliminate add,tr insns
2114 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2115 instructions. */
2116 void
2117 remove_useless_addtr_insns (insns, check_notes)
2118 rtx insns;
2119 int check_notes;
2121 rtx insn;
2122 int all;
2123 static int pass = 0;
2125 /* This is fairly cheap, so always run it when optimizing. */
2126 if (optimize > 0)
2128 int fcmp_count = 0;
2129 int fbranch_count = 0;
2131 /* Walk all the insns in this function looking for fcmp & fbranch
2132 instructions. Keep track of how many of each we find. */
2133 insns = get_insns ();
2134 for (insn = insns; insn; insn = next_insn (insn))
2136 rtx tmp;
2138 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2139 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2140 continue;
2142 tmp = PATTERN (insn);
2144 /* It must be a set. */
2145 if (GET_CODE (tmp) != SET)
2146 continue;
2148 /* If the destination is CCFP, then we've found an fcmp insn. */
2149 tmp = SET_DEST (tmp);
2150 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2152 fcmp_count++;
2153 continue;
2156 tmp = PATTERN (insn);
2157 /* If this is an fbranch instruction, bump the fbranch counter. */
2158 if (GET_CODE (tmp) == SET
2159 && SET_DEST (tmp) == pc_rtx
2160 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2161 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2162 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2163 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2165 fbranch_count++;
2166 continue;
2171 /* Find all floating point compare + branch insns. If possible,
2172 reverse the comparison & the branch to avoid add,tr insns. */
2173 for (insn = insns; insn; insn = next_insn (insn))
2175 rtx tmp, next;
2177 /* Ignore anything that isn't an INSN. */
2178 if (GET_CODE (insn) != INSN)
2179 continue;
2181 tmp = PATTERN (insn);
2183 /* It must be a set. */
2184 if (GET_CODE (tmp) != SET)
2185 continue;
2187 /* The destination must be CCFP, which is register zero. */
2188 tmp = SET_DEST (tmp);
2189 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2190 continue;
2192 /* INSN should be a set of CCFP.
2194 See if the result of this insn is used in a reversed FP
2195 conditional branch. If so, reverse our condition and
2196 the branch. Doing so avoids useless add,tr insns. */
2197 next = next_insn (insn);
2198 while (next)
2200 /* Jumps, calls and labels stop our search. */
2201 if (GET_CODE (next) == JUMP_INSN
2202 || GET_CODE (next) == CALL_INSN
2203 || GET_CODE (next) == CODE_LABEL)
2204 break;
2206 /* As does another fcmp insn. */
2207 if (GET_CODE (next) == INSN
2208 && GET_CODE (PATTERN (next)) == SET
2209 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2210 && REGNO (SET_DEST (PATTERN (next))) == 0)
2211 break;
2213 next = next_insn (next);
2216 /* Is NEXT_INSN a branch? */
2217 if (next
2218 && GET_CODE (next) == JUMP_INSN)
2220 rtx pattern = PATTERN (next);
2222 /* If it a reversed fp conditional branch (eg uses add,tr)
2223 and CCFP dies, then reverse our conditional and the branch
2224 to avoid the add,tr. */
2225 if (GET_CODE (pattern) == SET
2226 && SET_DEST (pattern) == pc_rtx
2227 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2228 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2229 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2230 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2231 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2232 && (fcmp_count == fbranch_count
2233 || (check_notes
2234 && find_regno_note (next, REG_DEAD, 0))))
2236 /* Reverse the branch. */
2237 tmp = XEXP (SET_SRC (pattern), 1);
2238 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2239 XEXP (SET_SRC (pattern), 2) = tmp;
2240 INSN_CODE (next) = -1;
2242 /* Reverse our condition. */
2243 tmp = PATTERN (insn);
2244 PUT_CODE (XEXP (tmp, 1),
2245 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2251 pass = !pass;
2255 /* You may have trouble believing this, but this is the HP-PA stack
2256 layout. Wow.
2258 Offset Contents
2260 Variable arguments (optional; any number may be allocated)
2262 SP-(4*(N+9)) arg word N
2264 SP-56 arg word 5
2265 SP-52 arg word 4
2267 Fixed arguments (must be allocated; may remain unused)
2269 SP-48 arg word 3
2270 SP-44 arg word 2
2271 SP-40 arg word 1
2272 SP-36 arg word 0
2274 Frame Marker
2276 SP-32 External Data Pointer (DP)
2277 SP-28 External sr4
2278 SP-24 External/stub RP (RP')
2279 SP-20 Current RP
2280 SP-16 Static Link
2281 SP-12 Clean up
2282 SP-8 Calling Stub RP (RP'')
2283 SP-4 Previous SP
2285 Top of Frame
2287 SP-0 Stack Pointer (points to next available address)
2291 /* This function saves registers as follows. Registers marked with ' are
2292 this function's registers (as opposed to the previous function's).
2293 If a frame_pointer isn't needed, r4 is saved as a general register;
2294 the space for the frame pointer is still allocated, though, to keep
2295 things simple.
2298 Top of Frame
2300 SP (FP') Previous FP
2301 SP + 4 Alignment filler (sigh)
2302 SP + 8 Space for locals reserved here.
2306 SP + n All call saved register used.
2310 SP + o All call saved fp registers used.
2314 SP + p (SP') points to next available address.
2318 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2319 Handle case where DISP > 8k by using the add_high_const pattern.
2321 Note in DISP > 8k case, we will leave the high part of the address
2322 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2323 static void
2324 store_reg (reg, disp, base)
2325 int reg, disp, base;
2327 if (VAL_14_BITS_P (disp))
2329 emit_move_insn (gen_rtx (MEM, SImode,
2330 gen_rtx (PLUS, SImode,
2331 gen_rtx (REG, SImode, base),
2332 GEN_INT (disp))),
2333 gen_rtx (REG, SImode, reg));
2335 else
2337 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2338 gen_rtx (REG, SImode, base),
2339 GEN_INT (disp)));
2340 emit_move_insn (gen_rtx (MEM, SImode,
2341 gen_rtx (LO_SUM, SImode,
2342 gen_rtx (REG, SImode, 1),
2343 GEN_INT (disp))),
2344 gen_rtx (REG, SImode, reg));
2348 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2349 Handle case where DISP > 8k by using the add_high_const pattern.
2351 Note in DISP > 8k case, we will leave the high part of the address
2352 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2353 static void
2354 load_reg (reg, disp, base)
2355 int reg, disp, base;
2357 if (VAL_14_BITS_P (disp))
2359 emit_move_insn (gen_rtx (REG, SImode, reg),
2360 gen_rtx (MEM, SImode,
2361 gen_rtx (PLUS, SImode,
2362 gen_rtx (REG, SImode, base),
2363 GEN_INT (disp))));
2365 else
2367 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2368 gen_rtx (REG, SImode, base),
2369 GEN_INT (disp)));
2370 emit_move_insn (gen_rtx (REG, SImode, reg),
2371 gen_rtx (MEM, SImode,
2372 gen_rtx (LO_SUM, SImode,
2373 gen_rtx (REG, SImode, 1),
2374 GEN_INT (disp))));
2378 /* Emit RTL to set REG to the value specified by BASE+DISP.
2379 Handle case where DISP > 8k by using the add_high_const pattern.
2381 Note in DISP > 8k case, we will leave the high part of the address
2382 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2383 static void
2384 set_reg_plus_d(reg, base, disp)
2385 int reg, base, disp;
2387 if (VAL_14_BITS_P (disp))
2389 emit_move_insn (gen_rtx (REG, SImode, reg),
2390 gen_rtx (PLUS, SImode,
2391 gen_rtx (REG, SImode, base),
2392 GEN_INT (disp)));
2394 else
2396 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2397 gen_rtx (REG, SImode, base),
2398 GEN_INT (disp)));
2399 emit_move_insn (gen_rtx (REG, SImode, reg),
2400 gen_rtx (LO_SUM, SImode,
2401 gen_rtx (REG, SImode, 1),
2402 GEN_INT (disp)));
2406 /* Global variables set by FUNCTION_PROLOGUE. */
2407 /* Size of frame. Need to know this to emit return insns from
2408 leaf procedures. */
2409 static int actual_fsize;
2410 static int local_fsize, save_fregs;
2413 compute_frame_size (size, fregs_live)
2414 int size;
2415 int *fregs_live;
2417 extern int current_function_outgoing_args_size;
2418 int i, fsize;
2420 /* 8 is space for frame pointer + filler. If any frame is allocated
2421 we need to add this in because of STARTING_FRAME_OFFSET. */
2422 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2424 /* We must leave enough space for all the callee saved registers
2425 from 3 .. highest used callee save register since we don't
2426 know if we're going to have an inline or out of line prologue
2427 and epilogue. */
2428 for (i = 18; i >= 3; i--)
2429 if (regs_ever_live[i])
2431 fsize += 4 * (i - 2);
2432 break;
2435 /* Round the stack. */
2436 fsize = (fsize + 7) & ~7;
2438 /* We must leave enough space for all the callee saved registers
2439 from 3 .. highest used callee save register since we don't
2440 know if we're going to have an inline or out of line prologue
2441 and epilogue. */
2442 for (i = 66; i >= 48; i -= 2)
2443 if (regs_ever_live[i] || regs_ever_live[i + 1])
2445 if (fregs_live)
2446 *fregs_live = 1;
2448 fsize += 4 * (i - 46);
2449 break;
2452 fsize += current_function_outgoing_args_size;
2453 if (! leaf_function_p () || fsize)
2454 fsize += 32;
2455 return (fsize + 63) & ~63;
2458 rtx hp_profile_label_rtx;
2459 static char hp_profile_label_name[8];
2460 void
2461 output_function_prologue (file, size)
2462 FILE *file;
2463 int size;
2465 /* The function's label and associated .PROC must never be
2466 separated and must be output *after* any profiling declarations
2467 to avoid changing spaces/subspaces within a procedure. */
2468 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2469 fputs ("\t.PROC\n", file);
2471 /* hppa_expand_prologue does the dirty work now. We just need
2472 to output the assembler directives which denote the start
2473 of a function. */
2474 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2475 if (regs_ever_live[2] || profile_flag)
2476 fputs (",CALLS,SAVE_RP", file);
2477 else
2478 fputs (",NO_CALLS", file);
2480 if (frame_pointer_needed)
2481 fputs (",SAVE_SP", file);
2483 /* Pass on information about the number of callee register saves
2484 performed in the prologue.
2486 The compiler is supposed to pass the highest register number
2487 saved, the assembler then has to adjust that number before
2488 entering it into the unwind descriptor (to account for any
2489 caller saved registers with lower register numbers than the
2490 first callee saved register). */
2491 if (gr_saved)
2492 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2494 if (fr_saved)
2495 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2497 fputs ("\n\t.ENTRY\n", file);
2499 /* Horrid hack. emit_function_prologue will modify this RTL in
2500 place to get the expected results. */
2501 if (profile_flag)
2502 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2503 hp_profile_labelno);
2505 /* If we're using GAS and not using the portable runtime model, then
2506 we don't need to accumulate the total number of code bytes. */
2507 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2508 total_code_bytes = 0;
2509 else if (insn_addresses)
2511 unsigned int old_total = total_code_bytes;
2513 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2514 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2516 /* Be prepared to handle overflows. */
2517 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2519 else
2520 total_code_bytes = -1;
2522 remove_useless_addtr_insns (get_insns (), 0);
2525 void
2526 hppa_expand_prologue()
2528 extern char call_used_regs[];
2529 int size = get_frame_size ();
2530 int merge_sp_adjust_with_store = 0;
2531 int i, offset;
2532 rtx tmpreg, size_rtx;
2534 gr_saved = 0;
2535 fr_saved = 0;
2536 save_fregs = 0;
2537 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2538 actual_fsize = compute_frame_size (size, &save_fregs);
2540 /* Compute a few things we will use often. */
2541 tmpreg = gen_rtx (REG, SImode, 1);
2542 size_rtx = GEN_INT (actual_fsize);
2544 /* Handle out of line prologues and epilogues. */
2545 if (TARGET_SPACE)
2547 rtx operands[2];
2548 int saves = 0;
2549 int outline_insn_count = 0;
2550 int inline_insn_count = 0;
2552 /* Count the number of insns for the inline and out of line
2553 variants so we can choose one appropriately.
2555 No need to screw with counting actual_fsize operations -- they're
2556 done for both inline and out of line prologues. */
2557 if (regs_ever_live[2])
2558 inline_insn_count += 1;
2560 if (! cint_ok_for_move (local_fsize))
2561 outline_insn_count += 2;
2562 else
2563 outline_insn_count += 1;
2565 /* Put the register save info into %r22. */
2566 for (i = 18; i >= 3; i--)
2567 if (regs_ever_live[i] && ! call_used_regs[i])
2569 /* -1 because the stack adjustment is normally done in
2570 the same insn as a register save. */
2571 inline_insn_count += (i - 2) - 1;
2572 saves = i;
2573 break;
2576 for (i = 66; i >= 48; i -= 2)
2577 if (regs_ever_live[i] || regs_ever_live[i + 1])
2579 /* +1 needed as we load %r1 with the start of the freg
2580 save area. */
2581 inline_insn_count += (i/2 - 23) + 1;
2582 saves |= ((i/2 - 12 ) << 16);
2583 break;
2586 if (frame_pointer_needed)
2587 inline_insn_count += 3;
2589 if (! cint_ok_for_move (saves))
2590 outline_insn_count += 2;
2591 else
2592 outline_insn_count += 1;
2594 if (TARGET_PORTABLE_RUNTIME)
2595 outline_insn_count += 2;
2596 else
2597 outline_insn_count += 1;
2599 /* If there's a lot of insns in the prologue, then do it as
2600 an out-of-line sequence. */
2601 if (inline_insn_count > outline_insn_count)
2603 /* Put the local_fisze into %r19. */
2604 operands[0] = gen_rtx (REG, SImode, 19);
2605 operands[1] = GEN_INT (local_fsize);
2606 emit_move_insn (operands[0], operands[1]);
2608 /* Put the stack size into %r21. */
2609 operands[0] = gen_rtx (REG, SImode, 21);
2610 operands[1] = size_rtx;
2611 emit_move_insn (operands[0], operands[1]);
2613 operands[0] = gen_rtx (REG, SImode, 22);
2614 operands[1] = GEN_INT (saves);
2615 emit_move_insn (operands[0], operands[1]);
2617 /* Now call the out-of-line prologue. */
2618 emit_insn (gen_outline_prologue_call ());
2619 emit_insn (gen_blockage ());
2621 /* Note that we're using an out-of-line prologue. */
2622 out_of_line_prologue_epilogue = 1;
2623 return;
2627 out_of_line_prologue_epilogue = 0;
2629 /* Save RP first. The calling conventions manual states RP will
2630 always be stored into the caller's frame at sp-20. */
2631 if (regs_ever_live[2] || profile_flag)
2632 store_reg (2, -20, STACK_POINTER_REGNUM);
2634 /* Allocate the local frame and set up the frame pointer if needed. */
2635 if (actual_fsize)
2636 if (frame_pointer_needed)
2638 /* Copy the old frame pointer temporarily into %r1. Set up the
2639 new stack pointer, then store away the saved old frame pointer
2640 into the stack at sp+actual_fsize and at the same time update
2641 the stack pointer by actual_fsize bytes. Two versions, first
2642 handles small (<8k) frames. The second handles large (>8k)
2643 frames. */
2644 emit_move_insn (tmpreg, frame_pointer_rtx);
2645 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2646 if (VAL_14_BITS_P (actual_fsize))
2647 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2648 else
2650 /* It is incorrect to store the saved frame pointer at *sp,
2651 then increment sp (writes beyond the current stack boundary).
2653 So instead use stwm to store at *sp and post-increment the
2654 stack pointer as an atomic operation. Then increment sp to
2655 finish allocating the new frame. */
2656 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2657 set_reg_plus_d (STACK_POINTER_REGNUM,
2658 STACK_POINTER_REGNUM,
2659 actual_fsize - 64);
2662 /* no frame pointer needed. */
2663 else
2665 /* In some cases we can perform the first callee register save
2666 and allocating the stack frame at the same time. If so, just
2667 make a note of it and defer allocating the frame until saving
2668 the callee registers. */
2669 if (VAL_14_BITS_P (-actual_fsize)
2670 && local_fsize == 0
2671 && ! profile_flag
2672 && ! flag_pic)
2673 merge_sp_adjust_with_store = 1;
2674 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2675 else if (actual_fsize != 0)
2676 set_reg_plus_d (STACK_POINTER_REGNUM,
2677 STACK_POINTER_REGNUM,
2678 actual_fsize);
2680 /* The hppa calling conventions say that that %r19, the pic offset
2681 register, is saved at sp - 32 (in this function's frame) when
2682 generating PIC code. FIXME: What is the correct thing to do
2683 for functions which make no calls and allocate no frame? Do
2684 we need to allocate a frame, or can we just omit the save? For
2685 now we'll just omit the save. */
2686 if (actual_fsize != 0 && flag_pic)
2687 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2689 /* Profiling code.
2691 Instead of taking one argument, the counter label, as most normal
2692 mcounts do, _mcount appears to behave differently on the HPPA. It
2693 takes the return address of the caller, the address of this routine,
2694 and the address of the label. Also, it isn't magic, so
2695 argument registers have to be preserved. */
2696 if (profile_flag)
2698 int pc_offset, i, arg_offset, basereg, offsetadj;
2700 pc_offset = 4 + (frame_pointer_needed
2701 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2702 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2704 /* When the function has a frame pointer, use it as the base
2705 register for saving/restore registers. Else use the stack
2706 pointer. Adjust the offset according to the frame size if
2707 this function does not have a frame pointer. */
2709 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2710 : STACK_POINTER_REGNUM;
2711 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2713 /* Horrid hack. emit_function_prologue will modify this RTL in
2714 place to get the expected results. sprintf here is just to
2715 put something in the name. */
2716 sprintf(hp_profile_label_name, "LP$%04d", -1);
2717 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2718 hp_profile_label_name);
2719 if (current_function_returns_struct)
2720 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2722 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2723 if (regs_ever_live [i])
2725 store_reg (i, arg_offset, basereg);
2726 /* Deal with arg_offset not fitting in 14 bits. */
2727 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2730 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2731 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2732 emit_move_insn (gen_rtx (REG, SImode, 24),
2733 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2734 /* %r25 is set from within the output pattern. */
2735 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2737 /* Restore argument registers. */
2738 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2739 if (regs_ever_live [i])
2740 load_reg (i, arg_offset, basereg);
2742 if (current_function_returns_struct)
2743 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2747 /* Normal register save.
2749 Do not save the frame pointer in the frame_pointer_needed case. It
2750 was done earlier. */
2751 if (frame_pointer_needed)
2753 for (i = 18, offset = local_fsize; i >= 4; i--)
2754 if (regs_ever_live[i] && ! call_used_regs[i])
2756 store_reg (i, offset, FRAME_POINTER_REGNUM);
2757 offset += 4;
2758 gr_saved++;
2760 /* Account for %r3 which is saved in a special place. */
2761 gr_saved++;
2763 /* No frame pointer needed. */
2764 else
2766 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2767 if (regs_ever_live[i] && ! call_used_regs[i])
2769 /* If merge_sp_adjust_with_store is nonzero, then we can
2770 optimize the first GR save. */
2771 if (merge_sp_adjust_with_store)
2773 merge_sp_adjust_with_store = 0;
2774 emit_insn (gen_post_stwm (stack_pointer_rtx,
2775 gen_rtx (REG, SImode, i),
2776 GEN_INT (-offset)));
2778 else
2779 store_reg (i, offset, STACK_POINTER_REGNUM);
2780 offset += 4;
2781 gr_saved++;
2784 /* If we wanted to merge the SP adjustment with a GR save, but we never
2785 did any GR saves, then just emit the adjustment here. */
2786 if (merge_sp_adjust_with_store)
2787 set_reg_plus_d (STACK_POINTER_REGNUM,
2788 STACK_POINTER_REGNUM,
2789 actual_fsize);
2792 /* Align pointer properly (doubleword boundary). */
2793 offset = (offset + 7) & ~7;
2795 /* Floating point register store. */
2796 if (save_fregs)
2798 /* First get the frame or stack pointer to the start of the FP register
2799 save area. */
2800 if (frame_pointer_needed)
2801 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2802 else
2803 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2805 /* Now actually save the FP registers. */
2806 for (i = 66; i >= 48; i -= 2)
2808 if (regs_ever_live[i] || regs_ever_live[i + 1])
2810 emit_move_insn (gen_rtx (MEM, DFmode,
2811 gen_rtx (POST_INC, DFmode, tmpreg)),
2812 gen_rtx (REG, DFmode, i));
2813 fr_saved++;
2818 /* When generating PIC code it is necessary to save/restore the
2819 PIC register around each function call. We used to do this
2820 in the call patterns themselves, but that implementation
2821 made incorrect assumptions about using global variables to hold
2822 per-function rtl code generated in the backend.
2824 So instead, we copy the PIC register into a reserved callee saved
2825 register in the prologue. Then after each call we reload the PIC
2826 register from the callee saved register. We also reload the PIC
2827 register from the callee saved register in the epilogue ensure the
2828 PIC register is valid at function exit.
2830 This may (depending on the exact characteristics of the function)
2831 even be more efficient.
2833 Avoid this if the callee saved register wasn't used (these are
2834 leaf functions). */
2835 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2836 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2837 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2841 void
2842 output_function_epilogue (file, size)
2843 FILE *file;
2844 int size;
2846 rtx insn = get_last_insn ();
2847 int i;
2849 /* hppa_expand_epilogue does the dirty work now. We just need
2850 to output the assembler directives which denote the end
2851 of a function.
2853 To make debuggers happy, emit a nop if the epilogue was completely
2854 eliminated due to a volatile call as the last insn in the
2855 current function. That way the return address (in %r2) will
2856 always point to a valid instruction in the current function. */
2858 /* Get the last real insn. */
2859 if (GET_CODE (insn) == NOTE)
2860 insn = prev_real_insn (insn);
2862 /* If it is a sequence, then look inside. */
2863 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2864 insn = XVECEXP (PATTERN (insn), 0, 0);
2866 /* If insn is a CALL_INSN, then it must be a call to a volatile
2867 function (otherwise there would be epilogue insns). */
2868 if (insn && GET_CODE (insn) == CALL_INSN)
2869 fputs ("\tnop\n", file);
2871 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2873 /* If we have deferred plabels, then we need to switch into the data
2874 section and align it to a 4 byte boundary before we output the
2875 deferred plabels. */
2876 if (n_deferred_plabels)
2878 data_section ();
2879 ASM_OUTPUT_ALIGN (file, 2);
2882 /* Now output the deferred plabels. */
2883 for (i = 0; i < n_deferred_plabels; i++)
2885 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
2886 assemble_integer (deferred_plabels[i].symbol, 4, 1);
2888 n_deferred_plabels = 0;
2891 void
2892 hppa_expand_epilogue ()
2894 rtx tmpreg;
2895 int offset,i;
2896 int merge_sp_adjust_with_load = 0;
2898 /* Handle out of line prologues and epilogues. */
2899 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2901 int saves = 0;
2902 rtx operands[2];
2904 /* Put the register save info into %r22. */
2905 for (i = 18; i >= 3; i--)
2906 if (regs_ever_live[i] && ! call_used_regs[i])
2908 saves = i;
2909 break;
2912 for (i = 66; i >= 48; i -= 2)
2913 if (regs_ever_live[i] || regs_ever_live[i + 1])
2915 saves |= ((i/2 - 12 ) << 16);
2916 break;
2919 emit_insn (gen_blockage ());
2921 /* Put the local_fisze into %r19. */
2922 operands[0] = gen_rtx (REG, SImode, 19);
2923 operands[1] = GEN_INT (local_fsize);
2924 emit_move_insn (operands[0], operands[1]);
2926 /* Put the stack size into %r21. */
2927 operands[0] = gen_rtx (REG, SImode, 21);
2928 operands[1] = GEN_INT (actual_fsize);
2929 emit_move_insn (operands[0], operands[1]);
2931 operands[0] = gen_rtx (REG, SImode, 22);
2932 operands[1] = GEN_INT (saves);
2933 emit_move_insn (operands[0], operands[1]);
2935 /* Now call the out-of-line epilogue. */
2936 emit_insn (gen_outline_epilogue_call ());
2937 return;
2940 /* We will use this often. */
2941 tmpreg = gen_rtx (REG, SImode, 1);
2943 /* Try to restore RP early to avoid load/use interlocks when
2944 RP gets used in the return (bv) instruction. This appears to still
2945 be necessary even when we schedule the prologue and epilogue. */
2946 if (frame_pointer_needed
2947 && (regs_ever_live [2] || profile_flag))
2948 load_reg (2, -20, FRAME_POINTER_REGNUM);
2950 /* No frame pointer, and stack is smaller than 8k. */
2951 else if (! frame_pointer_needed
2952 && VAL_14_BITS_P (actual_fsize + 20)
2953 && (regs_ever_live[2] || profile_flag))
2954 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2956 /* General register restores. */
2957 if (frame_pointer_needed)
2959 for (i = 18, offset = local_fsize; i >= 4; i--)
2960 if (regs_ever_live[i] && ! call_used_regs[i])
2962 load_reg (i, offset, FRAME_POINTER_REGNUM);
2963 offset += 4;
2966 else
2968 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2970 if (regs_ever_live[i] && ! call_used_regs[i])
2972 /* Only for the first load.
2973 merge_sp_adjust_with_load holds the register load
2974 with which we will merge the sp adjustment. */
2975 if (VAL_14_BITS_P (actual_fsize + 20)
2976 && local_fsize == 0
2977 && ! merge_sp_adjust_with_load)
2978 merge_sp_adjust_with_load = i;
2979 else
2980 load_reg (i, offset, STACK_POINTER_REGNUM);
2981 offset += 4;
2986 /* Align pointer properly (doubleword boundary). */
2987 offset = (offset + 7) & ~7;
2989 /* FP register restores. */
2990 if (save_fregs)
2992 /* Adjust the register to index off of. */
2993 if (frame_pointer_needed)
2994 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2995 else
2996 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2998 /* Actually do the restores now. */
2999 for (i = 66; i >= 48; i -= 2)
3001 if (regs_ever_live[i] || regs_ever_live[i + 1])
3003 emit_move_insn (gen_rtx (REG, DFmode, i),
3004 gen_rtx (MEM, DFmode,
3005 gen_rtx (POST_INC, DFmode, tmpreg)));
3010 /* Emit a blockage insn here to keep these insns from being moved to
3011 an earlier spot in the epilogue, or into the main instruction stream.
3013 This is necessary as we must not cut the stack back before all the
3014 restores are finished. */
3015 emit_insn (gen_blockage ());
3016 /* No frame pointer, but we have a stack greater than 8k. We restore
3017 %r2 very late in this case. (All other cases are restored as early
3018 as possible.) */
3019 if (! frame_pointer_needed
3020 && ! VAL_14_BITS_P (actual_fsize + 20)
3021 && (regs_ever_live[2] || profile_flag))
3023 set_reg_plus_d (STACK_POINTER_REGNUM,
3024 STACK_POINTER_REGNUM,
3025 - actual_fsize);
3027 /* This used to try and be clever by not depending on the value in
3028 %r30 and instead use the value held in %r1 (so that the 2nd insn
3029 which sets %r30 could be put in the delay slot of the return insn).
3031 That won't work since if the stack is exactly 8k set_reg_plus_d
3032 doesn't set %r1, just %r30. */
3033 load_reg (2, - 20, STACK_POINTER_REGNUM);
3036 /* Reset stack pointer (and possibly frame pointer). The stack
3037 pointer is initially set to fp + 64 to avoid a race condition. */
3038 else if (frame_pointer_needed)
3040 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3041 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3042 stack_pointer_rtx,
3043 GEN_INT (-64)));
3045 /* If we were deferring a callee register restore, do it now. */
3046 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3047 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3048 merge_sp_adjust_with_load),
3049 stack_pointer_rtx,
3050 GEN_INT (- actual_fsize)));
3051 else if (actual_fsize != 0)
3052 set_reg_plus_d (STACK_POINTER_REGNUM,
3053 STACK_POINTER_REGNUM,
3054 - actual_fsize);
3057 /* Fetch the return address for the frame COUNT steps up from
3058 the current frame, after the prologue. FRAMEADDR is the
3059 frame pointer of the COUNT frame.
3061 We want to ignore any export stub remnants here.
3063 The value returned is used in two different ways:
3065 1. To find a function's caller.
3067 2. To change the return address for a function.
3069 This function handles most instances of case 1; however, it will
3070 fail if there are two levels of stubs to execute on the return
3071 path. The only way I believe that can happen is if the return value
3072 needs a parameter relocation, which never happens for C code.
3074 This function handles most instances of case 2; however, it will
3075 fail if we did not originally have stub code on the return path
3076 but will need code on the new return path. This can happen if
3077 the caller & callee are both in the main program, but the new
3078 return location is in a shared library.
3080 To handle this correctly we need to set the return pointer at
3081 frame-20 to point to a return stub frame-24 to point to the
3082 location we wish to return to. */
3085 return_addr_rtx (count, frameaddr)
3086 int count;
3087 rtx frameaddr;
3089 rtx label;
3090 rtx saved_rp;
3091 rtx ins;
3093 saved_rp = gen_reg_rtx (Pmode);
3095 /* First, we start off with the normal return address pointer from
3096 -20[frameaddr]. */
3098 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3100 /* Get pointer to the instruction stream. We have to mask out the
3101 privilege level from the two low order bits of the return address
3102 pointer here so that ins will point to the start of the first
3103 instruction that would have been executed if we returned. */
3104 ins = copy_to_reg (gen_rtx (AND, Pmode,
3105 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3106 MASK_RETURN_ADDR));
3107 label = gen_label_rtx ();
3109 /* Check the instruction stream at the normal return address for the
3110 export stub:
3112 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3113 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3114 0x00011820 | stub+16: mtsp r1,sr0
3115 0xe0400002 | stub+20: be,n 0(sr0,rp)
3117 If it is an export stub, than our return address is really in
3118 -24[frameaddr]. */
3120 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3121 GEN_INT (0x4bc23fd1),
3122 NE, NULL_RTX, SImode, 1, 0);
3123 emit_jump_insn (gen_bne (label));
3125 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3126 GEN_INT (0x004010a1),
3127 NE, NULL_RTX, SImode, 1, 0);
3128 emit_jump_insn (gen_bne (label));
3130 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3131 GEN_INT (0x00011820),
3132 NE, NULL_RTX, SImode, 1, 0);
3133 emit_jump_insn (gen_bne (label));
3135 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3136 GEN_INT (0xe0400002),
3137 NE, NULL_RTX, SImode, 1, 0);
3139 /* If there is no export stub then just use our initial guess of
3140 -20[frameaddr]. */
3142 emit_jump_insn (gen_bne (label));
3144 /* Here we know that our return address pointer points to an export
3145 stub. We don't want to return the address of the export stub,
3146 but rather the return address that leads back into user code.
3147 That return address is stored at -24[frameaddr]. */
3149 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3151 emit_label (label);
3152 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3155 /* This is only valid once reload has completed because it depends on
3156 knowing exactly how much (if any) frame there is and...
3158 It's only valid if there is no frame marker to de-allocate and...
3160 It's only valid if %r2 hasn't been saved into the caller's frame
3161 (we're not profiling and %r2 isn't live anywhere). */
3163 hppa_can_use_return_insn_p ()
3165 return (reload_completed
3166 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3167 && ! profile_flag
3168 && ! regs_ever_live[2]
3169 && ! frame_pointer_needed);
3172 void
3173 emit_bcond_fp (code, operand0)
3174 enum rtx_code code;
3175 rtx operand0;
3177 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3178 gen_rtx (IF_THEN_ELSE, VOIDmode,
3179 gen_rtx (code, VOIDmode,
3180 gen_rtx (REG, CCFPmode, 0),
3181 const0_rtx),
3182 gen_rtx (LABEL_REF, VOIDmode, operand0),
3183 pc_rtx)));
3188 gen_cmp_fp (code, operand0, operand1)
3189 enum rtx_code code;
3190 rtx operand0, operand1;
3192 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3193 gen_rtx (code, CCFPmode, operand0, operand1));
3196 /* Adjust the cost of a scheduling dependency. Return the new cost of
3197 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3200 pa_adjust_cost (insn, link, dep_insn, cost)
3201 rtx insn;
3202 rtx link;
3203 rtx dep_insn;
3204 int cost;
3206 if (! recog_memoized (insn))
3207 return 0;
3209 if (REG_NOTE_KIND (link) == 0)
3211 /* Data dependency; DEP_INSN writes a register that INSN reads some
3212 cycles later. */
3214 if (get_attr_type (insn) == TYPE_FPSTORE)
3216 rtx pat = PATTERN (insn);
3217 rtx dep_pat = PATTERN (dep_insn);
3218 if (GET_CODE (pat) == PARALLEL)
3220 /* This happens for the fstXs,mb patterns. */
3221 pat = XVECEXP (pat, 0, 0);
3223 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3224 /* If this happens, we have to extend this to schedule
3225 optimally. Return 0 for now. */
3226 return 0;
3228 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3230 if (! recog_memoized (dep_insn))
3231 return 0;
3232 /* DEP_INSN is writing its result to the register
3233 being stored in the fpstore INSN. */
3234 switch (get_attr_type (dep_insn))
3236 case TYPE_FPLOAD:
3237 /* This cost 3 cycles, not 2 as the md says for the
3238 700 and 7100. Note scaling of cost for 7100. */
3239 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3241 case TYPE_FPALU:
3242 case TYPE_FPMULSGL:
3243 case TYPE_FPMULDBL:
3244 case TYPE_FPDIVSGL:
3245 case TYPE_FPDIVDBL:
3246 case TYPE_FPSQRTSGL:
3247 case TYPE_FPSQRTDBL:
3248 /* In these important cases, we save one cycle compared to
3249 when flop instruction feed each other. */
3250 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3252 default:
3253 return cost;
3258 /* For other data dependencies, the default cost specified in the
3259 md is correct. */
3260 return cost;
3262 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3264 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3265 cycles later. */
3267 if (get_attr_type (insn) == TYPE_FPLOAD)
3269 rtx pat = PATTERN (insn);
3270 rtx dep_pat = PATTERN (dep_insn);
3271 if (GET_CODE (pat) == PARALLEL)
3273 /* This happens for the fldXs,mb patterns. */
3274 pat = XVECEXP (pat, 0, 0);
3276 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3277 /* If this happens, we have to extend this to schedule
3278 optimally. Return 0 for now. */
3279 return 0;
3281 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3283 if (! recog_memoized (dep_insn))
3284 return 0;
3285 switch (get_attr_type (dep_insn))
3287 case TYPE_FPALU:
3288 case TYPE_FPMULSGL:
3289 case TYPE_FPMULDBL:
3290 case TYPE_FPDIVSGL:
3291 case TYPE_FPDIVDBL:
3292 case TYPE_FPSQRTSGL:
3293 case TYPE_FPSQRTDBL:
3294 /* A fpload can't be issued until one cycle before a
3295 preceding arithmetic operation has finished if
3296 the target of the fpload is any of the sources
3297 (or destination) of the arithmetic operation. */
3298 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3300 default:
3301 return 0;
3305 else if (get_attr_type (insn) == TYPE_FPALU)
3307 rtx pat = PATTERN (insn);
3308 rtx dep_pat = PATTERN (dep_insn);
3309 if (GET_CODE (pat) == PARALLEL)
3311 /* This happens for the fldXs,mb patterns. */
3312 pat = XVECEXP (pat, 0, 0);
3314 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3315 /* If this happens, we have to extend this to schedule
3316 optimally. Return 0 for now. */
3317 return 0;
3319 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3321 if (! recog_memoized (dep_insn))
3322 return 0;
3323 switch (get_attr_type (dep_insn))
3325 case TYPE_FPDIVSGL:
3326 case TYPE_FPDIVDBL:
3327 case TYPE_FPSQRTSGL:
3328 case TYPE_FPSQRTDBL:
3329 /* An ALU flop can't be issued until two cycles before a
3330 preceding divide or sqrt operation has finished if
3331 the target of the ALU flop is any of the sources
3332 (or destination) of the divide or sqrt operation. */
3333 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3335 default:
3336 return 0;
3341 /* For other anti dependencies, the cost is 0. */
3342 return 0;
3344 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3346 /* Output dependency; DEP_INSN writes a register that INSN writes some
3347 cycles later. */
3348 if (get_attr_type (insn) == TYPE_FPLOAD)
3350 rtx pat = PATTERN (insn);
3351 rtx dep_pat = PATTERN (dep_insn);
3352 if (GET_CODE (pat) == PARALLEL)
3354 /* This happens for the fldXs,mb patterns. */
3355 pat = XVECEXP (pat, 0, 0);
3357 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3358 /* If this happens, we have to extend this to schedule
3359 optimally. Return 0 for now. */
3360 return 0;
3362 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3364 if (! recog_memoized (dep_insn))
3365 return 0;
3366 switch (get_attr_type (dep_insn))
3368 case TYPE_FPALU:
3369 case TYPE_FPMULSGL:
3370 case TYPE_FPMULDBL:
3371 case TYPE_FPDIVSGL:
3372 case TYPE_FPDIVDBL:
3373 case TYPE_FPSQRTSGL:
3374 case TYPE_FPSQRTDBL:
3375 /* A fpload can't be issued until one cycle before a
3376 preceding arithmetic operation has finished if
3377 the target of the fpload is the destination of the
3378 arithmetic operation. */
3379 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3381 default:
3382 return 0;
3386 else if (get_attr_type (insn) == TYPE_FPALU)
3388 rtx pat = PATTERN (insn);
3389 rtx dep_pat = PATTERN (dep_insn);
3390 if (GET_CODE (pat) == PARALLEL)
3392 /* This happens for the fldXs,mb patterns. */
3393 pat = XVECEXP (pat, 0, 0);
3395 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3396 /* If this happens, we have to extend this to schedule
3397 optimally. Return 0 for now. */
3398 return 0;
3400 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3402 if (! recog_memoized (dep_insn))
3403 return 0;
3404 switch (get_attr_type (dep_insn))
3406 case TYPE_FPDIVSGL:
3407 case TYPE_FPDIVDBL:
3408 case TYPE_FPSQRTSGL:
3409 case TYPE_FPSQRTDBL:
3410 /* An ALU flop can't be issued until two cycles before a
3411 preceding divide or sqrt operation has finished if
3412 the target of the ALU flop is also the target of
3413 of the divide or sqrt operation. */
3414 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3416 default:
3417 return 0;
3422 /* For other output dependencies, the cost is 0. */
3423 return 0;
3425 else
3426 abort ();
3429 /* Return any length adjustment needed by INSN which already has its length
3430 computed as LENGTH. Return zero if no adjustment is necessary.
3432 For the PA: function calls, millicode calls, and backwards short
3433 conditional branches with unfilled delay slots need an adjustment by +1
3434 (to account for the NOP which will be inserted into the instruction stream).
3436 Also compute the length of an inline block move here as it is too
3437 complicated to express as a length attribute in pa.md. */
3439 pa_adjust_insn_length (insn, length)
3440 rtx insn;
3441 int length;
3443 rtx pat = PATTERN (insn);
3445 /* Call insns which are *not* indirect and have unfilled delay slots. */
3446 if (GET_CODE (insn) == CALL_INSN)
3449 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3450 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3451 return 4;
3452 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3453 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3454 == SYMBOL_REF)
3455 return 4;
3456 else
3457 return 0;
3459 /* Jumps inside switch tables which have unfilled delay slots
3460 also need adjustment. */
3461 else if (GET_CODE (insn) == JUMP_INSN
3462 && simplejump_p (insn)
3463 && GET_MODE (PATTERN (insn)) == DImode)
3464 return 4;
3465 /* Millicode insn with an unfilled delay slot. */
3466 else if (GET_CODE (insn) == INSN
3467 && GET_CODE (pat) != SEQUENCE
3468 && GET_CODE (pat) != USE
3469 && GET_CODE (pat) != CLOBBER
3470 && get_attr_type (insn) == TYPE_MILLI)
3471 return 4;
3472 /* Block move pattern. */
3473 else if (GET_CODE (insn) == INSN
3474 && GET_CODE (pat) == PARALLEL
3475 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3476 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3477 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3478 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3479 return compute_movstrsi_length (insn) - 4;
3480 /* Conditional branch with an unfilled delay slot. */
3481 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3483 /* Adjust a short backwards conditional with an unfilled delay slot. */
3484 if (GET_CODE (pat) == SET
3485 && length == 4
3486 && ! forward_branch_p (insn))
3487 return 4;
3488 else if (GET_CODE (pat) == PARALLEL
3489 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3490 && length == 4)
3491 return 4;
3492 /* Adjust dbra insn with short backwards conditional branch with
3493 unfilled delay slot -- only for case where counter is in a
3494 general register register. */
3495 else if (GET_CODE (pat) == PARALLEL
3496 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3497 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3498 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3499 && length == 4
3500 && ! forward_branch_p (insn))
3501 return 4;
3502 else
3503 return 0;
3505 return 0;
3508 /* Print operand X (an rtx) in assembler syntax to file FILE.
3509 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3510 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3512 void
3513 print_operand (file, x, code)
3514 FILE *file;
3515 rtx x;
3516 int code;
3518 switch (code)
3520 case '#':
3521 /* Output a 'nop' if there's nothing for the delay slot. */
3522 if (dbr_sequence_length () == 0)
3523 fputs ("\n\tnop", file);
3524 return;
3525 case '*':
3526 /* Output an nullification completer if there's nothing for the */
3527 /* delay slot or nullification is requested. */
3528 if (dbr_sequence_length () == 0 ||
3529 (final_sequence &&
3530 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3531 fputs (",n", file);
3532 return;
3533 case 'R':
3534 /* Print out the second register name of a register pair.
3535 I.e., R (6) => 7. */
3536 fputs (reg_names[REGNO (x)+1], file);
3537 return;
3538 case 'r':
3539 /* A register or zero. */
3540 if (x == const0_rtx
3541 || (x == CONST0_RTX (DFmode))
3542 || (x == CONST0_RTX (SFmode)))
3544 fputs ("0", file);
3545 return;
3547 else
3548 break;
3549 case 'C': /* Plain (C)ondition */
3550 case 'X':
3551 switch (GET_CODE (x))
3553 case EQ:
3554 fputs ("=", file); break;
3555 case NE:
3556 fputs ("<>", file); break;
3557 case GT:
3558 fputs (">", file); break;
3559 case GE:
3560 fputs (">=", file); break;
3561 case GEU:
3562 fputs (">>=", file); break;
3563 case GTU:
3564 fputs (">>", file); break;
3565 case LT:
3566 fputs ("<", file); break;
3567 case LE:
3568 fputs ("<=", file); break;
3569 case LEU:
3570 fputs ("<<=", file); break;
3571 case LTU:
3572 fputs ("<<", file); break;
3573 default:
3574 abort ();
3576 return;
3577 case 'N': /* Condition, (N)egated */
3578 switch (GET_CODE (x))
3580 case EQ:
3581 fputs ("<>", file); break;
3582 case NE:
3583 fputs ("=", file); break;
3584 case GT:
3585 fputs ("<=", file); break;
3586 case GE:
3587 fputs ("<", file); break;
3588 case GEU:
3589 fputs ("<<", file); break;
3590 case GTU:
3591 fputs ("<<=", file); break;
3592 case LT:
3593 fputs (">=", file); break;
3594 case LE:
3595 fputs (">", file); break;
3596 case LEU:
3597 fputs (">>", file); break;
3598 case LTU:
3599 fputs (">>=", file); break;
3600 default:
3601 abort ();
3603 return;
3604 /* For floating point comparisons. Need special conditions to deal
3605 with NaNs properly. */
3606 case 'Y':
3607 switch (GET_CODE (x))
3609 case EQ:
3610 fputs ("!=", file); break;
3611 case NE:
3612 fputs ("=", file); break;
3613 case GT:
3614 fputs ("<=", file); break;
3615 case GE:
3616 fputs ("<", file); break;
3617 case LT:
3618 fputs (">=", file); break;
3619 case LE:
3620 fputs (">", file); break;
3621 default:
3622 abort ();
3624 return;
3625 case 'S': /* Condition, operands are (S)wapped. */
3626 switch (GET_CODE (x))
3628 case EQ:
3629 fputs ("=", file); break;
3630 case NE:
3631 fputs ("<>", file); break;
3632 case GT:
3633 fputs ("<", file); break;
3634 case GE:
3635 fputs ("<=", file); break;
3636 case GEU:
3637 fputs ("<<=", file); break;
3638 case GTU:
3639 fputs ("<<", file); break;
3640 case LT:
3641 fputs (">", file); break;
3642 case LE:
3643 fputs (">=", file); break;
3644 case LEU:
3645 fputs (">>=", file); break;
3646 case LTU:
3647 fputs (">>", file); break;
3648 default:
3649 abort ();
3651 return;
3652 case 'B': /* Condition, (B)oth swapped and negate. */
3653 switch (GET_CODE (x))
3655 case EQ:
3656 fputs ("<>", file); break;
3657 case NE:
3658 fputs ("=", file); break;
3659 case GT:
3660 fputs (">=", file); break;
3661 case GE:
3662 fputs (">", file); break;
3663 case GEU:
3664 fputs (">>", file); break;
3665 case GTU:
3666 fputs (">>=", file); break;
3667 case LT:
3668 fputs ("<=", file); break;
3669 case LE:
3670 fputs ("<", file); break;
3671 case LEU:
3672 fputs ("<<", file); break;
3673 case LTU:
3674 fputs ("<<=", file); break;
3675 default:
3676 abort ();
3678 return;
3679 case 'k':
3680 if (GET_CODE (x) == CONST_INT)
3682 fprintf (file, "%d", ~INTVAL (x));
3683 return;
3685 abort();
3686 case 'L':
3687 if (GET_CODE (x) == CONST_INT)
3689 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3690 return;
3692 abort();
3693 case 'O':
3694 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3696 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3697 return;
3699 abort();
3700 case 'P':
3701 if (GET_CODE (x) == CONST_INT)
3703 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3704 return;
3706 abort();
3707 case 'I':
3708 if (GET_CODE (x) == CONST_INT)
3709 fputs ("i", file);
3710 return;
3711 case 'M':
3712 case 'F':
3713 switch (GET_CODE (XEXP (x, 0)))
3715 case PRE_DEC:
3716 case PRE_INC:
3717 fputs ("s,mb", file);
3718 break;
3719 case POST_DEC:
3720 case POST_INC:
3721 fputs ("s,ma", file);
3722 break;
3723 case PLUS:
3724 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3725 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3726 fputs ("x,s", file);
3727 else if (code == 'F')
3728 fputs ("s", file);
3729 break;
3730 default:
3731 if (code == 'F')
3732 fputs ("s", file);
3733 break;
3735 return;
3736 case 'G':
3737 output_global_address (file, x, 0);
3738 return;
3739 case 'H':
3740 output_global_address (file, x, 1);
3741 return;
3742 case 0: /* Don't do anything special */
3743 break;
3744 case 'Z':
3746 unsigned op[3];
3747 compute_zdepi_operands (INTVAL (x), op);
3748 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3749 return;
3751 default:
3752 abort ();
3754 if (GET_CODE (x) == REG)
3756 fputs (reg_names [REGNO (x)], file);
3757 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3758 fputs ("L", file);
3760 else if (GET_CODE (x) == MEM)
3762 int size = GET_MODE_SIZE (GET_MODE (x));
3763 rtx base = XEXP (XEXP (x, 0), 0);
3764 switch (GET_CODE (XEXP (x, 0)))
3766 case PRE_DEC:
3767 case POST_DEC:
3768 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3769 break;
3770 case PRE_INC:
3771 case POST_INC:
3772 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3773 break;
3774 default:
3775 if (GET_CODE (XEXP (x, 0)) == PLUS
3776 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3777 fprintf (file, "%s(0,%s)",
3778 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3779 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3780 else if (GET_CODE (XEXP (x, 0)) == PLUS
3781 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3782 fprintf (file, "%s(0,%s)",
3783 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3784 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3785 else
3786 output_address (XEXP (x, 0));
3787 break;
3790 else
3791 output_addr_const (file, x);
3794 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3796 void
3797 output_global_address (file, x, round_constant)
3798 FILE *file;
3799 rtx x;
3800 int round_constant;
3803 /* Imagine (high (const (plus ...))). */
3804 if (GET_CODE (x) == HIGH)
3805 x = XEXP (x, 0);
3807 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3808 assemble_name (file, XSTR (x, 0));
3809 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3811 assemble_name (file, XSTR (x, 0));
3812 fputs ("-$global$", file);
3814 else if (GET_CODE (x) == CONST)
3816 char *sep = "";
3817 int offset = 0; /* assembler wants -$global$ at end */
3818 rtx base;
3820 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3822 base = XEXP (XEXP (x, 0), 0);
3823 output_addr_const (file, base);
3825 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3826 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3827 else abort ();
3829 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3831 base = XEXP (XEXP (x, 0), 1);
3832 output_addr_const (file, base);
3834 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3835 offset = INTVAL (XEXP (XEXP (x, 0),1));
3836 else abort ();
3838 /* How bogus. The compiler is apparently responsible for
3839 rounding the constant if it uses an LR field selector.
3841 The linker and/or assembler seem a better place since
3842 they have to do this kind of thing already.
3844 If we fail to do this, HP's optimizing linker may eliminate
3845 an addil, but not update the ldw/stw/ldo instruction that
3846 uses the result of the addil. */
3847 if (round_constant)
3848 offset = ((offset + 0x1000) & ~0x1fff);
3850 if (GET_CODE (XEXP (x, 0)) == PLUS)
3852 if (offset < 0)
3854 offset = -offset;
3855 sep = "-";
3857 else
3858 sep = "+";
3860 else if (GET_CODE (XEXP (x, 0)) == MINUS
3861 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3862 sep = "-";
3863 else abort ();
3865 if (!read_only_operand (base) && !flag_pic)
3866 fputs ("-$global$", file);
3867 if (offset)
3868 fprintf (file,"%s%d", sep, offset);
3870 else
3871 output_addr_const (file, x);
3874 /* HP's millicode routines mean something special to the assembler.
3875 Keep track of which ones we have used. */
3877 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3878 static char imported[(int)end1000];
3879 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3880 static char import_string[] = ".IMPORT $$....,MILLICODE";
3881 #define MILLI_START 10
3883 static void
3884 import_milli (code)
3885 enum millicodes code;
3887 char str[sizeof (import_string)];
3889 if (!imported[(int)code])
3891 imported[(int)code] = 1;
3892 strcpy (str, import_string);
3893 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3894 output_asm_insn (str, 0);
3898 /* The register constraints have put the operands and return value in
3899 the proper registers. */
3901 char *
3902 output_mul_insn (unsignedp, insn)
3903 int unsignedp;
3904 rtx insn;
3906 import_milli (mulI);
3907 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3910 /* Emit the rtl for doing a division by a constant. */
3912 /* Do magic division millicodes exist for this value? */
3913 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3914 1, 1};
3916 /* We'll use an array to keep track of the magic millicodes and
3917 whether or not we've used them already. [n][0] is signed, [n][1] is
3918 unsigned. */
3920 static int div_milli[16][2];
3923 div_operand (op, mode)
3924 rtx op;
3925 enum machine_mode mode;
3927 return (mode == SImode
3928 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3929 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3930 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3934 emit_hpdiv_const (operands, unsignedp)
3935 rtx *operands;
3936 int unsignedp;
3938 if (GET_CODE (operands[2]) == CONST_INT
3939 && INTVAL (operands[2]) > 0
3940 && INTVAL (operands[2]) < 16
3941 && magic_milli[INTVAL (operands[2])])
3943 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3944 emit
3945 (gen_rtx
3946 (PARALLEL, VOIDmode,
3947 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3948 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3949 gen_rtx (REG, SImode, 26),
3950 operands[2])),
3951 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3952 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3953 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3954 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3955 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3956 return 1;
3958 return 0;
3961 char *
3962 output_div_insn (operands, unsignedp, insn)
3963 rtx *operands;
3964 int unsignedp;
3965 rtx insn;
3967 int divisor;
3969 /* If the divisor is a constant, try to use one of the special
3970 opcodes .*/
3971 if (GET_CODE (operands[0]) == CONST_INT)
3973 static char buf[100];
3974 divisor = INTVAL (operands[0]);
3975 if (!div_milli[divisor][unsignedp])
3977 div_milli[divisor][unsignedp] = 1;
3978 if (unsignedp)
3979 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3980 else
3981 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3983 if (unsignedp)
3985 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3986 return output_millicode_call (insn,
3987 gen_rtx (SYMBOL_REF, SImode, buf));
3989 else
3991 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3992 return output_millicode_call (insn,
3993 gen_rtx (SYMBOL_REF, SImode, buf));
3996 /* Divisor isn't a special constant. */
3997 else
3999 if (unsignedp)
4001 import_milli (divU);
4002 return output_millicode_call (insn,
4003 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
4005 else
4007 import_milli (divI);
4008 return output_millicode_call (insn,
4009 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
4014 /* Output a $$rem millicode to do mod. */
4016 char *
4017 output_mod_insn (unsignedp, insn)
4018 int unsignedp;
4019 rtx insn;
4021 if (unsignedp)
4023 import_milli (remU);
4024 return output_millicode_call (insn,
4025 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
4027 else
4029 import_milli (remI);
4030 return output_millicode_call (insn,
4031 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
4035 void
4036 output_arg_descriptor (call_insn)
4037 rtx call_insn;
4039 char *arg_regs[4];
4040 enum machine_mode arg_mode;
4041 rtx link;
4042 int i, output_flag = 0;
4043 int regno;
4045 for (i = 0; i < 4; i++)
4046 arg_regs[i] = 0;
4048 /* Specify explicitly that no argument relocations should take place
4049 if using the portable runtime calling conventions. */
4050 if (TARGET_PORTABLE_RUNTIME)
4052 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4053 asm_out_file);
4054 return;
4057 if (GET_CODE (call_insn) != CALL_INSN)
4058 abort ();
4059 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4061 rtx use = XEXP (link, 0);
4063 if (! (GET_CODE (use) == USE
4064 && GET_CODE (XEXP (use, 0)) == REG
4065 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4066 continue;
4068 arg_mode = GET_MODE (XEXP (use, 0));
4069 regno = REGNO (XEXP (use, 0));
4070 if (regno >= 23 && regno <= 26)
4072 arg_regs[26 - regno] = "GR";
4073 if (arg_mode == DImode)
4074 arg_regs[25 - regno] = "GR";
4076 else if (regno >= 32 && regno <= 39)
4078 if (arg_mode == SFmode)
4079 arg_regs[(regno - 32) / 2] = "FR";
4080 else
4082 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4083 arg_regs[(regno - 34) / 2] = "FR";
4084 arg_regs[(regno - 34) / 2 + 1] = "FU";
4085 #else
4086 arg_regs[(regno - 34) / 2] = "FU";
4087 arg_regs[(regno - 34) / 2 + 1] = "FR";
4088 #endif
4092 fputs ("\t.CALL ", asm_out_file);
4093 for (i = 0; i < 4; i++)
4095 if (arg_regs[i])
4097 if (output_flag++)
4098 fputc (',', asm_out_file);
4099 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4102 fputc ('\n', asm_out_file);
4105 /* Return the class of any secondary reload register that is needed to
4106 move IN into a register in class CLASS using mode MODE.
4108 Profiling has showed this routine and its descendants account for
4109 a significant amount of compile time (~7%). So it has been
4110 optimized to reduce redundant computations and eliminate useless
4111 function calls.
4113 It might be worthwhile to try and make this a leaf function too. */
4115 enum reg_class
4116 secondary_reload_class (class, mode, in)
4117 enum reg_class class;
4118 enum machine_mode mode;
4119 rtx in;
4121 int regno, is_symbolic;
4123 /* Trying to load a constant into a FP register during PIC code
4124 generation will require %r1 as a scratch register. */
4125 if (flag_pic == 2
4126 && GET_MODE_CLASS (mode) == MODE_INT
4127 && FP_REG_CLASS_P (class)
4128 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4129 return R1_REGS;
4131 /* Profiling showed the PA port spends about 1.3% of its compilation
4132 time in true_regnum from calls inside secondary_reload_class. */
4134 if (GET_CODE (in) == REG)
4136 regno = REGNO (in);
4137 if (regno >= FIRST_PSEUDO_REGISTER)
4138 regno = true_regnum (in);
4140 else if (GET_CODE (in) == SUBREG)
4141 regno = true_regnum (in);
4142 else
4143 regno = -1;
4145 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4146 && GET_MODE_CLASS (mode) == MODE_INT
4147 && FP_REG_CLASS_P (class))
4148 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4149 return GENERAL_REGS;
4151 if (GET_CODE (in) == HIGH)
4152 in = XEXP (in, 0);
4154 /* Profiling has showed GCC spends about 2.6% of its compilation
4155 time in symbolic_operand from calls inside secondary_reload_class.
4157 We use an inline copy and only compute its return value once to avoid
4158 useless work. */
4159 switch (GET_CODE (in))
4161 rtx tmp;
4163 case SYMBOL_REF:
4164 case LABEL_REF:
4165 is_symbolic = 1;
4166 break;
4167 case CONST:
4168 tmp = XEXP (in, 0);
4169 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4170 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4171 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4172 break;
4173 default:
4174 is_symbolic = 0;
4175 break;
4178 if (!flag_pic
4179 && is_symbolic
4180 && read_only_operand (in))
4181 return NO_REGS;
4183 if (class != R1_REGS && is_symbolic)
4184 return R1_REGS;
4186 return NO_REGS;
4189 enum direction
4190 function_arg_padding (mode, type)
4191 enum machine_mode mode;
4192 tree type;
4194 int size;
4196 if (mode == BLKmode)
4198 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4199 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4200 else
4201 return upward; /* Don't know if this is right, but */
4202 /* same as old definition. */
4204 else
4205 size = GET_MODE_BITSIZE (mode);
4206 if (size < PARM_BOUNDARY)
4207 return downward;
4208 else if (size % PARM_BOUNDARY)
4209 return upward;
4210 else
4211 return none;
4215 /* Do what is necessary for `va_start'. The argument is ignored;
4216 We look at the current function to determine if stdargs or varargs
4217 is used and fill in an initial va_list. A pointer to this constructor
4218 is returned. */
4220 struct rtx_def *
4221 hppa_builtin_saveregs (arglist)
4222 tree arglist;
4224 rtx offset;
4225 tree fntype = TREE_TYPE (current_function_decl);
4226 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4227 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4228 != void_type_node)))
4229 ? UNITS_PER_WORD : 0);
4231 if (argadj)
4232 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4233 else
4234 offset = current_function_arg_offset_rtx;
4236 /* Store general registers on the stack. */
4237 move_block_from_reg (23,
4238 gen_rtx (MEM, BLKmode,
4239 plus_constant
4240 (current_function_internal_arg_pointer, -16)),
4241 4, 4 * UNITS_PER_WORD);
4242 return copy_to_reg (expand_binop (Pmode, add_optab,
4243 current_function_internal_arg_pointer,
4244 offset, 0, 0, OPTAB_LIB_WIDEN));
4247 /* This routine handles all the normal conditional branch sequences we
4248 might need to generate. It handles compare immediate vs compare
4249 register, nullification of delay slots, varying length branches,
4250 negated branches, and all combinations of the above. It returns the
4251 output appropriate to emit the branch corresponding to all given
4252 parameters. */
4254 char *
4255 output_cbranch (operands, nullify, length, negated, insn)
4256 rtx *operands;
4257 int nullify, length, negated;
4258 rtx insn;
4260 static char buf[100];
4261 int useskip = 0;
4263 /* A conditional branch to the following instruction (eg the delay slot) is
4264 asking for a disaster. This can happen when not optimizing.
4266 In such cases it is safe to emit nothing. */
4268 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4269 return "";
4271 /* If this is a long branch with its delay slot unfilled, set `nullify'
4272 as it can nullify the delay slot and save a nop. */
4273 if (length == 8 && dbr_sequence_length () == 0)
4274 nullify = 1;
4276 /* If this is a short forward conditional branch which did not get
4277 its delay slot filled, the delay slot can still be nullified. */
4278 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4279 nullify = forward_branch_p (insn);
4281 /* A forward branch over a single nullified insn can be done with a
4282 comclr instruction. This avoids a single cycle penalty due to
4283 mis-predicted branch if we fall through (branch not taken). */
4284 if (length == 4
4285 && next_real_insn (insn) != 0
4286 && get_attr_length (next_real_insn (insn)) == 4
4287 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4288 && nullify)
4289 useskip = 1;
4291 switch (length)
4293 /* All short conditional branches except backwards with an unfilled
4294 delay slot. */
4295 case 4:
4296 if (useskip)
4297 strcpy (buf, "com%I2clr,");
4298 else
4299 strcpy (buf, "com%I2b,");
4300 if (negated)
4301 strcat (buf, "%B3");
4302 else
4303 strcat (buf, "%S3");
4304 if (useskip)
4305 strcat (buf, " %2,%1,0");
4306 else if (nullify)
4307 strcat (buf, ",n %2,%1,%0");
4308 else
4309 strcat (buf, " %2,%1,%0");
4310 break;
4312 /* All long conditionals. Note an short backward branch with an
4313 unfilled delay slot is treated just like a long backward branch
4314 with an unfilled delay slot. */
4315 case 8:
4316 /* Handle weird backwards branch with a filled delay slot
4317 with is nullified. */
4318 if (dbr_sequence_length () != 0
4319 && ! forward_branch_p (insn)
4320 && nullify)
4322 strcpy (buf, "com%I2b,");
4323 if (negated)
4324 strcat (buf, "%S3");
4325 else
4326 strcat (buf, "%B3");
4327 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4329 /* Handle short backwards branch with an unfilled delay slot.
4330 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4331 taken and untaken branches. */
4332 else if (dbr_sequence_length () == 0
4333 && ! forward_branch_p (insn)
4334 && insn_addresses
4335 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4336 - insn_addresses[INSN_UID (insn)] - 8))
4338 strcpy (buf, "com%I2b,");
4339 if (negated)
4340 strcat (buf, "%B3 %2,%1,%0%#");
4341 else
4342 strcat (buf, "%S3 %2,%1,%0%#");
4344 else
4346 strcpy (buf, "com%I2clr,");
4347 if (negated)
4348 strcat (buf, "%S3");
4349 else
4350 strcat (buf, "%B3");
4351 if (nullify)
4352 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4353 else
4354 strcat (buf, " %2,%1,0\n\tbl %0,0");
4356 break;
4358 default:
4359 abort();
4361 return buf;
4364 /* This routine handles all the branch-on-bit conditional branch sequences we
4365 might need to generate. It handles nullification of delay slots,
4366 varying length branches, negated branches and all combinations of the
4367 above. it returns the appropriate output template to emit the branch. */
4369 char *
4370 output_bb (operands, nullify, length, negated, insn, which)
4371 rtx *operands;
4372 int nullify, length, negated;
4373 rtx insn;
4374 int which;
4376 static char buf[100];
4377 int useskip = 0;
4379 /* A conditional branch to the following instruction (eg the delay slot) is
4380 asking for a disaster. I do not think this can happen as this pattern
4381 is only used when optimizing; jump optimization should eliminate the
4382 jump. But be prepared just in case. */
4384 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4385 return "";
4387 /* If this is a long branch with its delay slot unfilled, set `nullify'
4388 as it can nullify the delay slot and save a nop. */
4389 if (length == 8 && dbr_sequence_length () == 0)
4390 nullify = 1;
4392 /* If this is a short forward conditional branch which did not get
4393 its delay slot filled, the delay slot can still be nullified. */
4394 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4395 nullify = forward_branch_p (insn);
4397 /* A forward branch over a single nullified insn can be done with a
4398 extrs instruction. This avoids a single cycle penalty due to
4399 mis-predicted branch if we fall through (branch not taken). */
4401 if (length == 4
4402 && next_real_insn (insn) != 0
4403 && get_attr_length (next_real_insn (insn)) == 4
4404 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4405 && nullify)
4406 useskip = 1;
4408 switch (length)
4411 /* All short conditional branches except backwards with an unfilled
4412 delay slot. */
4413 case 4:
4414 if (useskip)
4415 strcpy (buf, "extrs,");
4416 else
4417 strcpy (buf, "bb,");
4418 if ((which == 0 && negated)
4419 || (which == 1 && ! negated))
4420 strcat (buf, ">=");
4421 else
4422 strcat (buf, "<");
4423 if (useskip)
4424 strcat (buf, " %0,%1,1,0");
4425 else if (nullify && negated)
4426 strcat (buf, ",n %0,%1,%3");
4427 else if (nullify && ! negated)
4428 strcat (buf, ",n %0,%1,%2");
4429 else if (! nullify && negated)
4430 strcat (buf, "%0,%1,%3");
4431 else if (! nullify && ! negated)
4432 strcat (buf, " %0,%1,%2");
4433 break;
4435 /* All long conditionals. Note an short backward branch with an
4436 unfilled delay slot is treated just like a long backward branch
4437 with an unfilled delay slot. */
4438 case 8:
4439 /* Handle weird backwards branch with a filled delay slot
4440 with is nullified. */
4441 if (dbr_sequence_length () != 0
4442 && ! forward_branch_p (insn)
4443 && nullify)
4445 strcpy (buf, "bb,");
4446 if ((which == 0 && negated)
4447 || (which == 1 && ! negated))
4448 strcat (buf, "<");
4449 else
4450 strcat (buf, ">=");
4451 if (negated)
4452 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4453 else
4454 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4456 /* Handle short backwards branch with an unfilled delay slot.
4457 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4458 taken and untaken branches. */
4459 else if (dbr_sequence_length () == 0
4460 && ! forward_branch_p (insn)
4461 && insn_addresses
4462 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4463 - insn_addresses[INSN_UID (insn)] - 8))
4465 strcpy (buf, "bb,");
4466 if ((which == 0 && negated)
4467 || (which == 1 && ! negated))
4468 strcat (buf, ">=");
4469 else
4470 strcat (buf, "<");
4471 if (negated)
4472 strcat (buf, " %0,%1,%3%#");
4473 else
4474 strcat (buf, " %0,%1,%2%#");
4476 else
4478 strcpy (buf, "extrs,");
4479 if ((which == 0 && negated)
4480 || (which == 1 && ! negated))
4481 strcat (buf, "<");
4482 else
4483 strcat (buf, ">=");
4484 if (nullify && negated)
4485 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4486 else if (nullify && ! negated)
4487 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4488 else if (negated)
4489 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4490 else
4491 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4493 break;
4495 default:
4496 abort();
4498 return buf;
4501 /* This routine handles all the branch-on-variable-bit conditional branch
4502 sequences we might need to generate. It handles nullification of delay
4503 slots, varying length branches, negated branches and all combinations
4504 of the above. it returns the appropriate output template to emit the
4505 branch. */
4507 char *
4508 output_bvb (operands, nullify, length, negated, insn, which)
4509 rtx *operands;
4510 int nullify, length, negated;
4511 rtx insn;
4512 int which;
4514 static char buf[100];
4515 int useskip = 0;
4517 /* A conditional branch to the following instruction (eg the delay slot) is
4518 asking for a disaster. I do not think this can happen as this pattern
4519 is only used when optimizing; jump optimization should eliminate the
4520 jump. But be prepared just in case. */
4522 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4523 return "";
4525 /* If this is a long branch with its delay slot unfilled, set `nullify'
4526 as it can nullify the delay slot and save a nop. */
4527 if (length == 8 && dbr_sequence_length () == 0)
4528 nullify = 1;
4530 /* If this is a short forward conditional branch which did not get
4531 its delay slot filled, the delay slot can still be nullified. */
4532 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4533 nullify = forward_branch_p (insn);
4535 /* A forward branch over a single nullified insn can be done with a
4536 extrs instruction. This avoids a single cycle penalty due to
4537 mis-predicted branch if we fall through (branch not taken). */
4539 if (length == 4
4540 && next_real_insn (insn) != 0
4541 && get_attr_length (next_real_insn (insn)) == 4
4542 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4543 && nullify)
4544 useskip = 1;
4546 switch (length)
4549 /* All short conditional branches except backwards with an unfilled
4550 delay slot. */
4551 case 4:
4552 if (useskip)
4553 strcpy (buf, "vextrs,");
4554 else
4555 strcpy (buf, "bvb,");
4556 if ((which == 0 && negated)
4557 || (which == 1 && ! negated))
4558 strcat (buf, ">=");
4559 else
4560 strcat (buf, "<");
4561 if (useskip)
4562 strcat (buf, " %0,1,0");
4563 else if (nullify && negated)
4564 strcat (buf, ",n %0,%3");
4565 else if (nullify && ! negated)
4566 strcat (buf, ",n %0,%2");
4567 else if (! nullify && negated)
4568 strcat (buf, "%0,%3");
4569 else if (! nullify && ! negated)
4570 strcat (buf, " %0,%2");
4571 break;
4573 /* All long conditionals. Note an short backward branch with an
4574 unfilled delay slot is treated just like a long backward branch
4575 with an unfilled delay slot. */
4576 case 8:
4577 /* Handle weird backwards branch with a filled delay slot
4578 with is nullified. */
4579 if (dbr_sequence_length () != 0
4580 && ! forward_branch_p (insn)
4581 && nullify)
4583 strcpy (buf, "bvb,");
4584 if ((which == 0 && negated)
4585 || (which == 1 && ! negated))
4586 strcat (buf, "<");
4587 else
4588 strcat (buf, ">=");
4589 if (negated)
4590 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4591 else
4592 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4594 /* Handle short backwards branch with an unfilled delay slot.
4595 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4596 taken and untaken branches. */
4597 else if (dbr_sequence_length () == 0
4598 && ! forward_branch_p (insn)
4599 && insn_addresses
4600 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4601 - insn_addresses[INSN_UID (insn)] - 8))
4603 strcpy (buf, "bvb,");
4604 if ((which == 0 && negated)
4605 || (which == 1 && ! negated))
4606 strcat (buf, ">=");
4607 else
4608 strcat (buf, "<");
4609 if (negated)
4610 strcat (buf, " %0,%3%#");
4611 else
4612 strcat (buf, " %0,%2%#");
4614 else
4616 strcpy (buf, "vextrs,");
4617 if ((which == 0 && negated)
4618 || (which == 1 && ! negated))
4619 strcat (buf, "<");
4620 else
4621 strcat (buf, ">=");
4622 if (nullify && negated)
4623 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4624 else if (nullify && ! negated)
4625 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4626 else if (negated)
4627 strcat (buf, " %0,1,0\n\tbl %3,0");
4628 else
4629 strcat (buf, " %0,1,0\n\tbl %2,0");
4631 break;
4633 default:
4634 abort();
4636 return buf;
4639 /* Return the output template for emitting a dbra type insn.
4641 Note it may perform some output operations on its own before
4642 returning the final output string. */
4643 char *
4644 output_dbra (operands, insn, which_alternative)
4645 rtx *operands;
4646 rtx insn;
4647 int which_alternative;
4650 /* A conditional branch to the following instruction (eg the delay slot) is
4651 asking for a disaster. Be prepared! */
4653 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4655 if (which_alternative == 0)
4656 return "ldo %1(%0),%0";
4657 else if (which_alternative == 1)
4659 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4660 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4661 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4662 return "fldws -16(0,%%r30),%0";
4664 else
4666 output_asm_insn ("ldw %0,%4", operands);
4667 return "ldo %1(%4),%4\n\tstw %4,%0";
4671 if (which_alternative == 0)
4673 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4674 int length = get_attr_length (insn);
4676 /* If this is a long branch with its delay slot unfilled, set `nullify'
4677 as it can nullify the delay slot and save a nop. */
4678 if (length == 8 && dbr_sequence_length () == 0)
4679 nullify = 1;
4681 /* If this is a short forward conditional branch which did not get
4682 its delay slot filled, the delay slot can still be nullified. */
4683 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4684 nullify = forward_branch_p (insn);
4686 /* Handle short versions first. */
4687 if (length == 4 && nullify)
4688 return "addib,%C2,n %1,%0,%3";
4689 else if (length == 4 && ! nullify)
4690 return "addib,%C2 %1,%0,%3";
4691 else if (length == 8)
4693 /* Handle weird backwards branch with a fulled delay slot
4694 which is nullified. */
4695 if (dbr_sequence_length () != 0
4696 && ! forward_branch_p (insn)
4697 && nullify)
4698 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4699 /* Handle short backwards branch with an unfilled delay slot.
4700 Using a addb;nop rather than addi;bl saves 1 cycle for both
4701 taken and untaken branches. */
4702 else if (dbr_sequence_length () == 0
4703 && ! forward_branch_p (insn)
4704 && insn_addresses
4705 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4706 - insn_addresses[INSN_UID (insn)] - 8))
4707 return "addib,%C2 %1,%0,%3%#";
4709 /* Handle normal cases. */
4710 if (nullify)
4711 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4712 else
4713 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4715 else
4716 abort();
4718 /* Deal with gross reload from FP register case. */
4719 else if (which_alternative == 1)
4721 /* Move loop counter from FP register to MEM then into a GR,
4722 increment the GR, store the GR into MEM, and finally reload
4723 the FP register from MEM from within the branch's delay slot. */
4724 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4725 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4726 if (get_attr_length (insn) == 24)
4727 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4728 else
4729 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4731 /* Deal with gross reload from memory case. */
4732 else
4734 /* Reload loop counter from memory, the store back to memory
4735 happens in the branch's delay slot. */
4736 output_asm_insn ("ldw %0,%4", operands);
4737 if (get_attr_length (insn) == 12)
4738 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4739 else
4740 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4744 /* Return the output template for emitting a dbra type insn.
4746 Note it may perform some output operations on its own before
4747 returning the final output string. */
4748 char *
4749 output_movb (operands, insn, which_alternative, reverse_comparison)
4750 rtx *operands;
4751 rtx insn;
4752 int which_alternative;
4753 int reverse_comparison;
4756 /* A conditional branch to the following instruction (eg the delay slot) is
4757 asking for a disaster. Be prepared! */
4759 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4761 if (which_alternative == 0)
4762 return "copy %1,%0";
4763 else if (which_alternative == 1)
4765 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4766 return "fldws -16(0,%%r30),%0";
4768 else if (which_alternative == 2)
4769 return "stw %1,%0";
4770 else
4771 return "mtsar %r1";
4774 /* Support the second variant. */
4775 if (reverse_comparison)
4776 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4778 if (which_alternative == 0)
4780 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4781 int length = get_attr_length (insn);
4783 /* If this is a long branch with its delay slot unfilled, set `nullify'
4784 as it can nullify the delay slot and save a nop. */
4785 if (length == 8 && dbr_sequence_length () == 0)
4786 nullify = 1;
4788 /* If this is a short forward conditional branch which did not get
4789 its delay slot filled, the delay slot can still be nullified. */
4790 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4791 nullify = forward_branch_p (insn);
4793 /* Handle short versions first. */
4794 if (length == 4 && nullify)
4795 return "movb,%C2,n %1,%0,%3";
4796 else if (length == 4 && ! nullify)
4797 return "movb,%C2 %1,%0,%3";
4798 else if (length == 8)
4800 /* Handle weird backwards branch with a filled delay slot
4801 which is nullified. */
4802 if (dbr_sequence_length () != 0
4803 && ! forward_branch_p (insn)
4804 && nullify)
4805 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4807 /* Handle short backwards branch with an unfilled delay slot.
4808 Using a movb;nop rather than or;bl saves 1 cycle for both
4809 taken and untaken branches. */
4810 else if (dbr_sequence_length () == 0
4811 && ! forward_branch_p (insn)
4812 && insn_addresses
4813 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4814 - insn_addresses[INSN_UID (insn)] - 8))
4815 return "movb,%C2 %1,%0,%3%#";
4816 /* Handle normal cases. */
4817 if (nullify)
4818 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4819 else
4820 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4822 else
4823 abort();
4825 /* Deal with gross reload from FP register case. */
4826 else if (which_alternative == 1)
4828 /* Move loop counter from FP register to MEM then into a GR,
4829 increment the GR, store the GR into MEM, and finally reload
4830 the FP register from MEM from within the branch's delay slot. */
4831 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4832 if (get_attr_length (insn) == 12)
4833 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4834 else
4835 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4837 /* Deal with gross reload from memory case. */
4838 else if (which_alternative == 2)
4840 /* Reload loop counter from memory, the store back to memory
4841 happens in the branch's delay slot. */
4842 if (get_attr_length (insn) == 8)
4843 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4844 else
4845 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4847 /* Handle SAR as a destination. */
4848 else
4850 if (get_attr_length (insn) == 8)
4851 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4852 else
4853 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4858 /* INSN is a millicode call. It may have an unconditional jump in its delay
4859 slot.
4861 CALL_DEST is the routine we are calling. */
4863 char *
4864 output_millicode_call (insn, call_dest)
4865 rtx insn;
4866 rtx call_dest;
4868 int distance;
4869 rtx xoperands[4];
4870 rtx seq_insn;
4872 /* Handle common case -- empty delay slot or no jump in the delay slot,
4873 and we're sure that the branch will reach the beginning of the $CODE$
4874 subspace. */
4875 if ((dbr_sequence_length () == 0
4876 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4877 || (dbr_sequence_length () != 0
4878 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4879 && get_attr_length (insn) == 4))
4881 xoperands[0] = call_dest;
4882 output_asm_insn ("bl %0,%%r31%#", xoperands);
4883 return "";
4886 /* This call may not reach the beginning of the $CODE$ subspace. */
4887 if (get_attr_length (insn) > 4)
4889 int delay_insn_deleted = 0;
4890 rtx xoperands[2];
4891 rtx link;
4893 /* We need to emit an inline long-call branch. */
4894 if (dbr_sequence_length () != 0
4895 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4897 /* A non-jump insn in the delay slot. By definition we can
4898 emit this insn before the call. */
4899 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4901 /* Now delete the delay insn. */
4902 PUT_CODE (NEXT_INSN (insn), NOTE);
4903 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4904 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4905 delay_insn_deleted = 1;
4908 /* If we're allowed to use be/ble instructions, then this is the
4909 best sequence to use for a long millicode call. */
4910 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4911 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4913 xoperands[0] = call_dest;
4914 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4915 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4916 output_asm_insn ("nop", xoperands);
4918 /* Pure portable runtime doesn't allow be/ble; we also don't have
4919 PIC support int he assembler/linker, so this sequence is needed. */
4920 else if (TARGET_PORTABLE_RUNTIME)
4922 xoperands[0] = call_dest;
4923 /* Get the address of our target into %r29. */
4924 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4925 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4927 /* Get our return address into %r31. */
4928 output_asm_insn ("blr 0,%%r31", xoperands);
4930 /* Jump to our target address in %r29. */
4931 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4933 /* Empty delay slot. Note this insn gets fetched twice and
4934 executed once. To be safe we use a nop. */
4935 output_asm_insn ("nop", xoperands);
4936 return "";
4938 /* PIC long millicode call sequence. */
4939 else
4941 xoperands[0] = call_dest;
4942 xoperands[1] = gen_label_rtx ();
4943 /* Get our address + 8 into %r1. */
4944 output_asm_insn ("bl .+8,%%r1", xoperands);
4946 /* Add %r1 to the offset of our target from the next insn. */
4947 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4948 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4949 CODE_LABEL_NUMBER (xoperands[1]));
4950 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4952 /* Get the return address into %r31. */
4953 output_asm_insn ("blr 0,%%r31", xoperands);
4955 /* Branch to our target which is in %r1. */
4956 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4958 /* Empty delay slot. Note this insn gets fetched twice and
4959 executed once. To be safe we use a nop. */
4960 output_asm_insn ("nop", xoperands);
4963 /* If we had a jump in the call's delay slot, output it now. */
4964 if (dbr_sequence_length () != 0
4965 && !delay_insn_deleted)
4967 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4968 output_asm_insn ("b,n %0", xoperands);
4970 /* Now delete the delay insn. */
4971 PUT_CODE (NEXT_INSN (insn), NOTE);
4972 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4973 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4975 return "";
4978 /* This call has an unconditional jump in its delay slot and the
4979 call is known to reach its target or the beginning of the current
4980 subspace. */
4982 /* Use the containing sequence insn's address. */
4983 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
4985 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
4986 - insn_addresses[INSN_UID (seq_insn)] - 8;
4988 /* If the branch was too far away, emit a normal call followed
4989 by a nop, followed by the unconditional branch.
4991 If the branch is close, then adjust %r2 from within the
4992 call's delay slot. */
4994 xoperands[0] = call_dest;
4995 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4996 if (! VAL_14_BITS_P (distance))
4997 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
4998 else
5000 xoperands[3] = gen_label_rtx ();
5001 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5002 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5003 CODE_LABEL_NUMBER (xoperands[3]));
5006 /* Delete the jump. */
5007 PUT_CODE (NEXT_INSN (insn), NOTE);
5008 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5009 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5010 return "";
5013 /* INSN is either a function call. It may have an unconditional jump
5014 in its delay slot.
5016 CALL_DEST is the routine we are calling. */
5018 char *
5019 output_call (insn, call_dest)
5020 rtx insn;
5021 rtx call_dest;
5023 int distance;
5024 rtx xoperands[4];
5025 rtx seq_insn;
5027 /* Handle common case -- empty delay slot or no jump in the delay slot,
5028 and we're sure that the branch will reach the beginning of the $CODE$
5029 subspace. */
5030 if ((dbr_sequence_length () == 0
5031 && get_attr_length (insn) == 8)
5032 || (dbr_sequence_length () != 0
5033 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5034 && get_attr_length (insn) == 4))
5036 xoperands[0] = call_dest;
5037 output_asm_insn ("bl %0,%%r2%#", xoperands);
5038 return "";
5041 /* This call may not reach the beginning of the $CODE$ subspace. */
5042 if (get_attr_length (insn) > 8)
5044 int delay_insn_deleted = 0;
5045 rtx xoperands[2];
5046 rtx link;
5048 /* We need to emit an inline long-call branch. Furthermore,
5049 because we're changing a named function call into an indirect
5050 function call well after the parameters have been set up, we
5051 need to make sure any FP args appear in both the integer
5052 and FP registers. Also, we need move any delay slot insn
5053 out of the delay slot. And finally, we can't rely on the linker
5054 being able to fix the call to $$dyncall! -- Yuk!. */
5055 if (dbr_sequence_length () != 0
5056 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5058 /* A non-jump insn in the delay slot. By definition we can
5059 emit this insn before the call (and in fact before argument
5060 relocating. */
5061 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5063 /* Now delete the delay insn. */
5064 PUT_CODE (NEXT_INSN (insn), NOTE);
5065 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5066 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5067 delay_insn_deleted = 1;
5070 /* Now copy any FP arguments into integer registers. */
5071 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5073 int arg_mode, regno;
5074 rtx use = XEXP (link, 0);
5075 if (! (GET_CODE (use) == USE
5076 && GET_CODE (XEXP (use, 0)) == REG
5077 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5078 continue;
5080 arg_mode = GET_MODE (XEXP (use, 0));
5081 regno = REGNO (XEXP (use, 0));
5082 /* Is it a floating point register? */
5083 if (regno >= 32 && regno <= 39)
5085 /* Copy from the FP register into an integer register
5086 (via memory). */
5087 if (arg_mode == SFmode)
5089 xoperands[0] = XEXP (use, 0);
5090 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5091 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5092 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5094 else
5096 xoperands[0] = XEXP (use, 0);
5097 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5098 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5099 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5100 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5106 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5107 we don't have any direct calls in that case. */
5108 if (flag_pic)
5110 /* We have to load the address of the function using a procedure
5111 label (plabel). The LP and RP relocs don't work reliably for PIC,
5112 so we make a plain 32 bit plabel in the data segment instead. We
5113 have to defer outputting it of course... Not pretty. */
5115 xoperands[0] = gen_label_rtx ();
5116 xoperands[1] = gen_label_rtx ();
5117 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5118 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5119 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5121 if (deferred_plabels == 0)
5122 deferred_plabels = (struct defer_plab *)
5123 xmalloc (1 * sizeof (struct defer_plab));
5124 else
5125 deferred_plabels = (struct defer_plab *)
5126 xrealloc (deferred_plabels,
5127 (n_deferred_plabels + 1) * sizeof (struct defer_plab));
5128 deferred_plabels[n_deferred_plabels].internal_label = xoperands[0];
5129 deferred_plabels[n_deferred_plabels].symbol = call_dest;
5130 n_deferred_plabels++;
5132 /* Get our address + 8 into %r1. */
5133 output_asm_insn ("bl .+8,%%r1", xoperands);
5135 /* Add %r1 to the offset of dyncall from the next insn. */
5136 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5137 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5138 CODE_LABEL_NUMBER (xoperands[1]));
5139 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5141 /* Get the return address into %r31. */
5142 output_asm_insn ("blr 0,%%r31", xoperands);
5144 /* Branch to our target which is in %r1. */
5145 output_asm_insn ("bv 0(%%r1)", xoperands);
5147 /* Copy the return address into %r2 also. */
5148 output_asm_insn ("copy %%r31,%%r2", xoperands);
5150 else
5152 /* No PIC stuff to worry about. We can use ldil;ble. */
5153 xoperands[0] = call_dest;
5155 /* Get the address of our target into %r22. */
5156 output_asm_insn ("ldil LP%%%0,%%r22", xoperands);
5157 output_asm_insn ("ldo RP%%%0(%%r22),%%r22", xoperands);
5159 /* Get the high part of the address of $dyncall into %r2, then
5160 add in the low part in the branch instruction. */
5161 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5162 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5164 /* Copy the return pointer into both %r31 and %r2. */
5165 output_asm_insn ("copy %%r31,%%r2", xoperands);
5168 /* If we had a jump in the call's delay slot, output it now. */
5169 if (dbr_sequence_length () != 0
5170 && !delay_insn_deleted)
5172 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5173 output_asm_insn ("b,n %0", xoperands);
5175 /* Now delete the delay insn. */
5176 PUT_CODE (NEXT_INSN (insn), NOTE);
5177 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5178 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5180 return "";
5183 /* This call has an unconditional jump in its delay slot and the
5184 call is known to reach its target or the beginning of the current
5185 subspace. */
5187 /* Use the containing sequence insn's address. */
5188 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5190 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5191 - insn_addresses[INSN_UID (seq_insn)] - 8;
5193 /* If the branch was too far away, emit a normal call followed
5194 by a nop, followed by the unconditional branch.
5196 If the branch is close, then adjust %r2 from within the
5197 call's delay slot. */
5199 xoperands[0] = call_dest;
5200 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5201 if (! VAL_14_BITS_P (distance))
5202 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5203 else
5205 xoperands[3] = gen_label_rtx ();
5206 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5207 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5208 CODE_LABEL_NUMBER (xoperands[3]));
5211 /* Delete the jump. */
5212 PUT_CODE (NEXT_INSN (insn), NOTE);
5213 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5214 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5215 return "";
5218 extern struct obstack permanent_obstack;
5219 extern struct obstack *saveable_obstack;
5221 /* In HPUX 8.0's shared library scheme, special relocations are needed
5222 for function labels if they might be passed to a function
5223 in a shared library (because shared libraries don't live in code
5224 space), and special magic is needed to construct their address.
5226 For reasons too disgusting to describe storage for the new name
5227 is allocated either on the saveable_obstack (released at function
5228 exit) or on the permanent_obstack for things that can never change
5229 (libcall names for example). */
5231 void
5232 hppa_encode_label (sym, permanent)
5233 rtx sym;
5234 int permanent;
5236 char *str = XSTR (sym, 0);
5237 int len = strlen (str);
5238 char *newstr;
5240 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5241 len + 2);
5243 if (str[0] == '*')
5244 *newstr++ = *str++;
5245 strcpy (newstr + 1, str);
5246 *newstr = '@';
5247 XSTR (sym,0) = newstr;
5251 function_label_operand (op, mode)
5252 rtx op;
5253 enum machine_mode mode;
5255 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5258 /* Returns 1 if OP is a function label involved in a simple addition
5259 with a constant. Used to keep certain patterns from matching
5260 during instruction combination. */
5262 is_function_label_plus_const (op)
5263 rtx op;
5265 /* Strip off any CONST. */
5266 if (GET_CODE (op) == CONST)
5267 op = XEXP (op, 0);
5269 return (GET_CODE (op) == PLUS
5270 && function_label_operand (XEXP (op, 0), Pmode)
5271 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5274 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5275 use in fmpyadd instructions. */
5277 fmpyaddoperands (operands)
5278 rtx *operands;
5280 enum machine_mode mode = GET_MODE (operands[0]);
5282 /* Must be a floating point mode. */
5283 if (mode != SFmode && mode != DFmode)
5284 return 0;
5286 /* All modes must be the same. */
5287 if (! (mode == GET_MODE (operands[1])
5288 && mode == GET_MODE (operands[2])
5289 && mode == GET_MODE (operands[3])
5290 && mode == GET_MODE (operands[4])
5291 && mode == GET_MODE (operands[5])))
5292 return 0;
5294 /* All operands must be registers. */
5295 if (! (GET_CODE (operands[1]) == REG
5296 && GET_CODE (operands[2]) == REG
5297 && GET_CODE (operands[3]) == REG
5298 && GET_CODE (operands[4]) == REG
5299 && GET_CODE (operands[5]) == REG))
5300 return 0;
5302 /* Only 2 real operands to the addition. One of the input operands must
5303 be the same as the output operand. */
5304 if (! rtx_equal_p (operands[3], operands[4])
5305 && ! rtx_equal_p (operands[3], operands[5]))
5306 return 0;
5308 /* Inout operand of add can not conflict with any operands from multiply. */
5309 if (rtx_equal_p (operands[3], operands[0])
5310 || rtx_equal_p (operands[3], operands[1])
5311 || rtx_equal_p (operands[3], operands[2]))
5312 return 0;
5314 /* multiply can not feed into addition operands. */
5315 if (rtx_equal_p (operands[4], operands[0])
5316 || rtx_equal_p (operands[5], operands[0]))
5317 return 0;
5319 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5320 if (mode == SFmode
5321 && (REGNO (operands[0]) < 57
5322 || REGNO (operands[1]) < 57
5323 || REGNO (operands[2]) < 57
5324 || REGNO (operands[3]) < 57
5325 || REGNO (operands[4]) < 57
5326 || REGNO (operands[5]) < 57))
5327 return 0;
5329 /* Passed. Operands are suitable for fmpyadd. */
5330 return 1;
5333 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5334 use in fmpysub instructions. */
5336 fmpysuboperands (operands)
5337 rtx *operands;
5339 enum machine_mode mode = GET_MODE (operands[0]);
5341 /* Must be a floating point mode. */
5342 if (mode != SFmode && mode != DFmode)
5343 return 0;
5345 /* All modes must be the same. */
5346 if (! (mode == GET_MODE (operands[1])
5347 && mode == GET_MODE (operands[2])
5348 && mode == GET_MODE (operands[3])
5349 && mode == GET_MODE (operands[4])
5350 && mode == GET_MODE (operands[5])))
5351 return 0;
5353 /* All operands must be registers. */
5354 if (! (GET_CODE (operands[1]) == REG
5355 && GET_CODE (operands[2]) == REG
5356 && GET_CODE (operands[3]) == REG
5357 && GET_CODE (operands[4]) == REG
5358 && GET_CODE (operands[5]) == REG))
5359 return 0;
5361 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5362 operation, so operands[4] must be the same as operand[3]. */
5363 if (! rtx_equal_p (operands[3], operands[4]))
5364 return 0;
5366 /* multiply can not feed into subtraction. */
5367 if (rtx_equal_p (operands[5], operands[0]))
5368 return 0;
5370 /* Inout operand of sub can not conflict with any operands from multiply. */
5371 if (rtx_equal_p (operands[3], operands[0])
5372 || rtx_equal_p (operands[3], operands[1])
5373 || rtx_equal_p (operands[3], operands[2]))
5374 return 0;
5376 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5377 if (mode == SFmode
5378 && (REGNO (operands[0]) < 57
5379 || REGNO (operands[1]) < 57
5380 || REGNO (operands[2]) < 57
5381 || REGNO (operands[3]) < 57
5382 || REGNO (operands[4]) < 57
5383 || REGNO (operands[5]) < 57))
5384 return 0;
5386 /* Passed. Operands are suitable for fmpysub. */
5387 return 1;
5391 plus_xor_ior_operator (op, mode)
5392 rtx op;
5393 enum machine_mode mode;
5395 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5396 || GET_CODE (op) == IOR);
5399 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5400 constants for shadd instructions. */
5402 shadd_constant_p (val)
5403 int val;
5405 if (val == 2 || val == 4 || val == 8)
5406 return 1;
5407 else
5408 return 0;
5411 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5412 the valid constant for shadd instructions. */
5414 shadd_operand (op, mode)
5415 rtx op;
5416 enum machine_mode mode;
5418 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5421 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5424 basereg_operand (op, mode)
5425 rtx op;
5426 enum machine_mode mode;
5428 /* cse will create some unscaled indexed addresses, however; it
5429 generally isn't a win on the PA, so avoid creating unscaled
5430 indexed addresses until after cse is finished. */
5431 if (!cse_not_expected)
5432 return 0;
5434 /* Once reload has started everything is considered valid. Reload should
5435 only create indexed addresses using the stack/frame pointer, and any
5436 others were checked for validity when created by the combine pass.
5438 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5439 we don't have to worry about the braindamaged implicit space register
5440 selection using the basereg only (rather than effective address)
5441 screwing us over. */
5442 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5443 return (GET_CODE (op) == REG);
5445 /* Stack is always OK for indexing. */
5446 if (op == stack_pointer_rtx)
5447 return 1;
5449 /* While it's always safe to index off the frame pointer, it's not
5450 always profitable, particularly when the frame pointer is being
5451 eliminated. */
5452 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5453 return 1;
5455 /* The only other valid OPs are pseudo registers with
5456 REGNO_POINTER_FLAG set. */
5457 if (GET_CODE (op) != REG
5458 || REGNO (op) < FIRST_PSEUDO_REGISTER
5459 || ! register_operand (op, mode))
5460 return 0;
5462 return REGNO_POINTER_FLAG (REGNO (op));
5465 /* Return 1 if this operand is anything other than a hard register. */
5468 non_hard_reg_operand (op, mode)
5469 rtx op;
5470 enum machine_mode mode;
5472 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5475 /* Return 1 if INSN branches forward. Should be using insn_addresses
5476 to avoid walking through all the insns... */
5478 forward_branch_p (insn)
5479 rtx insn;
5481 rtx label = JUMP_LABEL (insn);
5483 while (insn)
5485 if (insn == label)
5486 break;
5487 else
5488 insn = NEXT_INSN (insn);
5491 return (insn == label);
5494 /* Return 1 if OP is an equality comparison, else return 0. */
5496 eq_neq_comparison_operator (op, mode)
5497 rtx op;
5498 enum machine_mode mode;
5500 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5503 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5505 movb_comparison_operator (op, mode)
5506 rtx op;
5507 enum machine_mode mode;
5509 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5510 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5513 /* Return 1 if INSN is in the delay slot of a call instruction. */
5515 jump_in_call_delay (insn)
5516 rtx insn;
5519 if (GET_CODE (insn) != JUMP_INSN)
5520 return 0;
5522 if (PREV_INSN (insn)
5523 && PREV_INSN (PREV_INSN (insn))
5524 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5526 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5528 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5529 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5532 else
5533 return 0;
5536 /* Output an unconditional move and branch insn. */
5538 char *
5539 output_parallel_movb (operands, length)
5540 rtx *operands;
5541 int length;
5543 /* These are the cases in which we win. */
5544 if (length == 4)
5545 return "mov%I1b,tr %1,%0,%2";
5547 /* None of these cases wins, but they don't lose either. */
5548 if (dbr_sequence_length () == 0)
5550 /* Nothing in the delay slot, fake it by putting the combined
5551 insn (the copy or add) in the delay slot of a bl. */
5552 if (GET_CODE (operands[1]) == CONST_INT)
5553 return "bl %2,0\n\tldi %1,%0";
5554 else
5555 return "bl %2,0\n\tcopy %1,%0";
5557 else
5559 /* Something in the delay slot, but we've got a long branch. */
5560 if (GET_CODE (operands[1]) == CONST_INT)
5561 return "ldi %1,%0\n\tbl %2,0";
5562 else
5563 return "copy %1,%0\n\tbl %2,0";
5567 /* Output an unconditional add and branch insn. */
5569 char *
5570 output_parallel_addb (operands, length)
5571 rtx *operands;
5572 int length;
5574 /* To make life easy we want operand0 to be the shared input/output
5575 operand and operand1 to be the readonly operand. */
5576 if (operands[0] == operands[1])
5577 operands[1] = operands[2];
5579 /* These are the cases in which we win. */
5580 if (length == 4)
5581 return "add%I1b,tr %1,%0,%3";
5583 /* None of these cases win, but they don't lose either. */
5584 if (dbr_sequence_length () == 0)
5586 /* Nothing in the delay slot, fake it by putting the combined
5587 insn (the copy or add) in the delay slot of a bl. */
5588 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5590 else
5592 /* Something in the delay slot, but we've got a long branch. */
5593 return "add%I1 %1,%0,%0\n\tbl %3,0";
5597 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5598 is used to discourage creating parallel movb/addb insns since a jump
5599 which immediately follows a call can execute in the delay slot of the
5600 call. */
5602 following_call (insn)
5603 rtx insn;
5605 /* Find the previous real insn, skipping NOTEs. */
5606 insn = PREV_INSN (insn);
5607 while (insn && GET_CODE (insn) == NOTE)
5608 insn = PREV_INSN (insn);
5610 /* Check for CALL_INSNs and millicode calls. */
5611 if (insn
5612 && (GET_CODE (insn) == CALL_INSN
5613 || (GET_CODE (insn) == INSN
5614 && GET_CODE (PATTERN (insn)) != SEQUENCE
5615 && GET_CODE (PATTERN (insn)) != USE
5616 && GET_CODE (PATTERN (insn)) != CLOBBER
5617 && get_attr_type (insn) == TYPE_MILLI)))
5618 return 1;
5620 return 0;
5623 /* We use this hook to perform a PA specific optimization which is difficult
5624 to do in earlier passes.
5626 We want the delay slots of branches within jump tables to be filled.
5627 None of the compiler passes at the moment even has the notion that a
5628 PA jump table doesn't contain addresses, but instead contains actual
5629 instructions!
5631 Because we actually jump into the table, the addresses of each entry
5632 must stay constant in relation to the beginning of the table (which
5633 itself must stay constant relative to the instruction to jump into
5634 it). I don't believe we can guarantee earlier passes of the compiler
5635 will adhere to those rules.
5637 So, late in the compilation process we find all the jump tables, and
5638 expand them into real code -- eg each entry in the jump table vector
5639 will get an appropriate label followed by a jump to the final target.
5641 Reorg and the final jump pass can then optimize these branches and
5642 fill their delay slots. We end up with smaller, more efficient code.
5644 The jump instructions within the table are special; we must be able
5645 to identify them during assembly output (if the jumps don't get filled
5646 we need to emit a nop rather than nullifying the delay slot)). We
5647 identify jumps in switch tables by marking the SET with DImode. */
5649 pa_reorg (insns)
5650 rtx insns;
5652 rtx insn;
5654 remove_useless_addtr_insns (insns, 1);
5656 pa_combine_instructions (get_insns ());
5658 /* This is fairly cheap, so always run it if optimizing. */
5659 if (optimize > 0)
5661 /* Find and explode all ADDR_VEC insns. */
5662 insns = get_insns ();
5663 for (insn = insns; insn; insn = NEXT_INSN (insn))
5665 rtx pattern, tmp, location;
5666 unsigned int length, i;
5668 /* Find an ADDR_VEC insn to explode. */
5669 if (GET_CODE (insn) != JUMP_INSN
5670 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5671 continue;
5673 /* If needed, emit marker for the beginning of the branch table. */
5674 if (TARGET_GAS)
5675 emit_insn_before (gen_begin_brtab (), insn);
5677 pattern = PATTERN (insn);
5678 location = PREV_INSN (insn);
5679 length = XVECLEN (pattern, 0);
5681 for (i = 0; i < length; i++)
5683 /* Emit the jump itself. */
5684 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5685 tmp = emit_jump_insn_after (tmp, location);
5686 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5687 LABEL_NUSES (JUMP_LABEL (tmp))++;
5689 /* Emit a BARRIER after the jump. */
5690 location = NEXT_INSN (location);
5691 emit_barrier_after (location);
5693 /* Put a CODE_LABEL before each so jump.c does not optimize
5694 the jumps away. */
5695 location = NEXT_INSN (location);
5696 tmp = gen_label_rtx ();
5697 LABEL_NUSES (tmp) = 1;
5698 emit_label_after (tmp, location);
5699 location = NEXT_INSN (location);
5702 /* If needed, emit marker for the end of the branch table. */
5703 if (TARGET_GAS)
5704 emit_insn_before (gen_end_brtab (), location);
5705 /* Delete the ADDR_VEC. */
5706 delete_insn (insn);
5709 else if (TARGET_GAS)
5711 /* Sill need an end_brtab insn. */
5712 insns = get_insns ();
5713 for (insn = insns; insn; insn = NEXT_INSN (insn))
5715 /* Find an ADDR_VEC insn. */
5716 if (GET_CODE (insn) != JUMP_INSN
5717 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5718 continue;
5720 /* Now generate markers for the beginning and end of the
5721 branc table. */
5722 emit_insn_before (gen_begin_brtab (), insn);
5723 emit_insn_after (gen_end_brtab (), insn);
5728 /* The PA has a number of odd instructions which can perform multiple
5729 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5730 it may be profitable to combine two instructions into one instruction
5731 with two outputs. It's not profitable PA2.0 machines because the
5732 two outputs would take two slots in the reorder buffers.
5734 This routine finds instructions which can be combined and combines
5735 them. We only support some of the potential combinations, and we
5736 only try common ways to find suitable instructions.
5738 * addb can add two registers or a register and a small integer
5739 and jump to a nearby (+-8k) location. Normally the jump to the
5740 nearby location is conditional on the result of the add, but by
5741 using the "true" condition we can make the jump unconditional.
5742 Thus addb can perform two independent operations in one insn.
5744 * movb is similar to addb in that it can perform a reg->reg
5745 or small immediate->reg copy and jump to a nearby (+-8k location).
5747 * fmpyadd and fmpysub can perform a FP multiply and either an
5748 FP add or FP sub if the operands of the multiply and add/sub are
5749 independent (there are other minor restrictions). Note both
5750 the fmpy and fadd/fsub can in theory move to better spots according
5751 to data dependencies, but for now we require the fmpy stay at a
5752 fixed location.
5754 * Many of the memory operations can perform pre & post updates
5755 of index registers. GCC's pre/post increment/decrement addressing
5756 is far too simple to take advantage of all the possibilities. This
5757 pass may not be suitable since those insns may not be independent.
5759 * comclr can compare two ints or an int and a register, nullify
5760 the following instruction and zero some other register. This
5761 is more difficult to use as it's harder to find an insn which
5762 will generate a comclr than finding something like an unconditional
5763 branch. (conditional moves & long branches create comclr insns).
5765 * Most arithmetic operations can conditionally skip the next
5766 instruction. They can be viewed as "perform this operation
5767 and conditionally jump to this nearby location" (where nearby
5768 is an insns away). These are difficult to use due to the
5769 branch length restrictions. */
5771 pa_combine_instructions (insns)
5772 rtx insns;
5774 rtx anchor, new;
5776 /* This can get expensive since the basic algorithm is on the
5777 order of O(n^2) (or worse). Only do it for -O2 or higher
5778 levels of optimizaton. */
5779 if (optimize < 2)
5780 return;
5782 /* Walk down the list of insns looking for "anchor" insns which
5783 may be combined with "floating" insns. As the name implies,
5784 "anchor" instructions don't move, while "floating" insns may
5785 move around. */
5786 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5787 new = make_insn_raw (new);
5789 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5791 enum attr_pa_combine_type anchor_attr;
5792 enum attr_pa_combine_type floater_attr;
5794 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5795 Also ignore any special USE insns. */
5796 if (GET_CODE (anchor) != INSN
5797 && GET_CODE (anchor) != JUMP_INSN
5798 && GET_CODE (anchor) != CALL_INSN
5799 || GET_CODE (PATTERN (anchor)) == USE
5800 || GET_CODE (PATTERN (anchor)) == CLOBBER
5801 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5802 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5803 continue;
5805 anchor_attr = get_attr_pa_combine_type (anchor);
5806 /* See if anchor is an insn suitable for combination. */
5807 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5808 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5809 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5810 && ! forward_branch_p (anchor)))
5812 rtx floater;
5814 for (floater = PREV_INSN (anchor);
5815 floater;
5816 floater = PREV_INSN (floater))
5818 if (GET_CODE (floater) == NOTE
5819 || (GET_CODE (floater) == INSN
5820 && (GET_CODE (PATTERN (floater)) == USE
5821 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5822 continue;
5824 /* Anything except a regular INSN will stop our search. */
5825 if (GET_CODE (floater) != INSN
5826 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5827 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5829 floater = NULL_RTX;
5830 break;
5833 /* See if FLOATER is suitable for combination with the
5834 anchor. */
5835 floater_attr = get_attr_pa_combine_type (floater);
5836 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5837 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5838 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5839 && floater_attr == PA_COMBINE_TYPE_FMPY))
5841 /* If ANCHOR and FLOATER can be combined, then we're
5842 done with this pass. */
5843 if (pa_can_combine_p (new, anchor, floater, 0,
5844 SET_DEST (PATTERN (floater)),
5845 XEXP (SET_SRC (PATTERN (floater)), 0),
5846 XEXP (SET_SRC (PATTERN (floater)), 1)))
5847 break;
5850 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5851 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
5853 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
5855 if (pa_can_combine_p (new, anchor, floater, 0,
5856 SET_DEST (PATTERN (floater)),
5857 XEXP (SET_SRC (PATTERN (floater)), 0),
5858 XEXP (SET_SRC (PATTERN (floater)), 1)))
5859 break;
5861 else
5863 if (pa_can_combine_p (new, anchor, floater, 0,
5864 SET_DEST (PATTERN (floater)),
5865 SET_SRC (PATTERN (floater)),
5866 SET_SRC (PATTERN (floater))))
5867 break;
5872 /* If we didn't find anything on the backwards scan try forwards. */
5873 if (!floater
5874 && (anchor_attr == PA_COMBINE_TYPE_FMPY
5875 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
5877 for (floater = anchor; floater; floater = NEXT_INSN (floater))
5879 if (GET_CODE (floater) == NOTE
5880 || (GET_CODE (floater) == INSN
5881 && (GET_CODE (PATTERN (floater)) == USE
5882 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5884 continue;
5886 /* Anything except a regular INSN will stop our search. */
5887 if (GET_CODE (floater) != INSN
5888 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5889 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5891 floater = NULL_RTX;
5892 break;
5895 /* See if FLOATER is suitable for combination with the
5896 anchor. */
5897 floater_attr = get_attr_pa_combine_type (floater);
5898 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5899 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5900 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5901 && floater_attr == PA_COMBINE_TYPE_FMPY))
5903 /* If ANCHOR and FLOATER can be combined, then we're
5904 done with this pass. */
5905 if (pa_can_combine_p (new, anchor, floater, 1,
5906 SET_DEST (PATTERN (floater)),
5907 XEXP (SET_SRC (PATTERN(floater)),0),
5908 XEXP(SET_SRC(PATTERN(floater)),1)))
5909 break;
5914 /* FLOATER will be nonzero if we found a suitable floating
5915 insn for combination with ANCHOR. */
5916 if (floater
5917 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5918 || anchor_attr == PA_COMBINE_TYPE_FMPY))
5920 /* Emit the new instruction and delete the old anchor. */
5921 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
5922 gen_rtvec (2, PATTERN (anchor),
5923 PATTERN (floater))),
5924 anchor);
5925 PUT_CODE (anchor, NOTE);
5926 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5927 NOTE_SOURCE_FILE (anchor) = 0;
5929 /* Emit a special USE insn for FLOATER, then delete
5930 the floating insn. */
5931 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5932 delete_insn (floater);
5934 continue;
5936 else if (floater
5937 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
5939 rtx temp;
5940 /* Emit the new_jump instruction and delete the old anchor. */
5941 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
5942 gen_rtvec (2, PATTERN (anchor),
5943 PATTERN (floater))),
5944 anchor);
5945 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
5946 PUT_CODE (anchor, NOTE);
5947 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5948 NOTE_SOURCE_FILE (anchor) = 0;
5950 /* Emit a special USE insn for FLOATER, then delete
5951 the floating insn. */
5952 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5953 delete_insn (floater);
5954 continue;
5961 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
5962 rtx new, anchor, floater;
5963 int reversed;
5964 rtx dest, src1, src2;
5966 int insn_code_number;
5967 rtx start, end;
5969 /* Create a PARALLEL with the patterns of ANCHOR and
5970 FLOATER, try to recognize it, then test constraints
5971 for the resulting pattern.
5973 If the pattern doesn't match or the constraints
5974 aren't met keep searching for a suitable floater
5975 insn. */
5976 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
5977 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
5978 INSN_CODE (new) = -1;
5979 insn_code_number = recog_memoized (new);
5980 if (insn_code_number < 0
5981 || !constrain_operands (insn_code_number, 1))
5982 return 0;
5984 if (reversed)
5986 start = anchor;
5987 end = floater;
5989 else
5991 start = floater;
5992 end = anchor;
5995 /* There's up to three operands to consider. One
5996 output and two inputs.
5998 The output must not be used between FLOATER & ANCHOR
5999 exclusive. The inputs must not be set between
6000 FLOATER and ANCHOR exclusive. */
6002 if (reg_used_between_p (dest, start, end))
6003 return 0;
6005 if (reg_set_between_p (src1, start, end))
6006 return 0;
6008 if (reg_set_between_p (src2, start, end))
6009 return 0;
6011 /* If we get here, then everything is good. */
6012 return 1;