* pa/pa.c (emit_move_sequence): Don't copy 0.0 (double precision)
[official-gcc.git] / gcc / config / pa / pa.c
blob221166fb664d08a092e43596c193891666a7e241
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
68 unsigned int total_code_bytes;
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
73 struct defer_plab
75 rtx internal_label;
76 rtx symbol;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
80 void
81 override_options ()
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
92 else if (! strcmp (pa_cpu_string, "700"))
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
97 else if (! strcmp (pa_cpu_string, "7100LC"))
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
102 else
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
109 warning ("PIC code generation is not supported in the portable runtime model\n");
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
117 if (flag_pic && profile_flag)
119 warning ("PIC code generation is not compatable with profiling\n");
122 if (TARGET_SPACE && (flag_pic || profile_flag))
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
164 symbolic_expression_p (x)
165 register rtx x;
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
172 return (symbolic_operand (x, VOIDmode));
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
180 switch (GET_CODE (op))
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
220 if (register_operand (op, mode))
221 return 1;
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
226 return 0;
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
237 if (register_operand (op, mode))
238 return 1;
240 if (op == CONST0_RTX (mode))
241 return 1;
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
246 return 0;
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
267 if (register_operand (op, mode))
268 return 1;
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
298 return memory_address_p (mode, op);
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
308 if (register_operand (op, mode))
309 return 1;
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
314 return 0;
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
322 if (!flag_pic)
323 return 0;
325 switch (GET_CODE (op))
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
343 return reg_renumber && FP_REG_P (op);
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
468 return register_operand (op, mode) || int5_operand (op, mode);
471 /* True iff zdepi can be used to generate this CONST_INT. */
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
476 unsigned HOST_WIDE_INT lsb_mask, t;
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
500 /* True iff depi or extru can be used to compute (reg & OP). */
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
510 /* True iff depi can be used to compute (reg | MASK). */
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
519 /* True iff depi can be used to compute (reg | OP). */
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
577 rtx pic_ref = orig;
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
586 if (GET_CODE (orig) == SYMBOL_REF)
588 if (reg == 0)
589 abort ();
591 if (flag_pic == 2)
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
606 else if (GET_CODE (orig) == CONST)
608 rtx base;
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
614 if (reg == 0)
615 abort ();
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
633 return pic_ref;
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
649 For the PA, transform:
651 memory(X + <large int>)
653 into:
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
662 This is for CSE to find several similar references, and only use one Z.
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
692 rtx orig = x;
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
752 else
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
764 return plus_constant (ptr_reg, offset - newoffset);
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
822 /* Figure out what the base and index are. */
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
843 if (base == 0)
844 return orig;
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
931 rtx regx1, regx2, regy1, regy2, y;
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1004 else
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1017 return orig;
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1023 PIC addresses are very expensive.
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1028 hppa_address_cost (X)
1029 rtx X;
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1040 /* Emit insns to move operands[1] into operands[0].
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1140 use scratch_reg to hold the address of the memory location.
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1150 rtx xoperands[2];
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1173 emit_move_insn (scratch_reg, operand1);
1174 emit_move_insn (operand0, scratch_reg);
1175 return 1;
1177 /* Handle most common case: storing into a register. */
1178 else if (register_operand (operand0, mode))
1180 if (register_operand (operand1, mode)
1181 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1182 || (operand1 == CONST0_RTX (mode))
1183 || (GET_CODE (operand1) == HIGH
1184 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1185 /* Only `general_operands' can come here, so MEM is ok. */
1186 || GET_CODE (operand1) == MEM)
1188 /* Run this case quickly. */
1189 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1190 return 1;
1193 else if (GET_CODE (operand0) == MEM)
1195 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1196 && !(reload_in_progress || reload_completed))
1198 rtx temp = gen_reg_rtx (DFmode);
1200 emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
1201 emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
1202 return 1;
1204 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1206 /* Run this case quickly. */
1207 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1208 return 1;
1210 if (! (reload_in_progress || reload_completed))
1212 operands[0] = validize_mem (operand0);
1213 operands[1] = operand1 = force_reg (mode, operand1);
1217 /* Simplify the source if we need to. */
1218 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1219 || (GET_CODE (operand1) == HIGH
1220 && symbolic_operand (XEXP (operand1, 0), mode)))
1222 int ishighonly = 0;
1224 if (GET_CODE (operand1) == HIGH)
1226 ishighonly = 1;
1227 operand1 = XEXP (operand1, 0);
1229 if (symbolic_operand (operand1, mode))
1231 rtx const_part = NULL;
1233 /* Argh. The assembler and linker can't handle arithmetic
1234 involving plabels. We'll have to split up operand1 here
1235 if it's a function label involved in an arithmetic
1236 expression. Luckily, this only happens with addition
1237 of constants to plabels, which simplifies the test.
1239 We add the constant back in just before returning to
1240 our caller. */
1241 if (GET_CODE (operand1) == CONST
1242 && GET_CODE (XEXP (operand1, 0)) == PLUS
1243 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1245 /* Save away the constant part of the expression. */
1246 const_part = XEXP (XEXP (operand1, 0), 1);
1247 if (GET_CODE (const_part) != CONST_INT)
1248 abort ();
1250 /* Set operand1 to just the SYMBOL_REF. */
1251 operand1 = XEXP (XEXP (operand1, 0), 0);
1254 if (flag_pic)
1256 rtx temp;
1258 if (reload_in_progress || reload_completed)
1259 temp = scratch_reg ? scratch_reg : operand0;
1260 else
1261 temp = gen_reg_rtx (Pmode);
1263 /* If operand1 is a function label, then we've got to
1264 force it to memory, then load op0 from memory. */
1265 if (function_label_operand (operand1, mode))
1267 operands[1] = force_const_mem (mode, operand1);
1268 emit_move_sequence (operands, mode, temp);
1270 /* Likewise for (const (plus (symbol) (const_int))) when
1271 generating pic code during or after reload and const_int
1272 will not fit in 14 bits. */
1273 else if (GET_CODE (operand1) == CONST
1274 && GET_CODE (XEXP (operand1, 0)) == PLUS
1275 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1276 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1277 && (reload_completed || reload_in_progress)
1278 && flag_pic)
1280 operands[1] = force_const_mem (mode, operand1);
1281 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1282 mode, temp);
1283 emit_move_sequence (operands, mode, temp);
1285 else
1287 operands[1] = legitimize_pic_address (operand1, mode, temp);
1288 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1291 /* On the HPPA, references to data space are supposed to use dp,
1292 register 27, but showing it in the RTL inhibits various cse
1293 and loop optimizations. */
1294 else
1296 rtx temp, set;
1298 if (reload_in_progress || reload_completed)
1299 temp = scratch_reg ? scratch_reg : operand0;
1300 else
1301 temp = gen_reg_rtx (mode);
1303 /* Loading a SYMBOL_REF into a register makes that register
1304 safe to be used as the base in an indexed address.
1306 Don't mark hard registers though. That loses. */
1307 if (GET_CODE (operand0) == REG
1308 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1309 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1310 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1311 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1312 if (ishighonly)
1313 set = gen_rtx (SET, mode, operand0, temp);
1314 else
1315 set = gen_rtx (SET, VOIDmode,
1316 operand0,
1317 gen_rtx (LO_SUM, mode, temp, operand1));
1319 emit_insn (gen_rtx (SET, VOIDmode,
1320 temp,
1321 gen_rtx (HIGH, mode, operand1)));
1322 emit_insn (set);
1326 /* Add back in the constant part if needed. */
1327 if (const_part != NULL)
1328 expand_inc (operand0, const_part);
1329 return 1;
1331 else if (GET_CODE (operand1) != CONST_INT
1332 || ! cint_ok_for_move (INTVAL (operand1)))
1334 rtx temp;
1336 if (reload_in_progress || reload_completed)
1337 temp = operand0;
1338 else
1339 temp = gen_reg_rtx (mode);
1341 emit_insn (gen_rtx (SET, VOIDmode, temp,
1342 gen_rtx (HIGH, mode, operand1)));
1343 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1346 /* Now have insn-emit do whatever it normally does. */
1347 return 0;
1350 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1351 it will need a link/runtime reloc). */
1354 reloc_needed (exp)
1355 tree exp;
1357 int reloc = 0;
1359 switch (TREE_CODE (exp))
1361 case ADDR_EXPR:
1362 return 1;
1364 case PLUS_EXPR:
1365 case MINUS_EXPR:
1366 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1367 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1368 break;
1370 case NOP_EXPR:
1371 case CONVERT_EXPR:
1372 case NON_LVALUE_EXPR:
1373 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1374 break;
1376 case CONSTRUCTOR:
1378 register tree link;
1379 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1380 if (TREE_VALUE (link) != 0)
1381 reloc |= reloc_needed (TREE_VALUE (link));
1383 break;
1385 case ERROR_MARK:
1386 break;
1388 return reloc;
1391 /* Does operand (which is a symbolic_operand) live in text space? If
1392 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1395 read_only_operand (operand)
1396 rtx operand;
1398 if (GET_CODE (operand) == CONST)
1399 operand = XEXP (XEXP (operand, 0), 0);
1400 if (flag_pic)
1402 if (GET_CODE (operand) == SYMBOL_REF)
1403 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1405 else
1407 if (GET_CODE (operand) == SYMBOL_REF)
1408 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1410 return 1;
1414 /* Return the best assembler insn template
1415 for moving operands[1] into operands[0] as a fullword. */
1416 char *
1417 singlemove_string (operands)
1418 rtx *operands;
1420 HOST_WIDE_INT intval;
1422 if (GET_CODE (operands[0]) == MEM)
1423 return "stw %r1,%0";
1424 if (GET_CODE (operands[1]) == MEM)
1425 return "ldw %1,%0";
1426 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1428 long i;
1429 REAL_VALUE_TYPE d;
1431 if (GET_MODE (operands[1]) != SFmode)
1432 abort ();
1434 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1435 bit pattern. */
1436 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1437 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1439 operands[1] = GEN_INT (i);
1440 /* Fall through to CONST_INT case. */
1442 if (GET_CODE (operands[1]) == CONST_INT)
1444 intval = INTVAL (operands[1]);
1446 if (VAL_14_BITS_P (intval))
1447 return "ldi %1,%0";
1448 else if ((intval & 0x7ff) == 0)
1449 return "ldil L'%1,%0";
1450 else if (zdepi_cint_p (intval))
1451 return "zdepi %Z1,%0";
1452 else
1453 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1455 return "copy %1,%0";
1459 /* Compute position (in OP[1]) and width (in OP[2])
1460 useful for copying IMM to a register using the zdepi
1461 instructions. Store the immediate value to insert in OP[0]. */
1462 void
1463 compute_zdepi_operands (imm, op)
1464 unsigned HOST_WIDE_INT imm;
1465 unsigned *op;
1467 int lsb, len;
1469 /* Find the least significant set bit in IMM. */
1470 for (lsb = 0; lsb < 32; lsb++)
1472 if ((imm & 1) != 0)
1473 break;
1474 imm >>= 1;
1477 /* Choose variants based on *sign* of the 5-bit field. */
1478 if ((imm & 0x10) == 0)
1479 len = (lsb <= 28) ? 4 : 32 - lsb;
1480 else
1482 /* Find the width of the bitstring in IMM. */
1483 for (len = 5; len < 32; len++)
1485 if ((imm & (1 << len)) == 0)
1486 break;
1489 /* Sign extend IMM as a 5-bit value. */
1490 imm = (imm & 0xf) - 0x10;
1493 op[0] = imm;
1494 op[1] = 31 - lsb;
1495 op[2] = len;
1498 /* Output assembler code to perform a doubleword move insn
1499 with operands OPERANDS. */
1501 char *
1502 output_move_double (operands)
1503 rtx *operands;
1505 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1506 rtx latehalf[2];
1507 rtx addreg0 = 0, addreg1 = 0;
1509 /* First classify both operands. */
1511 if (REG_P (operands[0]))
1512 optype0 = REGOP;
1513 else if (offsettable_memref_p (operands[0]))
1514 optype0 = OFFSOP;
1515 else if (GET_CODE (operands[0]) == MEM)
1516 optype0 = MEMOP;
1517 else
1518 optype0 = RNDOP;
1520 if (REG_P (operands[1]))
1521 optype1 = REGOP;
1522 else if (CONSTANT_P (operands[1]))
1523 optype1 = CNSTOP;
1524 else if (offsettable_memref_p (operands[1]))
1525 optype1 = OFFSOP;
1526 else if (GET_CODE (operands[1]) == MEM)
1527 optype1 = MEMOP;
1528 else
1529 optype1 = RNDOP;
1531 /* Check for the cases that the operand constraints are not
1532 supposed to allow to happen. Abort if we get one,
1533 because generating code for these cases is painful. */
1535 if (optype0 != REGOP && optype1 != REGOP)
1536 abort ();
1538 /* Handle auto decrementing and incrementing loads and stores
1539 specifically, since the structure of the function doesn't work
1540 for them without major modification. Do it better when we learn
1541 this port about the general inc/dec addressing of PA.
1542 (This was written by tege. Chide him if it doesn't work.) */
1544 if (optype0 == MEMOP)
1546 /* We have to output the address syntax ourselves, since print_operand
1547 doesn't deal with the addresses we want to use. Fix this later. */
1549 rtx addr = XEXP (operands[0], 0);
1550 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1552 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1554 operands[0] = XEXP (addr, 0);
1555 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1556 abort ();
1558 if (!reg_overlap_mentioned_p (high_reg, addr))
1560 /* No overlap between high target register and address
1561 register. (We do this in a non-obvious way to
1562 save a register file writeback) */
1563 if (GET_CODE (addr) == POST_INC)
1564 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1565 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1567 else
1568 abort();
1570 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1572 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1574 operands[0] = XEXP (addr, 0);
1575 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1576 abort ();
1578 if (!reg_overlap_mentioned_p (high_reg, addr))
1580 /* No overlap between high target register and address
1581 register. (We do this in a non-obvious way to
1582 save a register file writeback) */
1583 if (GET_CODE (addr) == PRE_INC)
1584 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1585 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1587 else
1588 abort();
1591 if (optype1 == MEMOP)
1593 /* We have to output the address syntax ourselves, since print_operand
1594 doesn't deal with the addresses we want to use. Fix this later. */
1596 rtx addr = XEXP (operands[1], 0);
1597 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1599 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1601 operands[1] = XEXP (addr, 0);
1602 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1603 abort ();
1605 if (!reg_overlap_mentioned_p (high_reg, addr))
1607 /* No overlap between high target register and address
1608 register. (We do this in a non-obvious way to
1609 save a register file writeback) */
1610 if (GET_CODE (addr) == POST_INC)
1611 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1612 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1614 else
1616 /* This is an undefined situation. We should load into the
1617 address register *and* update that register. Probably
1618 we don't need to handle this at all. */
1619 if (GET_CODE (addr) == POST_INC)
1620 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1621 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1624 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1626 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1628 operands[1] = XEXP (addr, 0);
1629 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1630 abort ();
1632 if (!reg_overlap_mentioned_p (high_reg, addr))
1634 /* No overlap between high target register and address
1635 register. (We do this in a non-obvious way to
1636 save a register file writeback) */
1637 if (GET_CODE (addr) == PRE_INC)
1638 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1639 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1641 else
1643 /* This is an undefined situation. We should load into the
1644 address register *and* update that register. Probably
1645 we don't need to handle this at all. */
1646 if (GET_CODE (addr) == PRE_INC)
1647 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1648 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1653 /* If an operand is an unoffsettable memory ref, find a register
1654 we can increment temporarily to make it refer to the second word. */
1656 if (optype0 == MEMOP)
1657 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1659 if (optype1 == MEMOP)
1660 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1662 /* Ok, we can do one word at a time.
1663 Normally we do the low-numbered word first.
1665 In either case, set up in LATEHALF the operands to use
1666 for the high-numbered word and in some cases alter the
1667 operands in OPERANDS to be suitable for the low-numbered word. */
1669 if (optype0 == REGOP)
1670 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1671 else if (optype0 == OFFSOP)
1672 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1673 else
1674 latehalf[0] = operands[0];
1676 if (optype1 == REGOP)
1677 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1678 else if (optype1 == OFFSOP)
1679 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1680 else if (optype1 == CNSTOP)
1681 split_double (operands[1], &operands[1], &latehalf[1]);
1682 else
1683 latehalf[1] = operands[1];
1685 /* If the first move would clobber the source of the second one,
1686 do them in the other order.
1688 This can happen in two cases:
1690 mem -> register where the first half of the destination register
1691 is the same register used in the memory's address. Reload
1692 can create such insns.
1694 mem in this case will be either register indirect or register
1695 indirect plus a valid offset.
1697 register -> register move where REGNO(dst) == REGNO(src + 1)
1698 someone (Tim/Tege?) claimed this can happen for parameter loads.
1700 Handle mem -> register case first. */
1701 if (optype0 == REGOP
1702 && (optype1 == MEMOP || optype1 == OFFSOP)
1703 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1704 operands[1], 0))
1706 /* Do the late half first. */
1707 if (addreg1)
1708 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1709 output_asm_insn (singlemove_string (latehalf), latehalf);
1711 /* Then clobber. */
1712 if (addreg1)
1713 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1714 return singlemove_string (operands);
1717 /* Now handle register -> register case. */
1718 if (optype0 == REGOP && optype1 == REGOP
1719 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1721 output_asm_insn (singlemove_string (latehalf), latehalf);
1722 return singlemove_string (operands);
1725 /* Normal case: do the two words, low-numbered first. */
1727 output_asm_insn (singlemove_string (operands), operands);
1729 /* Make any unoffsettable addresses point at high-numbered word. */
1730 if (addreg0)
1731 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1732 if (addreg1)
1733 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1735 /* Do that word. */
1736 output_asm_insn (singlemove_string (latehalf), latehalf);
1738 /* Undo the adds we just did. */
1739 if (addreg0)
1740 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1741 if (addreg1)
1742 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1744 return "";
1747 char *
1748 output_fp_move_double (operands)
1749 rtx *operands;
1751 if (FP_REG_P (operands[0]))
1753 if (FP_REG_P (operands[1])
1754 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1755 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1756 else
1757 output_asm_insn ("fldd%F1 %1,%0", operands);
1759 else if (FP_REG_P (operands[1]))
1761 output_asm_insn ("fstd%F0 %1,%0", operands);
1763 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1765 if (GET_CODE (operands[0]) == REG)
1767 rtx xoperands[2];
1768 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1769 xoperands[0] = operands[0];
1770 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1772 /* This is a pain. You have to be prepared to deal with an
1773 arbitrary address here including pre/post increment/decrement.
1775 so avoid this in the MD. */
1776 else
1777 abort ();
1779 else abort ();
1780 return "";
1783 /* Return a REG that occurs in ADDR with coefficient 1.
1784 ADDR can be effectively incremented by incrementing REG. */
1786 static rtx
1787 find_addr_reg (addr)
1788 rtx addr;
1790 while (GET_CODE (addr) == PLUS)
1792 if (GET_CODE (XEXP (addr, 0)) == REG)
1793 addr = XEXP (addr, 0);
1794 else if (GET_CODE (XEXP (addr, 1)) == REG)
1795 addr = XEXP (addr, 1);
1796 else if (CONSTANT_P (XEXP (addr, 0)))
1797 addr = XEXP (addr, 1);
1798 else if (CONSTANT_P (XEXP (addr, 1)))
1799 addr = XEXP (addr, 0);
1800 else
1801 abort ();
1803 if (GET_CODE (addr) == REG)
1804 return addr;
1805 abort ();
1808 /* Emit code to perform a block move.
1810 OPERANDS[0] is the destination pointer as a REG, clobbered.
1811 OPERANDS[1] is the source pointer as a REG, clobbered.
1812 OPERANDS[2] is a register for temporary storage.
1813 OPERANDS[4] is the size as a CONST_INT
1814 OPERANDS[3] is a register for temporary storage.
1815 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1816 OPERNADS[6] is another temporary register. */
1818 char *
1819 output_block_move (operands, size_is_constant)
1820 rtx *operands;
1821 int size_is_constant;
1823 int align = INTVAL (operands[5]);
1824 unsigned long n_bytes = INTVAL (operands[4]);
1826 /* We can't move more than four bytes at a time because the PA
1827 has no longer integer move insns. (Could use fp mem ops?) */
1828 if (align > 4)
1829 align = 4;
1831 /* Note that we know each loop below will execute at least twice
1832 (else we would have open-coded the copy). */
1833 switch (align)
1835 case 4:
1836 /* Pre-adjust the loop counter. */
1837 operands[4] = GEN_INT (n_bytes - 8);
1838 output_asm_insn ("ldi %4,%2", operands);
1840 /* Copying loop. */
1841 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1842 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1843 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1844 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1845 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1847 /* Handle the residual. There could be up to 7 bytes of
1848 residual to copy! */
1849 if (n_bytes % 8 != 0)
1851 operands[4] = GEN_INT (n_bytes % 4);
1852 if (n_bytes % 8 >= 4)
1853 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1854 if (n_bytes % 4 != 0)
1855 output_asm_insn ("ldw 0(0,%1),%6", operands);
1856 if (n_bytes % 8 >= 4)
1857 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1858 if (n_bytes % 4 != 0)
1859 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1861 return "";
1863 case 2:
1864 /* Pre-adjust the loop counter. */
1865 operands[4] = GEN_INT (n_bytes - 4);
1866 output_asm_insn ("ldi %4,%2", operands);
1868 /* Copying loop. */
1869 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1870 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1871 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1872 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1873 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1875 /* Handle the residual. */
1876 if (n_bytes % 4 != 0)
1878 if (n_bytes % 4 >= 2)
1879 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1880 if (n_bytes % 2 != 0)
1881 output_asm_insn ("ldb 0(0,%1),%6", operands);
1882 if (n_bytes % 4 >= 2)
1883 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1884 if (n_bytes % 2 != 0)
1885 output_asm_insn ("stb %6,0(0,%0)", operands);
1887 return "";
1889 case 1:
1890 /* Pre-adjust the loop counter. */
1891 operands[4] = GEN_INT (n_bytes - 2);
1892 output_asm_insn ("ldi %4,%2", operands);
1894 /* Copying loop. */
1895 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1896 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1897 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1898 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1899 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1901 /* Handle the residual. */
1902 if (n_bytes % 2 != 0)
1904 output_asm_insn ("ldb 0(0,%1),%3", operands);
1905 output_asm_insn ("stb %3,0(0,%0)", operands);
1907 return "";
1909 default:
1910 abort ();
1914 /* Count the number of insns necessary to handle this block move.
1916 Basic structure is the same as emit_block_move, except that we
1917 count insns rather than emit them. */
1920 compute_movstrsi_length (insn)
1921 rtx insn;
1923 rtx pat = PATTERN (insn);
1924 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1925 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1926 unsigned int n_insns = 0;
1928 /* We can't move more than four bytes at a time because the PA
1929 has no longer integer move insns. (Could use fp mem ops?) */
1930 if (align > 4)
1931 align = 4;
1933 /* The basic opying loop. */
1934 n_insns = 6;
1936 /* Residuals. */
1937 if (n_bytes % (2 * align) != 0)
1939 /* Any residual caused by unrolling the copy loop. */
1940 if (n_bytes % (2 * align) > align)
1941 n_insns += 1;
1943 /* Any residual because the number of bytes was not a
1944 multiple of the alignment. */
1945 if (n_bytes % align != 0)
1946 n_insns += 1;
1949 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1950 return n_insns * 4;
1954 char *
1955 output_and (operands)
1956 rtx *operands;
1958 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1960 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
1961 int ls0, ls1, ms0, p, len;
1963 for (ls0 = 0; ls0 < 32; ls0++)
1964 if ((mask & (1 << ls0)) == 0)
1965 break;
1967 for (ls1 = ls0; ls1 < 32; ls1++)
1968 if ((mask & (1 << ls1)) != 0)
1969 break;
1971 for (ms0 = ls1; ms0 < 32; ms0++)
1972 if ((mask & (1 << ms0)) == 0)
1973 break;
1975 if (ms0 != 32)
1976 abort();
1978 if (ls1 == 32)
1980 len = ls0;
1982 if (len == 0)
1983 abort ();
1985 operands[2] = GEN_INT (len);
1986 return "extru %1,31,%2,%0";
1988 else
1990 /* We could use this `depi' for the case above as well, but `depi'
1991 requires one more register file access than an `extru'. */
1993 p = 31 - ls0;
1994 len = ls1 - ls0;
1996 operands[2] = GEN_INT (p);
1997 operands[3] = GEN_INT (len);
1998 return "depi 0,%2,%3,%0";
2001 else
2002 return "and %1,%2,%0";
2005 char *
2006 output_ior (operands)
2007 rtx *operands;
2009 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2010 int bs0, bs1, p, len;
2012 if (INTVAL (operands[2]) == 0)
2013 return "copy %1,%0";
2015 for (bs0 = 0; bs0 < 32; bs0++)
2016 if ((mask & (1 << bs0)) != 0)
2017 break;
2019 for (bs1 = bs0; bs1 < 32; bs1++)
2020 if ((mask & (1 << bs1)) == 0)
2021 break;
2023 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2024 abort();
2026 p = 31 - bs0;
2027 len = bs1 - bs0;
2029 operands[2] = GEN_INT (p);
2030 operands[3] = GEN_INT (len);
2031 return "depi -1,%2,%3,%0";
2034 /* Output an ascii string. */
2035 void
2036 output_ascii (file, p, size)
2037 FILE *file;
2038 unsigned char *p;
2039 int size;
2041 int i;
2042 int chars_output;
2043 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2045 /* The HP assembler can only take strings of 256 characters at one
2046 time. This is a limitation on input line length, *not* the
2047 length of the string. Sigh. Even worse, it seems that the
2048 restriction is in number of input characters (see \xnn &
2049 \whatever). So we have to do this very carefully. */
2051 fputs ("\t.STRING \"", file);
2053 chars_output = 0;
2054 for (i = 0; i < size; i += 4)
2056 int co = 0;
2057 int io = 0;
2058 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2060 register unsigned int c = p[i + io];
2062 if (c == '\"' || c == '\\')
2063 partial_output[co++] = '\\';
2064 if (c >= ' ' && c < 0177)
2065 partial_output[co++] = c;
2066 else
2068 unsigned int hexd;
2069 partial_output[co++] = '\\';
2070 partial_output[co++] = 'x';
2071 hexd = c / 16 - 0 + '0';
2072 if (hexd > '9')
2073 hexd -= '9' - 'a' + 1;
2074 partial_output[co++] = hexd;
2075 hexd = c % 16 - 0 + '0';
2076 if (hexd > '9')
2077 hexd -= '9' - 'a' + 1;
2078 partial_output[co++] = hexd;
2081 if (chars_output + co > 243)
2083 fputs ("\"\n\t.STRING \"", file);
2084 chars_output = 0;
2086 fwrite (partial_output, 1, co, file);
2087 chars_output += co;
2088 co = 0;
2090 fputs ("\"\n", file);
2093 /* Try to rewrite floating point comparisons & branches to avoid
2094 useless add,tr insns.
2096 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2097 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2098 first attempt to remove useless add,tr insns. It is zero
2099 for the second pass as reorg sometimes leaves bogus REG_DEAD
2100 notes lying around.
2102 When CHECK_NOTES is zero we can only eliminate add,tr insns
2103 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2104 instructions. */
2105 void
2106 remove_useless_addtr_insns (insns, check_notes)
2107 rtx insns;
2108 int check_notes;
2110 rtx insn;
2111 int all;
2112 static int pass = 0;
2114 /* This is fairly cheap, so always run it when optimizing. */
2115 if (optimize > 0)
2117 int fcmp_count = 0;
2118 int fbranch_count = 0;
2120 /* Walk all the insns in this function looking for fcmp & fbranch
2121 instructions. Keep track of how many of each we find. */
2122 insns = get_insns ();
2123 for (insn = insns; insn; insn = next_insn (insn))
2125 rtx tmp;
2127 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2128 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2129 continue;
2131 tmp = PATTERN (insn);
2133 /* It must be a set. */
2134 if (GET_CODE (tmp) != SET)
2135 continue;
2137 /* If the destination is CCFP, then we've found an fcmp insn. */
2138 tmp = SET_DEST (tmp);
2139 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2141 fcmp_count++;
2142 continue;
2145 tmp = PATTERN (insn);
2146 /* If this is an fbranch instruction, bump the fbranch counter. */
2147 if (GET_CODE (tmp) == SET
2148 && SET_DEST (tmp) == pc_rtx
2149 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2150 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2151 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2152 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2154 fbranch_count++;
2155 continue;
2160 /* Find all floating point compare + branch insns. If possible,
2161 reverse the comparison & the branch to avoid add,tr insns. */
2162 for (insn = insns; insn; insn = next_insn (insn))
2164 rtx tmp, next;
2166 /* Ignore anything that isn't an INSN. */
2167 if (GET_CODE (insn) != INSN)
2168 continue;
2170 tmp = PATTERN (insn);
2172 /* It must be a set. */
2173 if (GET_CODE (tmp) != SET)
2174 continue;
2176 /* The destination must be CCFP, which is register zero. */
2177 tmp = SET_DEST (tmp);
2178 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2179 continue;
2181 /* INSN should be a set of CCFP.
2183 See if the result of this insn is used in a reversed FP
2184 conditional branch. If so, reverse our condition and
2185 the branch. Doing so avoids useless add,tr insns. */
2186 next = next_insn (insn);
2187 while (next)
2189 /* Jumps, calls and labels stop our search. */
2190 if (GET_CODE (next) == JUMP_INSN
2191 || GET_CODE (next) == CALL_INSN
2192 || GET_CODE (next) == CODE_LABEL)
2193 break;
2195 /* As does another fcmp insn. */
2196 if (GET_CODE (next) == INSN
2197 && GET_CODE (PATTERN (next)) == SET
2198 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2199 && REGNO (SET_DEST (PATTERN (next))) == 0)
2200 break;
2202 next = next_insn (next);
2205 /* Is NEXT_INSN a branch? */
2206 if (next
2207 && GET_CODE (next) == JUMP_INSN)
2209 rtx pattern = PATTERN (next);
2211 /* If it a reversed fp conditional branch (eg uses add,tr)
2212 and CCFP dies, then reverse our conditional and the branch
2213 to avoid the add,tr. */
2214 if (GET_CODE (pattern) == SET
2215 && SET_DEST (pattern) == pc_rtx
2216 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2217 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2218 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2219 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2220 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2221 && (fcmp_count == fbranch_count
2222 || (check_notes
2223 && find_regno_note (next, REG_DEAD, 0))))
2225 /* Reverse the branch. */
2226 tmp = XEXP (SET_SRC (pattern), 1);
2227 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2228 XEXP (SET_SRC (pattern), 2) = tmp;
2229 INSN_CODE (next) = -1;
2231 /* Reverse our condition. */
2232 tmp = PATTERN (insn);
2233 PUT_CODE (XEXP (tmp, 1),
2234 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2240 pass = !pass;
2244 /* You may have trouble believing this, but this is the HP-PA stack
2245 layout. Wow.
2247 Offset Contents
2249 Variable arguments (optional; any number may be allocated)
2251 SP-(4*(N+9)) arg word N
2253 SP-56 arg word 5
2254 SP-52 arg word 4
2256 Fixed arguments (must be allocated; may remain unused)
2258 SP-48 arg word 3
2259 SP-44 arg word 2
2260 SP-40 arg word 1
2261 SP-36 arg word 0
2263 Frame Marker
2265 SP-32 External Data Pointer (DP)
2266 SP-28 External sr4
2267 SP-24 External/stub RP (RP')
2268 SP-20 Current RP
2269 SP-16 Static Link
2270 SP-12 Clean up
2271 SP-8 Calling Stub RP (RP'')
2272 SP-4 Previous SP
2274 Top of Frame
2276 SP-0 Stack Pointer (points to next available address)
2280 /* This function saves registers as follows. Registers marked with ' are
2281 this function's registers (as opposed to the previous function's).
2282 If a frame_pointer isn't needed, r4 is saved as a general register;
2283 the space for the frame pointer is still allocated, though, to keep
2284 things simple.
2287 Top of Frame
2289 SP (FP') Previous FP
2290 SP + 4 Alignment filler (sigh)
2291 SP + 8 Space for locals reserved here.
2295 SP + n All call saved register used.
2299 SP + o All call saved fp registers used.
2303 SP + p (SP') points to next available address.
2307 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2308 Handle case where DISP > 8k by using the add_high_const pattern.
2310 Note in DISP > 8k case, we will leave the high part of the address
2311 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2312 static void
2313 store_reg (reg, disp, base)
2314 int reg, disp, base;
2316 if (VAL_14_BITS_P (disp))
2318 emit_move_insn (gen_rtx (MEM, SImode,
2319 gen_rtx (PLUS, SImode,
2320 gen_rtx (REG, SImode, base),
2321 GEN_INT (disp))),
2322 gen_rtx (REG, SImode, reg));
2324 else
2326 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2327 gen_rtx (REG, SImode, base),
2328 GEN_INT (disp)));
2329 emit_move_insn (gen_rtx (MEM, SImode,
2330 gen_rtx (LO_SUM, SImode,
2331 gen_rtx (REG, SImode, 1),
2332 GEN_INT (disp))),
2333 gen_rtx (REG, SImode, reg));
2337 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2338 Handle case where DISP > 8k by using the add_high_const pattern.
2340 Note in DISP > 8k case, we will leave the high part of the address
2341 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2342 static void
2343 load_reg (reg, disp, base)
2344 int reg, disp, base;
2346 if (VAL_14_BITS_P (disp))
2348 emit_move_insn (gen_rtx (REG, SImode, reg),
2349 gen_rtx (MEM, SImode,
2350 gen_rtx (PLUS, SImode,
2351 gen_rtx (REG, SImode, base),
2352 GEN_INT (disp))));
2354 else
2356 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2357 gen_rtx (REG, SImode, base),
2358 GEN_INT (disp)));
2359 emit_move_insn (gen_rtx (REG, SImode, reg),
2360 gen_rtx (MEM, SImode,
2361 gen_rtx (LO_SUM, SImode,
2362 gen_rtx (REG, SImode, 1),
2363 GEN_INT (disp))));
2367 /* Emit RTL to set REG to the value specified by BASE+DISP.
2368 Handle case where DISP > 8k by using the add_high_const pattern.
2370 Note in DISP > 8k case, we will leave the high part of the address
2371 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2372 static void
2373 set_reg_plus_d(reg, base, disp)
2374 int reg, base, disp;
2376 if (VAL_14_BITS_P (disp))
2378 emit_move_insn (gen_rtx (REG, SImode, reg),
2379 gen_rtx (PLUS, SImode,
2380 gen_rtx (REG, SImode, base),
2381 GEN_INT (disp)));
2383 else
2385 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2386 gen_rtx (REG, SImode, base),
2387 GEN_INT (disp)));
2388 emit_move_insn (gen_rtx (REG, SImode, reg),
2389 gen_rtx (LO_SUM, SImode,
2390 gen_rtx (REG, SImode, 1),
2391 GEN_INT (disp)));
2395 /* Global variables set by FUNCTION_PROLOGUE. */
2396 /* Size of frame. Need to know this to emit return insns from
2397 leaf procedures. */
2398 static int actual_fsize;
2399 static int local_fsize, save_fregs;
2402 compute_frame_size (size, fregs_live)
2403 int size;
2404 int *fregs_live;
2406 extern int current_function_outgoing_args_size;
2407 int i, fsize;
2409 /* 8 is space for frame pointer + filler. If any frame is allocated
2410 we need to add this in because of STARTING_FRAME_OFFSET. */
2411 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2413 /* We must leave enough space for all the callee saved registers
2414 from 3 .. highest used callee save register since we don't
2415 know if we're going to have an inline or out of line prologue
2416 and epilogue. */
2417 for (i = 18; i >= 3; i--)
2418 if (regs_ever_live[i])
2420 fsize += 4 * (i - 2);
2421 break;
2424 /* Round the stack. */
2425 fsize = (fsize + 7) & ~7;
2427 /* We must leave enough space for all the callee saved registers
2428 from 3 .. highest used callee save register since we don't
2429 know if we're going to have an inline or out of line prologue
2430 and epilogue. */
2431 for (i = 66; i >= 48; i -= 2)
2432 if (regs_ever_live[i] || regs_ever_live[i + 1])
2434 if (fregs_live)
2435 *fregs_live = 1;
2437 fsize += 4 * (i - 46);
2438 break;
2441 fsize += current_function_outgoing_args_size;
2442 if (! leaf_function_p () || fsize)
2443 fsize += 32;
2444 return (fsize + 63) & ~63;
2447 rtx hp_profile_label_rtx;
2448 static char hp_profile_label_name[8];
2449 void
2450 output_function_prologue (file, size)
2451 FILE *file;
2452 int size;
2454 /* The function's label and associated .PROC must never be
2455 separated and must be output *after* any profiling declarations
2456 to avoid changing spaces/subspaces within a procedure. */
2457 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2458 fputs ("\t.PROC\n", file);
2460 /* hppa_expand_prologue does the dirty work now. We just need
2461 to output the assembler directives which denote the start
2462 of a function. */
2463 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2464 if (regs_ever_live[2] || profile_flag)
2465 fputs (",CALLS,SAVE_RP", file);
2466 else
2467 fputs (",NO_CALLS", file);
2469 if (frame_pointer_needed)
2470 fputs (",SAVE_SP", file);
2472 /* Pass on information about the number of callee register saves
2473 performed in the prologue.
2475 The compiler is supposed to pass the highest register number
2476 saved, the assembler then has to adjust that number before
2477 entering it into the unwind descriptor (to account for any
2478 caller saved registers with lower register numbers than the
2479 first callee saved register). */
2480 if (gr_saved)
2481 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2483 if (fr_saved)
2484 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2486 fputs ("\n\t.ENTRY\n", file);
2488 /* Horrid hack. emit_function_prologue will modify this RTL in
2489 place to get the expected results. */
2490 if (profile_flag)
2491 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2492 hp_profile_labelno);
2494 /* If we're using GAS and not using the portable runtime model, then
2495 we don't need to accumulate the total number of code bytes. */
2496 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2497 total_code_bytes = 0;
2498 else if (insn_addresses)
2500 unsigned int old_total = total_code_bytes;
2502 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2503 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2505 /* Be prepared to handle overflows. */
2506 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2508 else
2509 total_code_bytes = -1;
2511 remove_useless_addtr_insns (get_insns (), 0);
2514 void
2515 hppa_expand_prologue()
2517 extern char call_used_regs[];
2518 int size = get_frame_size ();
2519 int merge_sp_adjust_with_store = 0;
2520 int i, offset;
2521 rtx tmpreg, size_rtx;
2523 gr_saved = 0;
2524 fr_saved = 0;
2525 save_fregs = 0;
2526 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2527 actual_fsize = compute_frame_size (size, &save_fregs);
2529 /* Compute a few things we will use often. */
2530 tmpreg = gen_rtx (REG, SImode, 1);
2531 size_rtx = GEN_INT (actual_fsize);
2533 /* Handle out of line prologues and epilogues. */
2534 if (TARGET_SPACE)
2536 rtx operands[2];
2537 int saves = 0;
2538 int outline_insn_count = 0;
2539 int inline_insn_count = 0;
2541 /* Count the number of insns for the inline and out of line
2542 variants so we can choose one appropriately.
2544 No need to screw with counting actual_fsize operations -- they're
2545 done for both inline and out of line prologues. */
2546 if (regs_ever_live[2])
2547 inline_insn_count += 1;
2549 if (! cint_ok_for_move (local_fsize))
2550 outline_insn_count += 2;
2551 else
2552 outline_insn_count += 1;
2554 /* Put the register save info into %r22. */
2555 for (i = 18; i >= 3; i--)
2556 if (regs_ever_live[i] && ! call_used_regs[i])
2558 /* -1 because the stack adjustment is normally done in
2559 the same insn as a register save. */
2560 inline_insn_count += (i - 2) - 1;
2561 saves = i;
2562 break;
2565 for (i = 66; i >= 48; i -= 2)
2566 if (regs_ever_live[i] || regs_ever_live[i + 1])
2568 /* +1 needed as we load %r1 with the start of the freg
2569 save area. */
2570 inline_insn_count += (i/2 - 23) + 1;
2571 saves |= ((i/2 - 12 ) << 16);
2572 break;
2575 if (frame_pointer_needed)
2576 inline_insn_count += 3;
2578 if (! cint_ok_for_move (saves))
2579 outline_insn_count += 2;
2580 else
2581 outline_insn_count += 1;
2583 if (TARGET_PORTABLE_RUNTIME)
2584 outline_insn_count += 2;
2585 else
2586 outline_insn_count += 1;
2588 /* If there's a lot of insns in the prologue, then do it as
2589 an out-of-line sequence. */
2590 if (inline_insn_count > outline_insn_count)
2592 /* Put the local_fisze into %r19. */
2593 operands[0] = gen_rtx (REG, SImode, 19);
2594 operands[1] = GEN_INT (local_fsize);
2595 emit_move_insn (operands[0], operands[1]);
2597 /* Put the stack size into %r21. */
2598 operands[0] = gen_rtx (REG, SImode, 21);
2599 operands[1] = size_rtx;
2600 emit_move_insn (operands[0], operands[1]);
2602 operands[0] = gen_rtx (REG, SImode, 22);
2603 operands[1] = GEN_INT (saves);
2604 emit_move_insn (operands[0], operands[1]);
2606 /* Now call the out-of-line prologue. */
2607 emit_insn (gen_outline_prologue_call ());
2608 emit_insn (gen_blockage ());
2610 /* Note that we're using an out-of-line prologue. */
2611 out_of_line_prologue_epilogue = 1;
2612 return;
2616 out_of_line_prologue_epilogue = 0;
2618 /* Save RP first. The calling conventions manual states RP will
2619 always be stored into the caller's frame at sp-20. */
2620 if (regs_ever_live[2] || profile_flag)
2621 store_reg (2, -20, STACK_POINTER_REGNUM);
2623 /* Allocate the local frame and set up the frame pointer if needed. */
2624 if (actual_fsize)
2625 if (frame_pointer_needed)
2627 /* Copy the old frame pointer temporarily into %r1. Set up the
2628 new stack pointer, then store away the saved old frame pointer
2629 into the stack at sp+actual_fsize and at the same time update
2630 the stack pointer by actual_fsize bytes. Two versions, first
2631 handles small (<8k) frames. The second handles large (>8k)
2632 frames. */
2633 emit_move_insn (tmpreg, frame_pointer_rtx);
2634 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2635 if (VAL_14_BITS_P (actual_fsize))
2636 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2637 else
2639 /* It is incorrect to store the saved frame pointer at *sp,
2640 then increment sp (writes beyond the current stack boundary).
2642 So instead use stwm to store at *sp and post-increment the
2643 stack pointer as an atomic operation. Then increment sp to
2644 finish allocating the new frame. */
2645 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2646 set_reg_plus_d (STACK_POINTER_REGNUM,
2647 STACK_POINTER_REGNUM,
2648 actual_fsize - 64);
2651 /* no frame pointer needed. */
2652 else
2654 /* In some cases we can perform the first callee register save
2655 and allocating the stack frame at the same time. If so, just
2656 make a note of it and defer allocating the frame until saving
2657 the callee registers. */
2658 if (VAL_14_BITS_P (-actual_fsize)
2659 && local_fsize == 0
2660 && ! profile_flag
2661 && ! flag_pic)
2662 merge_sp_adjust_with_store = 1;
2663 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2664 else if (actual_fsize != 0)
2665 set_reg_plus_d (STACK_POINTER_REGNUM,
2666 STACK_POINTER_REGNUM,
2667 actual_fsize);
2669 /* The hppa calling conventions say that that %r19, the pic offset
2670 register, is saved at sp - 32 (in this function's frame) when
2671 generating PIC code. FIXME: What is the correct thing to do
2672 for functions which make no calls and allocate no frame? Do
2673 we need to allocate a frame, or can we just omit the save? For
2674 now we'll just omit the save. */
2675 if (actual_fsize != 0 && flag_pic)
2676 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2678 /* Profiling code.
2680 Instead of taking one argument, the counter label, as most normal
2681 mcounts do, _mcount appears to behave differently on the HPPA. It
2682 takes the return address of the caller, the address of this routine,
2683 and the address of the label. Also, it isn't magic, so
2684 argument registers have to be preserved. */
2685 if (profile_flag)
2687 int pc_offset, i, arg_offset, basereg, offsetadj;
2689 pc_offset = 4 + (frame_pointer_needed
2690 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2691 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2693 /* When the function has a frame pointer, use it as the base
2694 register for saving/restore registers. Else use the stack
2695 pointer. Adjust the offset according to the frame size if
2696 this function does not have a frame pointer. */
2698 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2699 : STACK_POINTER_REGNUM;
2700 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2702 /* Horrid hack. emit_function_prologue will modify this RTL in
2703 place to get the expected results. sprintf here is just to
2704 put something in the name. */
2705 sprintf(hp_profile_label_name, "LP$%04d", -1);
2706 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2707 hp_profile_label_name);
2708 if (current_function_returns_struct)
2709 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2711 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2712 if (regs_ever_live [i])
2714 store_reg (i, arg_offset, basereg);
2715 /* Deal with arg_offset not fitting in 14 bits. */
2716 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2719 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2720 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2721 emit_move_insn (gen_rtx (REG, SImode, 24),
2722 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2723 /* %r25 is set from within the output pattern. */
2724 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2726 /* Restore argument registers. */
2727 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2728 if (regs_ever_live [i])
2729 load_reg (i, arg_offset, basereg);
2731 if (current_function_returns_struct)
2732 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2736 /* Normal register save.
2738 Do not save the frame pointer in the frame_pointer_needed case. It
2739 was done earlier. */
2740 if (frame_pointer_needed)
2742 for (i = 18, offset = local_fsize; i >= 4; i--)
2743 if (regs_ever_live[i] && ! call_used_regs[i])
2745 store_reg (i, offset, FRAME_POINTER_REGNUM);
2746 offset += 4;
2747 gr_saved++;
2749 /* Account for %r3 which is saved in a special place. */
2750 gr_saved++;
2752 /* No frame pointer needed. */
2753 else
2755 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2756 if (regs_ever_live[i] && ! call_used_regs[i])
2758 /* If merge_sp_adjust_with_store is nonzero, then we can
2759 optimize the first GR save. */
2760 if (merge_sp_adjust_with_store)
2762 merge_sp_adjust_with_store = 0;
2763 emit_insn (gen_post_stwm (stack_pointer_rtx,
2764 gen_rtx (REG, SImode, i),
2765 GEN_INT (-offset)));
2767 else
2768 store_reg (i, offset, STACK_POINTER_REGNUM);
2769 offset += 4;
2770 gr_saved++;
2773 /* If we wanted to merge the SP adjustment with a GR save, but we never
2774 did any GR saves, then just emit the adjustment here. */
2775 if (merge_sp_adjust_with_store)
2776 set_reg_plus_d (STACK_POINTER_REGNUM,
2777 STACK_POINTER_REGNUM,
2778 actual_fsize);
2781 /* Align pointer properly (doubleword boundary). */
2782 offset = (offset + 7) & ~7;
2784 /* Floating point register store. */
2785 if (save_fregs)
2787 /* First get the frame or stack pointer to the start of the FP register
2788 save area. */
2789 if (frame_pointer_needed)
2790 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2791 else
2792 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2794 /* Now actually save the FP registers. */
2795 for (i = 66; i >= 48; i -= 2)
2797 if (regs_ever_live[i] || regs_ever_live[i + 1])
2799 emit_move_insn (gen_rtx (MEM, DFmode,
2800 gen_rtx (POST_INC, DFmode, tmpreg)),
2801 gen_rtx (REG, DFmode, i));
2802 fr_saved++;
2807 /* When generating PIC code it is necessary to save/restore the
2808 PIC register around each function call. We used to do this
2809 in the call patterns themselves, but that implementation
2810 made incorrect assumptions about using global variables to hold
2811 per-function rtl code generated in the backend.
2813 So instead, we copy the PIC register into a reserved callee saved
2814 register in the prologue. Then after each call we reload the PIC
2815 register from the callee saved register. We also reload the PIC
2816 register from the callee saved register in the epilogue ensure the
2817 PIC register is valid at function exit.
2819 This may (depending on the exact characteristics of the function)
2820 even be more efficient.
2822 Avoid this if the callee saved register wasn't used (these are
2823 leaf functions). */
2824 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2825 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2826 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2830 void
2831 output_function_epilogue (file, size)
2832 FILE *file;
2833 int size;
2835 rtx insn = get_last_insn ();
2836 int i;
2838 /* hppa_expand_epilogue does the dirty work now. We just need
2839 to output the assembler directives which denote the end
2840 of a function.
2842 To make debuggers happy, emit a nop if the epilogue was completely
2843 eliminated due to a volatile call as the last insn in the
2844 current function. That way the return address (in %r2) will
2845 always point to a valid instruction in the current function. */
2847 /* Get the last real insn. */
2848 if (GET_CODE (insn) == NOTE)
2849 insn = prev_real_insn (insn);
2851 /* If it is a sequence, then look inside. */
2852 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2853 insn = XVECEXP (PATTERN (insn), 0, 0);
2855 /* If insn is a CALL_INSN, then it must be a call to a volatile
2856 function (otherwise there would be epilogue insns). */
2857 if (insn && GET_CODE (insn) == CALL_INSN)
2858 fputs ("\tnop\n", file);
2860 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2862 /* If we have deferred plabels, then we need to switch into the data
2863 section and align it to a 4 byte boundary before we output the
2864 deferred plabels. */
2865 if (n_deferred_plabels)
2867 data_section ();
2868 ASM_OUTPUT_ALIGN (file, 2);
2871 /* Now output the deferred plabels. */
2872 for (i = 0; i < n_deferred_plabels; i++)
2874 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
2875 assemble_integer (deferred_plabels[i].symbol, 4, 1);
2877 n_deferred_plabels = 0;
2880 void
2881 hppa_expand_epilogue ()
2883 rtx tmpreg;
2884 int offset,i;
2885 int merge_sp_adjust_with_load = 0;
2887 /* Handle out of line prologues and epilogues. */
2888 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2890 int saves = 0;
2891 rtx operands[2];
2893 /* Put the register save info into %r22. */
2894 for (i = 18; i >= 3; i--)
2895 if (regs_ever_live[i] && ! call_used_regs[i])
2897 saves = i;
2898 break;
2901 for (i = 66; i >= 48; i -= 2)
2902 if (regs_ever_live[i] || regs_ever_live[i + 1])
2904 saves |= ((i/2 - 12 ) << 16);
2905 break;
2908 emit_insn (gen_blockage ());
2910 /* Put the local_fisze into %r19. */
2911 operands[0] = gen_rtx (REG, SImode, 19);
2912 operands[1] = GEN_INT (local_fsize);
2913 emit_move_insn (operands[0], operands[1]);
2915 /* Put the stack size into %r21. */
2916 operands[0] = gen_rtx (REG, SImode, 21);
2917 operands[1] = GEN_INT (actual_fsize);
2918 emit_move_insn (operands[0], operands[1]);
2920 operands[0] = gen_rtx (REG, SImode, 22);
2921 operands[1] = GEN_INT (saves);
2922 emit_move_insn (operands[0], operands[1]);
2924 /* Now call the out-of-line epilogue. */
2925 emit_insn (gen_outline_epilogue_call ());
2926 return;
2929 /* We will use this often. */
2930 tmpreg = gen_rtx (REG, SImode, 1);
2932 /* Try to restore RP early to avoid load/use interlocks when
2933 RP gets used in the return (bv) instruction. This appears to still
2934 be necessary even when we schedule the prologue and epilogue. */
2935 if (frame_pointer_needed
2936 && (regs_ever_live [2] || profile_flag))
2937 load_reg (2, -20, FRAME_POINTER_REGNUM);
2939 /* No frame pointer, and stack is smaller than 8k. */
2940 else if (! frame_pointer_needed
2941 && VAL_14_BITS_P (actual_fsize + 20)
2942 && (regs_ever_live[2] || profile_flag))
2943 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2945 /* General register restores. */
2946 if (frame_pointer_needed)
2948 for (i = 18, offset = local_fsize; i >= 4; i--)
2949 if (regs_ever_live[i] && ! call_used_regs[i])
2951 load_reg (i, offset, FRAME_POINTER_REGNUM);
2952 offset += 4;
2955 else
2957 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2959 if (regs_ever_live[i] && ! call_used_regs[i])
2961 /* Only for the first load.
2962 merge_sp_adjust_with_load holds the register load
2963 with which we will merge the sp adjustment. */
2964 if (VAL_14_BITS_P (actual_fsize + 20)
2965 && local_fsize == 0
2966 && ! merge_sp_adjust_with_load)
2967 merge_sp_adjust_with_load = i;
2968 else
2969 load_reg (i, offset, STACK_POINTER_REGNUM);
2970 offset += 4;
2975 /* Align pointer properly (doubleword boundary). */
2976 offset = (offset + 7) & ~7;
2978 /* FP register restores. */
2979 if (save_fregs)
2981 /* Adjust the register to index off of. */
2982 if (frame_pointer_needed)
2983 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2984 else
2985 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2987 /* Actually do the restores now. */
2988 for (i = 66; i >= 48; i -= 2)
2990 if (regs_ever_live[i] || regs_ever_live[i + 1])
2992 emit_move_insn (gen_rtx (REG, DFmode, i),
2993 gen_rtx (MEM, DFmode,
2994 gen_rtx (POST_INC, DFmode, tmpreg)));
2999 /* Emit a blockage insn here to keep these insns from being moved to
3000 an earlier spot in the epilogue, or into the main instruction stream.
3002 This is necessary as we must not cut the stack back before all the
3003 restores are finished. */
3004 emit_insn (gen_blockage ());
3005 /* No frame pointer, but we have a stack greater than 8k. We restore
3006 %r2 very late in this case. (All other cases are restored as early
3007 as possible.) */
3008 if (! frame_pointer_needed
3009 && ! VAL_14_BITS_P (actual_fsize + 20)
3010 && (regs_ever_live[2] || profile_flag))
3012 set_reg_plus_d (STACK_POINTER_REGNUM,
3013 STACK_POINTER_REGNUM,
3014 - actual_fsize);
3016 /* This used to try and be clever by not depending on the value in
3017 %r30 and instead use the value held in %r1 (so that the 2nd insn
3018 which sets %r30 could be put in the delay slot of the return insn).
3020 That won't work since if the stack is exactly 8k set_reg_plus_d
3021 doesn't set %r1, just %r30. */
3022 load_reg (2, - 20, STACK_POINTER_REGNUM);
3025 /* Reset stack pointer (and possibly frame pointer). The stack
3026 pointer is initially set to fp + 64 to avoid a race condition. */
3027 else if (frame_pointer_needed)
3029 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3030 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3031 stack_pointer_rtx,
3032 GEN_INT (-64)));
3034 /* If we were deferring a callee register restore, do it now. */
3035 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3036 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3037 merge_sp_adjust_with_load),
3038 stack_pointer_rtx,
3039 GEN_INT (- actual_fsize)));
3040 else if (actual_fsize != 0)
3041 set_reg_plus_d (STACK_POINTER_REGNUM,
3042 STACK_POINTER_REGNUM,
3043 - actual_fsize);
3046 /* Fetch the return address for the frame COUNT steps up from
3047 the current frame, after the prologue. FRAMEADDR is the
3048 frame pointer of the COUNT frame.
3050 We want to ignore any export stub remnants here.
3052 The value returned is used in two different ways:
3054 1. To find a function's caller.
3056 2. To change the return address for a function.
3058 This function handles most instances of case 1; however, it will
3059 fail if there are two levels of stubs to execute on the return
3060 path. The only way I believe that can happen is if the return value
3061 needs a parameter relocation, which never happens for C code.
3063 This function handles most instances of case 2; however, it will
3064 fail if we did not originally have stub code on the return path
3065 but will need code on the new return path. This can happen if
3066 the caller & callee are both in the main program, but the new
3067 return location is in a shared library.
3069 To handle this correctly we need to set the return pointer at
3070 frame-20 to point to a return stub frame-24 to point to the
3071 location we wish to return to. */
3074 return_addr_rtx (count, frameaddr)
3075 int count;
3076 rtx frameaddr;
3078 rtx label;
3079 rtx saved_rp;
3080 rtx ins;
3082 saved_rp = gen_reg_rtx (Pmode);
3084 /* First, we start off with the normal return address pointer from
3085 -20[frameaddr]. */
3087 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3089 /* Get pointer to the instruction stream. We have to mask out the
3090 privilege level from the two low order bits of the return address
3091 pointer here so that ins will point to the start of the first
3092 instruction that would have been executed if we returned. */
3093 ins = copy_to_reg (gen_rtx (AND, Pmode,
3094 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3095 MASK_RETURN_ADDR));
3096 label = gen_label_rtx ();
3098 /* Check the instruction stream at the normal return address for the
3099 export stub:
3101 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3102 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3103 0x00011820 | stub+16: mtsp r1,sr0
3104 0xe0400002 | stub+20: be,n 0(sr0,rp)
3106 If it is an export stub, than our return address is really in
3107 -24[frameaddr]. */
3109 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3110 GEN_INT (0x4bc23fd1),
3111 NE, NULL_RTX, SImode, 1, 0);
3112 emit_jump_insn (gen_bne (label));
3114 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3115 GEN_INT (0x004010a1),
3116 NE, NULL_RTX, SImode, 1, 0);
3117 emit_jump_insn (gen_bne (label));
3119 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3120 GEN_INT (0x00011820),
3121 NE, NULL_RTX, SImode, 1, 0);
3122 emit_jump_insn (gen_bne (label));
3124 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3125 GEN_INT (0xe0400002),
3126 NE, NULL_RTX, SImode, 1, 0);
3128 /* If there is no export stub then just use our initial guess of
3129 -20[frameaddr]. */
3131 emit_jump_insn (gen_bne (label));
3133 /* Here we know that our return address pointer points to an export
3134 stub. We don't want to return the address of the export stub,
3135 but rather the return address that leads back into user code.
3136 That return address is stored at -24[frameaddr]. */
3138 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3140 emit_label (label);
3141 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3144 /* This is only valid once reload has completed because it depends on
3145 knowing exactly how much (if any) frame there is and...
3147 It's only valid if there is no frame marker to de-allocate and...
3149 It's only valid if %r2 hasn't been saved into the caller's frame
3150 (we're not profiling and %r2 isn't live anywhere). */
3152 hppa_can_use_return_insn_p ()
3154 return (reload_completed
3155 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3156 && ! profile_flag
3157 && ! regs_ever_live[2]
3158 && ! frame_pointer_needed);
3161 void
3162 emit_bcond_fp (code, operand0)
3163 enum rtx_code code;
3164 rtx operand0;
3166 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3167 gen_rtx (IF_THEN_ELSE, VOIDmode,
3168 gen_rtx (code, VOIDmode,
3169 gen_rtx (REG, CCFPmode, 0),
3170 const0_rtx),
3171 gen_rtx (LABEL_REF, VOIDmode, operand0),
3172 pc_rtx)));
3177 gen_cmp_fp (code, operand0, operand1)
3178 enum rtx_code code;
3179 rtx operand0, operand1;
3181 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3182 gen_rtx (code, CCFPmode, operand0, operand1));
3185 /* Adjust the cost of a scheduling dependency. Return the new cost of
3186 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3189 pa_adjust_cost (insn, link, dep_insn, cost)
3190 rtx insn;
3191 rtx link;
3192 rtx dep_insn;
3193 int cost;
3195 if (! recog_memoized (insn))
3196 return 0;
3198 if (REG_NOTE_KIND (link) == 0)
3200 /* Data dependency; DEP_INSN writes a register that INSN reads some
3201 cycles later. */
3203 if (get_attr_type (insn) == TYPE_FPSTORE)
3205 rtx pat = PATTERN (insn);
3206 rtx dep_pat = PATTERN (dep_insn);
3207 if (GET_CODE (pat) == PARALLEL)
3209 /* This happens for the fstXs,mb patterns. */
3210 pat = XVECEXP (pat, 0, 0);
3212 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3213 /* If this happens, we have to extend this to schedule
3214 optimally. Return 0 for now. */
3215 return 0;
3217 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3219 if (! recog_memoized (dep_insn))
3220 return 0;
3221 /* DEP_INSN is writing its result to the register
3222 being stored in the fpstore INSN. */
3223 switch (get_attr_type (dep_insn))
3225 case TYPE_FPLOAD:
3226 /* This cost 3 cycles, not 2 as the md says for the
3227 700 and 7100. Note scaling of cost for 7100. */
3228 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3230 case TYPE_FPALU:
3231 case TYPE_FPMULSGL:
3232 case TYPE_FPMULDBL:
3233 case TYPE_FPDIVSGL:
3234 case TYPE_FPDIVDBL:
3235 case TYPE_FPSQRTSGL:
3236 case TYPE_FPSQRTDBL:
3237 /* In these important cases, we save one cycle compared to
3238 when flop instruction feed each other. */
3239 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3241 default:
3242 return cost;
3247 /* For other data dependencies, the default cost specified in the
3248 md is correct. */
3249 return cost;
3251 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3253 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3254 cycles later. */
3256 if (get_attr_type (insn) == TYPE_FPLOAD)
3258 rtx pat = PATTERN (insn);
3259 rtx dep_pat = PATTERN (dep_insn);
3260 if (GET_CODE (pat) == PARALLEL)
3262 /* This happens for the fldXs,mb patterns. */
3263 pat = XVECEXP (pat, 0, 0);
3265 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3266 /* If this happens, we have to extend this to schedule
3267 optimally. Return 0 for now. */
3268 return 0;
3270 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3272 if (! recog_memoized (dep_insn))
3273 return 0;
3274 switch (get_attr_type (dep_insn))
3276 case TYPE_FPALU:
3277 case TYPE_FPMULSGL:
3278 case TYPE_FPMULDBL:
3279 case TYPE_FPDIVSGL:
3280 case TYPE_FPDIVDBL:
3281 case TYPE_FPSQRTSGL:
3282 case TYPE_FPSQRTDBL:
3283 /* A fpload can't be issued until one cycle before a
3284 preceding arithmetic operation has finished if
3285 the target of the fpload is any of the sources
3286 (or destination) of the arithmetic operation. */
3287 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3289 default:
3290 return 0;
3294 else if (get_attr_type (insn) == TYPE_FPALU)
3296 rtx pat = PATTERN (insn);
3297 rtx dep_pat = PATTERN (dep_insn);
3298 if (GET_CODE (pat) == PARALLEL)
3300 /* This happens for the fldXs,mb patterns. */
3301 pat = XVECEXP (pat, 0, 0);
3303 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3304 /* If this happens, we have to extend this to schedule
3305 optimally. Return 0 for now. */
3306 return 0;
3308 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3310 if (! recog_memoized (dep_insn))
3311 return 0;
3312 switch (get_attr_type (dep_insn))
3314 case TYPE_FPDIVSGL:
3315 case TYPE_FPDIVDBL:
3316 case TYPE_FPSQRTSGL:
3317 case TYPE_FPSQRTDBL:
3318 /* An ALU flop can't be issued until two cycles before a
3319 preceding divide or sqrt operation has finished if
3320 the target of the ALU flop is any of the sources
3321 (or destination) of the divide or sqrt operation. */
3322 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3324 default:
3325 return 0;
3330 /* For other anti dependencies, the cost is 0. */
3331 return 0;
3333 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3335 /* Output dependency; DEP_INSN writes a register that INSN writes some
3336 cycles later. */
3337 if (get_attr_type (insn) == TYPE_FPLOAD)
3339 rtx pat = PATTERN (insn);
3340 rtx dep_pat = PATTERN (dep_insn);
3341 if (GET_CODE (pat) == PARALLEL)
3343 /* This happens for the fldXs,mb patterns. */
3344 pat = XVECEXP (pat, 0, 0);
3346 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3347 /* If this happens, we have to extend this to schedule
3348 optimally. Return 0 for now. */
3349 return 0;
3351 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3353 if (! recog_memoized (dep_insn))
3354 return 0;
3355 switch (get_attr_type (dep_insn))
3357 case TYPE_FPALU:
3358 case TYPE_FPMULSGL:
3359 case TYPE_FPMULDBL:
3360 case TYPE_FPDIVSGL:
3361 case TYPE_FPDIVDBL:
3362 case TYPE_FPSQRTSGL:
3363 case TYPE_FPSQRTDBL:
3364 /* A fpload can't be issued until one cycle before a
3365 preceding arithmetic operation has finished if
3366 the target of the fpload is the destination of the
3367 arithmetic operation. */
3368 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3370 default:
3371 return 0;
3375 else if (get_attr_type (insn) == TYPE_FPALU)
3377 rtx pat = PATTERN (insn);
3378 rtx dep_pat = PATTERN (dep_insn);
3379 if (GET_CODE (pat) == PARALLEL)
3381 /* This happens for the fldXs,mb patterns. */
3382 pat = XVECEXP (pat, 0, 0);
3384 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3385 /* If this happens, we have to extend this to schedule
3386 optimally. Return 0 for now. */
3387 return 0;
3389 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3391 if (! recog_memoized (dep_insn))
3392 return 0;
3393 switch (get_attr_type (dep_insn))
3395 case TYPE_FPDIVSGL:
3396 case TYPE_FPDIVDBL:
3397 case TYPE_FPSQRTSGL:
3398 case TYPE_FPSQRTDBL:
3399 /* An ALU flop can't be issued until two cycles before a
3400 preceding divide or sqrt operation has finished if
3401 the target of the ALU flop is also the target of
3402 of the divide or sqrt operation. */
3403 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3405 default:
3406 return 0;
3411 /* For other output dependencies, the cost is 0. */
3412 return 0;
3414 else
3415 abort ();
3418 /* Return any length adjustment needed by INSN which already has its length
3419 computed as LENGTH. Return zero if no adjustment is necessary.
3421 For the PA: function calls, millicode calls, and backwards short
3422 conditional branches with unfilled delay slots need an adjustment by +1
3423 (to account for the NOP which will be inserted into the instruction stream).
3425 Also compute the length of an inline block move here as it is too
3426 complicated to express as a length attribute in pa.md. */
3428 pa_adjust_insn_length (insn, length)
3429 rtx insn;
3430 int length;
3432 rtx pat = PATTERN (insn);
3434 /* Call insns which are *not* indirect and have unfilled delay slots. */
3435 if (GET_CODE (insn) == CALL_INSN)
3438 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3439 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3440 return 4;
3441 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3442 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3443 == SYMBOL_REF)
3444 return 4;
3445 else
3446 return 0;
3448 /* Jumps inside switch tables which have unfilled delay slots
3449 also need adjustment. */
3450 else if (GET_CODE (insn) == JUMP_INSN
3451 && simplejump_p (insn)
3452 && GET_MODE (PATTERN (insn)) == DImode)
3453 return 4;
3454 /* Millicode insn with an unfilled delay slot. */
3455 else if (GET_CODE (insn) == INSN
3456 && GET_CODE (pat) != SEQUENCE
3457 && GET_CODE (pat) != USE
3458 && GET_CODE (pat) != CLOBBER
3459 && get_attr_type (insn) == TYPE_MILLI)
3460 return 4;
3461 /* Block move pattern. */
3462 else if (GET_CODE (insn) == INSN
3463 && GET_CODE (pat) == PARALLEL
3464 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3465 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3466 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3467 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3468 return compute_movstrsi_length (insn) - 4;
3469 /* Conditional branch with an unfilled delay slot. */
3470 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3472 /* Adjust a short backwards conditional with an unfilled delay slot. */
3473 if (GET_CODE (pat) == SET
3474 && length == 4
3475 && ! forward_branch_p (insn))
3476 return 4;
3477 else if (GET_CODE (pat) == PARALLEL
3478 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3479 && length == 4)
3480 return 4;
3481 /* Adjust dbra insn with short backwards conditional branch with
3482 unfilled delay slot -- only for case where counter is in a
3483 general register register. */
3484 else if (GET_CODE (pat) == PARALLEL
3485 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3486 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3487 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3488 && length == 4
3489 && ! forward_branch_p (insn))
3490 return 4;
3491 else
3492 return 0;
3494 return 0;
3497 /* Print operand X (an rtx) in assembler syntax to file FILE.
3498 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3499 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3501 void
3502 print_operand (file, x, code)
3503 FILE *file;
3504 rtx x;
3505 int code;
3507 switch (code)
3509 case '#':
3510 /* Output a 'nop' if there's nothing for the delay slot. */
3511 if (dbr_sequence_length () == 0)
3512 fputs ("\n\tnop", file);
3513 return;
3514 case '*':
3515 /* Output an nullification completer if there's nothing for the */
3516 /* delay slot or nullification is requested. */
3517 if (dbr_sequence_length () == 0 ||
3518 (final_sequence &&
3519 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3520 fputs (",n", file);
3521 return;
3522 case 'R':
3523 /* Print out the second register name of a register pair.
3524 I.e., R (6) => 7. */
3525 fputs (reg_names[REGNO (x)+1], file);
3526 return;
3527 case 'r':
3528 /* A register or zero. */
3529 if (x == const0_rtx
3530 || (x == CONST0_RTX (DFmode))
3531 || (x == CONST0_RTX (SFmode)))
3533 fputs ("0", file);
3534 return;
3536 else
3537 break;
3538 case 'C': /* Plain (C)ondition */
3539 case 'X':
3540 switch (GET_CODE (x))
3542 case EQ:
3543 fputs ("=", file); break;
3544 case NE:
3545 fputs ("<>", file); break;
3546 case GT:
3547 fputs (">", file); break;
3548 case GE:
3549 fputs (">=", file); break;
3550 case GEU:
3551 fputs (">>=", file); break;
3552 case GTU:
3553 fputs (">>", file); break;
3554 case LT:
3555 fputs ("<", file); break;
3556 case LE:
3557 fputs ("<=", file); break;
3558 case LEU:
3559 fputs ("<<=", file); break;
3560 case LTU:
3561 fputs ("<<", file); break;
3562 default:
3563 abort ();
3565 return;
3566 case 'N': /* Condition, (N)egated */
3567 switch (GET_CODE (x))
3569 case EQ:
3570 fputs ("<>", file); break;
3571 case NE:
3572 fputs ("=", file); break;
3573 case GT:
3574 fputs ("<=", file); break;
3575 case GE:
3576 fputs ("<", file); break;
3577 case GEU:
3578 fputs ("<<", file); break;
3579 case GTU:
3580 fputs ("<<=", file); break;
3581 case LT:
3582 fputs (">=", file); break;
3583 case LE:
3584 fputs (">", file); break;
3585 case LEU:
3586 fputs (">>", file); break;
3587 case LTU:
3588 fputs (">>=", file); break;
3589 default:
3590 abort ();
3592 return;
3593 /* For floating point comparisons. Need special conditions to deal
3594 with NaNs properly. */
3595 case 'Y':
3596 switch (GET_CODE (x))
3598 case EQ:
3599 fputs ("!=", file); break;
3600 case NE:
3601 fputs ("=", file); break;
3602 case GT:
3603 fputs ("<=", file); break;
3604 case GE:
3605 fputs ("<", file); break;
3606 case LT:
3607 fputs (">=", file); break;
3608 case LE:
3609 fputs (">", file); break;
3610 default:
3611 abort ();
3613 return;
3614 case 'S': /* Condition, operands are (S)wapped. */
3615 switch (GET_CODE (x))
3617 case EQ:
3618 fputs ("=", file); break;
3619 case NE:
3620 fputs ("<>", file); break;
3621 case GT:
3622 fputs ("<", file); break;
3623 case GE:
3624 fputs ("<=", file); break;
3625 case GEU:
3626 fputs ("<<=", file); break;
3627 case GTU:
3628 fputs ("<<", file); break;
3629 case LT:
3630 fputs (">", file); break;
3631 case LE:
3632 fputs (">=", file); break;
3633 case LEU:
3634 fputs (">>=", file); break;
3635 case LTU:
3636 fputs (">>", file); break;
3637 default:
3638 abort ();
3640 return;
3641 case 'B': /* Condition, (B)oth swapped and negate. */
3642 switch (GET_CODE (x))
3644 case EQ:
3645 fputs ("<>", file); break;
3646 case NE:
3647 fputs ("=", file); break;
3648 case GT:
3649 fputs (">=", file); break;
3650 case GE:
3651 fputs (">", file); break;
3652 case GEU:
3653 fputs (">>", file); break;
3654 case GTU:
3655 fputs (">>=", file); break;
3656 case LT:
3657 fputs ("<=", file); break;
3658 case LE:
3659 fputs ("<", file); break;
3660 case LEU:
3661 fputs ("<<", file); break;
3662 case LTU:
3663 fputs ("<<=", file); break;
3664 default:
3665 abort ();
3667 return;
3668 case 'k':
3669 if (GET_CODE (x) == CONST_INT)
3671 fprintf (file, "%d", ~INTVAL (x));
3672 return;
3674 abort();
3675 case 'L':
3676 if (GET_CODE (x) == CONST_INT)
3678 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3679 return;
3681 abort();
3682 case 'O':
3683 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3685 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3686 return;
3688 abort();
3689 case 'P':
3690 if (GET_CODE (x) == CONST_INT)
3692 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3693 return;
3695 abort();
3696 case 'I':
3697 if (GET_CODE (x) == CONST_INT)
3698 fputs ("i", file);
3699 return;
3700 case 'M':
3701 case 'F':
3702 switch (GET_CODE (XEXP (x, 0)))
3704 case PRE_DEC:
3705 case PRE_INC:
3706 fputs ("s,mb", file);
3707 break;
3708 case POST_DEC:
3709 case POST_INC:
3710 fputs ("s,ma", file);
3711 break;
3712 case PLUS:
3713 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3714 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3715 fputs ("x,s", file);
3716 else if (code == 'F')
3717 fputs ("s", file);
3718 break;
3719 default:
3720 if (code == 'F')
3721 fputs ("s", file);
3722 break;
3724 return;
3725 case 'G':
3726 output_global_address (file, x, 0);
3727 return;
3728 case 'H':
3729 output_global_address (file, x, 1);
3730 return;
3731 case 0: /* Don't do anything special */
3732 break;
3733 case 'Z':
3735 unsigned op[3];
3736 compute_zdepi_operands (INTVAL (x), op);
3737 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3738 return;
3740 default:
3741 abort ();
3743 if (GET_CODE (x) == REG)
3745 fputs (reg_names [REGNO (x)], file);
3746 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3747 fputs ("L", file);
3749 else if (GET_CODE (x) == MEM)
3751 int size = GET_MODE_SIZE (GET_MODE (x));
3752 rtx base = XEXP (XEXP (x, 0), 0);
3753 switch (GET_CODE (XEXP (x, 0)))
3755 case PRE_DEC:
3756 case POST_DEC:
3757 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3758 break;
3759 case PRE_INC:
3760 case POST_INC:
3761 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3762 break;
3763 default:
3764 if (GET_CODE (XEXP (x, 0)) == PLUS
3765 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3766 fprintf (file, "%s(0,%s)",
3767 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3768 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3769 else if (GET_CODE (XEXP (x, 0)) == PLUS
3770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3771 fprintf (file, "%s(0,%s)",
3772 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3773 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3774 else
3775 output_address (XEXP (x, 0));
3776 break;
3779 else
3780 output_addr_const (file, x);
3783 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3785 void
3786 output_global_address (file, x, round_constant)
3787 FILE *file;
3788 rtx x;
3789 int round_constant;
3792 /* Imagine (high (const (plus ...))). */
3793 if (GET_CODE (x) == HIGH)
3794 x = XEXP (x, 0);
3796 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3797 assemble_name (file, XSTR (x, 0));
3798 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3800 assemble_name (file, XSTR (x, 0));
3801 fputs ("-$global$", file);
3803 else if (GET_CODE (x) == CONST)
3805 char *sep = "";
3806 int offset = 0; /* assembler wants -$global$ at end */
3807 rtx base;
3809 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3811 base = XEXP (XEXP (x, 0), 0);
3812 output_addr_const (file, base);
3814 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3815 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3816 else abort ();
3818 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3820 base = XEXP (XEXP (x, 0), 1);
3821 output_addr_const (file, base);
3823 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3824 offset = INTVAL (XEXP (XEXP (x, 0),1));
3825 else abort ();
3827 /* How bogus. The compiler is apparently responsible for
3828 rounding the constant if it uses an LR field selector.
3830 The linker and/or assembler seem a better place since
3831 they have to do this kind of thing already.
3833 If we fail to do this, HP's optimizing linker may eliminate
3834 an addil, but not update the ldw/stw/ldo instruction that
3835 uses the result of the addil. */
3836 if (round_constant)
3837 offset = ((offset + 0x1000) & ~0x1fff);
3839 if (GET_CODE (XEXP (x, 0)) == PLUS)
3841 if (offset < 0)
3843 offset = -offset;
3844 sep = "-";
3846 else
3847 sep = "+";
3849 else if (GET_CODE (XEXP (x, 0)) == MINUS
3850 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3851 sep = "-";
3852 else abort ();
3854 if (!read_only_operand (base) && !flag_pic)
3855 fputs ("-$global$", file);
3856 if (offset)
3857 fprintf (file,"%s%d", sep, offset);
3859 else
3860 output_addr_const (file, x);
3863 /* HP's millicode routines mean something special to the assembler.
3864 Keep track of which ones we have used. */
3866 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3867 static char imported[(int)end1000];
3868 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3869 static char import_string[] = ".IMPORT $$....,MILLICODE";
3870 #define MILLI_START 10
3872 static void
3873 import_milli (code)
3874 enum millicodes code;
3876 char str[sizeof (import_string)];
3878 if (!imported[(int)code])
3880 imported[(int)code] = 1;
3881 strcpy (str, import_string);
3882 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3883 output_asm_insn (str, 0);
3887 /* The register constraints have put the operands and return value in
3888 the proper registers. */
3890 char *
3891 output_mul_insn (unsignedp, insn)
3892 int unsignedp;
3893 rtx insn;
3895 import_milli (mulI);
3896 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3899 /* Emit the rtl for doing a division by a constant. */
3901 /* Do magic division millicodes exist for this value? */
3902 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3903 1, 1};
3905 /* We'll use an array to keep track of the magic millicodes and
3906 whether or not we've used them already. [n][0] is signed, [n][1] is
3907 unsigned. */
3909 static int div_milli[16][2];
3912 div_operand (op, mode)
3913 rtx op;
3914 enum machine_mode mode;
3916 return (mode == SImode
3917 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3918 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3919 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3923 emit_hpdiv_const (operands, unsignedp)
3924 rtx *operands;
3925 int unsignedp;
3927 if (GET_CODE (operands[2]) == CONST_INT
3928 && INTVAL (operands[2]) > 0
3929 && INTVAL (operands[2]) < 16
3930 && magic_milli[INTVAL (operands[2])])
3932 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3933 emit
3934 (gen_rtx
3935 (PARALLEL, VOIDmode,
3936 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3937 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3938 gen_rtx (REG, SImode, 26),
3939 operands[2])),
3940 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3941 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3942 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3943 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3944 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3945 return 1;
3947 return 0;
3950 char *
3951 output_div_insn (operands, unsignedp, insn)
3952 rtx *operands;
3953 int unsignedp;
3954 rtx insn;
3956 int divisor;
3958 /* If the divisor is a constant, try to use one of the special
3959 opcodes .*/
3960 if (GET_CODE (operands[0]) == CONST_INT)
3962 static char buf[100];
3963 divisor = INTVAL (operands[0]);
3964 if (!div_milli[divisor][unsignedp])
3966 div_milli[divisor][unsignedp] = 1;
3967 if (unsignedp)
3968 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3969 else
3970 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3972 if (unsignedp)
3974 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3975 return output_millicode_call (insn,
3976 gen_rtx (SYMBOL_REF, SImode, buf));
3978 else
3980 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3981 return output_millicode_call (insn,
3982 gen_rtx (SYMBOL_REF, SImode, buf));
3985 /* Divisor isn't a special constant. */
3986 else
3988 if (unsignedp)
3990 import_milli (divU);
3991 return output_millicode_call (insn,
3992 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
3994 else
3996 import_milli (divI);
3997 return output_millicode_call (insn,
3998 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
4003 /* Output a $$rem millicode to do mod. */
4005 char *
4006 output_mod_insn (unsignedp, insn)
4007 int unsignedp;
4008 rtx insn;
4010 if (unsignedp)
4012 import_milli (remU);
4013 return output_millicode_call (insn,
4014 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
4016 else
4018 import_milli (remI);
4019 return output_millicode_call (insn,
4020 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
4024 void
4025 output_arg_descriptor (call_insn)
4026 rtx call_insn;
4028 char *arg_regs[4];
4029 enum machine_mode arg_mode;
4030 rtx link;
4031 int i, output_flag = 0;
4032 int regno;
4034 for (i = 0; i < 4; i++)
4035 arg_regs[i] = 0;
4037 /* Specify explicitly that no argument relocations should take place
4038 if using the portable runtime calling conventions. */
4039 if (TARGET_PORTABLE_RUNTIME)
4041 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4042 asm_out_file);
4043 return;
4046 if (GET_CODE (call_insn) != CALL_INSN)
4047 abort ();
4048 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4050 rtx use = XEXP (link, 0);
4052 if (! (GET_CODE (use) == USE
4053 && GET_CODE (XEXP (use, 0)) == REG
4054 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4055 continue;
4057 arg_mode = GET_MODE (XEXP (use, 0));
4058 regno = REGNO (XEXP (use, 0));
4059 if (regno >= 23 && regno <= 26)
4061 arg_regs[26 - regno] = "GR";
4062 if (arg_mode == DImode)
4063 arg_regs[25 - regno] = "GR";
4065 else if (regno >= 32 && regno <= 39)
4067 if (arg_mode == SFmode)
4068 arg_regs[(regno - 32) / 2] = "FR";
4069 else
4071 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4072 arg_regs[(regno - 34) / 2] = "FR";
4073 arg_regs[(regno - 34) / 2 + 1] = "FU";
4074 #else
4075 arg_regs[(regno - 34) / 2] = "FU";
4076 arg_regs[(regno - 34) / 2 + 1] = "FR";
4077 #endif
4081 fputs ("\t.CALL ", asm_out_file);
4082 for (i = 0; i < 4; i++)
4084 if (arg_regs[i])
4086 if (output_flag++)
4087 fputc (',', asm_out_file);
4088 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4091 fputc ('\n', asm_out_file);
4094 /* Return the class of any secondary reload register that is needed to
4095 move IN into a register in class CLASS using mode MODE.
4097 Profiling has showed this routine and its descendants account for
4098 a significant amount of compile time (~7%). So it has been
4099 optimized to reduce redundant computations and eliminate useless
4100 function calls.
4102 It might be worthwhile to try and make this a leaf function too. */
4104 enum reg_class
4105 secondary_reload_class (class, mode, in)
4106 enum reg_class class;
4107 enum machine_mode mode;
4108 rtx in;
4110 int regno, is_symbolic;
4112 /* Trying to load a constant into a FP register during PIC code
4113 generation will require %r1 as a scratch register. */
4114 if (flag_pic == 2
4115 && GET_MODE_CLASS (mode) == MODE_INT
4116 && FP_REG_CLASS_P (class)
4117 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4118 return R1_REGS;
4120 /* Profiling showed the PA port spends about 1.3% of its compilation
4121 time in true_regnum from calls inside secondary_reload_class. */
4123 if (GET_CODE (in) == REG)
4125 regno = REGNO (in);
4126 if (regno >= FIRST_PSEUDO_REGISTER)
4127 regno = true_regnum (in);
4129 else if (GET_CODE (in) == SUBREG)
4130 regno = true_regnum (in);
4131 else
4132 regno = -1;
4134 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4135 && GET_MODE_CLASS (mode) == MODE_INT
4136 && FP_REG_CLASS_P (class))
4137 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4138 return GENERAL_REGS;
4140 if (GET_CODE (in) == HIGH)
4141 in = XEXP (in, 0);
4143 /* Profiling has showed GCC spends about 2.6% of its compilation
4144 time in symbolic_operand from calls inside secondary_reload_class.
4146 We use an inline copy and only compute its return value once to avoid
4147 useless work. */
4148 switch (GET_CODE (in))
4150 rtx tmp;
4152 case SYMBOL_REF:
4153 case LABEL_REF:
4154 is_symbolic = 1;
4155 break;
4156 case CONST:
4157 tmp = XEXP (in, 0);
4158 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4159 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4160 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4161 break;
4162 default:
4163 is_symbolic = 0;
4164 break;
4167 if (!flag_pic
4168 && is_symbolic
4169 && read_only_operand (in))
4170 return NO_REGS;
4172 if (class != R1_REGS && is_symbolic)
4173 return R1_REGS;
4175 return NO_REGS;
4178 enum direction
4179 function_arg_padding (mode, type)
4180 enum machine_mode mode;
4181 tree type;
4183 int size;
4185 if (mode == BLKmode)
4187 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4188 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4189 else
4190 return upward; /* Don't know if this is right, but */
4191 /* same as old definition. */
4193 else
4194 size = GET_MODE_BITSIZE (mode);
4195 if (size < PARM_BOUNDARY)
4196 return downward;
4197 else if (size % PARM_BOUNDARY)
4198 return upward;
4199 else
4200 return none;
4204 /* Do what is necessary for `va_start'. The argument is ignored;
4205 We look at the current function to determine if stdargs or varargs
4206 is used and fill in an initial va_list. A pointer to this constructor
4207 is returned. */
4209 struct rtx_def *
4210 hppa_builtin_saveregs (arglist)
4211 tree arglist;
4213 rtx offset;
4214 tree fntype = TREE_TYPE (current_function_decl);
4215 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4216 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4217 != void_type_node)))
4218 ? UNITS_PER_WORD : 0);
4220 if (argadj)
4221 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4222 else
4223 offset = current_function_arg_offset_rtx;
4225 /* Store general registers on the stack. */
4226 move_block_from_reg (23,
4227 gen_rtx (MEM, BLKmode,
4228 plus_constant
4229 (current_function_internal_arg_pointer, -16)),
4230 4, 4 * UNITS_PER_WORD);
4231 return copy_to_reg (expand_binop (Pmode, add_optab,
4232 current_function_internal_arg_pointer,
4233 offset, 0, 0, OPTAB_LIB_WIDEN));
4236 /* This routine handles all the normal conditional branch sequences we
4237 might need to generate. It handles compare immediate vs compare
4238 register, nullification of delay slots, varying length branches,
4239 negated branches, and all combinations of the above. It returns the
4240 output appropriate to emit the branch corresponding to all given
4241 parameters. */
4243 char *
4244 output_cbranch (operands, nullify, length, negated, insn)
4245 rtx *operands;
4246 int nullify, length, negated;
4247 rtx insn;
4249 static char buf[100];
4250 int useskip = 0;
4252 /* A conditional branch to the following instruction (eg the delay slot) is
4253 asking for a disaster. This can happen when not optimizing.
4255 In such cases it is safe to emit nothing. */
4257 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4258 return "";
4260 /* If this is a long branch with its delay slot unfilled, set `nullify'
4261 as it can nullify the delay slot and save a nop. */
4262 if (length == 8 && dbr_sequence_length () == 0)
4263 nullify = 1;
4265 /* If this is a short forward conditional branch which did not get
4266 its delay slot filled, the delay slot can still be nullified. */
4267 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4268 nullify = forward_branch_p (insn);
4270 /* A forward branch over a single nullified insn can be done with a
4271 comclr instruction. This avoids a single cycle penalty due to
4272 mis-predicted branch if we fall through (branch not taken). */
4273 if (length == 4
4274 && next_real_insn (insn) != 0
4275 && get_attr_length (next_real_insn (insn)) == 4
4276 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4277 && nullify)
4278 useskip = 1;
4280 switch (length)
4282 /* All short conditional branches except backwards with an unfilled
4283 delay slot. */
4284 case 4:
4285 if (useskip)
4286 strcpy (buf, "com%I2clr,");
4287 else
4288 strcpy (buf, "com%I2b,");
4289 if (negated)
4290 strcat (buf, "%B3");
4291 else
4292 strcat (buf, "%S3");
4293 if (useskip)
4294 strcat (buf, " %2,%1,0");
4295 else if (nullify)
4296 strcat (buf, ",n %2,%1,%0");
4297 else
4298 strcat (buf, " %2,%1,%0");
4299 break;
4301 /* All long conditionals. Note an short backward branch with an
4302 unfilled delay slot is treated just like a long backward branch
4303 with an unfilled delay slot. */
4304 case 8:
4305 /* Handle weird backwards branch with a filled delay slot
4306 with is nullified. */
4307 if (dbr_sequence_length () != 0
4308 && ! forward_branch_p (insn)
4309 && nullify)
4311 strcpy (buf, "com%I2b,");
4312 if (negated)
4313 strcat (buf, "%S3");
4314 else
4315 strcat (buf, "%B3");
4316 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4318 /* Handle short backwards branch with an unfilled delay slot.
4319 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4320 taken and untaken branches. */
4321 else if (dbr_sequence_length () == 0
4322 && ! forward_branch_p (insn)
4323 && insn_addresses
4324 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4325 - insn_addresses[INSN_UID (insn)] - 8))
4327 strcpy (buf, "com%I2b,");
4328 if (negated)
4329 strcat (buf, "%B3 %2,%1,%0%#");
4330 else
4331 strcat (buf, "%S3 %2,%1,%0%#");
4333 else
4335 strcpy (buf, "com%I2clr,");
4336 if (negated)
4337 strcat (buf, "%S3");
4338 else
4339 strcat (buf, "%B3");
4340 if (nullify)
4341 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4342 else
4343 strcat (buf, " %2,%1,0\n\tbl %0,0");
4345 break;
4347 default:
4348 abort();
4350 return buf;
4353 /* This routine handles all the branch-on-bit conditional branch sequences we
4354 might need to generate. It handles nullification of delay slots,
4355 varying length branches, negated branches and all combinations of the
4356 above. it returns the appropriate output template to emit the branch. */
4358 char *
4359 output_bb (operands, nullify, length, negated, insn, which)
4360 rtx *operands;
4361 int nullify, length, negated;
4362 rtx insn;
4363 int which;
4365 static char buf[100];
4366 int useskip = 0;
4368 /* A conditional branch to the following instruction (eg the delay slot) is
4369 asking for a disaster. I do not think this can happen as this pattern
4370 is only used when optimizing; jump optimization should eliminate the
4371 jump. But be prepared just in case. */
4373 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4374 return "";
4376 /* If this is a long branch with its delay slot unfilled, set `nullify'
4377 as it can nullify the delay slot and save a nop. */
4378 if (length == 8 && dbr_sequence_length () == 0)
4379 nullify = 1;
4381 /* If this is a short forward conditional branch which did not get
4382 its delay slot filled, the delay slot can still be nullified. */
4383 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4384 nullify = forward_branch_p (insn);
4386 /* A forward branch over a single nullified insn can be done with a
4387 extrs instruction. This avoids a single cycle penalty due to
4388 mis-predicted branch if we fall through (branch not taken). */
4390 if (length == 4
4391 && next_real_insn (insn) != 0
4392 && get_attr_length (next_real_insn (insn)) == 4
4393 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4394 && nullify)
4395 useskip = 1;
4397 switch (length)
4400 /* All short conditional branches except backwards with an unfilled
4401 delay slot. */
4402 case 4:
4403 if (useskip)
4404 strcpy (buf, "extrs,");
4405 else
4406 strcpy (buf, "bb,");
4407 if ((which == 0 && negated)
4408 || (which == 1 && ! negated))
4409 strcat (buf, ">=");
4410 else
4411 strcat (buf, "<");
4412 if (useskip)
4413 strcat (buf, " %0,%1,1,0");
4414 else if (nullify && negated)
4415 strcat (buf, ",n %0,%1,%3");
4416 else if (nullify && ! negated)
4417 strcat (buf, ",n %0,%1,%2");
4418 else if (! nullify && negated)
4419 strcat (buf, "%0,%1,%3");
4420 else if (! nullify && ! negated)
4421 strcat (buf, " %0,%1,%2");
4422 break;
4424 /* All long conditionals. Note an short backward branch with an
4425 unfilled delay slot is treated just like a long backward branch
4426 with an unfilled delay slot. */
4427 case 8:
4428 /* Handle weird backwards branch with a filled delay slot
4429 with is nullified. */
4430 if (dbr_sequence_length () != 0
4431 && ! forward_branch_p (insn)
4432 && nullify)
4434 strcpy (buf, "bb,");
4435 if ((which == 0 && negated)
4436 || (which == 1 && ! negated))
4437 strcat (buf, "<");
4438 else
4439 strcat (buf, ">=");
4440 if (negated)
4441 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4442 else
4443 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4445 /* Handle short backwards branch with an unfilled delay slot.
4446 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4447 taken and untaken branches. */
4448 else if (dbr_sequence_length () == 0
4449 && ! forward_branch_p (insn)
4450 && insn_addresses
4451 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4452 - insn_addresses[INSN_UID (insn)] - 8))
4454 strcpy (buf, "bb,");
4455 if ((which == 0 && negated)
4456 || (which == 1 && ! negated))
4457 strcat (buf, ">=");
4458 else
4459 strcat (buf, "<");
4460 if (negated)
4461 strcat (buf, " %0,%1,%3%#");
4462 else
4463 strcat (buf, " %0,%1,%2%#");
4465 else
4467 strcpy (buf, "extrs,");
4468 if ((which == 0 && negated)
4469 || (which == 1 && ! negated))
4470 strcat (buf, "<");
4471 else
4472 strcat (buf, ">=");
4473 if (nullify && negated)
4474 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4475 else if (nullify && ! negated)
4476 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4477 else if (negated)
4478 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4479 else
4480 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4482 break;
4484 default:
4485 abort();
4487 return buf;
4490 /* This routine handles all the branch-on-variable-bit conditional branch
4491 sequences we might need to generate. It handles nullification of delay
4492 slots, varying length branches, negated branches and all combinations
4493 of the above. it returns the appropriate output template to emit the
4494 branch. */
4496 char *
4497 output_bvb (operands, nullify, length, negated, insn, which)
4498 rtx *operands;
4499 int nullify, length, negated;
4500 rtx insn;
4501 int which;
4503 static char buf[100];
4504 int useskip = 0;
4506 /* A conditional branch to the following instruction (eg the delay slot) is
4507 asking for a disaster. I do not think this can happen as this pattern
4508 is only used when optimizing; jump optimization should eliminate the
4509 jump. But be prepared just in case. */
4511 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4512 return "";
4514 /* If this is a long branch with its delay slot unfilled, set `nullify'
4515 as it can nullify the delay slot and save a nop. */
4516 if (length == 8 && dbr_sequence_length () == 0)
4517 nullify = 1;
4519 /* If this is a short forward conditional branch which did not get
4520 its delay slot filled, the delay slot can still be nullified. */
4521 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4522 nullify = forward_branch_p (insn);
4524 /* A forward branch over a single nullified insn can be done with a
4525 extrs instruction. This avoids a single cycle penalty due to
4526 mis-predicted branch if we fall through (branch not taken). */
4528 if (length == 4
4529 && next_real_insn (insn) != 0
4530 && get_attr_length (next_real_insn (insn)) == 4
4531 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4532 && nullify)
4533 useskip = 1;
4535 switch (length)
4538 /* All short conditional branches except backwards with an unfilled
4539 delay slot. */
4540 case 4:
4541 if (useskip)
4542 strcpy (buf, "vextrs,");
4543 else
4544 strcpy (buf, "bvb,");
4545 if ((which == 0 && negated)
4546 || (which == 1 && ! negated))
4547 strcat (buf, ">=");
4548 else
4549 strcat (buf, "<");
4550 if (useskip)
4551 strcat (buf, " %0,1,0");
4552 else if (nullify && negated)
4553 strcat (buf, ",n %0,%3");
4554 else if (nullify && ! negated)
4555 strcat (buf, ",n %0,%2");
4556 else if (! nullify && negated)
4557 strcat (buf, "%0,%3");
4558 else if (! nullify && ! negated)
4559 strcat (buf, " %0,%2");
4560 break;
4562 /* All long conditionals. Note an short backward branch with an
4563 unfilled delay slot is treated just like a long backward branch
4564 with an unfilled delay slot. */
4565 case 8:
4566 /* Handle weird backwards branch with a filled delay slot
4567 with is nullified. */
4568 if (dbr_sequence_length () != 0
4569 && ! forward_branch_p (insn)
4570 && nullify)
4572 strcpy (buf, "bvb,");
4573 if ((which == 0 && negated)
4574 || (which == 1 && ! negated))
4575 strcat (buf, "<");
4576 else
4577 strcat (buf, ">=");
4578 if (negated)
4579 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4580 else
4581 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4583 /* Handle short backwards branch with an unfilled delay slot.
4584 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4585 taken and untaken branches. */
4586 else if (dbr_sequence_length () == 0
4587 && ! forward_branch_p (insn)
4588 && insn_addresses
4589 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4590 - insn_addresses[INSN_UID (insn)] - 8))
4592 strcpy (buf, "bvb,");
4593 if ((which == 0 && negated)
4594 || (which == 1 && ! negated))
4595 strcat (buf, ">=");
4596 else
4597 strcat (buf, "<");
4598 if (negated)
4599 strcat (buf, " %0,%3%#");
4600 else
4601 strcat (buf, " %0,%2%#");
4603 else
4605 strcpy (buf, "vextrs,");
4606 if ((which == 0 && negated)
4607 || (which == 1 && ! negated))
4608 strcat (buf, "<");
4609 else
4610 strcat (buf, ">=");
4611 if (nullify && negated)
4612 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4613 else if (nullify && ! negated)
4614 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4615 else if (negated)
4616 strcat (buf, " %0,1,0\n\tbl %3,0");
4617 else
4618 strcat (buf, " %0,1,0\n\tbl %2,0");
4620 break;
4622 default:
4623 abort();
4625 return buf;
4628 /* Return the output template for emitting a dbra type insn.
4630 Note it may perform some output operations on its own before
4631 returning the final output string. */
4632 char *
4633 output_dbra (operands, insn, which_alternative)
4634 rtx *operands;
4635 rtx insn;
4636 int which_alternative;
4639 /* A conditional branch to the following instruction (eg the delay slot) is
4640 asking for a disaster. Be prepared! */
4642 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4644 if (which_alternative == 0)
4645 return "ldo %1(%0),%0";
4646 else if (which_alternative == 1)
4648 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4649 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4650 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4651 return "fldws -16(0,%%r30),%0";
4653 else
4655 output_asm_insn ("ldw %0,%4", operands);
4656 return "ldo %1(%4),%4\n\tstw %4,%0";
4660 if (which_alternative == 0)
4662 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4663 int length = get_attr_length (insn);
4665 /* If this is a long branch with its delay slot unfilled, set `nullify'
4666 as it can nullify the delay slot and save a nop. */
4667 if (length == 8 && dbr_sequence_length () == 0)
4668 nullify = 1;
4670 /* If this is a short forward conditional branch which did not get
4671 its delay slot filled, the delay slot can still be nullified. */
4672 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4673 nullify = forward_branch_p (insn);
4675 /* Handle short versions first. */
4676 if (length == 4 && nullify)
4677 return "addib,%C2,n %1,%0,%3";
4678 else if (length == 4 && ! nullify)
4679 return "addib,%C2 %1,%0,%3";
4680 else if (length == 8)
4682 /* Handle weird backwards branch with a fulled delay slot
4683 which is nullified. */
4684 if (dbr_sequence_length () != 0
4685 && ! forward_branch_p (insn)
4686 && nullify)
4687 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4688 /* Handle short backwards branch with an unfilled delay slot.
4689 Using a addb;nop rather than addi;bl saves 1 cycle for both
4690 taken and untaken branches. */
4691 else if (dbr_sequence_length () == 0
4692 && ! forward_branch_p (insn)
4693 && insn_addresses
4694 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4695 - insn_addresses[INSN_UID (insn)] - 8))
4696 return "addib,%C2 %1,%0,%3%#";
4698 /* Handle normal cases. */
4699 if (nullify)
4700 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4701 else
4702 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4704 else
4705 abort();
4707 /* Deal with gross reload from FP register case. */
4708 else if (which_alternative == 1)
4710 /* Move loop counter from FP register to MEM then into a GR,
4711 increment the GR, store the GR into MEM, and finally reload
4712 the FP register from MEM from within the branch's delay slot. */
4713 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4714 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4715 if (get_attr_length (insn) == 24)
4716 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4717 else
4718 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4720 /* Deal with gross reload from memory case. */
4721 else
4723 /* Reload loop counter from memory, the store back to memory
4724 happens in the branch's delay slot. */
4725 output_asm_insn ("ldw %0,%4", operands);
4726 if (get_attr_length (insn) == 12)
4727 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4728 else
4729 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4733 /* Return the output template for emitting a dbra type insn.
4735 Note it may perform some output operations on its own before
4736 returning the final output string. */
4737 char *
4738 output_movb (operands, insn, which_alternative, reverse_comparison)
4739 rtx *operands;
4740 rtx insn;
4741 int which_alternative;
4742 int reverse_comparison;
4745 /* A conditional branch to the following instruction (eg the delay slot) is
4746 asking for a disaster. Be prepared! */
4748 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4750 if (which_alternative == 0)
4751 return "copy %1,%0";
4752 else if (which_alternative == 1)
4754 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4755 return "fldws -16(0,%%r30),%0";
4757 else if (which_alternative == 2)
4758 return "stw %1,%0";
4759 else
4760 return "mtsar %r1";
4763 /* Support the second variant. */
4764 if (reverse_comparison)
4765 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4767 if (which_alternative == 0)
4769 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4770 int length = get_attr_length (insn);
4772 /* If this is a long branch with its delay slot unfilled, set `nullify'
4773 as it can nullify the delay slot and save a nop. */
4774 if (length == 8 && dbr_sequence_length () == 0)
4775 nullify = 1;
4777 /* If this is a short forward conditional branch which did not get
4778 its delay slot filled, the delay slot can still be nullified. */
4779 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4780 nullify = forward_branch_p (insn);
4782 /* Handle short versions first. */
4783 if (length == 4 && nullify)
4784 return "movb,%C2,n %1,%0,%3";
4785 else if (length == 4 && ! nullify)
4786 return "movb,%C2 %1,%0,%3";
4787 else if (length == 8)
4789 /* Handle weird backwards branch with a filled delay slot
4790 which is nullified. */
4791 if (dbr_sequence_length () != 0
4792 && ! forward_branch_p (insn)
4793 && nullify)
4794 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4796 /* Handle short backwards branch with an unfilled delay slot.
4797 Using a movb;nop rather than or;bl saves 1 cycle for both
4798 taken and untaken branches. */
4799 else if (dbr_sequence_length () == 0
4800 && ! forward_branch_p (insn)
4801 && insn_addresses
4802 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4803 - insn_addresses[INSN_UID (insn)] - 8))
4804 return "movb,%C2 %1,%0,%3%#";
4805 /* Handle normal cases. */
4806 if (nullify)
4807 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4808 else
4809 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4811 else
4812 abort();
4814 /* Deal with gross reload from FP register case. */
4815 else if (which_alternative == 1)
4817 /* Move loop counter from FP register to MEM then into a GR,
4818 increment the GR, store the GR into MEM, and finally reload
4819 the FP register from MEM from within the branch's delay slot. */
4820 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4821 if (get_attr_length (insn) == 12)
4822 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4823 else
4824 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4826 /* Deal with gross reload from memory case. */
4827 else if (which_alternative == 2)
4829 /* Reload loop counter from memory, the store back to memory
4830 happens in the branch's delay slot. */
4831 if (get_attr_length (insn) == 8)
4832 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4833 else
4834 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4836 /* Handle SAR as a destination. */
4837 else
4839 if (get_attr_length (insn) == 8)
4840 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4841 else
4842 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4847 /* INSN is a millicode call. It may have an unconditional jump in its delay
4848 slot.
4850 CALL_DEST is the routine we are calling. */
4852 char *
4853 output_millicode_call (insn, call_dest)
4854 rtx insn;
4855 rtx call_dest;
4857 int distance;
4858 rtx xoperands[4];
4859 rtx seq_insn;
4861 /* Handle common case -- empty delay slot or no jump in the delay slot,
4862 and we're sure that the branch will reach the beginning of the $CODE$
4863 subspace. */
4864 if ((dbr_sequence_length () == 0
4865 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4866 || (dbr_sequence_length () != 0
4867 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4868 && get_attr_length (insn) == 4))
4870 xoperands[0] = call_dest;
4871 output_asm_insn ("bl %0,%%r31%#", xoperands);
4872 return "";
4875 /* This call may not reach the beginning of the $CODE$ subspace. */
4876 if (get_attr_length (insn) > 4)
4878 int delay_insn_deleted = 0;
4879 rtx xoperands[2];
4880 rtx link;
4882 /* We need to emit an inline long-call branch. */
4883 if (dbr_sequence_length () != 0
4884 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4886 /* A non-jump insn in the delay slot. By definition we can
4887 emit this insn before the call. */
4888 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4890 /* Now delete the delay insn. */
4891 PUT_CODE (NEXT_INSN (insn), NOTE);
4892 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4893 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4894 delay_insn_deleted = 1;
4897 /* If we're allowed to use be/ble instructions, then this is the
4898 best sequence to use for a long millicode call. */
4899 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4900 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4902 xoperands[0] = call_dest;
4903 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4904 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4905 output_asm_insn ("nop", xoperands);
4907 /* Pure portable runtime doesn't allow be/ble; we also don't have
4908 PIC support int he assembler/linker, so this sequence is needed. */
4909 else if (TARGET_PORTABLE_RUNTIME)
4911 xoperands[0] = call_dest;
4912 /* Get the address of our target into %r29. */
4913 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4914 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4916 /* Get our return address into %r31. */
4917 output_asm_insn ("blr 0,%%r31", xoperands);
4919 /* Jump to our target address in %r29. */
4920 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4922 /* Empty delay slot. Note this insn gets fetched twice and
4923 executed once. To be safe we use a nop. */
4924 output_asm_insn ("nop", xoperands);
4925 return "";
4927 /* PIC long millicode call sequence. */
4928 else
4930 xoperands[0] = call_dest;
4931 xoperands[1] = gen_label_rtx ();
4932 /* Get our address + 8 into %r1. */
4933 output_asm_insn ("bl .+8,%%r1", xoperands);
4935 /* Add %r1 to the offset of our target from the next insn. */
4936 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4937 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4938 CODE_LABEL_NUMBER (xoperands[1]));
4939 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4941 /* Get the return address into %r31. */
4942 output_asm_insn ("blr 0,%%r31", xoperands);
4944 /* Branch to our target which is in %r1. */
4945 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4947 /* Empty delay slot. Note this insn gets fetched twice and
4948 executed once. To be safe we use a nop. */
4949 output_asm_insn ("nop", xoperands);
4952 /* If we had a jump in the call's delay slot, output it now. */
4953 if (dbr_sequence_length () != 0
4954 && !delay_insn_deleted)
4956 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4957 output_asm_insn ("b,n %0", xoperands);
4959 /* Now delete the delay insn. */
4960 PUT_CODE (NEXT_INSN (insn), NOTE);
4961 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4962 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4964 return "";
4967 /* This call has an unconditional jump in its delay slot and the
4968 call is known to reach its target or the beginning of the current
4969 subspace. */
4971 /* Use the containing sequence insn's address. */
4972 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
4974 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
4975 - insn_addresses[INSN_UID (seq_insn)] - 8;
4977 /* If the branch was too far away, emit a normal call followed
4978 by a nop, followed by the unconditional branch.
4980 If the branch is close, then adjust %r2 from within the
4981 call's delay slot. */
4983 xoperands[0] = call_dest;
4984 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4985 if (! VAL_14_BITS_P (distance))
4986 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
4987 else
4989 xoperands[3] = gen_label_rtx ();
4990 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
4991 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4992 CODE_LABEL_NUMBER (xoperands[3]));
4995 /* Delete the jump. */
4996 PUT_CODE (NEXT_INSN (insn), NOTE);
4997 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4998 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4999 return "";
5002 /* INSN is either a function call. It may have an unconditional jump
5003 in its delay slot.
5005 CALL_DEST is the routine we are calling. */
5007 char *
5008 output_call (insn, call_dest)
5009 rtx insn;
5010 rtx call_dest;
5012 int distance;
5013 rtx xoperands[4];
5014 rtx seq_insn;
5016 /* Handle common case -- empty delay slot or no jump in the delay slot,
5017 and we're sure that the branch will reach the beginning of the $CODE$
5018 subspace. */
5019 if ((dbr_sequence_length () == 0
5020 && get_attr_length (insn) == 8)
5021 || (dbr_sequence_length () != 0
5022 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5023 && get_attr_length (insn) == 4))
5025 xoperands[0] = call_dest;
5026 output_asm_insn ("bl %0,%%r2%#", xoperands);
5027 return "";
5030 /* This call may not reach the beginning of the $CODE$ subspace. */
5031 if (get_attr_length (insn) > 8)
5033 int delay_insn_deleted = 0;
5034 rtx xoperands[2];
5035 rtx link;
5037 /* We need to emit an inline long-call branch. Furthermore,
5038 because we're changing a named function call into an indirect
5039 function call well after the parameters have been set up, we
5040 need to make sure any FP args appear in both the integer
5041 and FP registers. Also, we need move any delay slot insn
5042 out of the delay slot. And finally, we can't rely on the linker
5043 being able to fix the call to $$dyncall! -- Yuk!. */
5044 if (dbr_sequence_length () != 0
5045 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5047 /* A non-jump insn in the delay slot. By definition we can
5048 emit this insn before the call (and in fact before argument
5049 relocating. */
5050 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5052 /* Now delete the delay insn. */
5053 PUT_CODE (NEXT_INSN (insn), NOTE);
5054 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5055 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5056 delay_insn_deleted = 1;
5059 /* Now copy any FP arguments into integer registers. */
5060 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5062 int arg_mode, regno;
5063 rtx use = XEXP (link, 0);
5064 if (! (GET_CODE (use) == USE
5065 && GET_CODE (XEXP (use, 0)) == REG
5066 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5067 continue;
5069 arg_mode = GET_MODE (XEXP (use, 0));
5070 regno = REGNO (XEXP (use, 0));
5071 /* Is it a floating point register? */
5072 if (regno >= 32 && regno <= 39)
5074 /* Copy from the FP register into an integer register
5075 (via memory). */
5076 if (arg_mode == SFmode)
5078 xoperands[0] = XEXP (use, 0);
5079 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5080 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5081 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5083 else
5085 xoperands[0] = XEXP (use, 0);
5086 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5087 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5088 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5089 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5095 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5096 we don't have any direct calls in that case. */
5097 if (flag_pic)
5099 /* We have to load the address of the function using a procedure
5100 label (plabel). The LP and RP relocs don't work reliably for PIC,
5101 so we make a plain 32 bit plabel in the data segment instead. We
5102 have to defer outputting it of course... Not pretty. */
5104 xoperands[0] = gen_label_rtx ();
5105 xoperands[1] = gen_label_rtx ();
5106 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5107 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5108 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5110 if (deferred_plabels == 0)
5111 deferred_plabels = (struct defer_plab *)
5112 xmalloc (1 * sizeof (struct defer_plab));
5113 else
5114 deferred_plabels = (struct defer_plab *)
5115 xrealloc (deferred_plabels,
5116 (n_deferred_plabels + 1) * sizeof (struct defer_plab));
5117 deferred_plabels[n_deferred_plabels].internal_label = xoperands[0];
5118 deferred_plabels[n_deferred_plabels].symbol = call_dest;
5119 n_deferred_plabels++;
5121 /* Get our address + 8 into %r1. */
5122 output_asm_insn ("bl .+8,%%r1", xoperands);
5124 /* Add %r1 to the offset of dyncall from the next insn. */
5125 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5126 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5127 CODE_LABEL_NUMBER (xoperands[1]));
5128 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5130 /* Get the return address into %r31. */
5131 output_asm_insn ("blr 0,%%r31", xoperands);
5133 /* Branch to our target which is in %r1. */
5134 output_asm_insn ("bv 0(%%r1)", xoperands);
5136 /* Copy the return address into %r2 also. */
5137 output_asm_insn ("copy %%r31,%%r2", xoperands);
5139 else
5141 /* No PIC stuff to worry about. We can use ldil;ble. */
5142 xoperands[0] = call_dest;
5144 /* Get the address of our target into %r22. */
5145 output_asm_insn ("ldil LP%%%0,%%r22", xoperands);
5146 output_asm_insn ("ldo RP%%%0(%%r22),%%r22", xoperands);
5148 /* Get the high part of the address of $dyncall into %r2, then
5149 add in the low part in the branch instruction. */
5150 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5151 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5153 /* Copy the return pointer into both %r31 and %r2. */
5154 output_asm_insn ("copy %%r31,%%r2", xoperands);
5157 /* If we had a jump in the call's delay slot, output it now. */
5158 if (dbr_sequence_length () != 0
5159 && !delay_insn_deleted)
5161 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5162 output_asm_insn ("b,n %0", xoperands);
5164 /* Now delete the delay insn. */
5165 PUT_CODE (NEXT_INSN (insn), NOTE);
5166 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5167 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5169 return "";
5172 /* This call has an unconditional jump in its delay slot and the
5173 call is known to reach its target or the beginning of the current
5174 subspace. */
5176 /* Use the containing sequence insn's address. */
5177 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5179 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5180 - insn_addresses[INSN_UID (seq_insn)] - 8;
5182 /* If the branch was too far away, emit a normal call followed
5183 by a nop, followed by the unconditional branch.
5185 If the branch is close, then adjust %r2 from within the
5186 call's delay slot. */
5188 xoperands[0] = call_dest;
5189 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5190 if (! VAL_14_BITS_P (distance))
5191 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5192 else
5194 xoperands[3] = gen_label_rtx ();
5195 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5196 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5197 CODE_LABEL_NUMBER (xoperands[3]));
5200 /* Delete the jump. */
5201 PUT_CODE (NEXT_INSN (insn), NOTE);
5202 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5203 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5204 return "";
5207 extern struct obstack permanent_obstack;
5208 extern struct obstack *saveable_obstack;
5210 /* In HPUX 8.0's shared library scheme, special relocations are needed
5211 for function labels if they might be passed to a function
5212 in a shared library (because shared libraries don't live in code
5213 space), and special magic is needed to construct their address.
5215 For reasons too disgusting to describe storage for the new name
5216 is allocated either on the saveable_obstack (released at function
5217 exit) or on the permanent_obstack for things that can never change
5218 (libcall names for example). */
5220 void
5221 hppa_encode_label (sym, permanent)
5222 rtx sym;
5223 int permanent;
5225 char *str = XSTR (sym, 0);
5226 int len = strlen (str);
5227 char *newstr;
5229 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5230 len + 2);
5232 if (str[0] == '*')
5233 *newstr++ = *str++;
5234 strcpy (newstr + 1, str);
5235 *newstr = '@';
5236 XSTR (sym,0) = newstr;
5240 function_label_operand (op, mode)
5241 rtx op;
5242 enum machine_mode mode;
5244 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5247 /* Returns 1 if OP is a function label involved in a simple addition
5248 with a constant. Used to keep certain patterns from matching
5249 during instruction combination. */
5251 is_function_label_plus_const (op)
5252 rtx op;
5254 /* Strip off any CONST. */
5255 if (GET_CODE (op) == CONST)
5256 op = XEXP (op, 0);
5258 return (GET_CODE (op) == PLUS
5259 && function_label_operand (XEXP (op, 0), Pmode)
5260 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5263 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5264 use in fmpyadd instructions. */
5266 fmpyaddoperands (operands)
5267 rtx *operands;
5269 enum machine_mode mode = GET_MODE (operands[0]);
5271 /* Must be a floating point mode. */
5272 if (mode != SFmode && mode != DFmode)
5273 return 0;
5275 /* All modes must be the same. */
5276 if (! (mode == GET_MODE (operands[1])
5277 && mode == GET_MODE (operands[2])
5278 && mode == GET_MODE (operands[3])
5279 && mode == GET_MODE (operands[4])
5280 && mode == GET_MODE (operands[5])))
5281 return 0;
5283 /* All operands must be registers. */
5284 if (! (GET_CODE (operands[1]) == REG
5285 && GET_CODE (operands[2]) == REG
5286 && GET_CODE (operands[3]) == REG
5287 && GET_CODE (operands[4]) == REG
5288 && GET_CODE (operands[5]) == REG))
5289 return 0;
5291 /* Only 2 real operands to the addition. One of the input operands must
5292 be the same as the output operand. */
5293 if (! rtx_equal_p (operands[3], operands[4])
5294 && ! rtx_equal_p (operands[3], operands[5]))
5295 return 0;
5297 /* Inout operand of add can not conflict with any operands from multiply. */
5298 if (rtx_equal_p (operands[3], operands[0])
5299 || rtx_equal_p (operands[3], operands[1])
5300 || rtx_equal_p (operands[3], operands[2]))
5301 return 0;
5303 /* multiply can not feed into addition operands. */
5304 if (rtx_equal_p (operands[4], operands[0])
5305 || rtx_equal_p (operands[5], operands[0]))
5306 return 0;
5308 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5309 if (mode == SFmode
5310 && (REGNO (operands[0]) < 57
5311 || REGNO (operands[1]) < 57
5312 || REGNO (operands[2]) < 57
5313 || REGNO (operands[3]) < 57
5314 || REGNO (operands[4]) < 57
5315 || REGNO (operands[5]) < 57))
5316 return 0;
5318 /* Passed. Operands are suitable for fmpyadd. */
5319 return 1;
5322 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5323 use in fmpysub instructions. */
5325 fmpysuboperands (operands)
5326 rtx *operands;
5328 enum machine_mode mode = GET_MODE (operands[0]);
5330 /* Must be a floating point mode. */
5331 if (mode != SFmode && mode != DFmode)
5332 return 0;
5334 /* All modes must be the same. */
5335 if (! (mode == GET_MODE (operands[1])
5336 && mode == GET_MODE (operands[2])
5337 && mode == GET_MODE (operands[3])
5338 && mode == GET_MODE (operands[4])
5339 && mode == GET_MODE (operands[5])))
5340 return 0;
5342 /* All operands must be registers. */
5343 if (! (GET_CODE (operands[1]) == REG
5344 && GET_CODE (operands[2]) == REG
5345 && GET_CODE (operands[3]) == REG
5346 && GET_CODE (operands[4]) == REG
5347 && GET_CODE (operands[5]) == REG))
5348 return 0;
5350 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5351 operation, so operands[4] must be the same as operand[3]. */
5352 if (! rtx_equal_p (operands[3], operands[4]))
5353 return 0;
5355 /* multiply can not feed into subtraction. */
5356 if (rtx_equal_p (operands[5], operands[0]))
5357 return 0;
5359 /* Inout operand of sub can not conflict with any operands from multiply. */
5360 if (rtx_equal_p (operands[3], operands[0])
5361 || rtx_equal_p (operands[3], operands[1])
5362 || rtx_equal_p (operands[3], operands[2]))
5363 return 0;
5365 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5366 if (mode == SFmode
5367 && (REGNO (operands[0]) < 57
5368 || REGNO (operands[1]) < 57
5369 || REGNO (operands[2]) < 57
5370 || REGNO (operands[3]) < 57
5371 || REGNO (operands[4]) < 57
5372 || REGNO (operands[5]) < 57))
5373 return 0;
5375 /* Passed. Operands are suitable for fmpysub. */
5376 return 1;
5380 plus_xor_ior_operator (op, mode)
5381 rtx op;
5382 enum machine_mode mode;
5384 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5385 || GET_CODE (op) == IOR);
5388 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5389 constants for shadd instructions. */
5391 shadd_constant_p (val)
5392 int val;
5394 if (val == 2 || val == 4 || val == 8)
5395 return 1;
5396 else
5397 return 0;
5400 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5401 the valid constant for shadd instructions. */
5403 shadd_operand (op, mode)
5404 rtx op;
5405 enum machine_mode mode;
5407 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5410 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5413 basereg_operand (op, mode)
5414 rtx op;
5415 enum machine_mode mode;
5417 /* cse will create some unscaled indexed addresses, however; it
5418 generally isn't a win on the PA, so avoid creating unscaled
5419 indexed addresses until after cse is finished. */
5420 if (!cse_not_expected)
5421 return 0;
5423 /* Once reload has started everything is considered valid. Reload should
5424 only create indexed addresses using the stack/frame pointer, and any
5425 others were checked for validity when created by the combine pass.
5427 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5428 we don't have to worry about the braindamaged implicit space register
5429 selection using the basereg only (rather than effective address)
5430 screwing us over. */
5431 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5432 return (GET_CODE (op) == REG);
5434 /* Stack is always OK for indexing. */
5435 if (op == stack_pointer_rtx)
5436 return 1;
5438 /* While it's always safe to index off the frame pointer, it's not
5439 always profitable, particularly when the frame pointer is being
5440 eliminated. */
5441 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5442 return 1;
5444 /* The only other valid OPs are pseudo registers with
5445 REGNO_POINTER_FLAG set. */
5446 if (GET_CODE (op) != REG
5447 || REGNO (op) < FIRST_PSEUDO_REGISTER
5448 || ! register_operand (op, mode))
5449 return 0;
5451 return REGNO_POINTER_FLAG (REGNO (op));
5454 /* Return 1 if this operand is anything other than a hard register. */
5457 non_hard_reg_operand (op, mode)
5458 rtx op;
5459 enum machine_mode mode;
5461 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5464 /* Return 1 if INSN branches forward. Should be using insn_addresses
5465 to avoid walking through all the insns... */
5467 forward_branch_p (insn)
5468 rtx insn;
5470 rtx label = JUMP_LABEL (insn);
5472 while (insn)
5474 if (insn == label)
5475 break;
5476 else
5477 insn = NEXT_INSN (insn);
5480 return (insn == label);
5483 /* Return 1 if OP is an equality comparison, else return 0. */
5485 eq_neq_comparison_operator (op, mode)
5486 rtx op;
5487 enum machine_mode mode;
5489 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5492 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5494 movb_comparison_operator (op, mode)
5495 rtx op;
5496 enum machine_mode mode;
5498 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5499 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5502 /* Return 1 if INSN is in the delay slot of a call instruction. */
5504 jump_in_call_delay (insn)
5505 rtx insn;
5508 if (GET_CODE (insn) != JUMP_INSN)
5509 return 0;
5511 if (PREV_INSN (insn)
5512 && PREV_INSN (PREV_INSN (insn))
5513 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5515 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5517 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5518 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5521 else
5522 return 0;
5525 /* Output an unconditional move and branch insn. */
5527 char *
5528 output_parallel_movb (operands, length)
5529 rtx *operands;
5530 int length;
5532 /* These are the cases in which we win. */
5533 if (length == 4)
5534 return "mov%I1b,tr %1,%0,%2";
5536 /* None of these cases wins, but they don't lose either. */
5537 if (dbr_sequence_length () == 0)
5539 /* Nothing in the delay slot, fake it by putting the combined
5540 insn (the copy or add) in the delay slot of a bl. */
5541 if (GET_CODE (operands[1]) == CONST_INT)
5542 return "bl %2,0\n\tldi %1,%0";
5543 else
5544 return "bl %2,0\n\tcopy %1,%0";
5546 else
5548 /* Something in the delay slot, but we've got a long branch. */
5549 if (GET_CODE (operands[1]) == CONST_INT)
5550 return "ldi %1,%0\n\tbl %2,0";
5551 else
5552 return "copy %1,%0\n\tbl %2,0";
5556 /* Output an unconditional add and branch insn. */
5558 char *
5559 output_parallel_addb (operands, length)
5560 rtx *operands;
5561 int length;
5563 /* To make life easy we want operand0 to be the shared input/output
5564 operand and operand1 to be the readonly operand. */
5565 if (operands[0] == operands[1])
5566 operands[1] = operands[2];
5568 /* These are the cases in which we win. */
5569 if (length == 4)
5570 return "add%I1b,tr %1,%0,%3";
5572 /* None of these cases win, but they don't lose either. */
5573 if (dbr_sequence_length () == 0)
5575 /* Nothing in the delay slot, fake it by putting the combined
5576 insn (the copy or add) in the delay slot of a bl. */
5577 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5579 else
5581 /* Something in the delay slot, but we've got a long branch. */
5582 return "add%I1 %1,%0,%0\n\tbl %3,0";
5586 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5587 is used to discourage creating parallel movb/addb insns since a jump
5588 which immediately follows a call can execute in the delay slot of the
5589 call. */
5591 following_call (insn)
5592 rtx insn;
5594 /* Find the previous real insn, skipping NOTEs. */
5595 insn = PREV_INSN (insn);
5596 while (insn && GET_CODE (insn) == NOTE)
5597 insn = PREV_INSN (insn);
5599 /* Check for CALL_INSNs and millicode calls. */
5600 if (insn
5601 && (GET_CODE (insn) == CALL_INSN
5602 || (GET_CODE (insn) == INSN
5603 && GET_CODE (PATTERN (insn)) != SEQUENCE
5604 && GET_CODE (PATTERN (insn)) != USE
5605 && GET_CODE (PATTERN (insn)) != CLOBBER
5606 && get_attr_type (insn) == TYPE_MILLI)))
5607 return 1;
5609 return 0;
5612 /* We use this hook to perform a PA specific optimization which is difficult
5613 to do in earlier passes.
5615 We want the delay slots of branches within jump tables to be filled.
5616 None of the compiler passes at the moment even has the notion that a
5617 PA jump table doesn't contain addresses, but instead contains actual
5618 instructions!
5620 Because we actually jump into the table, the addresses of each entry
5621 must stay constant in relation to the beginning of the table (which
5622 itself must stay constant relative to the instruction to jump into
5623 it). I don't believe we can guarantee earlier passes of the compiler
5624 will adhere to those rules.
5626 So, late in the compilation process we find all the jump tables, and
5627 expand them into real code -- eg each entry in the jump table vector
5628 will get an appropriate label followed by a jump to the final target.
5630 Reorg and the final jump pass can then optimize these branches and
5631 fill their delay slots. We end up with smaller, more efficient code.
5633 The jump instructions within the table are special; we must be able
5634 to identify them during assembly output (if the jumps don't get filled
5635 we need to emit a nop rather than nullifying the delay slot)). We
5636 identify jumps in switch tables by marking the SET with DImode. */
5638 pa_reorg (insns)
5639 rtx insns;
5641 rtx insn;
5643 remove_useless_addtr_insns (insns, 1);
5645 pa_combine_instructions (get_insns ());
5647 /* This is fairly cheap, so always run it if optimizing. */
5648 if (optimize > 0)
5650 /* Find and explode all ADDR_VEC insns. */
5651 insns = get_insns ();
5652 for (insn = insns; insn; insn = NEXT_INSN (insn))
5654 rtx pattern, tmp, location;
5655 unsigned int length, i;
5657 /* Find an ADDR_VEC insn to explode. */
5658 if (GET_CODE (insn) != JUMP_INSN
5659 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5660 continue;
5662 /* If needed, emit marker for the beginning of the branch table. */
5663 if (TARGET_GAS)
5664 emit_insn_before (gen_begin_brtab (), insn);
5666 pattern = PATTERN (insn);
5667 location = PREV_INSN (insn);
5668 length = XVECLEN (pattern, 0);
5670 for (i = 0; i < length; i++)
5672 /* Emit the jump itself. */
5673 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5674 tmp = emit_jump_insn_after (tmp, location);
5675 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5676 LABEL_NUSES (JUMP_LABEL (tmp))++;
5678 /* Emit a BARRIER after the jump. */
5679 location = NEXT_INSN (location);
5680 emit_barrier_after (location);
5682 /* Put a CODE_LABEL before each so jump.c does not optimize
5683 the jumps away. */
5684 location = NEXT_INSN (location);
5685 tmp = gen_label_rtx ();
5686 LABEL_NUSES (tmp) = 1;
5687 emit_label_after (tmp, location);
5688 location = NEXT_INSN (location);
5691 /* If needed, emit marker for the end of the branch table. */
5692 if (TARGET_GAS)
5693 emit_insn_before (gen_end_brtab (), location);
5694 /* Delete the ADDR_VEC. */
5695 delete_insn (insn);
5698 else if (TARGET_GAS)
5700 /* Sill need an end_brtab insn. */
5701 insns = get_insns ();
5702 for (insn = insns; insn; insn = NEXT_INSN (insn))
5704 /* Find an ADDR_VEC insn. */
5705 if (GET_CODE (insn) != JUMP_INSN
5706 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5707 continue;
5709 /* Now generate markers for the beginning and end of the
5710 branc table. */
5711 emit_insn_before (gen_begin_brtab (), insn);
5712 emit_insn_after (gen_end_brtab (), insn);
5717 /* The PA has a number of odd instructions which can perform multiple
5718 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5719 it may be profitable to combine two instructions into one instruction
5720 with two outputs. It's not profitable PA2.0 machines because the
5721 two outputs would take two slots in the reorder buffers.
5723 This routine finds instructions which can be combined and combines
5724 them. We only support some of the potential combinations, and we
5725 only try common ways to find suitable instructions.
5727 * addb can add two registers or a register and a small integer
5728 and jump to a nearby (+-8k) location. Normally the jump to the
5729 nearby location is conditional on the result of the add, but by
5730 using the "true" condition we can make the jump unconditional.
5731 Thus addb can perform two independent operations in one insn.
5733 * movb is similar to addb in that it can perform a reg->reg
5734 or small immediate->reg copy and jump to a nearby (+-8k location).
5736 * fmpyadd and fmpysub can perform a FP multiply and either an
5737 FP add or FP sub if the operands of the multiply and add/sub are
5738 independent (there are other minor restrictions). Note both
5739 the fmpy and fadd/fsub can in theory move to better spots according
5740 to data dependencies, but for now we require the fmpy stay at a
5741 fixed location.
5743 * Many of the memory operations can perform pre & post updates
5744 of index registers. GCC's pre/post increment/decrement addressing
5745 is far too simple to take advantage of all the possibilities. This
5746 pass may not be suitable since those insns may not be independent.
5748 * comclr can compare two ints or an int and a register, nullify
5749 the following instruction and zero some other register. This
5750 is more difficult to use as it's harder to find an insn which
5751 will generate a comclr than finding something like an unconditional
5752 branch. (conditional moves & long branches create comclr insns).
5754 * Most arithmetic operations can conditionally skip the next
5755 instruction. They can be viewed as "perform this operation
5756 and conditionally jump to this nearby location" (where nearby
5757 is an insns away). These are difficult to use due to the
5758 branch length restrictions. */
5760 pa_combine_instructions (insns)
5761 rtx insns;
5763 rtx anchor, new;
5765 /* This can get expensive since the basic algorithm is on the
5766 order of O(n^2) (or worse). Only do it for -O2 or higher
5767 levels of optimizaton. */
5768 if (optimize < 2)
5769 return;
5771 /* Walk down the list of insns looking for "anchor" insns which
5772 may be combined with "floating" insns. As the name implies,
5773 "anchor" instructions don't move, while "floating" insns may
5774 move around. */
5775 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5776 new = make_insn_raw (new);
5778 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5780 enum attr_pa_combine_type anchor_attr;
5781 enum attr_pa_combine_type floater_attr;
5783 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5784 Also ignore any special USE insns. */
5785 if (GET_CODE (anchor) != INSN
5786 && GET_CODE (anchor) != JUMP_INSN
5787 && GET_CODE (anchor) != CALL_INSN
5788 || GET_CODE (PATTERN (anchor)) == USE
5789 || GET_CODE (PATTERN (anchor)) == CLOBBER
5790 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5791 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5792 continue;
5794 anchor_attr = get_attr_pa_combine_type (anchor);
5795 /* See if anchor is an insn suitable for combination. */
5796 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5797 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5798 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5799 && ! forward_branch_p (anchor)))
5801 rtx floater;
5803 for (floater = PREV_INSN (anchor);
5804 floater;
5805 floater = PREV_INSN (floater))
5807 if (GET_CODE (floater) == NOTE
5808 || (GET_CODE (floater) == INSN
5809 && (GET_CODE (PATTERN (floater)) == USE
5810 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5811 continue;
5813 /* Anything except a regular INSN will stop our search. */
5814 if (GET_CODE (floater) != INSN
5815 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5816 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5818 floater = NULL_RTX;
5819 break;
5822 /* See if FLOATER is suitable for combination with the
5823 anchor. */
5824 floater_attr = get_attr_pa_combine_type (floater);
5825 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5826 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5827 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5828 && floater_attr == PA_COMBINE_TYPE_FMPY))
5830 /* If ANCHOR and FLOATER can be combined, then we're
5831 done with this pass. */
5832 if (pa_can_combine_p (new, anchor, floater, 0,
5833 SET_DEST (PATTERN (floater)),
5834 XEXP (SET_SRC (PATTERN (floater)), 0),
5835 XEXP (SET_SRC (PATTERN (floater)), 1)))
5836 break;
5839 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5840 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
5842 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
5844 if (pa_can_combine_p (new, anchor, floater, 0,
5845 SET_DEST (PATTERN (floater)),
5846 XEXP (SET_SRC (PATTERN (floater)), 0),
5847 XEXP (SET_SRC (PATTERN (floater)), 1)))
5848 break;
5850 else
5852 if (pa_can_combine_p (new, anchor, floater, 0,
5853 SET_DEST (PATTERN (floater)),
5854 SET_SRC (PATTERN (floater)),
5855 SET_SRC (PATTERN (floater))))
5856 break;
5861 /* If we didn't find anything on the backwards scan try forwards. */
5862 if (!floater
5863 && (anchor_attr == PA_COMBINE_TYPE_FMPY
5864 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
5866 for (floater = anchor; floater; floater = NEXT_INSN (floater))
5868 if (GET_CODE (floater) == NOTE
5869 || (GET_CODE (floater) == INSN
5870 && (GET_CODE (PATTERN (floater)) == USE
5871 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5873 continue;
5875 /* Anything except a regular INSN will stop our search. */
5876 if (GET_CODE (floater) != INSN
5877 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5878 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5880 floater = NULL_RTX;
5881 break;
5884 /* See if FLOATER is suitable for combination with the
5885 anchor. */
5886 floater_attr = get_attr_pa_combine_type (floater);
5887 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5888 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5889 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5890 && floater_attr == PA_COMBINE_TYPE_FMPY))
5892 /* If ANCHOR and FLOATER can be combined, then we're
5893 done with this pass. */
5894 if (pa_can_combine_p (new, anchor, floater, 1,
5895 SET_DEST (PATTERN (floater)),
5896 XEXP (SET_SRC (PATTERN(floater)),0),
5897 XEXP(SET_SRC(PATTERN(floater)),1)))
5898 break;
5903 /* FLOATER will be nonzero if we found a suitable floating
5904 insn for combination with ANCHOR. */
5905 if (floater
5906 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5907 || anchor_attr == PA_COMBINE_TYPE_FMPY))
5909 /* Emit the new instruction and delete the old anchor. */
5910 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
5911 gen_rtvec (2, PATTERN (anchor),
5912 PATTERN (floater))),
5913 anchor);
5914 PUT_CODE (anchor, NOTE);
5915 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5916 NOTE_SOURCE_FILE (anchor) = 0;
5918 /* Emit a special USE insn for FLOATER, then delete
5919 the floating insn. */
5920 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5921 delete_insn (floater);
5923 continue;
5925 else if (floater
5926 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
5928 rtx temp;
5929 /* Emit the new_jump instruction and delete the old anchor. */
5930 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
5931 gen_rtvec (2, PATTERN (anchor),
5932 PATTERN (floater))),
5933 anchor);
5934 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
5935 PUT_CODE (anchor, NOTE);
5936 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5937 NOTE_SOURCE_FILE (anchor) = 0;
5939 /* Emit a special USE insn for FLOATER, then delete
5940 the floating insn. */
5941 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5942 delete_insn (floater);
5943 continue;
5950 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
5951 rtx new, anchor, floater;
5952 int reversed;
5953 rtx dest, src1, src2;
5955 int insn_code_number;
5956 rtx start, end;
5958 /* Create a PARALLEL with the patterns of ANCHOR and
5959 FLOATER, try to recognize it, then test constraints
5960 for the resulting pattern.
5962 If the pattern doesn't match or the constraints
5963 aren't met keep searching for a suitable floater
5964 insn. */
5965 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
5966 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
5967 INSN_CODE (new) = -1;
5968 insn_code_number = recog_memoized (new);
5969 if (insn_code_number < 0
5970 || !constrain_operands (insn_code_number, 1))
5971 return 0;
5973 if (reversed)
5975 start = anchor;
5976 end = floater;
5978 else
5980 start = floater;
5981 end = anchor;
5984 /* There's up to three operands to consider. One
5985 output and two inputs.
5987 The output must not be used between FLOATER & ANCHOR
5988 exclusive. The inputs must not be set between
5989 FLOATER and ANCHOR exclusive. */
5991 if (reg_used_between_p (dest, start, end))
5992 return 0;
5994 if (reg_set_between_p (src1, start, end))
5995 return 0;
5997 if (reg_set_between_p (src2, start, end))
5998 return 0;
6000 /* If we get here, then everything is good. */
6001 return 1;