* pa/pa.c (emit_move_sequence): Don't lose for a secondary reload
[official-gcc.git] / gcc / config / pa / pa.c
blob5f9416057dfe7a12fce4a1cceb3a8c211d74693d
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
68 unsigned int total_code_bytes;
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
73 struct defer_plab
75 rtx internal_label;
76 rtx symbol;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
80 void
81 override_options ()
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
92 else if (! strcmp (pa_cpu_string, "700"))
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
97 else if (! strcmp (pa_cpu_string, "7100LC"))
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
102 else
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
109 warning ("PIC code generation is not supported in the portable runtime model\n");
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
117 if (flag_pic && profile_flag)
119 warning ("PIC code generation is not compatable with profiling\n");
122 if (TARGET_SPACE && (flag_pic || profile_flag))
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
164 symbolic_expression_p (x)
165 register rtx x;
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
172 return (symbolic_operand (x, VOIDmode));
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
180 switch (GET_CODE (op))
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
220 if (register_operand (op, mode))
221 return 1;
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
226 return 0;
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
237 if (register_operand (op, mode))
238 return 1;
240 if (op == CONST0_RTX (mode))
241 return 1;
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
246 return 0;
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
267 if (register_operand (op, mode))
268 return 1;
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
298 return memory_address_p (mode, op);
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
308 if (register_operand (op, mode))
309 return 1;
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
314 return 0;
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
322 if (!flag_pic)
323 return 0;
325 switch (GET_CODE (op))
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
343 return reg_renumber && FP_REG_P (op);
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
468 return register_operand (op, mode) || int5_operand (op, mode);
471 /* True iff zdepi can be used to generate this CONST_INT. */
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
476 unsigned HOST_WIDE_INT lsb_mask, t;
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
500 /* True iff depi or extru can be used to compute (reg & OP). */
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
510 /* True iff depi can be used to compute (reg | MASK). */
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
519 /* True iff depi can be used to compute (reg | OP). */
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
577 rtx pic_ref = orig;
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
586 if (GET_CODE (orig) == SYMBOL_REF)
588 if (reg == 0)
589 abort ();
591 if (flag_pic == 2)
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
606 else if (GET_CODE (orig) == CONST)
608 rtx base;
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
614 if (reg == 0)
615 abort ();
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
633 return pic_ref;
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
649 For the PA, transform:
651 memory(X + <large int>)
653 into:
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
662 This is for CSE to find several similar references, and only use one Z.
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
692 rtx orig = x;
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
752 else
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
764 return plus_constant (ptr_reg, offset - newoffset);
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
822 /* Figure out what the base and index are. */
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
843 if (base == 0)
844 return orig;
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
931 rtx regx1, regx2, regy1, regy2, y;
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1004 else
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1017 return orig;
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1023 PIC addresses are very expensive.
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1028 hppa_address_cost (X)
1029 rtx X;
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1040 /* Emit insns to move operands[1] into operands[0].
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1140 use scratch_reg to hold the address of the memory location.
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1150 rtx xoperands[2];
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1173 /* D might not fit in 14 bits either; for such cases load D into
1174 scratch reg. */
1175 if (GET_CODE (operand1) == MEM
1176 && !memory_address_p (SImode, XEXP (operand1, 0)))
1178 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1179 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1180 SImode,
1181 XEXP (XEXP (operand1, 0), 0),
1182 scratch_reg));
1183 emit_move_insn (scratch_reg, gen_rtx (MEM, GET_MODE (operand1),
1184 scratch_reg));
1186 else
1187 emit_move_insn (scratch_reg, operand1);
1188 emit_move_insn (operand0, scratch_reg);
1189 return 1;
1191 /* Handle most common case: storing into a register. */
1192 else if (register_operand (operand0, mode))
1194 if (register_operand (operand1, mode)
1195 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1196 || (operand1 == CONST0_RTX (mode))
1197 || (GET_CODE (operand1) == HIGH
1198 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1199 /* Only `general_operands' can come here, so MEM is ok. */
1200 || GET_CODE (operand1) == MEM)
1202 /* Run this case quickly. */
1203 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1204 return 1;
1207 else if (GET_CODE (operand0) == MEM)
1209 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1210 && !(reload_in_progress || reload_completed))
1212 rtx temp = gen_reg_rtx (DFmode);
1214 emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
1215 emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
1216 return 1;
1218 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1220 /* Run this case quickly. */
1221 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1222 return 1;
1224 if (! (reload_in_progress || reload_completed))
1226 operands[0] = validize_mem (operand0);
1227 operands[1] = operand1 = force_reg (mode, operand1);
1231 /* Simplify the source if we need to. */
1232 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1233 || (GET_CODE (operand1) == HIGH
1234 && symbolic_operand (XEXP (operand1, 0), mode)))
1236 int ishighonly = 0;
1238 if (GET_CODE (operand1) == HIGH)
1240 ishighonly = 1;
1241 operand1 = XEXP (operand1, 0);
1243 if (symbolic_operand (operand1, mode))
1245 rtx const_part = NULL;
1247 /* Argh. The assembler and linker can't handle arithmetic
1248 involving plabels. We'll have to split up operand1 here
1249 if it's a function label involved in an arithmetic
1250 expression. Luckily, this only happens with addition
1251 of constants to plabels, which simplifies the test.
1253 We add the constant back in just before returning to
1254 our caller. */
1255 if (GET_CODE (operand1) == CONST
1256 && GET_CODE (XEXP (operand1, 0)) == PLUS
1257 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1259 /* Save away the constant part of the expression. */
1260 const_part = XEXP (XEXP (operand1, 0), 1);
1261 if (GET_CODE (const_part) != CONST_INT)
1262 abort ();
1264 /* Set operand1 to just the SYMBOL_REF. */
1265 operand1 = XEXP (XEXP (operand1, 0), 0);
1268 if (flag_pic)
1270 rtx temp;
1272 if (reload_in_progress || reload_completed)
1273 temp = scratch_reg ? scratch_reg : operand0;
1274 else
1275 temp = gen_reg_rtx (Pmode);
1277 /* If operand1 is a function label, then we've got to
1278 force it to memory, then load op0 from memory. */
1279 if (function_label_operand (operand1, mode))
1281 operands[1] = force_const_mem (mode, operand1);
1282 emit_move_sequence (operands, mode, temp);
1284 /* Likewise for (const (plus (symbol) (const_int))) when
1285 generating pic code during or after reload and const_int
1286 will not fit in 14 bits. */
1287 else if (GET_CODE (operand1) == CONST
1288 && GET_CODE (XEXP (operand1, 0)) == PLUS
1289 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1290 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1291 && (reload_completed || reload_in_progress)
1292 && flag_pic)
1294 operands[1] = force_const_mem (mode, operand1);
1295 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1296 mode, temp);
1297 emit_move_sequence (operands, mode, temp);
1299 else
1301 operands[1] = legitimize_pic_address (operand1, mode, temp);
1302 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1305 /* On the HPPA, references to data space are supposed to use dp,
1306 register 27, but showing it in the RTL inhibits various cse
1307 and loop optimizations. */
1308 else
1310 rtx temp, set;
1312 if (reload_in_progress || reload_completed)
1313 temp = scratch_reg ? scratch_reg : operand0;
1314 else
1315 temp = gen_reg_rtx (mode);
1317 /* Loading a SYMBOL_REF into a register makes that register
1318 safe to be used as the base in an indexed address.
1320 Don't mark hard registers though. That loses. */
1321 if (GET_CODE (operand0) == REG
1322 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1323 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1324 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1325 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1326 if (ishighonly)
1327 set = gen_rtx (SET, mode, operand0, temp);
1328 else
1329 set = gen_rtx (SET, VOIDmode,
1330 operand0,
1331 gen_rtx (LO_SUM, mode, temp, operand1));
1333 emit_insn (gen_rtx (SET, VOIDmode,
1334 temp,
1335 gen_rtx (HIGH, mode, operand1)));
1336 emit_insn (set);
1340 /* Add back in the constant part if needed. */
1341 if (const_part != NULL)
1342 expand_inc (operand0, const_part);
1343 return 1;
1345 else if (GET_CODE (operand1) != CONST_INT
1346 || ! cint_ok_for_move (INTVAL (operand1)))
1348 rtx temp;
1350 if (reload_in_progress || reload_completed)
1351 temp = operand0;
1352 else
1353 temp = gen_reg_rtx (mode);
1355 emit_insn (gen_rtx (SET, VOIDmode, temp,
1356 gen_rtx (HIGH, mode, operand1)));
1357 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1360 /* Now have insn-emit do whatever it normally does. */
1361 return 0;
1364 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1365 it will need a link/runtime reloc). */
1368 reloc_needed (exp)
1369 tree exp;
1371 int reloc = 0;
1373 switch (TREE_CODE (exp))
1375 case ADDR_EXPR:
1376 return 1;
1378 case PLUS_EXPR:
1379 case MINUS_EXPR:
1380 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1381 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1382 break;
1384 case NOP_EXPR:
1385 case CONVERT_EXPR:
1386 case NON_LVALUE_EXPR:
1387 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1388 break;
1390 case CONSTRUCTOR:
1392 register tree link;
1393 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1394 if (TREE_VALUE (link) != 0)
1395 reloc |= reloc_needed (TREE_VALUE (link));
1397 break;
1399 case ERROR_MARK:
1400 break;
1402 return reloc;
1405 /* Does operand (which is a symbolic_operand) live in text space? If
1406 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1409 read_only_operand (operand)
1410 rtx operand;
1412 if (GET_CODE (operand) == CONST)
1413 operand = XEXP (XEXP (operand, 0), 0);
1414 if (flag_pic)
1416 if (GET_CODE (operand) == SYMBOL_REF)
1417 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1419 else
1421 if (GET_CODE (operand) == SYMBOL_REF)
1422 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1424 return 1;
1428 /* Return the best assembler insn template
1429 for moving operands[1] into operands[0] as a fullword. */
1430 char *
1431 singlemove_string (operands)
1432 rtx *operands;
1434 HOST_WIDE_INT intval;
1436 if (GET_CODE (operands[0]) == MEM)
1437 return "stw %r1,%0";
1438 if (GET_CODE (operands[1]) == MEM)
1439 return "ldw %1,%0";
1440 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1442 long i;
1443 REAL_VALUE_TYPE d;
1445 if (GET_MODE (operands[1]) != SFmode)
1446 abort ();
1448 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1449 bit pattern. */
1450 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1451 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1453 operands[1] = GEN_INT (i);
1454 /* Fall through to CONST_INT case. */
1456 if (GET_CODE (operands[1]) == CONST_INT)
1458 intval = INTVAL (operands[1]);
1460 if (VAL_14_BITS_P (intval))
1461 return "ldi %1,%0";
1462 else if ((intval & 0x7ff) == 0)
1463 return "ldil L'%1,%0";
1464 else if (zdepi_cint_p (intval))
1465 return "zdepi %Z1,%0";
1466 else
1467 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1469 return "copy %1,%0";
1473 /* Compute position (in OP[1]) and width (in OP[2])
1474 useful for copying IMM to a register using the zdepi
1475 instructions. Store the immediate value to insert in OP[0]. */
1476 void
1477 compute_zdepi_operands (imm, op)
1478 unsigned HOST_WIDE_INT imm;
1479 unsigned *op;
1481 int lsb, len;
1483 /* Find the least significant set bit in IMM. */
1484 for (lsb = 0; lsb < 32; lsb++)
1486 if ((imm & 1) != 0)
1487 break;
1488 imm >>= 1;
1491 /* Choose variants based on *sign* of the 5-bit field. */
1492 if ((imm & 0x10) == 0)
1493 len = (lsb <= 28) ? 4 : 32 - lsb;
1494 else
1496 /* Find the width of the bitstring in IMM. */
1497 for (len = 5; len < 32; len++)
1499 if ((imm & (1 << len)) == 0)
1500 break;
1503 /* Sign extend IMM as a 5-bit value. */
1504 imm = (imm & 0xf) - 0x10;
1507 op[0] = imm;
1508 op[1] = 31 - lsb;
1509 op[2] = len;
1512 /* Output assembler code to perform a doubleword move insn
1513 with operands OPERANDS. */
1515 char *
1516 output_move_double (operands)
1517 rtx *operands;
1519 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1520 rtx latehalf[2];
1521 rtx addreg0 = 0, addreg1 = 0;
1523 /* First classify both operands. */
1525 if (REG_P (operands[0]))
1526 optype0 = REGOP;
1527 else if (offsettable_memref_p (operands[0]))
1528 optype0 = OFFSOP;
1529 else if (GET_CODE (operands[0]) == MEM)
1530 optype0 = MEMOP;
1531 else
1532 optype0 = RNDOP;
1534 if (REG_P (operands[1]))
1535 optype1 = REGOP;
1536 else if (CONSTANT_P (operands[1]))
1537 optype1 = CNSTOP;
1538 else if (offsettable_memref_p (operands[1]))
1539 optype1 = OFFSOP;
1540 else if (GET_CODE (operands[1]) == MEM)
1541 optype1 = MEMOP;
1542 else
1543 optype1 = RNDOP;
1545 /* Check for the cases that the operand constraints are not
1546 supposed to allow to happen. Abort if we get one,
1547 because generating code for these cases is painful. */
1549 if (optype0 != REGOP && optype1 != REGOP)
1550 abort ();
1552 /* Handle auto decrementing and incrementing loads and stores
1553 specifically, since the structure of the function doesn't work
1554 for them without major modification. Do it better when we learn
1555 this port about the general inc/dec addressing of PA.
1556 (This was written by tege. Chide him if it doesn't work.) */
1558 if (optype0 == MEMOP)
1560 /* We have to output the address syntax ourselves, since print_operand
1561 doesn't deal with the addresses we want to use. Fix this later. */
1563 rtx addr = XEXP (operands[0], 0);
1564 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1566 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1568 operands[0] = XEXP (addr, 0);
1569 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1570 abort ();
1572 if (!reg_overlap_mentioned_p (high_reg, addr))
1574 /* No overlap between high target register and address
1575 register. (We do this in a non-obvious way to
1576 save a register file writeback) */
1577 if (GET_CODE (addr) == POST_INC)
1578 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1579 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1581 else
1582 abort();
1584 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1586 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1588 operands[0] = XEXP (addr, 0);
1589 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1590 abort ();
1592 if (!reg_overlap_mentioned_p (high_reg, addr))
1594 /* No overlap between high target register and address
1595 register. (We do this in a non-obvious way to
1596 save a register file writeback) */
1597 if (GET_CODE (addr) == PRE_INC)
1598 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1599 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1601 else
1602 abort();
1605 if (optype1 == MEMOP)
1607 /* We have to output the address syntax ourselves, since print_operand
1608 doesn't deal with the addresses we want to use. Fix this later. */
1610 rtx addr = XEXP (operands[1], 0);
1611 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1613 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1615 operands[1] = XEXP (addr, 0);
1616 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1617 abort ();
1619 if (!reg_overlap_mentioned_p (high_reg, addr))
1621 /* No overlap between high target register and address
1622 register. (We do this in a non-obvious way to
1623 save a register file writeback) */
1624 if (GET_CODE (addr) == POST_INC)
1625 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1626 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1628 else
1630 /* This is an undefined situation. We should load into the
1631 address register *and* update that register. Probably
1632 we don't need to handle this at all. */
1633 if (GET_CODE (addr) == POST_INC)
1634 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1635 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1638 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1640 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1642 operands[1] = XEXP (addr, 0);
1643 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1644 abort ();
1646 if (!reg_overlap_mentioned_p (high_reg, addr))
1648 /* No overlap between high target register and address
1649 register. (We do this in a non-obvious way to
1650 save a register file writeback) */
1651 if (GET_CODE (addr) == PRE_INC)
1652 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1653 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1655 else
1657 /* This is an undefined situation. We should load into the
1658 address register *and* update that register. Probably
1659 we don't need to handle this at all. */
1660 if (GET_CODE (addr) == PRE_INC)
1661 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1662 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1667 /* If an operand is an unoffsettable memory ref, find a register
1668 we can increment temporarily to make it refer to the second word. */
1670 if (optype0 == MEMOP)
1671 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1673 if (optype1 == MEMOP)
1674 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1676 /* Ok, we can do one word at a time.
1677 Normally we do the low-numbered word first.
1679 In either case, set up in LATEHALF the operands to use
1680 for the high-numbered word and in some cases alter the
1681 operands in OPERANDS to be suitable for the low-numbered word. */
1683 if (optype0 == REGOP)
1684 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1685 else if (optype0 == OFFSOP)
1686 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1687 else
1688 latehalf[0] = operands[0];
1690 if (optype1 == REGOP)
1691 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1692 else if (optype1 == OFFSOP)
1693 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1694 else if (optype1 == CNSTOP)
1695 split_double (operands[1], &operands[1], &latehalf[1]);
1696 else
1697 latehalf[1] = operands[1];
1699 /* If the first move would clobber the source of the second one,
1700 do them in the other order.
1702 This can happen in two cases:
1704 mem -> register where the first half of the destination register
1705 is the same register used in the memory's address. Reload
1706 can create such insns.
1708 mem in this case will be either register indirect or register
1709 indirect plus a valid offset.
1711 register -> register move where REGNO(dst) == REGNO(src + 1)
1712 someone (Tim/Tege?) claimed this can happen for parameter loads.
1714 Handle mem -> register case first. */
1715 if (optype0 == REGOP
1716 && (optype1 == MEMOP || optype1 == OFFSOP)
1717 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1718 operands[1], 0))
1720 /* Do the late half first. */
1721 if (addreg1)
1722 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1723 output_asm_insn (singlemove_string (latehalf), latehalf);
1725 /* Then clobber. */
1726 if (addreg1)
1727 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1728 return singlemove_string (operands);
1731 /* Now handle register -> register case. */
1732 if (optype0 == REGOP && optype1 == REGOP
1733 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1735 output_asm_insn (singlemove_string (latehalf), latehalf);
1736 return singlemove_string (operands);
1739 /* Normal case: do the two words, low-numbered first. */
1741 output_asm_insn (singlemove_string (operands), operands);
1743 /* Make any unoffsettable addresses point at high-numbered word. */
1744 if (addreg0)
1745 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1746 if (addreg1)
1747 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1749 /* Do that word. */
1750 output_asm_insn (singlemove_string (latehalf), latehalf);
1752 /* Undo the adds we just did. */
1753 if (addreg0)
1754 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1755 if (addreg1)
1756 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1758 return "";
1761 char *
1762 output_fp_move_double (operands)
1763 rtx *operands;
1765 if (FP_REG_P (operands[0]))
1767 if (FP_REG_P (operands[1])
1768 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1769 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1770 else
1771 output_asm_insn ("fldd%F1 %1,%0", operands);
1773 else if (FP_REG_P (operands[1]))
1775 output_asm_insn ("fstd%F0 %1,%0", operands);
1777 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1779 if (GET_CODE (operands[0]) == REG)
1781 rtx xoperands[2];
1782 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1783 xoperands[0] = operands[0];
1784 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1786 /* This is a pain. You have to be prepared to deal with an
1787 arbitrary address here including pre/post increment/decrement.
1789 so avoid this in the MD. */
1790 else
1791 abort ();
1793 else abort ();
1794 return "";
1797 /* Return a REG that occurs in ADDR with coefficient 1.
1798 ADDR can be effectively incremented by incrementing REG. */
1800 static rtx
1801 find_addr_reg (addr)
1802 rtx addr;
1804 while (GET_CODE (addr) == PLUS)
1806 if (GET_CODE (XEXP (addr, 0)) == REG)
1807 addr = XEXP (addr, 0);
1808 else if (GET_CODE (XEXP (addr, 1)) == REG)
1809 addr = XEXP (addr, 1);
1810 else if (CONSTANT_P (XEXP (addr, 0)))
1811 addr = XEXP (addr, 1);
1812 else if (CONSTANT_P (XEXP (addr, 1)))
1813 addr = XEXP (addr, 0);
1814 else
1815 abort ();
1817 if (GET_CODE (addr) == REG)
1818 return addr;
1819 abort ();
1822 /* Emit code to perform a block move.
1824 OPERANDS[0] is the destination pointer as a REG, clobbered.
1825 OPERANDS[1] is the source pointer as a REG, clobbered.
1826 OPERANDS[2] is a register for temporary storage.
1827 OPERANDS[4] is the size as a CONST_INT
1828 OPERANDS[3] is a register for temporary storage.
1829 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1830 OPERNADS[6] is another temporary register. */
1832 char *
1833 output_block_move (operands, size_is_constant)
1834 rtx *operands;
1835 int size_is_constant;
1837 int align = INTVAL (operands[5]);
1838 unsigned long n_bytes = INTVAL (operands[4]);
1840 /* We can't move more than four bytes at a time because the PA
1841 has no longer integer move insns. (Could use fp mem ops?) */
1842 if (align > 4)
1843 align = 4;
1845 /* Note that we know each loop below will execute at least twice
1846 (else we would have open-coded the copy). */
1847 switch (align)
1849 case 4:
1850 /* Pre-adjust the loop counter. */
1851 operands[4] = GEN_INT (n_bytes - 8);
1852 output_asm_insn ("ldi %4,%2", operands);
1854 /* Copying loop. */
1855 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1856 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1857 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1858 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1859 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1861 /* Handle the residual. There could be up to 7 bytes of
1862 residual to copy! */
1863 if (n_bytes % 8 != 0)
1865 operands[4] = GEN_INT (n_bytes % 4);
1866 if (n_bytes % 8 >= 4)
1867 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1868 if (n_bytes % 4 != 0)
1869 output_asm_insn ("ldw 0(0,%1),%6", operands);
1870 if (n_bytes % 8 >= 4)
1871 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1872 if (n_bytes % 4 != 0)
1873 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1875 return "";
1877 case 2:
1878 /* Pre-adjust the loop counter. */
1879 operands[4] = GEN_INT (n_bytes - 4);
1880 output_asm_insn ("ldi %4,%2", operands);
1882 /* Copying loop. */
1883 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1884 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1885 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1886 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1887 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1889 /* Handle the residual. */
1890 if (n_bytes % 4 != 0)
1892 if (n_bytes % 4 >= 2)
1893 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1894 if (n_bytes % 2 != 0)
1895 output_asm_insn ("ldb 0(0,%1),%6", operands);
1896 if (n_bytes % 4 >= 2)
1897 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1898 if (n_bytes % 2 != 0)
1899 output_asm_insn ("stb %6,0(0,%0)", operands);
1901 return "";
1903 case 1:
1904 /* Pre-adjust the loop counter. */
1905 operands[4] = GEN_INT (n_bytes - 2);
1906 output_asm_insn ("ldi %4,%2", operands);
1908 /* Copying loop. */
1909 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1910 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1911 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1912 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1913 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1915 /* Handle the residual. */
1916 if (n_bytes % 2 != 0)
1918 output_asm_insn ("ldb 0(0,%1),%3", operands);
1919 output_asm_insn ("stb %3,0(0,%0)", operands);
1921 return "";
1923 default:
1924 abort ();
1928 /* Count the number of insns necessary to handle this block move.
1930 Basic structure is the same as emit_block_move, except that we
1931 count insns rather than emit them. */
1934 compute_movstrsi_length (insn)
1935 rtx insn;
1937 rtx pat = PATTERN (insn);
1938 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1939 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1940 unsigned int n_insns = 0;
1942 /* We can't move more than four bytes at a time because the PA
1943 has no longer integer move insns. (Could use fp mem ops?) */
1944 if (align > 4)
1945 align = 4;
1947 /* The basic opying loop. */
1948 n_insns = 6;
1950 /* Residuals. */
1951 if (n_bytes % (2 * align) != 0)
1953 /* Any residual caused by unrolling the copy loop. */
1954 if (n_bytes % (2 * align) > align)
1955 n_insns += 1;
1957 /* Any residual because the number of bytes was not a
1958 multiple of the alignment. */
1959 if (n_bytes % align != 0)
1960 n_insns += 1;
1963 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1964 return n_insns * 4;
1968 char *
1969 output_and (operands)
1970 rtx *operands;
1972 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1974 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
1975 int ls0, ls1, ms0, p, len;
1977 for (ls0 = 0; ls0 < 32; ls0++)
1978 if ((mask & (1 << ls0)) == 0)
1979 break;
1981 for (ls1 = ls0; ls1 < 32; ls1++)
1982 if ((mask & (1 << ls1)) != 0)
1983 break;
1985 for (ms0 = ls1; ms0 < 32; ms0++)
1986 if ((mask & (1 << ms0)) == 0)
1987 break;
1989 if (ms0 != 32)
1990 abort();
1992 if (ls1 == 32)
1994 len = ls0;
1996 if (len == 0)
1997 abort ();
1999 operands[2] = GEN_INT (len);
2000 return "extru %1,31,%2,%0";
2002 else
2004 /* We could use this `depi' for the case above as well, but `depi'
2005 requires one more register file access than an `extru'. */
2007 p = 31 - ls0;
2008 len = ls1 - ls0;
2010 operands[2] = GEN_INT (p);
2011 operands[3] = GEN_INT (len);
2012 return "depi 0,%2,%3,%0";
2015 else
2016 return "and %1,%2,%0";
2019 char *
2020 output_ior (operands)
2021 rtx *operands;
2023 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2024 int bs0, bs1, p, len;
2026 if (INTVAL (operands[2]) == 0)
2027 return "copy %1,%0";
2029 for (bs0 = 0; bs0 < 32; bs0++)
2030 if ((mask & (1 << bs0)) != 0)
2031 break;
2033 for (bs1 = bs0; bs1 < 32; bs1++)
2034 if ((mask & (1 << bs1)) == 0)
2035 break;
2037 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2038 abort();
2040 p = 31 - bs0;
2041 len = bs1 - bs0;
2043 operands[2] = GEN_INT (p);
2044 operands[3] = GEN_INT (len);
2045 return "depi -1,%2,%3,%0";
2048 /* Output an ascii string. */
2049 void
2050 output_ascii (file, p, size)
2051 FILE *file;
2052 unsigned char *p;
2053 int size;
2055 int i;
2056 int chars_output;
2057 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2059 /* The HP assembler can only take strings of 256 characters at one
2060 time. This is a limitation on input line length, *not* the
2061 length of the string. Sigh. Even worse, it seems that the
2062 restriction is in number of input characters (see \xnn &
2063 \whatever). So we have to do this very carefully. */
2065 fputs ("\t.STRING \"", file);
2067 chars_output = 0;
2068 for (i = 0; i < size; i += 4)
2070 int co = 0;
2071 int io = 0;
2072 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2074 register unsigned int c = p[i + io];
2076 if (c == '\"' || c == '\\')
2077 partial_output[co++] = '\\';
2078 if (c >= ' ' && c < 0177)
2079 partial_output[co++] = c;
2080 else
2082 unsigned int hexd;
2083 partial_output[co++] = '\\';
2084 partial_output[co++] = 'x';
2085 hexd = c / 16 - 0 + '0';
2086 if (hexd > '9')
2087 hexd -= '9' - 'a' + 1;
2088 partial_output[co++] = hexd;
2089 hexd = c % 16 - 0 + '0';
2090 if (hexd > '9')
2091 hexd -= '9' - 'a' + 1;
2092 partial_output[co++] = hexd;
2095 if (chars_output + co > 243)
2097 fputs ("\"\n\t.STRING \"", file);
2098 chars_output = 0;
2100 fwrite (partial_output, 1, co, file);
2101 chars_output += co;
2102 co = 0;
2104 fputs ("\"\n", file);
2107 /* Try to rewrite floating point comparisons & branches to avoid
2108 useless add,tr insns.
2110 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2111 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2112 first attempt to remove useless add,tr insns. It is zero
2113 for the second pass as reorg sometimes leaves bogus REG_DEAD
2114 notes lying around.
2116 When CHECK_NOTES is zero we can only eliminate add,tr insns
2117 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2118 instructions. */
2119 void
2120 remove_useless_addtr_insns (insns, check_notes)
2121 rtx insns;
2122 int check_notes;
2124 rtx insn;
2125 int all;
2126 static int pass = 0;
2128 /* This is fairly cheap, so always run it when optimizing. */
2129 if (optimize > 0)
2131 int fcmp_count = 0;
2132 int fbranch_count = 0;
2134 /* Walk all the insns in this function looking for fcmp & fbranch
2135 instructions. Keep track of how many of each we find. */
2136 insns = get_insns ();
2137 for (insn = insns; insn; insn = next_insn (insn))
2139 rtx tmp;
2141 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2142 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2143 continue;
2145 tmp = PATTERN (insn);
2147 /* It must be a set. */
2148 if (GET_CODE (tmp) != SET)
2149 continue;
2151 /* If the destination is CCFP, then we've found an fcmp insn. */
2152 tmp = SET_DEST (tmp);
2153 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2155 fcmp_count++;
2156 continue;
2159 tmp = PATTERN (insn);
2160 /* If this is an fbranch instruction, bump the fbranch counter. */
2161 if (GET_CODE (tmp) == SET
2162 && SET_DEST (tmp) == pc_rtx
2163 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2164 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2165 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2166 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2168 fbranch_count++;
2169 continue;
2174 /* Find all floating point compare + branch insns. If possible,
2175 reverse the comparison & the branch to avoid add,tr insns. */
2176 for (insn = insns; insn; insn = next_insn (insn))
2178 rtx tmp, next;
2180 /* Ignore anything that isn't an INSN. */
2181 if (GET_CODE (insn) != INSN)
2182 continue;
2184 tmp = PATTERN (insn);
2186 /* It must be a set. */
2187 if (GET_CODE (tmp) != SET)
2188 continue;
2190 /* The destination must be CCFP, which is register zero. */
2191 tmp = SET_DEST (tmp);
2192 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2193 continue;
2195 /* INSN should be a set of CCFP.
2197 See if the result of this insn is used in a reversed FP
2198 conditional branch. If so, reverse our condition and
2199 the branch. Doing so avoids useless add,tr insns. */
2200 next = next_insn (insn);
2201 while (next)
2203 /* Jumps, calls and labels stop our search. */
2204 if (GET_CODE (next) == JUMP_INSN
2205 || GET_CODE (next) == CALL_INSN
2206 || GET_CODE (next) == CODE_LABEL)
2207 break;
2209 /* As does another fcmp insn. */
2210 if (GET_CODE (next) == INSN
2211 && GET_CODE (PATTERN (next)) == SET
2212 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2213 && REGNO (SET_DEST (PATTERN (next))) == 0)
2214 break;
2216 next = next_insn (next);
2219 /* Is NEXT_INSN a branch? */
2220 if (next
2221 && GET_CODE (next) == JUMP_INSN)
2223 rtx pattern = PATTERN (next);
2225 /* If it a reversed fp conditional branch (eg uses add,tr)
2226 and CCFP dies, then reverse our conditional and the branch
2227 to avoid the add,tr. */
2228 if (GET_CODE (pattern) == SET
2229 && SET_DEST (pattern) == pc_rtx
2230 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2231 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2232 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2233 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2234 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2235 && (fcmp_count == fbranch_count
2236 || (check_notes
2237 && find_regno_note (next, REG_DEAD, 0))))
2239 /* Reverse the branch. */
2240 tmp = XEXP (SET_SRC (pattern), 1);
2241 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2242 XEXP (SET_SRC (pattern), 2) = tmp;
2243 INSN_CODE (next) = -1;
2245 /* Reverse our condition. */
2246 tmp = PATTERN (insn);
2247 PUT_CODE (XEXP (tmp, 1),
2248 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2254 pass = !pass;
2258 /* You may have trouble believing this, but this is the HP-PA stack
2259 layout. Wow.
2261 Offset Contents
2263 Variable arguments (optional; any number may be allocated)
2265 SP-(4*(N+9)) arg word N
2267 SP-56 arg word 5
2268 SP-52 arg word 4
2270 Fixed arguments (must be allocated; may remain unused)
2272 SP-48 arg word 3
2273 SP-44 arg word 2
2274 SP-40 arg word 1
2275 SP-36 arg word 0
2277 Frame Marker
2279 SP-32 External Data Pointer (DP)
2280 SP-28 External sr4
2281 SP-24 External/stub RP (RP')
2282 SP-20 Current RP
2283 SP-16 Static Link
2284 SP-12 Clean up
2285 SP-8 Calling Stub RP (RP'')
2286 SP-4 Previous SP
2288 Top of Frame
2290 SP-0 Stack Pointer (points to next available address)
2294 /* This function saves registers as follows. Registers marked with ' are
2295 this function's registers (as opposed to the previous function's).
2296 If a frame_pointer isn't needed, r4 is saved as a general register;
2297 the space for the frame pointer is still allocated, though, to keep
2298 things simple.
2301 Top of Frame
2303 SP (FP') Previous FP
2304 SP + 4 Alignment filler (sigh)
2305 SP + 8 Space for locals reserved here.
2309 SP + n All call saved register used.
2313 SP + o All call saved fp registers used.
2317 SP + p (SP') points to next available address.
2321 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2322 Handle case where DISP > 8k by using the add_high_const pattern.
2324 Note in DISP > 8k case, we will leave the high part of the address
2325 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2326 static void
2327 store_reg (reg, disp, base)
2328 int reg, disp, base;
2330 if (VAL_14_BITS_P (disp))
2332 emit_move_insn (gen_rtx (MEM, SImode,
2333 gen_rtx (PLUS, SImode,
2334 gen_rtx (REG, SImode, base),
2335 GEN_INT (disp))),
2336 gen_rtx (REG, SImode, reg));
2338 else
2340 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2341 gen_rtx (REG, SImode, base),
2342 GEN_INT (disp)));
2343 emit_move_insn (gen_rtx (MEM, SImode,
2344 gen_rtx (LO_SUM, SImode,
2345 gen_rtx (REG, SImode, 1),
2346 GEN_INT (disp))),
2347 gen_rtx (REG, SImode, reg));
2351 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2352 Handle case where DISP > 8k by using the add_high_const pattern.
2354 Note in DISP > 8k case, we will leave the high part of the address
2355 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2356 static void
2357 load_reg (reg, disp, base)
2358 int reg, disp, base;
2360 if (VAL_14_BITS_P (disp))
2362 emit_move_insn (gen_rtx (REG, SImode, reg),
2363 gen_rtx (MEM, SImode,
2364 gen_rtx (PLUS, SImode,
2365 gen_rtx (REG, SImode, base),
2366 GEN_INT (disp))));
2368 else
2370 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2371 gen_rtx (REG, SImode, base),
2372 GEN_INT (disp)));
2373 emit_move_insn (gen_rtx (REG, SImode, reg),
2374 gen_rtx (MEM, SImode,
2375 gen_rtx (LO_SUM, SImode,
2376 gen_rtx (REG, SImode, 1),
2377 GEN_INT (disp))));
2381 /* Emit RTL to set REG to the value specified by BASE+DISP.
2382 Handle case where DISP > 8k by using the add_high_const pattern.
2384 Note in DISP > 8k case, we will leave the high part of the address
2385 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2386 static void
2387 set_reg_plus_d(reg, base, disp)
2388 int reg, base, disp;
2390 if (VAL_14_BITS_P (disp))
2392 emit_move_insn (gen_rtx (REG, SImode, reg),
2393 gen_rtx (PLUS, SImode,
2394 gen_rtx (REG, SImode, base),
2395 GEN_INT (disp)));
2397 else
2399 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2400 gen_rtx (REG, SImode, base),
2401 GEN_INT (disp)));
2402 emit_move_insn (gen_rtx (REG, SImode, reg),
2403 gen_rtx (LO_SUM, SImode,
2404 gen_rtx (REG, SImode, 1),
2405 GEN_INT (disp)));
2409 /* Global variables set by FUNCTION_PROLOGUE. */
2410 /* Size of frame. Need to know this to emit return insns from
2411 leaf procedures. */
2412 static int actual_fsize;
2413 static int local_fsize, save_fregs;
2416 compute_frame_size (size, fregs_live)
2417 int size;
2418 int *fregs_live;
2420 extern int current_function_outgoing_args_size;
2421 int i, fsize;
2423 /* 8 is space for frame pointer + filler. If any frame is allocated
2424 we need to add this in because of STARTING_FRAME_OFFSET. */
2425 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2427 /* We must leave enough space for all the callee saved registers
2428 from 3 .. highest used callee save register since we don't
2429 know if we're going to have an inline or out of line prologue
2430 and epilogue. */
2431 for (i = 18; i >= 3; i--)
2432 if (regs_ever_live[i])
2434 fsize += 4 * (i - 2);
2435 break;
2438 /* Round the stack. */
2439 fsize = (fsize + 7) & ~7;
2441 /* We must leave enough space for all the callee saved registers
2442 from 3 .. highest used callee save register since we don't
2443 know if we're going to have an inline or out of line prologue
2444 and epilogue. */
2445 for (i = 66; i >= 48; i -= 2)
2446 if (regs_ever_live[i] || regs_ever_live[i + 1])
2448 if (fregs_live)
2449 *fregs_live = 1;
2451 fsize += 4 * (i - 46);
2452 break;
2455 fsize += current_function_outgoing_args_size;
2456 if (! leaf_function_p () || fsize)
2457 fsize += 32;
2458 return (fsize + 63) & ~63;
2461 rtx hp_profile_label_rtx;
2462 static char hp_profile_label_name[8];
2463 void
2464 output_function_prologue (file, size)
2465 FILE *file;
2466 int size;
2468 /* The function's label and associated .PROC must never be
2469 separated and must be output *after* any profiling declarations
2470 to avoid changing spaces/subspaces within a procedure. */
2471 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2472 fputs ("\t.PROC\n", file);
2474 /* hppa_expand_prologue does the dirty work now. We just need
2475 to output the assembler directives which denote the start
2476 of a function. */
2477 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2478 if (regs_ever_live[2] || profile_flag)
2479 fputs (",CALLS,SAVE_RP", file);
2480 else
2481 fputs (",NO_CALLS", file);
2483 if (frame_pointer_needed)
2484 fputs (",SAVE_SP", file);
2486 /* Pass on information about the number of callee register saves
2487 performed in the prologue.
2489 The compiler is supposed to pass the highest register number
2490 saved, the assembler then has to adjust that number before
2491 entering it into the unwind descriptor (to account for any
2492 caller saved registers with lower register numbers than the
2493 first callee saved register). */
2494 if (gr_saved)
2495 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2497 if (fr_saved)
2498 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2500 fputs ("\n\t.ENTRY\n", file);
2502 /* Horrid hack. emit_function_prologue will modify this RTL in
2503 place to get the expected results. */
2504 if (profile_flag)
2505 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2506 hp_profile_labelno);
2508 /* If we're using GAS and not using the portable runtime model, then
2509 we don't need to accumulate the total number of code bytes. */
2510 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2511 total_code_bytes = 0;
2512 else if (insn_addresses)
2514 unsigned int old_total = total_code_bytes;
2516 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2517 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2519 /* Be prepared to handle overflows. */
2520 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2522 else
2523 total_code_bytes = -1;
2525 remove_useless_addtr_insns (get_insns (), 0);
2528 void
2529 hppa_expand_prologue()
2531 extern char call_used_regs[];
2532 int size = get_frame_size ();
2533 int merge_sp_adjust_with_store = 0;
2534 int i, offset;
2535 rtx tmpreg, size_rtx;
2537 gr_saved = 0;
2538 fr_saved = 0;
2539 save_fregs = 0;
2540 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2541 actual_fsize = compute_frame_size (size, &save_fregs);
2543 /* Compute a few things we will use often. */
2544 tmpreg = gen_rtx (REG, SImode, 1);
2545 size_rtx = GEN_INT (actual_fsize);
2547 /* Handle out of line prologues and epilogues. */
2548 if (TARGET_SPACE)
2550 rtx operands[2];
2551 int saves = 0;
2552 int outline_insn_count = 0;
2553 int inline_insn_count = 0;
2555 /* Count the number of insns for the inline and out of line
2556 variants so we can choose one appropriately.
2558 No need to screw with counting actual_fsize operations -- they're
2559 done for both inline and out of line prologues. */
2560 if (regs_ever_live[2])
2561 inline_insn_count += 1;
2563 if (! cint_ok_for_move (local_fsize))
2564 outline_insn_count += 2;
2565 else
2566 outline_insn_count += 1;
2568 /* Put the register save info into %r22. */
2569 for (i = 18; i >= 3; i--)
2570 if (regs_ever_live[i] && ! call_used_regs[i])
2572 /* -1 because the stack adjustment is normally done in
2573 the same insn as a register save. */
2574 inline_insn_count += (i - 2) - 1;
2575 saves = i;
2576 break;
2579 for (i = 66; i >= 48; i -= 2)
2580 if (regs_ever_live[i] || regs_ever_live[i + 1])
2582 /* +1 needed as we load %r1 with the start of the freg
2583 save area. */
2584 inline_insn_count += (i/2 - 23) + 1;
2585 saves |= ((i/2 - 12 ) << 16);
2586 break;
2589 if (frame_pointer_needed)
2590 inline_insn_count += 3;
2592 if (! cint_ok_for_move (saves))
2593 outline_insn_count += 2;
2594 else
2595 outline_insn_count += 1;
2597 if (TARGET_PORTABLE_RUNTIME)
2598 outline_insn_count += 2;
2599 else
2600 outline_insn_count += 1;
2602 /* If there's a lot of insns in the prologue, then do it as
2603 an out-of-line sequence. */
2604 if (inline_insn_count > outline_insn_count)
2606 /* Put the local_fisze into %r19. */
2607 operands[0] = gen_rtx (REG, SImode, 19);
2608 operands[1] = GEN_INT (local_fsize);
2609 emit_move_insn (operands[0], operands[1]);
2611 /* Put the stack size into %r21. */
2612 operands[0] = gen_rtx (REG, SImode, 21);
2613 operands[1] = size_rtx;
2614 emit_move_insn (operands[0], operands[1]);
2616 operands[0] = gen_rtx (REG, SImode, 22);
2617 operands[1] = GEN_INT (saves);
2618 emit_move_insn (operands[0], operands[1]);
2620 /* Now call the out-of-line prologue. */
2621 emit_insn (gen_outline_prologue_call ());
2622 emit_insn (gen_blockage ());
2624 /* Note that we're using an out-of-line prologue. */
2625 out_of_line_prologue_epilogue = 1;
2626 return;
2630 out_of_line_prologue_epilogue = 0;
2632 /* Save RP first. The calling conventions manual states RP will
2633 always be stored into the caller's frame at sp-20. */
2634 if (regs_ever_live[2] || profile_flag)
2635 store_reg (2, -20, STACK_POINTER_REGNUM);
2637 /* Allocate the local frame and set up the frame pointer if needed. */
2638 if (actual_fsize)
2639 if (frame_pointer_needed)
2641 /* Copy the old frame pointer temporarily into %r1. Set up the
2642 new stack pointer, then store away the saved old frame pointer
2643 into the stack at sp+actual_fsize and at the same time update
2644 the stack pointer by actual_fsize bytes. Two versions, first
2645 handles small (<8k) frames. The second handles large (>8k)
2646 frames. */
2647 emit_move_insn (tmpreg, frame_pointer_rtx);
2648 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2649 if (VAL_14_BITS_P (actual_fsize))
2650 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2651 else
2653 /* It is incorrect to store the saved frame pointer at *sp,
2654 then increment sp (writes beyond the current stack boundary).
2656 So instead use stwm to store at *sp and post-increment the
2657 stack pointer as an atomic operation. Then increment sp to
2658 finish allocating the new frame. */
2659 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2660 set_reg_plus_d (STACK_POINTER_REGNUM,
2661 STACK_POINTER_REGNUM,
2662 actual_fsize - 64);
2665 /* no frame pointer needed. */
2666 else
2668 /* In some cases we can perform the first callee register save
2669 and allocating the stack frame at the same time. If so, just
2670 make a note of it and defer allocating the frame until saving
2671 the callee registers. */
2672 if (VAL_14_BITS_P (-actual_fsize)
2673 && local_fsize == 0
2674 && ! profile_flag
2675 && ! flag_pic)
2676 merge_sp_adjust_with_store = 1;
2677 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2678 else if (actual_fsize != 0)
2679 set_reg_plus_d (STACK_POINTER_REGNUM,
2680 STACK_POINTER_REGNUM,
2681 actual_fsize);
2683 /* The hppa calling conventions say that that %r19, the pic offset
2684 register, is saved at sp - 32 (in this function's frame) when
2685 generating PIC code. FIXME: What is the correct thing to do
2686 for functions which make no calls and allocate no frame? Do
2687 we need to allocate a frame, or can we just omit the save? For
2688 now we'll just omit the save. */
2689 if (actual_fsize != 0 && flag_pic)
2690 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2692 /* Profiling code.
2694 Instead of taking one argument, the counter label, as most normal
2695 mcounts do, _mcount appears to behave differently on the HPPA. It
2696 takes the return address of the caller, the address of this routine,
2697 and the address of the label. Also, it isn't magic, so
2698 argument registers have to be preserved. */
2699 if (profile_flag)
2701 int pc_offset, i, arg_offset, basereg, offsetadj;
2703 pc_offset = 4 + (frame_pointer_needed
2704 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2705 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2707 /* When the function has a frame pointer, use it as the base
2708 register for saving/restore registers. Else use the stack
2709 pointer. Adjust the offset according to the frame size if
2710 this function does not have a frame pointer. */
2712 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2713 : STACK_POINTER_REGNUM;
2714 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2716 /* Horrid hack. emit_function_prologue will modify this RTL in
2717 place to get the expected results. sprintf here is just to
2718 put something in the name. */
2719 sprintf(hp_profile_label_name, "LP$%04d", -1);
2720 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2721 hp_profile_label_name);
2722 if (current_function_returns_struct)
2723 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2725 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2726 if (regs_ever_live [i])
2728 store_reg (i, arg_offset, basereg);
2729 /* Deal with arg_offset not fitting in 14 bits. */
2730 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2733 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2734 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2735 emit_move_insn (gen_rtx (REG, SImode, 24),
2736 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2737 /* %r25 is set from within the output pattern. */
2738 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2740 /* Restore argument registers. */
2741 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2742 if (regs_ever_live [i])
2743 load_reg (i, arg_offset, basereg);
2745 if (current_function_returns_struct)
2746 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2750 /* Normal register save.
2752 Do not save the frame pointer in the frame_pointer_needed case. It
2753 was done earlier. */
2754 if (frame_pointer_needed)
2756 for (i = 18, offset = local_fsize; i >= 4; i--)
2757 if (regs_ever_live[i] && ! call_used_regs[i])
2759 store_reg (i, offset, FRAME_POINTER_REGNUM);
2760 offset += 4;
2761 gr_saved++;
2763 /* Account for %r3 which is saved in a special place. */
2764 gr_saved++;
2766 /* No frame pointer needed. */
2767 else
2769 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2770 if (regs_ever_live[i] && ! call_used_regs[i])
2772 /* If merge_sp_adjust_with_store is nonzero, then we can
2773 optimize the first GR save. */
2774 if (merge_sp_adjust_with_store)
2776 merge_sp_adjust_with_store = 0;
2777 emit_insn (gen_post_stwm (stack_pointer_rtx,
2778 gen_rtx (REG, SImode, i),
2779 GEN_INT (-offset)));
2781 else
2782 store_reg (i, offset, STACK_POINTER_REGNUM);
2783 offset += 4;
2784 gr_saved++;
2787 /* If we wanted to merge the SP adjustment with a GR save, but we never
2788 did any GR saves, then just emit the adjustment here. */
2789 if (merge_sp_adjust_with_store)
2790 set_reg_plus_d (STACK_POINTER_REGNUM,
2791 STACK_POINTER_REGNUM,
2792 actual_fsize);
2795 /* Align pointer properly (doubleword boundary). */
2796 offset = (offset + 7) & ~7;
2798 /* Floating point register store. */
2799 if (save_fregs)
2801 /* First get the frame or stack pointer to the start of the FP register
2802 save area. */
2803 if (frame_pointer_needed)
2804 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2805 else
2806 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2808 /* Now actually save the FP registers. */
2809 for (i = 66; i >= 48; i -= 2)
2811 if (regs_ever_live[i] || regs_ever_live[i + 1])
2813 emit_move_insn (gen_rtx (MEM, DFmode,
2814 gen_rtx (POST_INC, DFmode, tmpreg)),
2815 gen_rtx (REG, DFmode, i));
2816 fr_saved++;
2821 /* When generating PIC code it is necessary to save/restore the
2822 PIC register around each function call. We used to do this
2823 in the call patterns themselves, but that implementation
2824 made incorrect assumptions about using global variables to hold
2825 per-function rtl code generated in the backend.
2827 So instead, we copy the PIC register into a reserved callee saved
2828 register in the prologue. Then after each call we reload the PIC
2829 register from the callee saved register. We also reload the PIC
2830 register from the callee saved register in the epilogue ensure the
2831 PIC register is valid at function exit.
2833 This may (depending on the exact characteristics of the function)
2834 even be more efficient.
2836 Avoid this if the callee saved register wasn't used (these are
2837 leaf functions). */
2838 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2839 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2840 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2844 void
2845 output_function_epilogue (file, size)
2846 FILE *file;
2847 int size;
2849 rtx insn = get_last_insn ();
2850 int i;
2852 /* hppa_expand_epilogue does the dirty work now. We just need
2853 to output the assembler directives which denote the end
2854 of a function.
2856 To make debuggers happy, emit a nop if the epilogue was completely
2857 eliminated due to a volatile call as the last insn in the
2858 current function. That way the return address (in %r2) will
2859 always point to a valid instruction in the current function. */
2861 /* Get the last real insn. */
2862 if (GET_CODE (insn) == NOTE)
2863 insn = prev_real_insn (insn);
2865 /* If it is a sequence, then look inside. */
2866 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2867 insn = XVECEXP (PATTERN (insn), 0, 0);
2869 /* If insn is a CALL_INSN, then it must be a call to a volatile
2870 function (otherwise there would be epilogue insns). */
2871 if (insn && GET_CODE (insn) == CALL_INSN)
2872 fputs ("\tnop\n", file);
2874 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2876 /* If we have deferred plabels, then we need to switch into the data
2877 section and align it to a 4 byte boundary before we output the
2878 deferred plabels. */
2879 if (n_deferred_plabels)
2881 data_section ();
2882 ASM_OUTPUT_ALIGN (file, 2);
2885 /* Now output the deferred plabels. */
2886 for (i = 0; i < n_deferred_plabels; i++)
2888 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
2889 assemble_integer (deferred_plabels[i].symbol, 4, 1);
2891 n_deferred_plabels = 0;
2894 void
2895 hppa_expand_epilogue ()
2897 rtx tmpreg;
2898 int offset,i;
2899 int merge_sp_adjust_with_load = 0;
2901 /* Handle out of line prologues and epilogues. */
2902 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2904 int saves = 0;
2905 rtx operands[2];
2907 /* Put the register save info into %r22. */
2908 for (i = 18; i >= 3; i--)
2909 if (regs_ever_live[i] && ! call_used_regs[i])
2911 saves = i;
2912 break;
2915 for (i = 66; i >= 48; i -= 2)
2916 if (regs_ever_live[i] || regs_ever_live[i + 1])
2918 saves |= ((i/2 - 12 ) << 16);
2919 break;
2922 emit_insn (gen_blockage ());
2924 /* Put the local_fisze into %r19. */
2925 operands[0] = gen_rtx (REG, SImode, 19);
2926 operands[1] = GEN_INT (local_fsize);
2927 emit_move_insn (operands[0], operands[1]);
2929 /* Put the stack size into %r21. */
2930 operands[0] = gen_rtx (REG, SImode, 21);
2931 operands[1] = GEN_INT (actual_fsize);
2932 emit_move_insn (operands[0], operands[1]);
2934 operands[0] = gen_rtx (REG, SImode, 22);
2935 operands[1] = GEN_INT (saves);
2936 emit_move_insn (operands[0], operands[1]);
2938 /* Now call the out-of-line epilogue. */
2939 emit_insn (gen_outline_epilogue_call ());
2940 return;
2943 /* We will use this often. */
2944 tmpreg = gen_rtx (REG, SImode, 1);
2946 /* Try to restore RP early to avoid load/use interlocks when
2947 RP gets used in the return (bv) instruction. This appears to still
2948 be necessary even when we schedule the prologue and epilogue. */
2949 if (frame_pointer_needed
2950 && (regs_ever_live [2] || profile_flag))
2951 load_reg (2, -20, FRAME_POINTER_REGNUM);
2953 /* No frame pointer, and stack is smaller than 8k. */
2954 else if (! frame_pointer_needed
2955 && VAL_14_BITS_P (actual_fsize + 20)
2956 && (regs_ever_live[2] || profile_flag))
2957 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2959 /* General register restores. */
2960 if (frame_pointer_needed)
2962 for (i = 18, offset = local_fsize; i >= 4; i--)
2963 if (regs_ever_live[i] && ! call_used_regs[i])
2965 load_reg (i, offset, FRAME_POINTER_REGNUM);
2966 offset += 4;
2969 else
2971 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2973 if (regs_ever_live[i] && ! call_used_regs[i])
2975 /* Only for the first load.
2976 merge_sp_adjust_with_load holds the register load
2977 with which we will merge the sp adjustment. */
2978 if (VAL_14_BITS_P (actual_fsize + 20)
2979 && local_fsize == 0
2980 && ! merge_sp_adjust_with_load)
2981 merge_sp_adjust_with_load = i;
2982 else
2983 load_reg (i, offset, STACK_POINTER_REGNUM);
2984 offset += 4;
2989 /* Align pointer properly (doubleword boundary). */
2990 offset = (offset + 7) & ~7;
2992 /* FP register restores. */
2993 if (save_fregs)
2995 /* Adjust the register to index off of. */
2996 if (frame_pointer_needed)
2997 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2998 else
2999 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3001 /* Actually do the restores now. */
3002 for (i = 66; i >= 48; i -= 2)
3004 if (regs_ever_live[i] || regs_ever_live[i + 1])
3006 emit_move_insn (gen_rtx (REG, DFmode, i),
3007 gen_rtx (MEM, DFmode,
3008 gen_rtx (POST_INC, DFmode, tmpreg)));
3013 /* Emit a blockage insn here to keep these insns from being moved to
3014 an earlier spot in the epilogue, or into the main instruction stream.
3016 This is necessary as we must not cut the stack back before all the
3017 restores are finished. */
3018 emit_insn (gen_blockage ());
3019 /* No frame pointer, but we have a stack greater than 8k. We restore
3020 %r2 very late in this case. (All other cases are restored as early
3021 as possible.) */
3022 if (! frame_pointer_needed
3023 && ! VAL_14_BITS_P (actual_fsize + 20)
3024 && (regs_ever_live[2] || profile_flag))
3026 set_reg_plus_d (STACK_POINTER_REGNUM,
3027 STACK_POINTER_REGNUM,
3028 - actual_fsize);
3030 /* This used to try and be clever by not depending on the value in
3031 %r30 and instead use the value held in %r1 (so that the 2nd insn
3032 which sets %r30 could be put in the delay slot of the return insn).
3034 That won't work since if the stack is exactly 8k set_reg_plus_d
3035 doesn't set %r1, just %r30. */
3036 load_reg (2, - 20, STACK_POINTER_REGNUM);
3039 /* Reset stack pointer (and possibly frame pointer). The stack
3040 pointer is initially set to fp + 64 to avoid a race condition. */
3041 else if (frame_pointer_needed)
3043 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3044 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3045 stack_pointer_rtx,
3046 GEN_INT (-64)));
3048 /* If we were deferring a callee register restore, do it now. */
3049 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3050 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3051 merge_sp_adjust_with_load),
3052 stack_pointer_rtx,
3053 GEN_INT (- actual_fsize)));
3054 else if (actual_fsize != 0)
3055 set_reg_plus_d (STACK_POINTER_REGNUM,
3056 STACK_POINTER_REGNUM,
3057 - actual_fsize);
3060 /* Fetch the return address for the frame COUNT steps up from
3061 the current frame, after the prologue. FRAMEADDR is the
3062 frame pointer of the COUNT frame.
3064 We want to ignore any export stub remnants here.
3066 The value returned is used in two different ways:
3068 1. To find a function's caller.
3070 2. To change the return address for a function.
3072 This function handles most instances of case 1; however, it will
3073 fail if there are two levels of stubs to execute on the return
3074 path. The only way I believe that can happen is if the return value
3075 needs a parameter relocation, which never happens for C code.
3077 This function handles most instances of case 2; however, it will
3078 fail if we did not originally have stub code on the return path
3079 but will need code on the new return path. This can happen if
3080 the caller & callee are both in the main program, but the new
3081 return location is in a shared library.
3083 To handle this correctly we need to set the return pointer at
3084 frame-20 to point to a return stub frame-24 to point to the
3085 location we wish to return to. */
3088 return_addr_rtx (count, frameaddr)
3089 int count;
3090 rtx frameaddr;
3092 rtx label;
3093 rtx saved_rp;
3094 rtx ins;
3096 saved_rp = gen_reg_rtx (Pmode);
3098 /* First, we start off with the normal return address pointer from
3099 -20[frameaddr]. */
3101 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3103 /* Get pointer to the instruction stream. We have to mask out the
3104 privilege level from the two low order bits of the return address
3105 pointer here so that ins will point to the start of the first
3106 instruction that would have been executed if we returned. */
3107 ins = copy_to_reg (gen_rtx (AND, Pmode,
3108 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3109 MASK_RETURN_ADDR));
3110 label = gen_label_rtx ();
3112 /* Check the instruction stream at the normal return address for the
3113 export stub:
3115 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3116 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3117 0x00011820 | stub+16: mtsp r1,sr0
3118 0xe0400002 | stub+20: be,n 0(sr0,rp)
3120 If it is an export stub, than our return address is really in
3121 -24[frameaddr]. */
3123 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3124 GEN_INT (0x4bc23fd1),
3125 NE, NULL_RTX, SImode, 1, 0);
3126 emit_jump_insn (gen_bne (label));
3128 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3129 GEN_INT (0x004010a1),
3130 NE, NULL_RTX, SImode, 1, 0);
3131 emit_jump_insn (gen_bne (label));
3133 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3134 GEN_INT (0x00011820),
3135 NE, NULL_RTX, SImode, 1, 0);
3136 emit_jump_insn (gen_bne (label));
3138 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3139 GEN_INT (0xe0400002),
3140 NE, NULL_RTX, SImode, 1, 0);
3142 /* If there is no export stub then just use our initial guess of
3143 -20[frameaddr]. */
3145 emit_jump_insn (gen_bne (label));
3147 /* Here we know that our return address pointer points to an export
3148 stub. We don't want to return the address of the export stub,
3149 but rather the return address that leads back into user code.
3150 That return address is stored at -24[frameaddr]. */
3152 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3154 emit_label (label);
3155 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3158 /* This is only valid once reload has completed because it depends on
3159 knowing exactly how much (if any) frame there is and...
3161 It's only valid if there is no frame marker to de-allocate and...
3163 It's only valid if %r2 hasn't been saved into the caller's frame
3164 (we're not profiling and %r2 isn't live anywhere). */
3166 hppa_can_use_return_insn_p ()
3168 return (reload_completed
3169 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3170 && ! profile_flag
3171 && ! regs_ever_live[2]
3172 && ! frame_pointer_needed);
3175 void
3176 emit_bcond_fp (code, operand0)
3177 enum rtx_code code;
3178 rtx operand0;
3180 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3181 gen_rtx (IF_THEN_ELSE, VOIDmode,
3182 gen_rtx (code, VOIDmode,
3183 gen_rtx (REG, CCFPmode, 0),
3184 const0_rtx),
3185 gen_rtx (LABEL_REF, VOIDmode, operand0),
3186 pc_rtx)));
3191 gen_cmp_fp (code, operand0, operand1)
3192 enum rtx_code code;
3193 rtx operand0, operand1;
3195 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3196 gen_rtx (code, CCFPmode, operand0, operand1));
3199 /* Adjust the cost of a scheduling dependency. Return the new cost of
3200 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3203 pa_adjust_cost (insn, link, dep_insn, cost)
3204 rtx insn;
3205 rtx link;
3206 rtx dep_insn;
3207 int cost;
3209 if (! recog_memoized (insn))
3210 return 0;
3212 if (REG_NOTE_KIND (link) == 0)
3214 /* Data dependency; DEP_INSN writes a register that INSN reads some
3215 cycles later. */
3217 if (get_attr_type (insn) == TYPE_FPSTORE)
3219 rtx pat = PATTERN (insn);
3220 rtx dep_pat = PATTERN (dep_insn);
3221 if (GET_CODE (pat) == PARALLEL)
3223 /* This happens for the fstXs,mb patterns. */
3224 pat = XVECEXP (pat, 0, 0);
3226 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3227 /* If this happens, we have to extend this to schedule
3228 optimally. Return 0 for now. */
3229 return 0;
3231 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3233 if (! recog_memoized (dep_insn))
3234 return 0;
3235 /* DEP_INSN is writing its result to the register
3236 being stored in the fpstore INSN. */
3237 switch (get_attr_type (dep_insn))
3239 case TYPE_FPLOAD:
3240 /* This cost 3 cycles, not 2 as the md says for the
3241 700 and 7100. Note scaling of cost for 7100. */
3242 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3244 case TYPE_FPALU:
3245 case TYPE_FPMULSGL:
3246 case TYPE_FPMULDBL:
3247 case TYPE_FPDIVSGL:
3248 case TYPE_FPDIVDBL:
3249 case TYPE_FPSQRTSGL:
3250 case TYPE_FPSQRTDBL:
3251 /* In these important cases, we save one cycle compared to
3252 when flop instruction feed each other. */
3253 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3255 default:
3256 return cost;
3261 /* For other data dependencies, the default cost specified in the
3262 md is correct. */
3263 return cost;
3265 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3267 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3268 cycles later. */
3270 if (get_attr_type (insn) == TYPE_FPLOAD)
3272 rtx pat = PATTERN (insn);
3273 rtx dep_pat = PATTERN (dep_insn);
3274 if (GET_CODE (pat) == PARALLEL)
3276 /* This happens for the fldXs,mb patterns. */
3277 pat = XVECEXP (pat, 0, 0);
3279 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3280 /* If this happens, we have to extend this to schedule
3281 optimally. Return 0 for now. */
3282 return 0;
3284 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3286 if (! recog_memoized (dep_insn))
3287 return 0;
3288 switch (get_attr_type (dep_insn))
3290 case TYPE_FPALU:
3291 case TYPE_FPMULSGL:
3292 case TYPE_FPMULDBL:
3293 case TYPE_FPDIVSGL:
3294 case TYPE_FPDIVDBL:
3295 case TYPE_FPSQRTSGL:
3296 case TYPE_FPSQRTDBL:
3297 /* A fpload can't be issued until one cycle before a
3298 preceding arithmetic operation has finished if
3299 the target of the fpload is any of the sources
3300 (or destination) of the arithmetic operation. */
3301 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3303 default:
3304 return 0;
3308 else if (get_attr_type (insn) == TYPE_FPALU)
3310 rtx pat = PATTERN (insn);
3311 rtx dep_pat = PATTERN (dep_insn);
3312 if (GET_CODE (pat) == PARALLEL)
3314 /* This happens for the fldXs,mb patterns. */
3315 pat = XVECEXP (pat, 0, 0);
3317 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3318 /* If this happens, we have to extend this to schedule
3319 optimally. Return 0 for now. */
3320 return 0;
3322 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3324 if (! recog_memoized (dep_insn))
3325 return 0;
3326 switch (get_attr_type (dep_insn))
3328 case TYPE_FPDIVSGL:
3329 case TYPE_FPDIVDBL:
3330 case TYPE_FPSQRTSGL:
3331 case TYPE_FPSQRTDBL:
3332 /* An ALU flop can't be issued until two cycles before a
3333 preceding divide or sqrt operation has finished if
3334 the target of the ALU flop is any of the sources
3335 (or destination) of the divide or sqrt operation. */
3336 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3338 default:
3339 return 0;
3344 /* For other anti dependencies, the cost is 0. */
3345 return 0;
3347 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3349 /* Output dependency; DEP_INSN writes a register that INSN writes some
3350 cycles later. */
3351 if (get_attr_type (insn) == TYPE_FPLOAD)
3353 rtx pat = PATTERN (insn);
3354 rtx dep_pat = PATTERN (dep_insn);
3355 if (GET_CODE (pat) == PARALLEL)
3357 /* This happens for the fldXs,mb patterns. */
3358 pat = XVECEXP (pat, 0, 0);
3360 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3361 /* If this happens, we have to extend this to schedule
3362 optimally. Return 0 for now. */
3363 return 0;
3365 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3367 if (! recog_memoized (dep_insn))
3368 return 0;
3369 switch (get_attr_type (dep_insn))
3371 case TYPE_FPALU:
3372 case TYPE_FPMULSGL:
3373 case TYPE_FPMULDBL:
3374 case TYPE_FPDIVSGL:
3375 case TYPE_FPDIVDBL:
3376 case TYPE_FPSQRTSGL:
3377 case TYPE_FPSQRTDBL:
3378 /* A fpload can't be issued until one cycle before a
3379 preceding arithmetic operation has finished if
3380 the target of the fpload is the destination of the
3381 arithmetic operation. */
3382 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3384 default:
3385 return 0;
3389 else if (get_attr_type (insn) == TYPE_FPALU)
3391 rtx pat = PATTERN (insn);
3392 rtx dep_pat = PATTERN (dep_insn);
3393 if (GET_CODE (pat) == PARALLEL)
3395 /* This happens for the fldXs,mb patterns. */
3396 pat = XVECEXP (pat, 0, 0);
3398 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3399 /* If this happens, we have to extend this to schedule
3400 optimally. Return 0 for now. */
3401 return 0;
3403 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3405 if (! recog_memoized (dep_insn))
3406 return 0;
3407 switch (get_attr_type (dep_insn))
3409 case TYPE_FPDIVSGL:
3410 case TYPE_FPDIVDBL:
3411 case TYPE_FPSQRTSGL:
3412 case TYPE_FPSQRTDBL:
3413 /* An ALU flop can't be issued until two cycles before a
3414 preceding divide or sqrt operation has finished if
3415 the target of the ALU flop is also the target of
3416 of the divide or sqrt operation. */
3417 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3419 default:
3420 return 0;
3425 /* For other output dependencies, the cost is 0. */
3426 return 0;
3428 else
3429 abort ();
3432 /* Return any length adjustment needed by INSN which already has its length
3433 computed as LENGTH. Return zero if no adjustment is necessary.
3435 For the PA: function calls, millicode calls, and backwards short
3436 conditional branches with unfilled delay slots need an adjustment by +1
3437 (to account for the NOP which will be inserted into the instruction stream).
3439 Also compute the length of an inline block move here as it is too
3440 complicated to express as a length attribute in pa.md. */
3442 pa_adjust_insn_length (insn, length)
3443 rtx insn;
3444 int length;
3446 rtx pat = PATTERN (insn);
3448 /* Call insns which are *not* indirect and have unfilled delay slots. */
3449 if (GET_CODE (insn) == CALL_INSN)
3452 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3453 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3454 return 4;
3455 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3456 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3457 == SYMBOL_REF)
3458 return 4;
3459 else
3460 return 0;
3462 /* Jumps inside switch tables which have unfilled delay slots
3463 also need adjustment. */
3464 else if (GET_CODE (insn) == JUMP_INSN
3465 && simplejump_p (insn)
3466 && GET_MODE (PATTERN (insn)) == DImode)
3467 return 4;
3468 /* Millicode insn with an unfilled delay slot. */
3469 else if (GET_CODE (insn) == INSN
3470 && GET_CODE (pat) != SEQUENCE
3471 && GET_CODE (pat) != USE
3472 && GET_CODE (pat) != CLOBBER
3473 && get_attr_type (insn) == TYPE_MILLI)
3474 return 4;
3475 /* Block move pattern. */
3476 else if (GET_CODE (insn) == INSN
3477 && GET_CODE (pat) == PARALLEL
3478 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3479 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3480 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3481 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3482 return compute_movstrsi_length (insn) - 4;
3483 /* Conditional branch with an unfilled delay slot. */
3484 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3486 /* Adjust a short backwards conditional with an unfilled delay slot. */
3487 if (GET_CODE (pat) == SET
3488 && length == 4
3489 && ! forward_branch_p (insn))
3490 return 4;
3491 else if (GET_CODE (pat) == PARALLEL
3492 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3493 && length == 4)
3494 return 4;
3495 /* Adjust dbra insn with short backwards conditional branch with
3496 unfilled delay slot -- only for case where counter is in a
3497 general register register. */
3498 else if (GET_CODE (pat) == PARALLEL
3499 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3500 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3501 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3502 && length == 4
3503 && ! forward_branch_p (insn))
3504 return 4;
3505 else
3506 return 0;
3508 return 0;
3511 /* Print operand X (an rtx) in assembler syntax to file FILE.
3512 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3513 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3515 void
3516 print_operand (file, x, code)
3517 FILE *file;
3518 rtx x;
3519 int code;
3521 switch (code)
3523 case '#':
3524 /* Output a 'nop' if there's nothing for the delay slot. */
3525 if (dbr_sequence_length () == 0)
3526 fputs ("\n\tnop", file);
3527 return;
3528 case '*':
3529 /* Output an nullification completer if there's nothing for the */
3530 /* delay slot or nullification is requested. */
3531 if (dbr_sequence_length () == 0 ||
3532 (final_sequence &&
3533 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3534 fputs (",n", file);
3535 return;
3536 case 'R':
3537 /* Print out the second register name of a register pair.
3538 I.e., R (6) => 7. */
3539 fputs (reg_names[REGNO (x)+1], file);
3540 return;
3541 case 'r':
3542 /* A register or zero. */
3543 if (x == const0_rtx
3544 || (x == CONST0_RTX (DFmode))
3545 || (x == CONST0_RTX (SFmode)))
3547 fputs ("0", file);
3548 return;
3550 else
3551 break;
3552 case 'C': /* Plain (C)ondition */
3553 case 'X':
3554 switch (GET_CODE (x))
3556 case EQ:
3557 fputs ("=", file); break;
3558 case NE:
3559 fputs ("<>", file); break;
3560 case GT:
3561 fputs (">", file); break;
3562 case GE:
3563 fputs (">=", file); break;
3564 case GEU:
3565 fputs (">>=", file); break;
3566 case GTU:
3567 fputs (">>", file); break;
3568 case LT:
3569 fputs ("<", file); break;
3570 case LE:
3571 fputs ("<=", file); break;
3572 case LEU:
3573 fputs ("<<=", file); break;
3574 case LTU:
3575 fputs ("<<", file); break;
3576 default:
3577 abort ();
3579 return;
3580 case 'N': /* Condition, (N)egated */
3581 switch (GET_CODE (x))
3583 case EQ:
3584 fputs ("<>", file); break;
3585 case NE:
3586 fputs ("=", file); break;
3587 case GT:
3588 fputs ("<=", file); break;
3589 case GE:
3590 fputs ("<", file); break;
3591 case GEU:
3592 fputs ("<<", file); break;
3593 case GTU:
3594 fputs ("<<=", file); break;
3595 case LT:
3596 fputs (">=", file); break;
3597 case LE:
3598 fputs (">", file); break;
3599 case LEU:
3600 fputs (">>", file); break;
3601 case LTU:
3602 fputs (">>=", file); break;
3603 default:
3604 abort ();
3606 return;
3607 /* For floating point comparisons. Need special conditions to deal
3608 with NaNs properly. */
3609 case 'Y':
3610 switch (GET_CODE (x))
3612 case EQ:
3613 fputs ("!=", file); break;
3614 case NE:
3615 fputs ("=", file); break;
3616 case GT:
3617 fputs ("<=", file); break;
3618 case GE:
3619 fputs ("<", file); break;
3620 case LT:
3621 fputs (">=", file); break;
3622 case LE:
3623 fputs (">", file); break;
3624 default:
3625 abort ();
3627 return;
3628 case 'S': /* Condition, operands are (S)wapped. */
3629 switch (GET_CODE (x))
3631 case EQ:
3632 fputs ("=", file); break;
3633 case NE:
3634 fputs ("<>", file); break;
3635 case GT:
3636 fputs ("<", file); break;
3637 case GE:
3638 fputs ("<=", file); break;
3639 case GEU:
3640 fputs ("<<=", file); break;
3641 case GTU:
3642 fputs ("<<", file); break;
3643 case LT:
3644 fputs (">", file); break;
3645 case LE:
3646 fputs (">=", file); break;
3647 case LEU:
3648 fputs (">>=", file); break;
3649 case LTU:
3650 fputs (">>", file); break;
3651 default:
3652 abort ();
3654 return;
3655 case 'B': /* Condition, (B)oth swapped and negate. */
3656 switch (GET_CODE (x))
3658 case EQ:
3659 fputs ("<>", file); break;
3660 case NE:
3661 fputs ("=", file); break;
3662 case GT:
3663 fputs (">=", file); break;
3664 case GE:
3665 fputs (">", file); break;
3666 case GEU:
3667 fputs (">>", file); break;
3668 case GTU:
3669 fputs (">>=", file); break;
3670 case LT:
3671 fputs ("<=", file); break;
3672 case LE:
3673 fputs ("<", file); break;
3674 case LEU:
3675 fputs ("<<", file); break;
3676 case LTU:
3677 fputs ("<<=", file); break;
3678 default:
3679 abort ();
3681 return;
3682 case 'k':
3683 if (GET_CODE (x) == CONST_INT)
3685 fprintf (file, "%d", ~INTVAL (x));
3686 return;
3688 abort();
3689 case 'L':
3690 if (GET_CODE (x) == CONST_INT)
3692 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3693 return;
3695 abort();
3696 case 'O':
3697 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3699 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3700 return;
3702 abort();
3703 case 'P':
3704 if (GET_CODE (x) == CONST_INT)
3706 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3707 return;
3709 abort();
3710 case 'I':
3711 if (GET_CODE (x) == CONST_INT)
3712 fputs ("i", file);
3713 return;
3714 case 'M':
3715 case 'F':
3716 switch (GET_CODE (XEXP (x, 0)))
3718 case PRE_DEC:
3719 case PRE_INC:
3720 fputs ("s,mb", file);
3721 break;
3722 case POST_DEC:
3723 case POST_INC:
3724 fputs ("s,ma", file);
3725 break;
3726 case PLUS:
3727 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3728 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3729 fputs ("x,s", file);
3730 else if (code == 'F')
3731 fputs ("s", file);
3732 break;
3733 default:
3734 if (code == 'F')
3735 fputs ("s", file);
3736 break;
3738 return;
3739 case 'G':
3740 output_global_address (file, x, 0);
3741 return;
3742 case 'H':
3743 output_global_address (file, x, 1);
3744 return;
3745 case 0: /* Don't do anything special */
3746 break;
3747 case 'Z':
3749 unsigned op[3];
3750 compute_zdepi_operands (INTVAL (x), op);
3751 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3752 return;
3754 default:
3755 abort ();
3757 if (GET_CODE (x) == REG)
3759 fputs (reg_names [REGNO (x)], file);
3760 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3761 fputs ("L", file);
3763 else if (GET_CODE (x) == MEM)
3765 int size = GET_MODE_SIZE (GET_MODE (x));
3766 rtx base = XEXP (XEXP (x, 0), 0);
3767 switch (GET_CODE (XEXP (x, 0)))
3769 case PRE_DEC:
3770 case POST_DEC:
3771 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3772 break;
3773 case PRE_INC:
3774 case POST_INC:
3775 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3776 break;
3777 default:
3778 if (GET_CODE (XEXP (x, 0)) == PLUS
3779 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3780 fprintf (file, "%s(0,%s)",
3781 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3782 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3783 else if (GET_CODE (XEXP (x, 0)) == PLUS
3784 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3785 fprintf (file, "%s(0,%s)",
3786 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3787 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3788 else
3789 output_address (XEXP (x, 0));
3790 break;
3793 else
3794 output_addr_const (file, x);
3797 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3799 void
3800 output_global_address (file, x, round_constant)
3801 FILE *file;
3802 rtx x;
3803 int round_constant;
3806 /* Imagine (high (const (plus ...))). */
3807 if (GET_CODE (x) == HIGH)
3808 x = XEXP (x, 0);
3810 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3811 assemble_name (file, XSTR (x, 0));
3812 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3814 assemble_name (file, XSTR (x, 0));
3815 fputs ("-$global$", file);
3817 else if (GET_CODE (x) == CONST)
3819 char *sep = "";
3820 int offset = 0; /* assembler wants -$global$ at end */
3821 rtx base;
3823 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3825 base = XEXP (XEXP (x, 0), 0);
3826 output_addr_const (file, base);
3828 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3829 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3830 else abort ();
3832 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3834 base = XEXP (XEXP (x, 0), 1);
3835 output_addr_const (file, base);
3837 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3838 offset = INTVAL (XEXP (XEXP (x, 0),1));
3839 else abort ();
3841 /* How bogus. The compiler is apparently responsible for
3842 rounding the constant if it uses an LR field selector.
3844 The linker and/or assembler seem a better place since
3845 they have to do this kind of thing already.
3847 If we fail to do this, HP's optimizing linker may eliminate
3848 an addil, but not update the ldw/stw/ldo instruction that
3849 uses the result of the addil. */
3850 if (round_constant)
3851 offset = ((offset + 0x1000) & ~0x1fff);
3853 if (GET_CODE (XEXP (x, 0)) == PLUS)
3855 if (offset < 0)
3857 offset = -offset;
3858 sep = "-";
3860 else
3861 sep = "+";
3863 else if (GET_CODE (XEXP (x, 0)) == MINUS
3864 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3865 sep = "-";
3866 else abort ();
3868 if (!read_only_operand (base) && !flag_pic)
3869 fputs ("-$global$", file);
3870 if (offset)
3871 fprintf (file,"%s%d", sep, offset);
3873 else
3874 output_addr_const (file, x);
3877 /* HP's millicode routines mean something special to the assembler.
3878 Keep track of which ones we have used. */
3880 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3881 static char imported[(int)end1000];
3882 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3883 static char import_string[] = ".IMPORT $$....,MILLICODE";
3884 #define MILLI_START 10
3886 static void
3887 import_milli (code)
3888 enum millicodes code;
3890 char str[sizeof (import_string)];
3892 if (!imported[(int)code])
3894 imported[(int)code] = 1;
3895 strcpy (str, import_string);
3896 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3897 output_asm_insn (str, 0);
3901 /* The register constraints have put the operands and return value in
3902 the proper registers. */
3904 char *
3905 output_mul_insn (unsignedp, insn)
3906 int unsignedp;
3907 rtx insn;
3909 import_milli (mulI);
3910 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3913 /* Emit the rtl for doing a division by a constant. */
3915 /* Do magic division millicodes exist for this value? */
3916 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3917 1, 1};
3919 /* We'll use an array to keep track of the magic millicodes and
3920 whether or not we've used them already. [n][0] is signed, [n][1] is
3921 unsigned. */
3923 static int div_milli[16][2];
3926 div_operand (op, mode)
3927 rtx op;
3928 enum machine_mode mode;
3930 return (mode == SImode
3931 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3932 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3933 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3937 emit_hpdiv_const (operands, unsignedp)
3938 rtx *operands;
3939 int unsignedp;
3941 if (GET_CODE (operands[2]) == CONST_INT
3942 && INTVAL (operands[2]) > 0
3943 && INTVAL (operands[2]) < 16
3944 && magic_milli[INTVAL (operands[2])])
3946 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3947 emit
3948 (gen_rtx
3949 (PARALLEL, VOIDmode,
3950 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3951 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3952 gen_rtx (REG, SImode, 26),
3953 operands[2])),
3954 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3955 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3956 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3957 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3958 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3959 return 1;
3961 return 0;
3964 char *
3965 output_div_insn (operands, unsignedp, insn)
3966 rtx *operands;
3967 int unsignedp;
3968 rtx insn;
3970 int divisor;
3972 /* If the divisor is a constant, try to use one of the special
3973 opcodes .*/
3974 if (GET_CODE (operands[0]) == CONST_INT)
3976 static char buf[100];
3977 divisor = INTVAL (operands[0]);
3978 if (!div_milli[divisor][unsignedp])
3980 div_milli[divisor][unsignedp] = 1;
3981 if (unsignedp)
3982 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3983 else
3984 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3986 if (unsignedp)
3988 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3989 return output_millicode_call (insn,
3990 gen_rtx (SYMBOL_REF, SImode, buf));
3992 else
3994 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3995 return output_millicode_call (insn,
3996 gen_rtx (SYMBOL_REF, SImode, buf));
3999 /* Divisor isn't a special constant. */
4000 else
4002 if (unsignedp)
4004 import_milli (divU);
4005 return output_millicode_call (insn,
4006 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
4008 else
4010 import_milli (divI);
4011 return output_millicode_call (insn,
4012 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
4017 /* Output a $$rem millicode to do mod. */
4019 char *
4020 output_mod_insn (unsignedp, insn)
4021 int unsignedp;
4022 rtx insn;
4024 if (unsignedp)
4026 import_milli (remU);
4027 return output_millicode_call (insn,
4028 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
4030 else
4032 import_milli (remI);
4033 return output_millicode_call (insn,
4034 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
4038 void
4039 output_arg_descriptor (call_insn)
4040 rtx call_insn;
4042 char *arg_regs[4];
4043 enum machine_mode arg_mode;
4044 rtx link;
4045 int i, output_flag = 0;
4046 int regno;
4048 for (i = 0; i < 4; i++)
4049 arg_regs[i] = 0;
4051 /* Specify explicitly that no argument relocations should take place
4052 if using the portable runtime calling conventions. */
4053 if (TARGET_PORTABLE_RUNTIME)
4055 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4056 asm_out_file);
4057 return;
4060 if (GET_CODE (call_insn) != CALL_INSN)
4061 abort ();
4062 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4064 rtx use = XEXP (link, 0);
4066 if (! (GET_CODE (use) == USE
4067 && GET_CODE (XEXP (use, 0)) == REG
4068 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4069 continue;
4071 arg_mode = GET_MODE (XEXP (use, 0));
4072 regno = REGNO (XEXP (use, 0));
4073 if (regno >= 23 && regno <= 26)
4075 arg_regs[26 - regno] = "GR";
4076 if (arg_mode == DImode)
4077 arg_regs[25 - regno] = "GR";
4079 else if (regno >= 32 && regno <= 39)
4081 if (arg_mode == SFmode)
4082 arg_regs[(regno - 32) / 2] = "FR";
4083 else
4085 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4086 arg_regs[(regno - 34) / 2] = "FR";
4087 arg_regs[(regno - 34) / 2 + 1] = "FU";
4088 #else
4089 arg_regs[(regno - 34) / 2] = "FU";
4090 arg_regs[(regno - 34) / 2 + 1] = "FR";
4091 #endif
4095 fputs ("\t.CALL ", asm_out_file);
4096 for (i = 0; i < 4; i++)
4098 if (arg_regs[i])
4100 if (output_flag++)
4101 fputc (',', asm_out_file);
4102 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4105 fputc ('\n', asm_out_file);
4108 /* Return the class of any secondary reload register that is needed to
4109 move IN into a register in class CLASS using mode MODE.
4111 Profiling has showed this routine and its descendants account for
4112 a significant amount of compile time (~7%). So it has been
4113 optimized to reduce redundant computations and eliminate useless
4114 function calls.
4116 It might be worthwhile to try and make this a leaf function too. */
4118 enum reg_class
4119 secondary_reload_class (class, mode, in)
4120 enum reg_class class;
4121 enum machine_mode mode;
4122 rtx in;
4124 int regno, is_symbolic;
4126 /* Trying to load a constant into a FP register during PIC code
4127 generation will require %r1 as a scratch register. */
4128 if (flag_pic == 2
4129 && GET_MODE_CLASS (mode) == MODE_INT
4130 && FP_REG_CLASS_P (class)
4131 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4132 return R1_REGS;
4134 /* Profiling showed the PA port spends about 1.3% of its compilation
4135 time in true_regnum from calls inside secondary_reload_class. */
4137 if (GET_CODE (in) == REG)
4139 regno = REGNO (in);
4140 if (regno >= FIRST_PSEUDO_REGISTER)
4141 regno = true_regnum (in);
4143 else if (GET_CODE (in) == SUBREG)
4144 regno = true_regnum (in);
4145 else
4146 regno = -1;
4148 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4149 && GET_MODE_CLASS (mode) == MODE_INT
4150 && FP_REG_CLASS_P (class))
4151 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4152 return GENERAL_REGS;
4154 if (GET_CODE (in) == HIGH)
4155 in = XEXP (in, 0);
4157 /* Profiling has showed GCC spends about 2.6% of its compilation
4158 time in symbolic_operand from calls inside secondary_reload_class.
4160 We use an inline copy and only compute its return value once to avoid
4161 useless work. */
4162 switch (GET_CODE (in))
4164 rtx tmp;
4166 case SYMBOL_REF:
4167 case LABEL_REF:
4168 is_symbolic = 1;
4169 break;
4170 case CONST:
4171 tmp = XEXP (in, 0);
4172 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4173 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4174 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4175 break;
4176 default:
4177 is_symbolic = 0;
4178 break;
4181 if (!flag_pic
4182 && is_symbolic
4183 && read_only_operand (in))
4184 return NO_REGS;
4186 if (class != R1_REGS && is_symbolic)
4187 return R1_REGS;
4189 return NO_REGS;
4192 enum direction
4193 function_arg_padding (mode, type)
4194 enum machine_mode mode;
4195 tree type;
4197 int size;
4199 if (mode == BLKmode)
4201 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4202 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4203 else
4204 return upward; /* Don't know if this is right, but */
4205 /* same as old definition. */
4207 else
4208 size = GET_MODE_BITSIZE (mode);
4209 if (size < PARM_BOUNDARY)
4210 return downward;
4211 else if (size % PARM_BOUNDARY)
4212 return upward;
4213 else
4214 return none;
4218 /* Do what is necessary for `va_start'. The argument is ignored;
4219 We look at the current function to determine if stdargs or varargs
4220 is used and fill in an initial va_list. A pointer to this constructor
4221 is returned. */
4223 struct rtx_def *
4224 hppa_builtin_saveregs (arglist)
4225 tree arglist;
4227 rtx offset;
4228 tree fntype = TREE_TYPE (current_function_decl);
4229 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4230 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4231 != void_type_node)))
4232 ? UNITS_PER_WORD : 0);
4234 if (argadj)
4235 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4236 else
4237 offset = current_function_arg_offset_rtx;
4239 /* Store general registers on the stack. */
4240 move_block_from_reg (23,
4241 gen_rtx (MEM, BLKmode,
4242 plus_constant
4243 (current_function_internal_arg_pointer, -16)),
4244 4, 4 * UNITS_PER_WORD);
4245 return copy_to_reg (expand_binop (Pmode, add_optab,
4246 current_function_internal_arg_pointer,
4247 offset, 0, 0, OPTAB_LIB_WIDEN));
4250 /* This routine handles all the normal conditional branch sequences we
4251 might need to generate. It handles compare immediate vs compare
4252 register, nullification of delay slots, varying length branches,
4253 negated branches, and all combinations of the above. It returns the
4254 output appropriate to emit the branch corresponding to all given
4255 parameters. */
4257 char *
4258 output_cbranch (operands, nullify, length, negated, insn)
4259 rtx *operands;
4260 int nullify, length, negated;
4261 rtx insn;
4263 static char buf[100];
4264 int useskip = 0;
4266 /* A conditional branch to the following instruction (eg the delay slot) is
4267 asking for a disaster. This can happen when not optimizing.
4269 In such cases it is safe to emit nothing. */
4271 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4272 return "";
4274 /* If this is a long branch with its delay slot unfilled, set `nullify'
4275 as it can nullify the delay slot and save a nop. */
4276 if (length == 8 && dbr_sequence_length () == 0)
4277 nullify = 1;
4279 /* If this is a short forward conditional branch which did not get
4280 its delay slot filled, the delay slot can still be nullified. */
4281 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4282 nullify = forward_branch_p (insn);
4284 /* A forward branch over a single nullified insn can be done with a
4285 comclr instruction. This avoids a single cycle penalty due to
4286 mis-predicted branch if we fall through (branch not taken). */
4287 if (length == 4
4288 && next_real_insn (insn) != 0
4289 && get_attr_length (next_real_insn (insn)) == 4
4290 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4291 && nullify)
4292 useskip = 1;
4294 switch (length)
4296 /* All short conditional branches except backwards with an unfilled
4297 delay slot. */
4298 case 4:
4299 if (useskip)
4300 strcpy (buf, "com%I2clr,");
4301 else
4302 strcpy (buf, "com%I2b,");
4303 if (negated)
4304 strcat (buf, "%B3");
4305 else
4306 strcat (buf, "%S3");
4307 if (useskip)
4308 strcat (buf, " %2,%1,0");
4309 else if (nullify)
4310 strcat (buf, ",n %2,%1,%0");
4311 else
4312 strcat (buf, " %2,%1,%0");
4313 break;
4315 /* All long conditionals. Note an short backward branch with an
4316 unfilled delay slot is treated just like a long backward branch
4317 with an unfilled delay slot. */
4318 case 8:
4319 /* Handle weird backwards branch with a filled delay slot
4320 with is nullified. */
4321 if (dbr_sequence_length () != 0
4322 && ! forward_branch_p (insn)
4323 && nullify)
4325 strcpy (buf, "com%I2b,");
4326 if (negated)
4327 strcat (buf, "%S3");
4328 else
4329 strcat (buf, "%B3");
4330 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4332 /* Handle short backwards branch with an unfilled delay slot.
4333 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4334 taken and untaken branches. */
4335 else if (dbr_sequence_length () == 0
4336 && ! forward_branch_p (insn)
4337 && insn_addresses
4338 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4339 - insn_addresses[INSN_UID (insn)] - 8))
4341 strcpy (buf, "com%I2b,");
4342 if (negated)
4343 strcat (buf, "%B3 %2,%1,%0%#");
4344 else
4345 strcat (buf, "%S3 %2,%1,%0%#");
4347 else
4349 strcpy (buf, "com%I2clr,");
4350 if (negated)
4351 strcat (buf, "%S3");
4352 else
4353 strcat (buf, "%B3");
4354 if (nullify)
4355 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4356 else
4357 strcat (buf, " %2,%1,0\n\tbl %0,0");
4359 break;
4361 default:
4362 abort();
4364 return buf;
4367 /* This routine handles all the branch-on-bit conditional branch sequences we
4368 might need to generate. It handles nullification of delay slots,
4369 varying length branches, negated branches and all combinations of the
4370 above. it returns the appropriate output template to emit the branch. */
4372 char *
4373 output_bb (operands, nullify, length, negated, insn, which)
4374 rtx *operands;
4375 int nullify, length, negated;
4376 rtx insn;
4377 int which;
4379 static char buf[100];
4380 int useskip = 0;
4382 /* A conditional branch to the following instruction (eg the delay slot) is
4383 asking for a disaster. I do not think this can happen as this pattern
4384 is only used when optimizing; jump optimization should eliminate the
4385 jump. But be prepared just in case. */
4387 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4388 return "";
4390 /* If this is a long branch with its delay slot unfilled, set `nullify'
4391 as it can nullify the delay slot and save a nop. */
4392 if (length == 8 && dbr_sequence_length () == 0)
4393 nullify = 1;
4395 /* If this is a short forward conditional branch which did not get
4396 its delay slot filled, the delay slot can still be nullified. */
4397 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4398 nullify = forward_branch_p (insn);
4400 /* A forward branch over a single nullified insn can be done with a
4401 extrs instruction. This avoids a single cycle penalty due to
4402 mis-predicted branch if we fall through (branch not taken). */
4404 if (length == 4
4405 && next_real_insn (insn) != 0
4406 && get_attr_length (next_real_insn (insn)) == 4
4407 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4408 && nullify)
4409 useskip = 1;
4411 switch (length)
4414 /* All short conditional branches except backwards with an unfilled
4415 delay slot. */
4416 case 4:
4417 if (useskip)
4418 strcpy (buf, "extrs,");
4419 else
4420 strcpy (buf, "bb,");
4421 if ((which == 0 && negated)
4422 || (which == 1 && ! negated))
4423 strcat (buf, ">=");
4424 else
4425 strcat (buf, "<");
4426 if (useskip)
4427 strcat (buf, " %0,%1,1,0");
4428 else if (nullify && negated)
4429 strcat (buf, ",n %0,%1,%3");
4430 else if (nullify && ! negated)
4431 strcat (buf, ",n %0,%1,%2");
4432 else if (! nullify && negated)
4433 strcat (buf, "%0,%1,%3");
4434 else if (! nullify && ! negated)
4435 strcat (buf, " %0,%1,%2");
4436 break;
4438 /* All long conditionals. Note an short backward branch with an
4439 unfilled delay slot is treated just like a long backward branch
4440 with an unfilled delay slot. */
4441 case 8:
4442 /* Handle weird backwards branch with a filled delay slot
4443 with is nullified. */
4444 if (dbr_sequence_length () != 0
4445 && ! forward_branch_p (insn)
4446 && nullify)
4448 strcpy (buf, "bb,");
4449 if ((which == 0 && negated)
4450 || (which == 1 && ! negated))
4451 strcat (buf, "<");
4452 else
4453 strcat (buf, ">=");
4454 if (negated)
4455 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4456 else
4457 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4459 /* Handle short backwards branch with an unfilled delay slot.
4460 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4461 taken and untaken branches. */
4462 else if (dbr_sequence_length () == 0
4463 && ! forward_branch_p (insn)
4464 && insn_addresses
4465 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4466 - insn_addresses[INSN_UID (insn)] - 8))
4468 strcpy (buf, "bb,");
4469 if ((which == 0 && negated)
4470 || (which == 1 && ! negated))
4471 strcat (buf, ">=");
4472 else
4473 strcat (buf, "<");
4474 if (negated)
4475 strcat (buf, " %0,%1,%3%#");
4476 else
4477 strcat (buf, " %0,%1,%2%#");
4479 else
4481 strcpy (buf, "extrs,");
4482 if ((which == 0 && negated)
4483 || (which == 1 && ! negated))
4484 strcat (buf, "<");
4485 else
4486 strcat (buf, ">=");
4487 if (nullify && negated)
4488 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4489 else if (nullify && ! negated)
4490 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4491 else if (negated)
4492 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4493 else
4494 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4496 break;
4498 default:
4499 abort();
4501 return buf;
4504 /* This routine handles all the branch-on-variable-bit conditional branch
4505 sequences we might need to generate. It handles nullification of delay
4506 slots, varying length branches, negated branches and all combinations
4507 of the above. it returns the appropriate output template to emit the
4508 branch. */
4510 char *
4511 output_bvb (operands, nullify, length, negated, insn, which)
4512 rtx *operands;
4513 int nullify, length, negated;
4514 rtx insn;
4515 int which;
4517 static char buf[100];
4518 int useskip = 0;
4520 /* A conditional branch to the following instruction (eg the delay slot) is
4521 asking for a disaster. I do not think this can happen as this pattern
4522 is only used when optimizing; jump optimization should eliminate the
4523 jump. But be prepared just in case. */
4525 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4526 return "";
4528 /* If this is a long branch with its delay slot unfilled, set `nullify'
4529 as it can nullify the delay slot and save a nop. */
4530 if (length == 8 && dbr_sequence_length () == 0)
4531 nullify = 1;
4533 /* If this is a short forward conditional branch which did not get
4534 its delay slot filled, the delay slot can still be nullified. */
4535 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4536 nullify = forward_branch_p (insn);
4538 /* A forward branch over a single nullified insn can be done with a
4539 extrs instruction. This avoids a single cycle penalty due to
4540 mis-predicted branch if we fall through (branch not taken). */
4542 if (length == 4
4543 && next_real_insn (insn) != 0
4544 && get_attr_length (next_real_insn (insn)) == 4
4545 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4546 && nullify)
4547 useskip = 1;
4549 switch (length)
4552 /* All short conditional branches except backwards with an unfilled
4553 delay slot. */
4554 case 4:
4555 if (useskip)
4556 strcpy (buf, "vextrs,");
4557 else
4558 strcpy (buf, "bvb,");
4559 if ((which == 0 && negated)
4560 || (which == 1 && ! negated))
4561 strcat (buf, ">=");
4562 else
4563 strcat (buf, "<");
4564 if (useskip)
4565 strcat (buf, " %0,1,0");
4566 else if (nullify && negated)
4567 strcat (buf, ",n %0,%3");
4568 else if (nullify && ! negated)
4569 strcat (buf, ",n %0,%2");
4570 else if (! nullify && negated)
4571 strcat (buf, "%0,%3");
4572 else if (! nullify && ! negated)
4573 strcat (buf, " %0,%2");
4574 break;
4576 /* All long conditionals. Note an short backward branch with an
4577 unfilled delay slot is treated just like a long backward branch
4578 with an unfilled delay slot. */
4579 case 8:
4580 /* Handle weird backwards branch with a filled delay slot
4581 with is nullified. */
4582 if (dbr_sequence_length () != 0
4583 && ! forward_branch_p (insn)
4584 && nullify)
4586 strcpy (buf, "bvb,");
4587 if ((which == 0 && negated)
4588 || (which == 1 && ! negated))
4589 strcat (buf, "<");
4590 else
4591 strcat (buf, ">=");
4592 if (negated)
4593 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4594 else
4595 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4597 /* Handle short backwards branch with an unfilled delay slot.
4598 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4599 taken and untaken branches. */
4600 else if (dbr_sequence_length () == 0
4601 && ! forward_branch_p (insn)
4602 && insn_addresses
4603 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4604 - insn_addresses[INSN_UID (insn)] - 8))
4606 strcpy (buf, "bvb,");
4607 if ((which == 0 && negated)
4608 || (which == 1 && ! negated))
4609 strcat (buf, ">=");
4610 else
4611 strcat (buf, "<");
4612 if (negated)
4613 strcat (buf, " %0,%3%#");
4614 else
4615 strcat (buf, " %0,%2%#");
4617 else
4619 strcpy (buf, "vextrs,");
4620 if ((which == 0 && negated)
4621 || (which == 1 && ! negated))
4622 strcat (buf, "<");
4623 else
4624 strcat (buf, ">=");
4625 if (nullify && negated)
4626 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4627 else if (nullify && ! negated)
4628 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4629 else if (negated)
4630 strcat (buf, " %0,1,0\n\tbl %3,0");
4631 else
4632 strcat (buf, " %0,1,0\n\tbl %2,0");
4634 break;
4636 default:
4637 abort();
4639 return buf;
4642 /* Return the output template for emitting a dbra type insn.
4644 Note it may perform some output operations on its own before
4645 returning the final output string. */
4646 char *
4647 output_dbra (operands, insn, which_alternative)
4648 rtx *operands;
4649 rtx insn;
4650 int which_alternative;
4653 /* A conditional branch to the following instruction (eg the delay slot) is
4654 asking for a disaster. Be prepared! */
4656 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4658 if (which_alternative == 0)
4659 return "ldo %1(%0),%0";
4660 else if (which_alternative == 1)
4662 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4663 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4664 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4665 return "fldws -16(0,%%r30),%0";
4667 else
4669 output_asm_insn ("ldw %0,%4", operands);
4670 return "ldo %1(%4),%4\n\tstw %4,%0";
4674 if (which_alternative == 0)
4676 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4677 int length = get_attr_length (insn);
4679 /* If this is a long branch with its delay slot unfilled, set `nullify'
4680 as it can nullify the delay slot and save a nop. */
4681 if (length == 8 && dbr_sequence_length () == 0)
4682 nullify = 1;
4684 /* If this is a short forward conditional branch which did not get
4685 its delay slot filled, the delay slot can still be nullified. */
4686 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4687 nullify = forward_branch_p (insn);
4689 /* Handle short versions first. */
4690 if (length == 4 && nullify)
4691 return "addib,%C2,n %1,%0,%3";
4692 else if (length == 4 && ! nullify)
4693 return "addib,%C2 %1,%0,%3";
4694 else if (length == 8)
4696 /* Handle weird backwards branch with a fulled delay slot
4697 which is nullified. */
4698 if (dbr_sequence_length () != 0
4699 && ! forward_branch_p (insn)
4700 && nullify)
4701 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4702 /* Handle short backwards branch with an unfilled delay slot.
4703 Using a addb;nop rather than addi;bl saves 1 cycle for both
4704 taken and untaken branches. */
4705 else if (dbr_sequence_length () == 0
4706 && ! forward_branch_p (insn)
4707 && insn_addresses
4708 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4709 - insn_addresses[INSN_UID (insn)] - 8))
4710 return "addib,%C2 %1,%0,%3%#";
4712 /* Handle normal cases. */
4713 if (nullify)
4714 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4715 else
4716 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4718 else
4719 abort();
4721 /* Deal with gross reload from FP register case. */
4722 else if (which_alternative == 1)
4724 /* Move loop counter from FP register to MEM then into a GR,
4725 increment the GR, store the GR into MEM, and finally reload
4726 the FP register from MEM from within the branch's delay slot. */
4727 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4728 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4729 if (get_attr_length (insn) == 24)
4730 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4731 else
4732 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4734 /* Deal with gross reload from memory case. */
4735 else
4737 /* Reload loop counter from memory, the store back to memory
4738 happens in the branch's delay slot. */
4739 output_asm_insn ("ldw %0,%4", operands);
4740 if (get_attr_length (insn) == 12)
4741 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4742 else
4743 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4747 /* Return the output template for emitting a dbra type insn.
4749 Note it may perform some output operations on its own before
4750 returning the final output string. */
4751 char *
4752 output_movb (operands, insn, which_alternative, reverse_comparison)
4753 rtx *operands;
4754 rtx insn;
4755 int which_alternative;
4756 int reverse_comparison;
4759 /* A conditional branch to the following instruction (eg the delay slot) is
4760 asking for a disaster. Be prepared! */
4762 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4764 if (which_alternative == 0)
4765 return "copy %1,%0";
4766 else if (which_alternative == 1)
4768 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4769 return "fldws -16(0,%%r30),%0";
4771 else if (which_alternative == 2)
4772 return "stw %1,%0";
4773 else
4774 return "mtsar %r1";
4777 /* Support the second variant. */
4778 if (reverse_comparison)
4779 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4781 if (which_alternative == 0)
4783 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4784 int length = get_attr_length (insn);
4786 /* If this is a long branch with its delay slot unfilled, set `nullify'
4787 as it can nullify the delay slot and save a nop. */
4788 if (length == 8 && dbr_sequence_length () == 0)
4789 nullify = 1;
4791 /* If this is a short forward conditional branch which did not get
4792 its delay slot filled, the delay slot can still be nullified. */
4793 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4794 nullify = forward_branch_p (insn);
4796 /* Handle short versions first. */
4797 if (length == 4 && nullify)
4798 return "movb,%C2,n %1,%0,%3";
4799 else if (length == 4 && ! nullify)
4800 return "movb,%C2 %1,%0,%3";
4801 else if (length == 8)
4803 /* Handle weird backwards branch with a filled delay slot
4804 which is nullified. */
4805 if (dbr_sequence_length () != 0
4806 && ! forward_branch_p (insn)
4807 && nullify)
4808 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4810 /* Handle short backwards branch with an unfilled delay slot.
4811 Using a movb;nop rather than or;bl saves 1 cycle for both
4812 taken and untaken branches. */
4813 else if (dbr_sequence_length () == 0
4814 && ! forward_branch_p (insn)
4815 && insn_addresses
4816 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4817 - insn_addresses[INSN_UID (insn)] - 8))
4818 return "movb,%C2 %1,%0,%3%#";
4819 /* Handle normal cases. */
4820 if (nullify)
4821 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4822 else
4823 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4825 else
4826 abort();
4828 /* Deal with gross reload from FP register case. */
4829 else if (which_alternative == 1)
4831 /* Move loop counter from FP register to MEM then into a GR,
4832 increment the GR, store the GR into MEM, and finally reload
4833 the FP register from MEM from within the branch's delay slot. */
4834 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4835 if (get_attr_length (insn) == 12)
4836 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4837 else
4838 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4840 /* Deal with gross reload from memory case. */
4841 else if (which_alternative == 2)
4843 /* Reload loop counter from memory, the store back to memory
4844 happens in the branch's delay slot. */
4845 if (get_attr_length (insn) == 8)
4846 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4847 else
4848 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4850 /* Handle SAR as a destination. */
4851 else
4853 if (get_attr_length (insn) == 8)
4854 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4855 else
4856 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4861 /* INSN is a millicode call. It may have an unconditional jump in its delay
4862 slot.
4864 CALL_DEST is the routine we are calling. */
4866 char *
4867 output_millicode_call (insn, call_dest)
4868 rtx insn;
4869 rtx call_dest;
4871 int distance;
4872 rtx xoperands[4];
4873 rtx seq_insn;
4875 /* Handle common case -- empty delay slot or no jump in the delay slot,
4876 and we're sure that the branch will reach the beginning of the $CODE$
4877 subspace. */
4878 if ((dbr_sequence_length () == 0
4879 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4880 || (dbr_sequence_length () != 0
4881 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4882 && get_attr_length (insn) == 4))
4884 xoperands[0] = call_dest;
4885 output_asm_insn ("bl %0,%%r31%#", xoperands);
4886 return "";
4889 /* This call may not reach the beginning of the $CODE$ subspace. */
4890 if (get_attr_length (insn) > 4)
4892 int delay_insn_deleted = 0;
4893 rtx xoperands[2];
4894 rtx link;
4896 /* We need to emit an inline long-call branch. */
4897 if (dbr_sequence_length () != 0
4898 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4900 /* A non-jump insn in the delay slot. By definition we can
4901 emit this insn before the call. */
4902 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4904 /* Now delete the delay insn. */
4905 PUT_CODE (NEXT_INSN (insn), NOTE);
4906 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4907 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4908 delay_insn_deleted = 1;
4911 /* If we're allowed to use be/ble instructions, then this is the
4912 best sequence to use for a long millicode call. */
4913 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4914 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4916 xoperands[0] = call_dest;
4917 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4918 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4919 output_asm_insn ("nop", xoperands);
4921 /* Pure portable runtime doesn't allow be/ble; we also don't have
4922 PIC support int he assembler/linker, so this sequence is needed. */
4923 else if (TARGET_PORTABLE_RUNTIME)
4925 xoperands[0] = call_dest;
4926 /* Get the address of our target into %r29. */
4927 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4928 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4930 /* Get our return address into %r31. */
4931 output_asm_insn ("blr 0,%%r31", xoperands);
4933 /* Jump to our target address in %r29. */
4934 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4936 /* Empty delay slot. Note this insn gets fetched twice and
4937 executed once. To be safe we use a nop. */
4938 output_asm_insn ("nop", xoperands);
4939 return "";
4941 /* PIC long millicode call sequence. */
4942 else
4944 xoperands[0] = call_dest;
4945 xoperands[1] = gen_label_rtx ();
4946 /* Get our address + 8 into %r1. */
4947 output_asm_insn ("bl .+8,%%r1", xoperands);
4949 /* Add %r1 to the offset of our target from the next insn. */
4950 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4951 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (xoperands[1]));
4953 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4955 /* Get the return address into %r31. */
4956 output_asm_insn ("blr 0,%%r31", xoperands);
4958 /* Branch to our target which is in %r1. */
4959 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4961 /* Empty delay slot. Note this insn gets fetched twice and
4962 executed once. To be safe we use a nop. */
4963 output_asm_insn ("nop", xoperands);
4966 /* If we had a jump in the call's delay slot, output it now. */
4967 if (dbr_sequence_length () != 0
4968 && !delay_insn_deleted)
4970 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4971 output_asm_insn ("b,n %0", xoperands);
4973 /* Now delete the delay insn. */
4974 PUT_CODE (NEXT_INSN (insn), NOTE);
4975 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4976 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4978 return "";
4981 /* This call has an unconditional jump in its delay slot and the
4982 call is known to reach its target or the beginning of the current
4983 subspace. */
4985 /* Use the containing sequence insn's address. */
4986 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
4988 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
4989 - insn_addresses[INSN_UID (seq_insn)] - 8;
4991 /* If the branch was too far away, emit a normal call followed
4992 by a nop, followed by the unconditional branch.
4994 If the branch is close, then adjust %r2 from within the
4995 call's delay slot. */
4997 xoperands[0] = call_dest;
4998 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4999 if (! VAL_14_BITS_P (distance))
5000 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
5001 else
5003 xoperands[3] = gen_label_rtx ();
5004 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5005 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5006 CODE_LABEL_NUMBER (xoperands[3]));
5009 /* Delete the jump. */
5010 PUT_CODE (NEXT_INSN (insn), NOTE);
5011 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5012 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5013 return "";
5016 /* INSN is either a function call. It may have an unconditional jump
5017 in its delay slot.
5019 CALL_DEST is the routine we are calling. */
5021 char *
5022 output_call (insn, call_dest)
5023 rtx insn;
5024 rtx call_dest;
5026 int distance;
5027 rtx xoperands[4];
5028 rtx seq_insn;
5030 /* Handle common case -- empty delay slot or no jump in the delay slot,
5031 and we're sure that the branch will reach the beginning of the $CODE$
5032 subspace. */
5033 if ((dbr_sequence_length () == 0
5034 && get_attr_length (insn) == 8)
5035 || (dbr_sequence_length () != 0
5036 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5037 && get_attr_length (insn) == 4))
5039 xoperands[0] = call_dest;
5040 output_asm_insn ("bl %0,%%r2%#", xoperands);
5041 return "";
5044 /* This call may not reach the beginning of the $CODE$ subspace. */
5045 if (get_attr_length (insn) > 8)
5047 int delay_insn_deleted = 0;
5048 rtx xoperands[2];
5049 rtx link;
5051 /* We need to emit an inline long-call branch. Furthermore,
5052 because we're changing a named function call into an indirect
5053 function call well after the parameters have been set up, we
5054 need to make sure any FP args appear in both the integer
5055 and FP registers. Also, we need move any delay slot insn
5056 out of the delay slot. And finally, we can't rely on the linker
5057 being able to fix the call to $$dyncall! -- Yuk!. */
5058 if (dbr_sequence_length () != 0
5059 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5061 /* A non-jump insn in the delay slot. By definition we can
5062 emit this insn before the call (and in fact before argument
5063 relocating. */
5064 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5066 /* Now delete the delay insn. */
5067 PUT_CODE (NEXT_INSN (insn), NOTE);
5068 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5069 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5070 delay_insn_deleted = 1;
5073 /* Now copy any FP arguments into integer registers. */
5074 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5076 int arg_mode, regno;
5077 rtx use = XEXP (link, 0);
5078 if (! (GET_CODE (use) == USE
5079 && GET_CODE (XEXP (use, 0)) == REG
5080 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5081 continue;
5083 arg_mode = GET_MODE (XEXP (use, 0));
5084 regno = REGNO (XEXP (use, 0));
5085 /* Is it a floating point register? */
5086 if (regno >= 32 && regno <= 39)
5088 /* Copy from the FP register into an integer register
5089 (via memory). */
5090 if (arg_mode == SFmode)
5092 xoperands[0] = XEXP (use, 0);
5093 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5094 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5095 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5097 else
5099 xoperands[0] = XEXP (use, 0);
5100 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5101 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5102 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5103 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5109 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5110 we don't have any direct calls in that case. */
5111 if (flag_pic)
5113 /* We have to load the address of the function using a procedure
5114 label (plabel). The LP and RP relocs don't work reliably for PIC,
5115 so we make a plain 32 bit plabel in the data segment instead. We
5116 have to defer outputting it of course... Not pretty. */
5118 xoperands[0] = gen_label_rtx ();
5119 xoperands[1] = gen_label_rtx ();
5120 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5121 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5122 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5124 if (deferred_plabels == 0)
5125 deferred_plabels = (struct defer_plab *)
5126 xmalloc (1 * sizeof (struct defer_plab));
5127 else
5128 deferred_plabels = (struct defer_plab *)
5129 xrealloc (deferred_plabels,
5130 (n_deferred_plabels + 1) * sizeof (struct defer_plab));
5131 deferred_plabels[n_deferred_plabels].internal_label = xoperands[0];
5132 deferred_plabels[n_deferred_plabels].symbol = call_dest;
5133 n_deferred_plabels++;
5135 /* Get our address + 8 into %r1. */
5136 output_asm_insn ("bl .+8,%%r1", xoperands);
5138 /* Add %r1 to the offset of dyncall from the next insn. */
5139 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5140 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5141 CODE_LABEL_NUMBER (xoperands[1]));
5142 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5144 /* Get the return address into %r31. */
5145 output_asm_insn ("blr 0,%%r31", xoperands);
5147 /* Branch to our target which is in %r1. */
5148 output_asm_insn ("bv 0(%%r1)", xoperands);
5150 /* Copy the return address into %r2 also. */
5151 output_asm_insn ("copy %%r31,%%r2", xoperands);
5153 else
5155 /* No PIC stuff to worry about. We can use ldil;ble. */
5156 xoperands[0] = call_dest;
5158 /* Get the address of our target into %r22. */
5159 output_asm_insn ("ldil LP%%%0,%%r22", xoperands);
5160 output_asm_insn ("ldo RP%%%0(%%r22),%%r22", xoperands);
5162 /* Get the high part of the address of $dyncall into %r2, then
5163 add in the low part in the branch instruction. */
5164 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5165 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5167 /* Copy the return pointer into both %r31 and %r2. */
5168 output_asm_insn ("copy %%r31,%%r2", xoperands);
5171 /* If we had a jump in the call's delay slot, output it now. */
5172 if (dbr_sequence_length () != 0
5173 && !delay_insn_deleted)
5175 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5176 output_asm_insn ("b,n %0", xoperands);
5178 /* Now delete the delay insn. */
5179 PUT_CODE (NEXT_INSN (insn), NOTE);
5180 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5181 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5183 return "";
5186 /* This call has an unconditional jump in its delay slot and the
5187 call is known to reach its target or the beginning of the current
5188 subspace. */
5190 /* Use the containing sequence insn's address. */
5191 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5193 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5194 - insn_addresses[INSN_UID (seq_insn)] - 8;
5196 /* If the branch was too far away, emit a normal call followed
5197 by a nop, followed by the unconditional branch.
5199 If the branch is close, then adjust %r2 from within the
5200 call's delay slot. */
5202 xoperands[0] = call_dest;
5203 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5204 if (! VAL_14_BITS_P (distance))
5205 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5206 else
5208 xoperands[3] = gen_label_rtx ();
5209 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5210 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5211 CODE_LABEL_NUMBER (xoperands[3]));
5214 /* Delete the jump. */
5215 PUT_CODE (NEXT_INSN (insn), NOTE);
5216 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5217 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5218 return "";
5221 extern struct obstack permanent_obstack;
5222 extern struct obstack *saveable_obstack;
5224 /* In HPUX 8.0's shared library scheme, special relocations are needed
5225 for function labels if they might be passed to a function
5226 in a shared library (because shared libraries don't live in code
5227 space), and special magic is needed to construct their address.
5229 For reasons too disgusting to describe storage for the new name
5230 is allocated either on the saveable_obstack (released at function
5231 exit) or on the permanent_obstack for things that can never change
5232 (libcall names for example). */
5234 void
5235 hppa_encode_label (sym, permanent)
5236 rtx sym;
5237 int permanent;
5239 char *str = XSTR (sym, 0);
5240 int len = strlen (str);
5241 char *newstr;
5243 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5244 len + 2);
5246 if (str[0] == '*')
5247 *newstr++ = *str++;
5248 strcpy (newstr + 1, str);
5249 *newstr = '@';
5250 XSTR (sym,0) = newstr;
5254 function_label_operand (op, mode)
5255 rtx op;
5256 enum machine_mode mode;
5258 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5261 /* Returns 1 if OP is a function label involved in a simple addition
5262 with a constant. Used to keep certain patterns from matching
5263 during instruction combination. */
5265 is_function_label_plus_const (op)
5266 rtx op;
5268 /* Strip off any CONST. */
5269 if (GET_CODE (op) == CONST)
5270 op = XEXP (op, 0);
5272 return (GET_CODE (op) == PLUS
5273 && function_label_operand (XEXP (op, 0), Pmode)
5274 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5277 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5278 use in fmpyadd instructions. */
5280 fmpyaddoperands (operands)
5281 rtx *operands;
5283 enum machine_mode mode = GET_MODE (operands[0]);
5285 /* Must be a floating point mode. */
5286 if (mode != SFmode && mode != DFmode)
5287 return 0;
5289 /* All modes must be the same. */
5290 if (! (mode == GET_MODE (operands[1])
5291 && mode == GET_MODE (operands[2])
5292 && mode == GET_MODE (operands[3])
5293 && mode == GET_MODE (operands[4])
5294 && mode == GET_MODE (operands[5])))
5295 return 0;
5297 /* All operands must be registers. */
5298 if (! (GET_CODE (operands[1]) == REG
5299 && GET_CODE (operands[2]) == REG
5300 && GET_CODE (operands[3]) == REG
5301 && GET_CODE (operands[4]) == REG
5302 && GET_CODE (operands[5]) == REG))
5303 return 0;
5305 /* Only 2 real operands to the addition. One of the input operands must
5306 be the same as the output operand. */
5307 if (! rtx_equal_p (operands[3], operands[4])
5308 && ! rtx_equal_p (operands[3], operands[5]))
5309 return 0;
5311 /* Inout operand of add can not conflict with any operands from multiply. */
5312 if (rtx_equal_p (operands[3], operands[0])
5313 || rtx_equal_p (operands[3], operands[1])
5314 || rtx_equal_p (operands[3], operands[2]))
5315 return 0;
5317 /* multiply can not feed into addition operands. */
5318 if (rtx_equal_p (operands[4], operands[0])
5319 || rtx_equal_p (operands[5], operands[0]))
5320 return 0;
5322 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5323 if (mode == SFmode
5324 && (REGNO (operands[0]) < 57
5325 || REGNO (operands[1]) < 57
5326 || REGNO (operands[2]) < 57
5327 || REGNO (operands[3]) < 57
5328 || REGNO (operands[4]) < 57
5329 || REGNO (operands[5]) < 57))
5330 return 0;
5332 /* Passed. Operands are suitable for fmpyadd. */
5333 return 1;
5336 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5337 use in fmpysub instructions. */
5339 fmpysuboperands (operands)
5340 rtx *operands;
5342 enum machine_mode mode = GET_MODE (operands[0]);
5344 /* Must be a floating point mode. */
5345 if (mode != SFmode && mode != DFmode)
5346 return 0;
5348 /* All modes must be the same. */
5349 if (! (mode == GET_MODE (operands[1])
5350 && mode == GET_MODE (operands[2])
5351 && mode == GET_MODE (operands[3])
5352 && mode == GET_MODE (operands[4])
5353 && mode == GET_MODE (operands[5])))
5354 return 0;
5356 /* All operands must be registers. */
5357 if (! (GET_CODE (operands[1]) == REG
5358 && GET_CODE (operands[2]) == REG
5359 && GET_CODE (operands[3]) == REG
5360 && GET_CODE (operands[4]) == REG
5361 && GET_CODE (operands[5]) == REG))
5362 return 0;
5364 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5365 operation, so operands[4] must be the same as operand[3]. */
5366 if (! rtx_equal_p (operands[3], operands[4]))
5367 return 0;
5369 /* multiply can not feed into subtraction. */
5370 if (rtx_equal_p (operands[5], operands[0]))
5371 return 0;
5373 /* Inout operand of sub can not conflict with any operands from multiply. */
5374 if (rtx_equal_p (operands[3], operands[0])
5375 || rtx_equal_p (operands[3], operands[1])
5376 || rtx_equal_p (operands[3], operands[2]))
5377 return 0;
5379 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5380 if (mode == SFmode
5381 && (REGNO (operands[0]) < 57
5382 || REGNO (operands[1]) < 57
5383 || REGNO (operands[2]) < 57
5384 || REGNO (operands[3]) < 57
5385 || REGNO (operands[4]) < 57
5386 || REGNO (operands[5]) < 57))
5387 return 0;
5389 /* Passed. Operands are suitable for fmpysub. */
5390 return 1;
5394 plus_xor_ior_operator (op, mode)
5395 rtx op;
5396 enum machine_mode mode;
5398 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5399 || GET_CODE (op) == IOR);
5402 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5403 constants for shadd instructions. */
5405 shadd_constant_p (val)
5406 int val;
5408 if (val == 2 || val == 4 || val == 8)
5409 return 1;
5410 else
5411 return 0;
5414 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5415 the valid constant for shadd instructions. */
5417 shadd_operand (op, mode)
5418 rtx op;
5419 enum machine_mode mode;
5421 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5424 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5427 basereg_operand (op, mode)
5428 rtx op;
5429 enum machine_mode mode;
5431 /* cse will create some unscaled indexed addresses, however; it
5432 generally isn't a win on the PA, so avoid creating unscaled
5433 indexed addresses until after cse is finished. */
5434 if (!cse_not_expected)
5435 return 0;
5437 /* Once reload has started everything is considered valid. Reload should
5438 only create indexed addresses using the stack/frame pointer, and any
5439 others were checked for validity when created by the combine pass.
5441 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5442 we don't have to worry about the braindamaged implicit space register
5443 selection using the basereg only (rather than effective address)
5444 screwing us over. */
5445 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5446 return (GET_CODE (op) == REG);
5448 /* Stack is always OK for indexing. */
5449 if (op == stack_pointer_rtx)
5450 return 1;
5452 /* While it's always safe to index off the frame pointer, it's not
5453 always profitable, particularly when the frame pointer is being
5454 eliminated. */
5455 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5456 return 1;
5458 /* The only other valid OPs are pseudo registers with
5459 REGNO_POINTER_FLAG set. */
5460 if (GET_CODE (op) != REG
5461 || REGNO (op) < FIRST_PSEUDO_REGISTER
5462 || ! register_operand (op, mode))
5463 return 0;
5465 return REGNO_POINTER_FLAG (REGNO (op));
5468 /* Return 1 if this operand is anything other than a hard register. */
5471 non_hard_reg_operand (op, mode)
5472 rtx op;
5473 enum machine_mode mode;
5475 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5478 /* Return 1 if INSN branches forward. Should be using insn_addresses
5479 to avoid walking through all the insns... */
5481 forward_branch_p (insn)
5482 rtx insn;
5484 rtx label = JUMP_LABEL (insn);
5486 while (insn)
5488 if (insn == label)
5489 break;
5490 else
5491 insn = NEXT_INSN (insn);
5494 return (insn == label);
5497 /* Return 1 if OP is an equality comparison, else return 0. */
5499 eq_neq_comparison_operator (op, mode)
5500 rtx op;
5501 enum machine_mode mode;
5503 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5506 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5508 movb_comparison_operator (op, mode)
5509 rtx op;
5510 enum machine_mode mode;
5512 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5513 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5516 /* Return 1 if INSN is in the delay slot of a call instruction. */
5518 jump_in_call_delay (insn)
5519 rtx insn;
5522 if (GET_CODE (insn) != JUMP_INSN)
5523 return 0;
5525 if (PREV_INSN (insn)
5526 && PREV_INSN (PREV_INSN (insn))
5527 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5529 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5531 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5532 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5535 else
5536 return 0;
5539 /* Output an unconditional move and branch insn. */
5541 char *
5542 output_parallel_movb (operands, length)
5543 rtx *operands;
5544 int length;
5546 /* These are the cases in which we win. */
5547 if (length == 4)
5548 return "mov%I1b,tr %1,%0,%2";
5550 /* None of these cases wins, but they don't lose either. */
5551 if (dbr_sequence_length () == 0)
5553 /* Nothing in the delay slot, fake it by putting the combined
5554 insn (the copy or add) in the delay slot of a bl. */
5555 if (GET_CODE (operands[1]) == CONST_INT)
5556 return "bl %2,0\n\tldi %1,%0";
5557 else
5558 return "bl %2,0\n\tcopy %1,%0";
5560 else
5562 /* Something in the delay slot, but we've got a long branch. */
5563 if (GET_CODE (operands[1]) == CONST_INT)
5564 return "ldi %1,%0\n\tbl %2,0";
5565 else
5566 return "copy %1,%0\n\tbl %2,0";
5570 /* Output an unconditional add and branch insn. */
5572 char *
5573 output_parallel_addb (operands, length)
5574 rtx *operands;
5575 int length;
5577 /* To make life easy we want operand0 to be the shared input/output
5578 operand and operand1 to be the readonly operand. */
5579 if (operands[0] == operands[1])
5580 operands[1] = operands[2];
5582 /* These are the cases in which we win. */
5583 if (length == 4)
5584 return "add%I1b,tr %1,%0,%3";
5586 /* None of these cases win, but they don't lose either. */
5587 if (dbr_sequence_length () == 0)
5589 /* Nothing in the delay slot, fake it by putting the combined
5590 insn (the copy or add) in the delay slot of a bl. */
5591 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5593 else
5595 /* Something in the delay slot, but we've got a long branch. */
5596 return "add%I1 %1,%0,%0\n\tbl %3,0";
5600 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5601 is used to discourage creating parallel movb/addb insns since a jump
5602 which immediately follows a call can execute in the delay slot of the
5603 call. */
5605 following_call (insn)
5606 rtx insn;
5608 /* Find the previous real insn, skipping NOTEs. */
5609 insn = PREV_INSN (insn);
5610 while (insn && GET_CODE (insn) == NOTE)
5611 insn = PREV_INSN (insn);
5613 /* Check for CALL_INSNs and millicode calls. */
5614 if (insn
5615 && (GET_CODE (insn) == CALL_INSN
5616 || (GET_CODE (insn) == INSN
5617 && GET_CODE (PATTERN (insn)) != SEQUENCE
5618 && GET_CODE (PATTERN (insn)) != USE
5619 && GET_CODE (PATTERN (insn)) != CLOBBER
5620 && get_attr_type (insn) == TYPE_MILLI)))
5621 return 1;
5623 return 0;
5626 /* We use this hook to perform a PA specific optimization which is difficult
5627 to do in earlier passes.
5629 We want the delay slots of branches within jump tables to be filled.
5630 None of the compiler passes at the moment even has the notion that a
5631 PA jump table doesn't contain addresses, but instead contains actual
5632 instructions!
5634 Because we actually jump into the table, the addresses of each entry
5635 must stay constant in relation to the beginning of the table (which
5636 itself must stay constant relative to the instruction to jump into
5637 it). I don't believe we can guarantee earlier passes of the compiler
5638 will adhere to those rules.
5640 So, late in the compilation process we find all the jump tables, and
5641 expand them into real code -- eg each entry in the jump table vector
5642 will get an appropriate label followed by a jump to the final target.
5644 Reorg and the final jump pass can then optimize these branches and
5645 fill their delay slots. We end up with smaller, more efficient code.
5647 The jump instructions within the table are special; we must be able
5648 to identify them during assembly output (if the jumps don't get filled
5649 we need to emit a nop rather than nullifying the delay slot)). We
5650 identify jumps in switch tables by marking the SET with DImode. */
5652 pa_reorg (insns)
5653 rtx insns;
5655 rtx insn;
5657 remove_useless_addtr_insns (insns, 1);
5659 pa_combine_instructions (get_insns ());
5661 /* This is fairly cheap, so always run it if optimizing. */
5662 if (optimize > 0)
5664 /* Find and explode all ADDR_VEC insns. */
5665 insns = get_insns ();
5666 for (insn = insns; insn; insn = NEXT_INSN (insn))
5668 rtx pattern, tmp, location;
5669 unsigned int length, i;
5671 /* Find an ADDR_VEC insn to explode. */
5672 if (GET_CODE (insn) != JUMP_INSN
5673 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5674 continue;
5676 /* If needed, emit marker for the beginning of the branch table. */
5677 if (TARGET_GAS)
5678 emit_insn_before (gen_begin_brtab (), insn);
5680 pattern = PATTERN (insn);
5681 location = PREV_INSN (insn);
5682 length = XVECLEN (pattern, 0);
5684 for (i = 0; i < length; i++)
5686 /* Emit the jump itself. */
5687 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5688 tmp = emit_jump_insn_after (tmp, location);
5689 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5690 LABEL_NUSES (JUMP_LABEL (tmp))++;
5692 /* Emit a BARRIER after the jump. */
5693 location = NEXT_INSN (location);
5694 emit_barrier_after (location);
5696 /* Put a CODE_LABEL before each so jump.c does not optimize
5697 the jumps away. */
5698 location = NEXT_INSN (location);
5699 tmp = gen_label_rtx ();
5700 LABEL_NUSES (tmp) = 1;
5701 emit_label_after (tmp, location);
5702 location = NEXT_INSN (location);
5705 /* If needed, emit marker for the end of the branch table. */
5706 if (TARGET_GAS)
5707 emit_insn_before (gen_end_brtab (), location);
5708 /* Delete the ADDR_VEC. */
5709 delete_insn (insn);
5712 else if (TARGET_GAS)
5714 /* Sill need an end_brtab insn. */
5715 insns = get_insns ();
5716 for (insn = insns; insn; insn = NEXT_INSN (insn))
5718 /* Find an ADDR_VEC insn. */
5719 if (GET_CODE (insn) != JUMP_INSN
5720 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5721 continue;
5723 /* Now generate markers for the beginning and end of the
5724 branc table. */
5725 emit_insn_before (gen_begin_brtab (), insn);
5726 emit_insn_after (gen_end_brtab (), insn);
5731 /* The PA has a number of odd instructions which can perform multiple
5732 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5733 it may be profitable to combine two instructions into one instruction
5734 with two outputs. It's not profitable PA2.0 machines because the
5735 two outputs would take two slots in the reorder buffers.
5737 This routine finds instructions which can be combined and combines
5738 them. We only support some of the potential combinations, and we
5739 only try common ways to find suitable instructions.
5741 * addb can add two registers or a register and a small integer
5742 and jump to a nearby (+-8k) location. Normally the jump to the
5743 nearby location is conditional on the result of the add, but by
5744 using the "true" condition we can make the jump unconditional.
5745 Thus addb can perform two independent operations in one insn.
5747 * movb is similar to addb in that it can perform a reg->reg
5748 or small immediate->reg copy and jump to a nearby (+-8k location).
5750 * fmpyadd and fmpysub can perform a FP multiply and either an
5751 FP add or FP sub if the operands of the multiply and add/sub are
5752 independent (there are other minor restrictions). Note both
5753 the fmpy and fadd/fsub can in theory move to better spots according
5754 to data dependencies, but for now we require the fmpy stay at a
5755 fixed location.
5757 * Many of the memory operations can perform pre & post updates
5758 of index registers. GCC's pre/post increment/decrement addressing
5759 is far too simple to take advantage of all the possibilities. This
5760 pass may not be suitable since those insns may not be independent.
5762 * comclr can compare two ints or an int and a register, nullify
5763 the following instruction and zero some other register. This
5764 is more difficult to use as it's harder to find an insn which
5765 will generate a comclr than finding something like an unconditional
5766 branch. (conditional moves & long branches create comclr insns).
5768 * Most arithmetic operations can conditionally skip the next
5769 instruction. They can be viewed as "perform this operation
5770 and conditionally jump to this nearby location" (where nearby
5771 is an insns away). These are difficult to use due to the
5772 branch length restrictions. */
5774 pa_combine_instructions (insns)
5775 rtx insns;
5777 rtx anchor, new;
5779 /* This can get expensive since the basic algorithm is on the
5780 order of O(n^2) (or worse). Only do it for -O2 or higher
5781 levels of optimizaton. */
5782 if (optimize < 2)
5783 return;
5785 /* Walk down the list of insns looking for "anchor" insns which
5786 may be combined with "floating" insns. As the name implies,
5787 "anchor" instructions don't move, while "floating" insns may
5788 move around. */
5789 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5790 new = make_insn_raw (new);
5792 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5794 enum attr_pa_combine_type anchor_attr;
5795 enum attr_pa_combine_type floater_attr;
5797 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5798 Also ignore any special USE insns. */
5799 if (GET_CODE (anchor) != INSN
5800 && GET_CODE (anchor) != JUMP_INSN
5801 && GET_CODE (anchor) != CALL_INSN
5802 || GET_CODE (PATTERN (anchor)) == USE
5803 || GET_CODE (PATTERN (anchor)) == CLOBBER
5804 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5805 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5806 continue;
5808 anchor_attr = get_attr_pa_combine_type (anchor);
5809 /* See if anchor is an insn suitable for combination. */
5810 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5811 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5812 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5813 && ! forward_branch_p (anchor)))
5815 rtx floater;
5817 for (floater = PREV_INSN (anchor);
5818 floater;
5819 floater = PREV_INSN (floater))
5821 if (GET_CODE (floater) == NOTE
5822 || (GET_CODE (floater) == INSN
5823 && (GET_CODE (PATTERN (floater)) == USE
5824 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5825 continue;
5827 /* Anything except a regular INSN will stop our search. */
5828 if (GET_CODE (floater) != INSN
5829 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5830 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5832 floater = NULL_RTX;
5833 break;
5836 /* See if FLOATER is suitable for combination with the
5837 anchor. */
5838 floater_attr = get_attr_pa_combine_type (floater);
5839 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5840 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5841 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5842 && floater_attr == PA_COMBINE_TYPE_FMPY))
5844 /* If ANCHOR and FLOATER can be combined, then we're
5845 done with this pass. */
5846 if (pa_can_combine_p (new, anchor, floater, 0,
5847 SET_DEST (PATTERN (floater)),
5848 XEXP (SET_SRC (PATTERN (floater)), 0),
5849 XEXP (SET_SRC (PATTERN (floater)), 1)))
5850 break;
5853 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5854 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
5856 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
5858 if (pa_can_combine_p (new, anchor, floater, 0,
5859 SET_DEST (PATTERN (floater)),
5860 XEXP (SET_SRC (PATTERN (floater)), 0),
5861 XEXP (SET_SRC (PATTERN (floater)), 1)))
5862 break;
5864 else
5866 if (pa_can_combine_p (new, anchor, floater, 0,
5867 SET_DEST (PATTERN (floater)),
5868 SET_SRC (PATTERN (floater)),
5869 SET_SRC (PATTERN (floater))))
5870 break;
5875 /* If we didn't find anything on the backwards scan try forwards. */
5876 if (!floater
5877 && (anchor_attr == PA_COMBINE_TYPE_FMPY
5878 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
5880 for (floater = anchor; floater; floater = NEXT_INSN (floater))
5882 if (GET_CODE (floater) == NOTE
5883 || (GET_CODE (floater) == INSN
5884 && (GET_CODE (PATTERN (floater)) == USE
5885 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5887 continue;
5889 /* Anything except a regular INSN will stop our search. */
5890 if (GET_CODE (floater) != INSN
5891 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5892 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5894 floater = NULL_RTX;
5895 break;
5898 /* See if FLOATER is suitable for combination with the
5899 anchor. */
5900 floater_attr = get_attr_pa_combine_type (floater);
5901 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5902 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5903 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5904 && floater_attr == PA_COMBINE_TYPE_FMPY))
5906 /* If ANCHOR and FLOATER can be combined, then we're
5907 done with this pass. */
5908 if (pa_can_combine_p (new, anchor, floater, 1,
5909 SET_DEST (PATTERN (floater)),
5910 XEXP (SET_SRC (PATTERN(floater)),0),
5911 XEXP(SET_SRC(PATTERN(floater)),1)))
5912 break;
5917 /* FLOATER will be nonzero if we found a suitable floating
5918 insn for combination with ANCHOR. */
5919 if (floater
5920 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5921 || anchor_attr == PA_COMBINE_TYPE_FMPY))
5923 /* Emit the new instruction and delete the old anchor. */
5924 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
5925 gen_rtvec (2, PATTERN (anchor),
5926 PATTERN (floater))),
5927 anchor);
5928 PUT_CODE (anchor, NOTE);
5929 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5930 NOTE_SOURCE_FILE (anchor) = 0;
5932 /* Emit a special USE insn for FLOATER, then delete
5933 the floating insn. */
5934 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5935 delete_insn (floater);
5937 continue;
5939 else if (floater
5940 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
5942 rtx temp;
5943 /* Emit the new_jump instruction and delete the old anchor. */
5944 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
5945 gen_rtvec (2, PATTERN (anchor),
5946 PATTERN (floater))),
5947 anchor);
5948 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
5949 PUT_CODE (anchor, NOTE);
5950 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5951 NOTE_SOURCE_FILE (anchor) = 0;
5953 /* Emit a special USE insn for FLOATER, then delete
5954 the floating insn. */
5955 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5956 delete_insn (floater);
5957 continue;
5964 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
5965 rtx new, anchor, floater;
5966 int reversed;
5967 rtx dest, src1, src2;
5969 int insn_code_number;
5970 rtx start, end;
5972 /* Create a PARALLEL with the patterns of ANCHOR and
5973 FLOATER, try to recognize it, then test constraints
5974 for the resulting pattern.
5976 If the pattern doesn't match or the constraints
5977 aren't met keep searching for a suitable floater
5978 insn. */
5979 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
5980 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
5981 INSN_CODE (new) = -1;
5982 insn_code_number = recog_memoized (new);
5983 if (insn_code_number < 0
5984 || !constrain_operands (insn_code_number, 1))
5985 return 0;
5987 if (reversed)
5989 start = anchor;
5990 end = floater;
5992 else
5994 start = floater;
5995 end = anchor;
5998 /* There's up to three operands to consider. One
5999 output and two inputs.
6001 The output must not be used between FLOATER & ANCHOR
6002 exclusive. The inputs must not be set between
6003 FLOATER and ANCHOR exclusive. */
6005 if (reg_used_between_p (dest, start, end))
6006 return 0;
6008 if (reg_set_between_p (src1, start, end))
6009 return 0;
6011 if (reg_set_between_p (src2, start, end))
6012 return 0;
6014 /* If we get here, then everything is good. */
6015 return 1;