Import final gcc2 snapshot (990109)
[official-gcc.git] / gcc / config / pa / pa.c
blob98d481ca6f06890b75cf043bf8b0232744bf7b71
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 93, 94, 95, 96, 97, 1998 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 static void restore_unscaled_index_insn_codes PROTO((rtx));
41 static void record_unscaled_index_insn_codes PROTO((rtx));
43 /* Save the operands last given to a compare for use when we
44 generate a scc or bcc insn. */
46 rtx hppa_compare_op0, hppa_compare_op1;
47 enum cmp_type hppa_branch_type;
49 /* Which cpu we are scheduling for. */
50 enum processor_type pa_cpu;
52 /* String to hold which cpu we are scheduling for. */
53 char *pa_cpu_string;
55 /* Set by the FUNCTION_PROFILER macro. */
56 int hp_profile_labelno;
58 /* Counts for the number of callee-saved general and floating point
59 registers which were saved by the current function's prologue. */
60 static int gr_saved, fr_saved;
62 /* Whether or not the current function uses an out-of-line prologue
63 and epilogue. */
64 static int out_of_line_prologue_epilogue;
66 static rtx find_addr_reg ();
68 /* Keep track of the number of bytes we have output in the CODE subspaces
69 during this compilation so we'll know when to emit inline long-calls. */
71 unsigned int total_code_bytes;
73 /* Variables to handle plabels that we discover are necessary at assembly
74 output time. They are output after the current function. */
76 struct deferred_plabel
78 rtx internal_label;
79 char *name;
80 } *deferred_plabels = 0;
81 int n_deferred_plabels = 0;
83 /* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which
84 uses an unscaled indexed address before delay slot scheduling. */
85 static int *unscaled_index_insn_codes;
87 /* Upper bound for the array. */
88 static int max_unscaled_index_insn_codes_uid;
90 void
91 override_options ()
93 /* Default to 7100 scheduling. If the 7100LC scheduling ever
94 gets reasonably tuned, it should be the default since that
95 what most PAs sold now are. */
96 if (pa_cpu_string == NULL
97 || ! strcmp (pa_cpu_string, "7100"))
99 pa_cpu_string = "7100";
100 pa_cpu = PROCESSOR_7100;
102 else if (! strcmp (pa_cpu_string, "700"))
104 pa_cpu_string = "700";
105 pa_cpu = PROCESSOR_700;
107 else if (! strcmp (pa_cpu_string, "7100LC"))
109 pa_cpu_string = "7100LC";
110 pa_cpu = PROCESSOR_7100LC;
112 else
114 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
117 if (flag_pic && TARGET_PORTABLE_RUNTIME)
119 warning ("PIC code generation is not supported in the portable runtime model\n");
122 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
124 warning ("PIC code generation is not compatible with fast indirect calls\n");
127 if (flag_pic && profile_flag)
129 warning ("PIC code generation is not compatible with profiling\n");
132 if (TARGET_SPACE && (flag_pic || profile_flag))
134 warning ("Out of line entry/exit sequences are not compatible\n");
135 warning ("with PIC or profiling\n");
138 if (! TARGET_GAS && write_symbols != NO_DEBUG)
140 warning ("-g is only supported when using GAS on this processor,");
141 warning ("-g option disabled.");
142 write_symbols = NO_DEBUG;
147 /* Return non-zero only if OP is a register of mode MODE,
148 or CONST0_RTX. */
150 reg_or_0_operand (op, mode)
151 rtx op;
152 enum machine_mode mode;
154 return (op == CONST0_RTX (mode) || register_operand (op, mode));
157 /* Return non-zero if OP is suitable for use in a call to a named
158 function.
160 (???) For 2.5 try to eliminate either call_operand_address or
161 function_label_operand, they perform very similar functions. */
163 call_operand_address (op, mode)
164 rtx op;
165 enum machine_mode mode;
167 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
170 /* Return 1 if X contains a symbolic expression. We know these
171 expressions will have one of a few well defined forms, so
172 we need only check those forms. */
174 symbolic_expression_p (x)
175 register rtx x;
178 /* Strip off any HIGH. */
179 if (GET_CODE (x) == HIGH)
180 x = XEXP (x, 0);
182 return (symbolic_operand (x, VOIDmode));
186 symbolic_operand (op, mode)
187 register rtx op;
188 enum machine_mode mode;
190 switch (GET_CODE (op))
192 case SYMBOL_REF:
193 case LABEL_REF:
194 return 1;
195 case CONST:
196 op = XEXP (op, 0);
197 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
198 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
199 && GET_CODE (XEXP (op, 1)) == CONST_INT);
200 default:
201 return 0;
205 /* Return truth value of statement that OP is a symbolic memory
206 operand of mode MODE. */
209 symbolic_memory_operand (op, mode)
210 rtx op;
211 enum machine_mode mode;
213 if (GET_CODE (op) == SUBREG)
214 op = SUBREG_REG (op);
215 if (GET_CODE (op) != MEM)
216 return 0;
217 op = XEXP (op, 0);
218 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
219 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
222 /* Return 1 if the operand is either a register or a memory operand that is
223 not symbolic. */
226 reg_or_nonsymb_mem_operand (op, mode)
227 register rtx op;
228 enum machine_mode mode;
230 if (register_operand (op, mode))
231 return 1;
233 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
234 return 1;
236 return 0;
239 /* Return 1 if the operand is either a register, zero, or a memory operand
240 that is not symbolic. */
243 reg_or_0_or_nonsymb_mem_operand (op, mode)
244 register rtx op;
245 enum machine_mode mode;
247 if (register_operand (op, mode))
248 return 1;
250 if (op == CONST0_RTX (mode))
251 return 1;
253 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
254 return 1;
256 return 0;
259 /* Accept any constant that can be moved in one instructions into a
260 general register. */
262 cint_ok_for_move (intval)
263 HOST_WIDE_INT intval;
265 /* OK if ldo, ldil, or zdepi, can be used. */
266 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
267 || zdepi_cint_p (intval));
270 /* Accept anything that can be moved in one instruction into a general
271 register. */
273 move_operand (op, mode)
274 rtx op;
275 enum machine_mode mode;
277 if (register_operand (op, mode))
278 return 1;
280 if (GET_CODE (op) == CONST_INT)
281 return cint_ok_for_move (INTVAL (op));
283 if (GET_CODE (op) == SUBREG)
284 op = SUBREG_REG (op);
285 if (GET_CODE (op) != MEM)
286 return 0;
288 op = XEXP (op, 0);
289 if (GET_CODE (op) == LO_SUM)
290 return (register_operand (XEXP (op, 0), Pmode)
291 && CONSTANT_P (XEXP (op, 1)));
293 /* Since move_operand is only used for source operands, we can always
294 allow scaled indexing! */
295 if (! TARGET_DISABLE_INDEXING
296 && GET_CODE (op) == PLUS
297 && ((GET_CODE (XEXP (op, 0)) == MULT
298 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
299 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
300 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
301 && GET_CODE (XEXP (op, 1)) == REG)
302 || (GET_CODE (XEXP (op, 1)) == MULT
303 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
304 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
305 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
306 && GET_CODE (XEXP (op, 0)) == REG)))
307 return 1;
309 return memory_address_p (mode, op);
312 /* Accept REG and any CONST_INT that can be moved in one instruction into a
313 general register. */
315 reg_or_cint_move_operand (op, mode)
316 rtx op;
317 enum machine_mode mode;
319 if (register_operand (op, mode))
320 return 1;
322 if (GET_CODE (op) == CONST_INT)
323 return cint_ok_for_move (INTVAL (op));
325 return 0;
329 pic_label_operand (op, mode)
330 rtx op;
331 enum machine_mode mode;
333 if (!flag_pic)
334 return 0;
336 switch (GET_CODE (op))
338 case LABEL_REF:
339 return 1;
340 case CONST:
341 op = XEXP (op, 0);
342 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
343 && GET_CODE (XEXP (op, 1)) == CONST_INT);
344 default:
345 return 0;
350 fp_reg_operand (op, mode)
351 rtx op;
352 enum machine_mode mode;
354 return reg_renumber && FP_REG_P (op);
359 /* Return truth value of whether OP can be used as an operand in a
360 three operand arithmetic insn that accepts registers of mode MODE
361 or 14-bit signed integers. */
363 arith_operand (op, mode)
364 rtx op;
365 enum machine_mode mode;
367 return (register_operand (op, mode)
368 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
371 /* Return truth value of whether OP can be used as an operand in a
372 three operand arithmetic insn that accepts registers of mode MODE
373 or 11-bit signed integers. */
375 arith11_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (register_operand (op, mode)
380 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
383 /* A constant integer suitable for use in a PRE_MODIFY memory
384 reference. */
386 pre_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
394 /* A constant integer suitable for use in a POST_MODIFY memory
395 reference. */
397 post_cint_operand (op, mode)
398 rtx op;
399 enum machine_mode mode;
401 return (GET_CODE (op) == CONST_INT
402 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
406 arith_double_operand (op, mode)
407 rtx op;
408 enum machine_mode mode;
410 return (register_operand (op, mode)
411 || (GET_CODE (op) == CONST_DOUBLE
412 && GET_MODE (op) == mode
413 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
414 && (CONST_DOUBLE_HIGH (op) >= 0
415 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
418 /* Return truth value of whether OP is a integer which fits the
419 range constraining immediate operands in three-address insns, or
420 is an integer register. */
423 ireg_or_int5_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
427 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
428 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
431 /* Return truth value of whether OP is a integer which fits the
432 range constraining immediate operands in three-address insns. */
435 int5_operand (op, mode)
436 rtx op;
437 enum machine_mode mode;
439 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
443 uint5_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
447 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
451 int11_operand (op, mode)
452 rtx op;
453 enum machine_mode mode;
455 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
459 uint32_operand (op, mode)
460 rtx op;
461 enum machine_mode mode;
463 #if HOST_BITS_PER_WIDE_INT > 32
464 /* All allowed constants will fit a CONST_INT. */
465 return (GET_CODE (op) == CONST_INT
466 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
467 #else
468 return (GET_CODE (op) == CONST_INT
469 || (GET_CODE (op) == CONST_DOUBLE
470 && CONST_DOUBLE_HIGH (op) == 0));
471 #endif
475 arith5_operand (op, mode)
476 rtx op;
477 enum machine_mode mode;
479 return register_operand (op, mode) || int5_operand (op, mode);
482 /* True iff zdepi can be used to generate this CONST_INT. */
484 zdepi_cint_p (x)
485 unsigned HOST_WIDE_INT x;
487 unsigned HOST_WIDE_INT lsb_mask, t;
489 /* This might not be obvious, but it's at least fast.
490 This function is critical; we don't have the time loops would take. */
491 lsb_mask = x & -x;
492 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
493 /* Return true iff t is a power of two. */
494 return ((t & (t - 1)) == 0);
497 /* True iff depi or extru can be used to compute (reg & mask).
498 Accept bit pattern like these:
499 0....01....1
500 1....10....0
501 1..10..01..1 */
503 and_mask_p (mask)
504 unsigned HOST_WIDE_INT mask;
506 mask = ~mask;
507 mask += mask & -mask;
508 return (mask & (mask - 1)) == 0;
511 /* True iff depi or extru can be used to compute (reg & OP). */
513 and_operand (op, mode)
514 rtx op;
515 enum machine_mode mode;
517 return (register_operand (op, mode)
518 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
521 /* True iff depi can be used to compute (reg | MASK). */
523 ior_mask_p (mask)
524 unsigned HOST_WIDE_INT mask;
526 mask += mask & -mask;
527 return (mask & (mask - 1)) == 0;
530 /* True iff depi can be used to compute (reg | OP). */
532 ior_operand (op, mode)
533 rtx op;
534 enum machine_mode mode;
536 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
540 lhs_lshift_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
547 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
548 Such values can be the left hand side x in (x << r), using the zvdepi
549 instruction. */
551 lhs_lshift_cint_operand (op, mode)
552 rtx op;
553 enum machine_mode mode;
555 unsigned HOST_WIDE_INT x;
556 if (GET_CODE (op) != CONST_INT)
557 return 0;
558 x = INTVAL (op) >> 4;
559 return (x & (x + 1)) == 0;
563 arith32_operand (op, mode)
564 rtx op;
565 enum machine_mode mode;
567 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
571 pc_or_label_operand (op, mode)
572 rtx op;
573 enum machine_mode mode;
575 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
578 /* Legitimize PIC addresses. If the address is already
579 position-independent, we return ORIG. Newly generated
580 position-independent addresses go to REG. If we need more
581 than one register, we lose. */
584 legitimize_pic_address (orig, mode, reg)
585 rtx orig, reg;
586 enum machine_mode mode;
588 rtx pic_ref = orig;
590 /* Labels need special handling. */
591 if (pic_label_operand (orig))
593 emit_insn (gen_pic_load_label (reg, orig));
594 current_function_uses_pic_offset_table = 1;
595 return reg;
597 if (GET_CODE (orig) == SYMBOL_REF)
599 if (reg == 0)
600 abort ();
602 if (flag_pic == 2)
604 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
605 pic_ref
606 = gen_rtx_MEM (Pmode,
607 gen_rtx_LO_SUM (Pmode, reg,
608 gen_rtx_UNSPEC (SImode,
609 gen_rtvec (1, orig),
610 0)));
612 else
613 pic_ref = gen_rtx_MEM (Pmode,
614 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
615 orig));
617 current_function_uses_pic_offset_table = 1;
618 RTX_UNCHANGING_P (pic_ref) = 1;
619 emit_move_insn (reg, pic_ref);
620 return reg;
622 else if (GET_CODE (orig) == CONST)
624 rtx base;
626 if (GET_CODE (XEXP (orig, 0)) == PLUS
627 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
628 return orig;
630 if (reg == 0)
631 abort ();
633 if (GET_CODE (XEXP (orig, 0)) == PLUS)
635 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
636 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
637 base == reg ? 0 : reg);
639 else abort ();
640 if (GET_CODE (orig) == CONST_INT)
642 if (INT_14_BITS (orig))
643 return plus_constant_for_output (base, INTVAL (orig));
644 orig = force_reg (Pmode, orig);
646 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
647 /* Likewise, should we set special REG_NOTEs here? */
649 return pic_ref;
652 /* Try machine-dependent ways of modifying an illegitimate address
653 to be legitimate. If we find one, return the new, valid address.
654 This macro is used in only one place: `memory_address' in explow.c.
656 OLDX is the address as it was before break_out_memory_refs was called.
657 In some cases it is useful to look at this to decide what needs to be done.
659 MODE and WIN are passed so that this macro can use
660 GO_IF_LEGITIMATE_ADDRESS.
662 It is always safe for this macro to do nothing. It exists to recognize
663 opportunities to optimize the output.
665 For the PA, transform:
667 memory(X + <large int>)
669 into:
671 if (<large int> & mask) >= 16
672 Y = (<large int> & ~mask) + mask + 1 Round up.
673 else
674 Y = (<large int> & ~mask) Round down.
675 Z = X + Y
676 memory (Z + (<large int> - Y));
678 This is for CSE to find several similar references, and only use one Z.
680 X can either be a SYMBOL_REF or REG, but because combine can not
681 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
682 D will not fit in 14 bits.
684 MODE_FLOAT references allow displacements which fit in 5 bits, so use
685 0x1f as the mask.
687 MODE_INT references allow displacements which fit in 14 bits, so use
688 0x3fff as the mask.
690 This relies on the fact that most mode MODE_FLOAT references will use FP
691 registers and most mode MODE_INT references will use integer registers.
692 (In the rare case of an FP register used in an integer MODE, we depend
693 on secondary reloads to clean things up.)
696 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
697 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
698 addressing modes to be used).
700 Put X and Z into registers. Then put the entire expression into
701 a register. */
704 hppa_legitimize_address (x, oldx, mode)
705 rtx x, oldx;
706 enum machine_mode mode;
708 rtx orig = x;
710 if (flag_pic)
711 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
713 /* Strip off CONST. */
714 if (GET_CODE (x) == CONST)
715 x = XEXP (x, 0);
717 /* Special case. Get the SYMBOL_REF into a register and use indexing.
718 That should always be safe. */
719 if (GET_CODE (x) == PLUS
720 && GET_CODE (XEXP (x, 0)) == REG
721 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
723 rtx reg = force_reg (SImode, XEXP (x, 1));
724 return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0)));
727 /* Note we must reject symbols which represent function addresses
728 since the assembler/linker can't handle arithmetic on plabels. */
729 if (GET_CODE (x) == PLUS
730 && GET_CODE (XEXP (x, 1)) == CONST_INT
731 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
732 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
733 || GET_CODE (XEXP (x, 0)) == REG))
735 rtx int_part, ptr_reg;
736 int newoffset;
737 int offset = INTVAL (XEXP (x, 1));
738 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
740 /* Choose which way to round the offset. Round up if we
741 are >= halfway to the next boundary. */
742 if ((offset & mask) >= ((mask + 1) / 2))
743 newoffset = (offset & ~ mask) + mask + 1;
744 else
745 newoffset = (offset & ~ mask);
747 /* If the newoffset will not fit in 14 bits (ldo), then
748 handling this would take 4 or 5 instructions (2 to load
749 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
750 add the new offset and the SYMBOL_REF.) Combine can
751 not handle 4->2 or 5->2 combinations, so do not create
752 them. */
753 if (! VAL_14_BITS_P (newoffset)
754 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
756 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
757 rtx tmp_reg
758 = force_reg (Pmode,
759 gen_rtx_HIGH (Pmode, const_part));
760 ptr_reg
761 = force_reg (Pmode,
762 gen_rtx_LO_SUM (Pmode,
763 tmp_reg, const_part));
765 else
767 if (! VAL_14_BITS_P (newoffset))
768 int_part = force_reg (Pmode, GEN_INT (newoffset));
769 else
770 int_part = GEN_INT (newoffset);
772 ptr_reg = force_reg (Pmode,
773 gen_rtx_PLUS (Pmode,
774 force_reg (Pmode, XEXP (x, 0)),
775 int_part));
777 return plus_constant (ptr_reg, offset - newoffset);
780 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
782 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
783 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
784 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
785 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
786 || GET_CODE (XEXP (x, 1)) == SUBREG)
787 && GET_CODE (XEXP (x, 1)) != CONST)
789 int val = INTVAL (XEXP (XEXP (x, 0), 1));
790 rtx reg1, reg2;
792 reg1 = XEXP (x, 1);
793 if (GET_CODE (reg1) != REG)
794 reg1 = force_reg (Pmode, force_operand (reg1, 0));
796 reg2 = XEXP (XEXP (x, 0), 0);
797 if (GET_CODE (reg2) != REG)
798 reg2 = force_reg (Pmode, force_operand (reg2, 0));
800 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
801 gen_rtx_MULT (Pmode,
802 reg2,
803 GEN_INT (val)),
804 reg1));
807 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
809 Only do so for floating point modes since this is more speculative
810 and we lose if it's an integer store. */
811 if (GET_CODE (x) == PLUS
812 && GET_CODE (XEXP (x, 0)) == PLUS
813 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
814 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
815 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
816 && (mode == SFmode || mode == DFmode))
819 /* First, try and figure out what to use as a base register. */
820 rtx reg1, reg2, base, idx, orig_base;
822 reg1 = XEXP (XEXP (x, 0), 1);
823 reg2 = XEXP (x, 1);
824 base = NULL_RTX;
825 idx = NULL_RTX;
827 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
828 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
829 know it's a base register below. */
830 if (GET_CODE (reg1) != REG)
831 reg1 = force_reg (Pmode, force_operand (reg1, 0));
833 if (GET_CODE (reg2) != REG)
834 reg2 = force_reg (Pmode, force_operand (reg2, 0));
836 /* Figure out what the base and index are. */
838 if (GET_CODE (reg1) == REG
839 && REGNO_POINTER_FLAG (REGNO (reg1)))
841 base = reg1;
842 orig_base = XEXP (XEXP (x, 0), 1);
843 idx = gen_rtx_PLUS (Pmode,
844 gen_rtx_MULT (Pmode,
845 XEXP (XEXP (XEXP (x, 0), 0), 0),
846 XEXP (XEXP (XEXP (x, 0), 0), 1)),
847 XEXP (x, 1));
849 else if (GET_CODE (reg2) == REG
850 && REGNO_POINTER_FLAG (REGNO (reg2)))
852 base = reg2;
853 orig_base = XEXP (x, 1);
854 idx = XEXP (x, 0);
857 if (base == 0)
858 return orig;
860 /* If the index adds a large constant, try to scale the
861 constant so that it can be loaded with only one insn. */
862 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
863 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
864 / INTVAL (XEXP (XEXP (idx, 0), 1)))
865 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
867 /* Divide the CONST_INT by the scale factor, then add it to A. */
868 int val = INTVAL (XEXP (idx, 1));
870 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
871 reg1 = XEXP (XEXP (idx, 0), 0);
872 if (GET_CODE (reg1) != REG)
873 reg1 = force_reg (Pmode, force_operand (reg1, 0));
875 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
877 /* We can now generate a simple scaled indexed address. */
878 return
879 force_reg
880 (Pmode, gen_rtx_PLUS (Pmode,
881 gen_rtx_MULT (Pmode, reg1,
882 XEXP (XEXP (idx, 0), 1)),
883 base));
886 /* If B + C is still a valid base register, then add them. */
887 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
888 && INTVAL (XEXP (idx, 1)) <= 4096
889 && INTVAL (XEXP (idx, 1)) >= -4096)
891 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
892 rtx reg1, reg2;
894 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
896 reg2 = XEXP (XEXP (idx, 0), 0);
897 if (GET_CODE (reg2) != CONST_INT)
898 reg2 = force_reg (Pmode, force_operand (reg2, 0));
900 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
901 gen_rtx_MULT (Pmode,
902 reg2,
903 GEN_INT (val)),
904 reg1));
907 /* Get the index into a register, then add the base + index and
908 return a register holding the result. */
910 /* First get A into a register. */
911 reg1 = XEXP (XEXP (idx, 0), 0);
912 if (GET_CODE (reg1) != REG)
913 reg1 = force_reg (Pmode, force_operand (reg1, 0));
915 /* And get B into a register. */
916 reg2 = XEXP (idx, 1);
917 if (GET_CODE (reg2) != REG)
918 reg2 = force_reg (Pmode, force_operand (reg2, 0));
920 reg1 = force_reg (Pmode,
921 gen_rtx_PLUS (Pmode,
922 gen_rtx_MULT (Pmode, reg1,
923 XEXP (XEXP (idx, 0), 1)),
924 reg2));
926 /* Add the result to our base register and return. */
927 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
931 /* Uh-oh. We might have an address for x[n-100000]. This needs
932 special handling to avoid creating an indexed memory address
933 with x-100000 as the base.
935 If the constant part is small enough, then it's still safe because
936 there is a guard page at the beginning and end of the data segment.
938 Scaled references are common enough that we want to try and rearrange the
939 terms so that we can use indexing for these addresses too. Only
940 do the optimization for floatint point modes. */
942 if (GET_CODE (x) == PLUS
943 && symbolic_expression_p (XEXP (x, 1)))
945 /* Ugly. We modify things here so that the address offset specified
946 by the index expression is computed first, then added to x to form
947 the entire address. */
949 rtx regx1, regx2, regy1, regy2, y;
951 /* Strip off any CONST. */
952 y = XEXP (x, 1);
953 if (GET_CODE (y) == CONST)
954 y = XEXP (y, 0);
956 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
958 /* See if this looks like
959 (plus (mult (reg) (shadd_const))
960 (const (plus (symbol_ref) (const_int))))
962 Where const_int is small. In that case the const
963 expression is a valid pointer for indexing.
965 If const_int is big, but can be divided evenly by shadd_const
966 and added to (reg). This allows more scaled indexed addresses. */
967 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
968 && GET_CODE (XEXP (x, 0)) == MULT
969 && GET_CODE (XEXP (y, 1)) == CONST_INT
970 && INTVAL (XEXP (y, 1)) >= -4096
971 && INTVAL (XEXP (y, 1)) <= 4095
972 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
973 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
975 int val = INTVAL (XEXP (XEXP (x, 0), 1));
976 rtx reg1, reg2;
978 reg1 = XEXP (x, 1);
979 if (GET_CODE (reg1) != REG)
980 reg1 = force_reg (Pmode, force_operand (reg1, 0));
982 reg2 = XEXP (XEXP (x, 0), 0);
983 if (GET_CODE (reg2) != REG)
984 reg2 = force_reg (Pmode, force_operand (reg2, 0));
986 return force_reg (Pmode,
987 gen_rtx_PLUS (Pmode,
988 gen_rtx_MULT (Pmode,
989 reg2,
990 GEN_INT (val)),
991 reg1));
993 else if ((mode == DFmode || mode == SFmode)
994 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
995 && GET_CODE (XEXP (x, 0)) == MULT
996 && GET_CODE (XEXP (y, 1)) == CONST_INT
997 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
998 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
999 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1001 regx1
1002 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1003 / INTVAL (XEXP (XEXP (x, 0), 1))));
1004 regx2 = XEXP (XEXP (x, 0), 0);
1005 if (GET_CODE (regx2) != REG)
1006 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1007 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
1008 regx2, regx1));
1009 return
1010 force_reg (Pmode,
1011 gen_rtx_PLUS (Pmode,
1012 gen_rtx_MULT (Pmode, regx2,
1013 XEXP (XEXP (x, 0), 1)),
1014 force_reg (Pmode, XEXP (y, 0))));
1016 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1017 && INTVAL (XEXP (y, 1)) >= -4096
1018 && INTVAL (XEXP (y, 1)) <= 4095)
1020 /* This is safe because of the guard page at the
1021 beginning and end of the data space. Just
1022 return the original address. */
1023 return orig;
1025 else
1027 /* Doesn't look like one we can optimize. */
1028 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1029 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1030 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1031 regx1 = force_reg (Pmode,
1032 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1033 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1038 return orig;
1041 /* For the HPPA, REG and REG+CONST is cost 0
1042 and addresses involving symbolic constants are cost 2.
1044 PIC addresses are very expensive.
1046 It is no coincidence that this has the same structure
1047 as GO_IF_LEGITIMATE_ADDRESS. */
1049 hppa_address_cost (X)
1050 rtx X;
1052 if (GET_CODE (X) == PLUS)
1053 return 1;
1054 else if (GET_CODE (X) == LO_SUM)
1055 return 1;
1056 else if (GET_CODE (X) == HIGH)
1057 return 2;
1058 return 4;
1061 /* Emit insns to move operands[1] into operands[0].
1063 Return 1 if we have written out everything that needs to be done to
1064 do the move. Otherwise, return 0 and the caller will emit the move
1065 normally. */
1068 emit_move_sequence (operands, mode, scratch_reg)
1069 rtx *operands;
1070 enum machine_mode mode;
1071 rtx scratch_reg;
1073 register rtx operand0 = operands[0];
1074 register rtx operand1 = operands[1];
1075 register rtx tem;
1077 if (reload_in_progress && GET_CODE (operand0) == REG
1078 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1079 operand0 = reg_equiv_mem[REGNO (operand0)];
1080 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1081 && GET_CODE (SUBREG_REG (operand0)) == REG
1082 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1084 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1085 operand0 = alter_subreg (operand0);
1088 if (reload_in_progress && GET_CODE (operand1) == REG
1089 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1090 operand1 = reg_equiv_mem[REGNO (operand1)];
1091 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1092 && GET_CODE (SUBREG_REG (operand1)) == REG
1093 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1095 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1096 operand1 = alter_subreg (operand1);
1099 if (reload_in_progress && GET_CODE (operand0) == MEM
1100 && ((tem = find_replacement (&XEXP (operand0, 0)))
1101 != XEXP (operand0, 0)))
1102 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1103 if (reload_in_progress && GET_CODE (operand1) == MEM
1104 && ((tem = find_replacement (&XEXP (operand1, 0)))
1105 != XEXP (operand1, 0)))
1106 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1108 /* Handle secondary reloads for loads/stores of FP registers from
1109 REG+D addresses where D does not fit in 5 bits, including
1110 (subreg (mem (addr))) cases. */
1111 if (fp_reg_operand (operand0, mode)
1112 && ((GET_CODE (operand1) == MEM
1113 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1114 || ((GET_CODE (operand1) == SUBREG
1115 && GET_CODE (XEXP (operand1, 0)) == MEM
1116 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1117 && scratch_reg)
1119 if (GET_CODE (operand1) == SUBREG)
1120 operand1 = XEXP (operand1, 0);
1122 scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
1124 /* D might not fit in 14 bits either; for such cases load D into
1125 scratch reg. */
1126 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1128 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1129 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1130 SImode,
1131 XEXP (XEXP (operand1, 0), 0),
1132 scratch_reg));
1134 else
1135 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1136 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1137 gen_rtx_MEM (mode, scratch_reg)));
1138 return 1;
1140 else if (fp_reg_operand (operand1, mode)
1141 && ((GET_CODE (operand0) == MEM
1142 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1143 || ((GET_CODE (operand0) == SUBREG)
1144 && GET_CODE (XEXP (operand0, 0)) == MEM
1145 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1146 && scratch_reg)
1148 if (GET_CODE (operand0) == SUBREG)
1149 operand0 = XEXP (operand0, 0);
1151 scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
1152 /* D might not fit in 14 bits either; for such cases load D into
1153 scratch reg. */
1154 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1156 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1157 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1158 SImode,
1159 XEXP (XEXP (operand0, 0), 0),
1160 scratch_reg));
1162 else
1163 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1164 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1165 operand1));
1166 return 1;
1168 /* Handle secondary reloads for loads of FP registers from constant
1169 expressions by forcing the constant into memory.
1171 use scratch_reg to hold the address of the memory location.
1173 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1174 NO_REGS when presented with a const_int and an register class
1175 containing only FP registers. Doing so unfortunately creates
1176 more problems than it solves. Fix this for 2.5. */
1177 else if (fp_reg_operand (operand0, mode)
1178 && CONSTANT_P (operand1)
1179 && scratch_reg)
1181 rtx xoperands[2];
1183 /* Force the constant into memory and put the address of the
1184 memory location into scratch_reg. */
1185 xoperands[0] = scratch_reg;
1186 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1187 emit_move_sequence (xoperands, Pmode, 0);
1189 /* Now load the destination register. */
1190 emit_insn (gen_rtx_SET (mode, operand0,
1191 gen_rtx_MEM (mode, scratch_reg)));
1192 return 1;
1194 /* Handle secondary reloads for SAR. These occur when trying to load
1195 the SAR from memory a FP register, or with a constant. */
1196 else if (GET_CODE (operand0) == REG
1197 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1198 && (GET_CODE (operand1) == MEM
1199 || GET_CODE (operand1) == CONST_INT
1200 || (GET_CODE (operand1) == REG
1201 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1202 && scratch_reg)
1204 /* D might not fit in 14 bits either; for such cases load D into
1205 scratch reg. */
1206 if (GET_CODE (operand1) == MEM
1207 && !memory_address_p (SImode, XEXP (operand1, 0)))
1209 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1210 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1211 SImode,
1212 XEXP (XEXP (operand1, 0), 0),
1213 scratch_reg));
1214 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1),
1215 scratch_reg));
1217 else
1218 emit_move_insn (scratch_reg, operand1);
1219 emit_move_insn (operand0, scratch_reg);
1220 return 1;
1222 /* Handle most common case: storing into a register. */
1223 else if (register_operand (operand0, mode))
1225 if (register_operand (operand1, mode)
1226 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1227 || (operand1 == CONST0_RTX (mode))
1228 || (GET_CODE (operand1) == HIGH
1229 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1230 /* Only `general_operands' can come here, so MEM is ok. */
1231 || GET_CODE (operand1) == MEM)
1233 /* Run this case quickly. */
1234 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1235 return 1;
1238 else if (GET_CODE (operand0) == MEM)
1240 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1241 && !(reload_in_progress || reload_completed))
1243 rtx temp = gen_reg_rtx (DFmode);
1245 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1246 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1247 return 1;
1249 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1251 /* Run this case quickly. */
1252 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1253 return 1;
1255 if (! (reload_in_progress || reload_completed))
1257 operands[0] = validize_mem (operand0);
1258 operands[1] = operand1 = force_reg (mode, operand1);
1262 /* Simplify the source if we need to. */
1263 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1264 || (GET_CODE (operand1) == HIGH
1265 && symbolic_operand (XEXP (operand1, 0), mode)))
1267 int ishighonly = 0;
1269 if (GET_CODE (operand1) == HIGH)
1271 ishighonly = 1;
1272 operand1 = XEXP (operand1, 0);
1274 if (symbolic_operand (operand1, mode))
1276 /* Argh. The assembler and linker can't handle arithmetic
1277 involving plabels.
1279 So we force the plabel into memory, load operand0 from
1280 the memory location, then add in the constant part. */
1281 if (GET_CODE (operand1) == CONST
1282 && GET_CODE (XEXP (operand1, 0)) == PLUS
1283 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1285 rtx temp, const_part;
1287 /* Figure out what (if any) scratch register to use. */
1288 if (reload_in_progress || reload_completed)
1289 scratch_reg = scratch_reg ? scratch_reg : operand0;
1290 else if (flag_pic)
1291 scratch_reg = gen_reg_rtx (Pmode);
1293 /* Save away the constant part of the expression. */
1294 const_part = XEXP (XEXP (operand1, 0), 1);
1295 if (GET_CODE (const_part) != CONST_INT)
1296 abort ();
1298 /* Force the function label into memory. */
1299 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1301 /* Get the address of the memory location. PIC-ify it if
1302 necessary. */
1303 temp = XEXP (temp, 0);
1304 if (flag_pic)
1305 temp = legitimize_pic_address (temp, mode, scratch_reg);
1307 /* Put the address of the memory location into our destination
1308 register. */
1309 operands[1] = temp;
1310 emit_move_sequence (operands, mode, scratch_reg);
1312 /* Now load from the memory location into our destination
1313 register. */
1314 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1315 emit_move_sequence (operands, mode, scratch_reg);
1317 /* And add back in the constant part. */
1318 expand_inc (operand0, const_part);
1320 return 1;
1323 if (flag_pic)
1325 rtx temp;
1327 if (reload_in_progress || reload_completed)
1328 temp = scratch_reg ? scratch_reg : operand0;
1329 else
1330 temp = gen_reg_rtx (Pmode);
1332 /* (const (plus (symbol) (const_int))) must be forced to
1333 memory during/after reload if the const_int will not fit
1334 in 14 bits. */
1335 if (GET_CODE (operand1) == CONST
1336 && GET_CODE (XEXP (operand1, 0)) == PLUS
1337 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1338 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1339 && (reload_completed || reload_in_progress)
1340 && flag_pic)
1342 operands[1] = force_const_mem (mode, operand1);
1343 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1344 mode, temp);
1345 emit_move_sequence (operands, mode, temp);
1347 else
1349 operands[1] = legitimize_pic_address (operand1, mode, temp);
1350 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1353 /* On the HPPA, references to data space are supposed to use dp,
1354 register 27, but showing it in the RTL inhibits various cse
1355 and loop optimizations. */
1356 else
1358 rtx temp, set;
1360 if (reload_in_progress || reload_completed)
1361 temp = scratch_reg ? scratch_reg : operand0;
1362 else
1363 temp = gen_reg_rtx (mode);
1365 /* Loading a SYMBOL_REF into a register makes that register
1366 safe to be used as the base in an indexed address.
1368 Don't mark hard registers though. That loses. */
1369 if (GET_CODE (operand0) == REG
1370 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1371 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1372 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1373 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1374 if (ishighonly)
1375 set = gen_rtx_SET (mode, operand0, temp);
1376 else
1377 set = gen_rtx_SET (VOIDmode,
1378 operand0,
1379 gen_rtx_LO_SUM (mode, temp, operand1));
1381 emit_insn (gen_rtx_SET (VOIDmode,
1382 temp,
1383 gen_rtx_HIGH (mode, operand1)));
1384 emit_insn (set);
1387 return 1;
1389 else if (GET_CODE (operand1) != CONST_INT
1390 || ! cint_ok_for_move (INTVAL (operand1)))
1392 rtx temp;
1394 if (reload_in_progress || reload_completed)
1395 temp = operand0;
1396 else
1397 temp = gen_reg_rtx (mode);
1399 emit_insn (gen_rtx_SET (VOIDmode, temp,
1400 gen_rtx_HIGH (mode, operand1)));
1401 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1404 /* Now have insn-emit do whatever it normally does. */
1405 return 0;
1408 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1409 it will need a link/runtime reloc). */
1412 reloc_needed (exp)
1413 tree exp;
1415 int reloc = 0;
1417 switch (TREE_CODE (exp))
1419 case ADDR_EXPR:
1420 return 1;
1422 case PLUS_EXPR:
1423 case MINUS_EXPR:
1424 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1425 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1426 break;
1428 case NOP_EXPR:
1429 case CONVERT_EXPR:
1430 case NON_LVALUE_EXPR:
1431 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1432 break;
1434 case CONSTRUCTOR:
1436 register tree link;
1437 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1438 if (TREE_VALUE (link) != 0)
1439 reloc |= reloc_needed (TREE_VALUE (link));
1441 break;
1443 case ERROR_MARK:
1444 break;
1446 return reloc;
1449 /* Does operand (which is a symbolic_operand) live in text space? If
1450 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1453 read_only_operand (operand)
1454 rtx operand;
1456 if (GET_CODE (operand) == CONST)
1457 operand = XEXP (XEXP (operand, 0), 0);
1458 if (flag_pic)
1460 if (GET_CODE (operand) == SYMBOL_REF)
1461 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1463 else
1465 if (GET_CODE (operand) == SYMBOL_REF)
1466 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1468 return 1;
1472 /* Return the best assembler insn template
1473 for moving operands[1] into operands[0] as a fullword. */
1474 char *
1475 singlemove_string (operands)
1476 rtx *operands;
1478 HOST_WIDE_INT intval;
1480 if (GET_CODE (operands[0]) == MEM)
1481 return "stw %r1,%0";
1482 if (GET_CODE (operands[1]) == MEM)
1483 return "ldw %1,%0";
1484 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1486 long i;
1487 REAL_VALUE_TYPE d;
1489 if (GET_MODE (operands[1]) != SFmode)
1490 abort ();
1492 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1493 bit pattern. */
1494 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1495 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1497 operands[1] = GEN_INT (i);
1498 /* Fall through to CONST_INT case. */
1500 if (GET_CODE (operands[1]) == CONST_INT)
1502 intval = INTVAL (operands[1]);
1504 if (VAL_14_BITS_P (intval))
1505 return "ldi %1,%0";
1506 else if ((intval & 0x7ff) == 0)
1507 return "ldil L'%1,%0";
1508 else if (zdepi_cint_p (intval))
1509 return "zdepi %Z1,%0";
1510 else
1511 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1513 return "copy %1,%0";
1517 /* Compute position (in OP[1]) and width (in OP[2])
1518 useful for copying IMM to a register using the zdepi
1519 instructions. Store the immediate value to insert in OP[0]. */
1520 void
1521 compute_zdepi_operands (imm, op)
1522 unsigned HOST_WIDE_INT imm;
1523 unsigned *op;
1525 int lsb, len;
1527 /* Find the least significant set bit in IMM. */
1528 for (lsb = 0; lsb < 32; lsb++)
1530 if ((imm & 1) != 0)
1531 break;
1532 imm >>= 1;
1535 /* Choose variants based on *sign* of the 5-bit field. */
1536 if ((imm & 0x10) == 0)
1537 len = (lsb <= 28) ? 4 : 32 - lsb;
1538 else
1540 /* Find the width of the bitstring in IMM. */
1541 for (len = 5; len < 32; len++)
1543 if ((imm & (1 << len)) == 0)
1544 break;
1547 /* Sign extend IMM as a 5-bit value. */
1548 imm = (imm & 0xf) - 0x10;
1551 op[0] = imm;
1552 op[1] = 31 - lsb;
1553 op[2] = len;
1556 /* Output assembler code to perform a doubleword move insn
1557 with operands OPERANDS. */
1559 char *
1560 output_move_double (operands)
1561 rtx *operands;
1563 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1564 rtx latehalf[2];
1565 rtx addreg0 = 0, addreg1 = 0;
1567 /* First classify both operands. */
1569 if (REG_P (operands[0]))
1570 optype0 = REGOP;
1571 else if (offsettable_memref_p (operands[0]))
1572 optype0 = OFFSOP;
1573 else if (GET_CODE (operands[0]) == MEM)
1574 optype0 = MEMOP;
1575 else
1576 optype0 = RNDOP;
1578 if (REG_P (operands[1]))
1579 optype1 = REGOP;
1580 else if (CONSTANT_P (operands[1]))
1581 optype1 = CNSTOP;
1582 else if (offsettable_memref_p (operands[1]))
1583 optype1 = OFFSOP;
1584 else if (GET_CODE (operands[1]) == MEM)
1585 optype1 = MEMOP;
1586 else
1587 optype1 = RNDOP;
1589 /* Check for the cases that the operand constraints are not
1590 supposed to allow to happen. Abort if we get one,
1591 because generating code for these cases is painful. */
1593 if (optype0 != REGOP && optype1 != REGOP)
1594 abort ();
1596 /* Handle auto decrementing and incrementing loads and stores
1597 specifically, since the structure of the function doesn't work
1598 for them without major modification. Do it better when we learn
1599 this port about the general inc/dec addressing of PA.
1600 (This was written by tege. Chide him if it doesn't work.) */
1602 if (optype0 == MEMOP)
1604 /* We have to output the address syntax ourselves, since print_operand
1605 doesn't deal with the addresses we want to use. Fix this later. */
1607 rtx addr = XEXP (operands[0], 0);
1608 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1610 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1612 operands[0] = XEXP (addr, 0);
1613 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1614 abort ();
1616 if (!reg_overlap_mentioned_p (high_reg, addr))
1618 /* No overlap between high target register and address
1619 register. (We do this in a non-obvious way to
1620 save a register file writeback) */
1621 if (GET_CODE (addr) == POST_INC)
1622 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1623 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1625 else
1626 abort();
1628 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1630 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1632 operands[0] = XEXP (addr, 0);
1633 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1634 abort ();
1636 if (!reg_overlap_mentioned_p (high_reg, addr))
1638 /* No overlap between high target register and address
1639 register. (We do this in a non-obvious way to
1640 save a register file writeback) */
1641 if (GET_CODE (addr) == PRE_INC)
1642 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1643 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1645 else
1646 abort();
1649 if (optype1 == MEMOP)
1651 /* We have to output the address syntax ourselves, since print_operand
1652 doesn't deal with the addresses we want to use. Fix this later. */
1654 rtx addr = XEXP (operands[1], 0);
1655 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1657 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1659 operands[1] = XEXP (addr, 0);
1660 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1661 abort ();
1663 if (!reg_overlap_mentioned_p (high_reg, addr))
1665 /* No overlap between high target register and address
1666 register. (We do this in a non-obvious way to
1667 save a register file writeback) */
1668 if (GET_CODE (addr) == POST_INC)
1669 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1670 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1672 else
1674 /* This is an undefined situation. We should load into the
1675 address register *and* update that register. Probably
1676 we don't need to handle this at all. */
1677 if (GET_CODE (addr) == POST_INC)
1678 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1679 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1682 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1684 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1686 operands[1] = XEXP (addr, 0);
1687 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1688 abort ();
1690 if (!reg_overlap_mentioned_p (high_reg, addr))
1692 /* No overlap between high target register and address
1693 register. (We do this in a non-obvious way to
1694 save a register file writeback) */
1695 if (GET_CODE (addr) == PRE_INC)
1696 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1697 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1699 else
1701 /* This is an undefined situation. We should load into the
1702 address register *and* update that register. Probably
1703 we don't need to handle this at all. */
1704 if (GET_CODE (addr) == PRE_INC)
1705 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1706 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1709 else if (GET_CODE (addr) == PLUS
1710 && GET_CODE (XEXP (addr, 0)) == MULT)
1712 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1714 if (!reg_overlap_mentioned_p (high_reg, addr))
1716 rtx xoperands[3];
1718 xoperands[0] = high_reg;
1719 xoperands[1] = XEXP (addr, 1);
1720 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1721 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1722 output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
1723 return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
1725 else
1727 rtx xoperands[3];
1729 xoperands[0] = high_reg;
1730 xoperands[1] = XEXP (addr, 1);
1731 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1732 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1733 output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
1734 return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
1740 /* If an operand is an unoffsettable memory ref, find a register
1741 we can increment temporarily to make it refer to the second word. */
1743 if (optype0 == MEMOP)
1744 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1746 if (optype1 == MEMOP)
1747 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1749 /* Ok, we can do one word at a time.
1750 Normally we do the low-numbered word first.
1752 In either case, set up in LATEHALF the operands to use
1753 for the high-numbered word and in some cases alter the
1754 operands in OPERANDS to be suitable for the low-numbered word. */
1756 if (optype0 == REGOP)
1757 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
1758 else if (optype0 == OFFSOP)
1759 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1760 else
1761 latehalf[0] = operands[0];
1763 if (optype1 == REGOP)
1764 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
1765 else if (optype1 == OFFSOP)
1766 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1767 else if (optype1 == CNSTOP)
1768 split_double (operands[1], &operands[1], &latehalf[1]);
1769 else
1770 latehalf[1] = operands[1];
1772 /* If the first move would clobber the source of the second one,
1773 do them in the other order.
1775 This can happen in two cases:
1777 mem -> register where the first half of the destination register
1778 is the same register used in the memory's address. Reload
1779 can create such insns.
1781 mem in this case will be either register indirect or register
1782 indirect plus a valid offset.
1784 register -> register move where REGNO(dst) == REGNO(src + 1)
1785 someone (Tim/Tege?) claimed this can happen for parameter loads.
1787 Handle mem -> register case first. */
1788 if (optype0 == REGOP
1789 && (optype1 == MEMOP || optype1 == OFFSOP)
1790 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1791 operands[1], 0))
1793 /* Do the late half first. */
1794 if (addreg1)
1795 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1796 output_asm_insn (singlemove_string (latehalf), latehalf);
1798 /* Then clobber. */
1799 if (addreg1)
1800 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1801 return singlemove_string (operands);
1804 /* Now handle register -> register case. */
1805 if (optype0 == REGOP && optype1 == REGOP
1806 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1808 output_asm_insn (singlemove_string (latehalf), latehalf);
1809 return singlemove_string (operands);
1812 /* Normal case: do the two words, low-numbered first. */
1814 output_asm_insn (singlemove_string (operands), operands);
1816 /* Make any unoffsettable addresses point at high-numbered word. */
1817 if (addreg0)
1818 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1819 if (addreg1)
1820 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1822 /* Do that word. */
1823 output_asm_insn (singlemove_string (latehalf), latehalf);
1825 /* Undo the adds we just did. */
1826 if (addreg0)
1827 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1828 if (addreg1)
1829 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1831 return "";
1834 char *
1835 output_fp_move_double (operands)
1836 rtx *operands;
1838 if (FP_REG_P (operands[0]))
1840 if (FP_REG_P (operands[1])
1841 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1842 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1843 else
1844 output_asm_insn ("fldd%F1 %1,%0", operands);
1846 else if (FP_REG_P (operands[1]))
1848 output_asm_insn ("fstd%F0 %1,%0", operands);
1850 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1852 if (GET_CODE (operands[0]) == REG)
1854 rtx xoperands[2];
1855 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
1856 xoperands[0] = operands[0];
1857 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1859 /* This is a pain. You have to be prepared to deal with an
1860 arbitrary address here including pre/post increment/decrement.
1862 so avoid this in the MD. */
1863 else
1864 abort ();
1866 else abort ();
1867 return "";
1870 /* Return a REG that occurs in ADDR with coefficient 1.
1871 ADDR can be effectively incremented by incrementing REG. */
1873 static rtx
1874 find_addr_reg (addr)
1875 rtx addr;
1877 while (GET_CODE (addr) == PLUS)
1879 if (GET_CODE (XEXP (addr, 0)) == REG)
1880 addr = XEXP (addr, 0);
1881 else if (GET_CODE (XEXP (addr, 1)) == REG)
1882 addr = XEXP (addr, 1);
1883 else if (CONSTANT_P (XEXP (addr, 0)))
1884 addr = XEXP (addr, 1);
1885 else if (CONSTANT_P (XEXP (addr, 1)))
1886 addr = XEXP (addr, 0);
1887 else
1888 abort ();
1890 if (GET_CODE (addr) == REG)
1891 return addr;
1892 abort ();
1895 /* Emit code to perform a block move.
1897 OPERANDS[0] is the destination pointer as a REG, clobbered.
1898 OPERANDS[1] is the source pointer as a REG, clobbered.
1899 OPERANDS[2] is a register for temporary storage.
1900 OPERANDS[4] is the size as a CONST_INT
1901 OPERANDS[3] is a register for temporary storage.
1902 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1903 OPERANDS[6] is another temporary register. */
1905 char *
1906 output_block_move (operands, size_is_constant)
1907 rtx *operands;
1908 int size_is_constant;
1910 int align = INTVAL (operands[5]);
1911 unsigned long n_bytes = INTVAL (operands[4]);
1913 /* We can't move more than four bytes at a time because the PA
1914 has no longer integer move insns. (Could use fp mem ops?) */
1915 if (align > 4)
1916 align = 4;
1918 /* Note that we know each loop below will execute at least twice
1919 (else we would have open-coded the copy). */
1920 switch (align)
1922 case 4:
1923 /* Pre-adjust the loop counter. */
1924 operands[4] = GEN_INT (n_bytes - 8);
1925 output_asm_insn ("ldi %4,%2", operands);
1927 /* Copying loop. */
1928 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1929 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1930 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1931 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1932 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1934 /* Handle the residual. There could be up to 7 bytes of
1935 residual to copy! */
1936 if (n_bytes % 8 != 0)
1938 operands[4] = GEN_INT (n_bytes % 4);
1939 if (n_bytes % 8 >= 4)
1940 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1941 if (n_bytes % 4 != 0)
1942 output_asm_insn ("ldw 0(0,%1),%6", operands);
1943 if (n_bytes % 8 >= 4)
1944 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1945 if (n_bytes % 4 != 0)
1946 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1948 return "";
1950 case 2:
1951 /* Pre-adjust the loop counter. */
1952 operands[4] = GEN_INT (n_bytes - 4);
1953 output_asm_insn ("ldi %4,%2", operands);
1955 /* Copying loop. */
1956 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1957 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1958 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1959 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1960 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1962 /* Handle the residual. */
1963 if (n_bytes % 4 != 0)
1965 if (n_bytes % 4 >= 2)
1966 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1967 if (n_bytes % 2 != 0)
1968 output_asm_insn ("ldb 0(0,%1),%6", operands);
1969 if (n_bytes % 4 >= 2)
1970 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1971 if (n_bytes % 2 != 0)
1972 output_asm_insn ("stb %6,0(0,%0)", operands);
1974 return "";
1976 case 1:
1977 /* Pre-adjust the loop counter. */
1978 operands[4] = GEN_INT (n_bytes - 2);
1979 output_asm_insn ("ldi %4,%2", operands);
1981 /* Copying loop. */
1982 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1983 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1984 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1985 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1986 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1988 /* Handle the residual. */
1989 if (n_bytes % 2 != 0)
1991 output_asm_insn ("ldb 0(0,%1),%3", operands);
1992 output_asm_insn ("stb %3,0(0,%0)", operands);
1994 return "";
1996 default:
1997 abort ();
2001 /* Count the number of insns necessary to handle this block move.
2003 Basic structure is the same as emit_block_move, except that we
2004 count insns rather than emit them. */
2007 compute_movstrsi_length (insn)
2008 rtx insn;
2010 rtx pat = PATTERN (insn);
2011 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2012 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
2013 unsigned int n_insns = 0;
2015 /* We can't move more than four bytes at a time because the PA
2016 has no longer integer move insns. (Could use fp mem ops?) */
2017 if (align > 4)
2018 align = 4;
2020 /* The basic copying loop. */
2021 n_insns = 6;
2023 /* Residuals. */
2024 if (n_bytes % (2 * align) != 0)
2026 if ((n_bytes % (2 * align)) >= align)
2027 n_insns += 2;
2029 if ((n_bytes % align) != 0)
2030 n_insns += 2;
2033 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2034 return n_insns * 4;
2038 char *
2039 output_and (operands)
2040 rtx *operands;
2042 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2044 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2045 int ls0, ls1, ms0, p, len;
2047 for (ls0 = 0; ls0 < 32; ls0++)
2048 if ((mask & (1 << ls0)) == 0)
2049 break;
2051 for (ls1 = ls0; ls1 < 32; ls1++)
2052 if ((mask & (1 << ls1)) != 0)
2053 break;
2055 for (ms0 = ls1; ms0 < 32; ms0++)
2056 if ((mask & (1 << ms0)) == 0)
2057 break;
2059 if (ms0 != 32)
2060 abort();
2062 if (ls1 == 32)
2064 len = ls0;
2066 if (len == 0)
2067 abort ();
2069 operands[2] = GEN_INT (len);
2070 return "extru %1,31,%2,%0";
2072 else
2074 /* We could use this `depi' for the case above as well, but `depi'
2075 requires one more register file access than an `extru'. */
2077 p = 31 - ls0;
2078 len = ls1 - ls0;
2080 operands[2] = GEN_INT (p);
2081 operands[3] = GEN_INT (len);
2082 return "depi 0,%2,%3,%0";
2085 else
2086 return "and %1,%2,%0";
2089 char *
2090 output_ior (operands)
2091 rtx *operands;
2093 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2094 int bs0, bs1, p, len;
2096 if (INTVAL (operands[2]) == 0)
2097 return "copy %1,%0";
2099 for (bs0 = 0; bs0 < 32; bs0++)
2100 if ((mask & (1 << bs0)) != 0)
2101 break;
2103 for (bs1 = bs0; bs1 < 32; bs1++)
2104 if ((mask & (1 << bs1)) == 0)
2105 break;
2107 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2108 abort();
2110 p = 31 - bs0;
2111 len = bs1 - bs0;
2113 operands[2] = GEN_INT (p);
2114 operands[3] = GEN_INT (len);
2115 return "depi -1,%2,%3,%0";
2118 /* Output an ascii string. */
2119 void
2120 output_ascii (file, p, size)
2121 FILE *file;
2122 unsigned char *p;
2123 int size;
2125 int i;
2126 int chars_output;
2127 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2129 /* The HP assembler can only take strings of 256 characters at one
2130 time. This is a limitation on input line length, *not* the
2131 length of the string. Sigh. Even worse, it seems that the
2132 restriction is in number of input characters (see \xnn &
2133 \whatever). So we have to do this very carefully. */
2135 fputs ("\t.STRING \"", file);
2137 chars_output = 0;
2138 for (i = 0; i < size; i += 4)
2140 int co = 0;
2141 int io = 0;
2142 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2144 register unsigned int c = p[i + io];
2146 if (c == '\"' || c == '\\')
2147 partial_output[co++] = '\\';
2148 if (c >= ' ' && c < 0177)
2149 partial_output[co++] = c;
2150 else
2152 unsigned int hexd;
2153 partial_output[co++] = '\\';
2154 partial_output[co++] = 'x';
2155 hexd = c / 16 - 0 + '0';
2156 if (hexd > '9')
2157 hexd -= '9' - 'a' + 1;
2158 partial_output[co++] = hexd;
2159 hexd = c % 16 - 0 + '0';
2160 if (hexd > '9')
2161 hexd -= '9' - 'a' + 1;
2162 partial_output[co++] = hexd;
2165 if (chars_output + co > 243)
2167 fputs ("\"\n\t.STRING \"", file);
2168 chars_output = 0;
2170 fwrite (partial_output, 1, co, file);
2171 chars_output += co;
2172 co = 0;
2174 fputs ("\"\n", file);
2177 /* Try to rewrite floating point comparisons & branches to avoid
2178 useless add,tr insns.
2180 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2181 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2182 first attempt to remove useless add,tr insns. It is zero
2183 for the second pass as reorg sometimes leaves bogus REG_DEAD
2184 notes lying around.
2186 When CHECK_NOTES is zero we can only eliminate add,tr insns
2187 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2188 instructions. */
2189 void
2190 remove_useless_addtr_insns (insns, check_notes)
2191 rtx insns;
2192 int check_notes;
2194 rtx insn;
2195 int all;
2196 static int pass = 0;
2198 /* This is fairly cheap, so always run it when optimizing. */
2199 if (optimize > 0)
2201 int fcmp_count = 0;
2202 int fbranch_count = 0;
2204 /* Walk all the insns in this function looking for fcmp & fbranch
2205 instructions. Keep track of how many of each we find. */
2206 insns = get_insns ();
2207 for (insn = insns; insn; insn = next_insn (insn))
2209 rtx tmp;
2211 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2212 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2213 continue;
2215 tmp = PATTERN (insn);
2217 /* It must be a set. */
2218 if (GET_CODE (tmp) != SET)
2219 continue;
2221 /* If the destination is CCFP, then we've found an fcmp insn. */
2222 tmp = SET_DEST (tmp);
2223 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2225 fcmp_count++;
2226 continue;
2229 tmp = PATTERN (insn);
2230 /* If this is an fbranch instruction, bump the fbranch counter. */
2231 if (GET_CODE (tmp) == SET
2232 && SET_DEST (tmp) == pc_rtx
2233 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2234 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2235 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2236 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2238 fbranch_count++;
2239 continue;
2244 /* Find all floating point compare + branch insns. If possible,
2245 reverse the comparison & the branch to avoid add,tr insns. */
2246 for (insn = insns; insn; insn = next_insn (insn))
2248 rtx tmp, next;
2250 /* Ignore anything that isn't an INSN. */
2251 if (GET_CODE (insn) != INSN)
2252 continue;
2254 tmp = PATTERN (insn);
2256 /* It must be a set. */
2257 if (GET_CODE (tmp) != SET)
2258 continue;
2260 /* The destination must be CCFP, which is register zero. */
2261 tmp = SET_DEST (tmp);
2262 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2263 continue;
2265 /* INSN should be a set of CCFP.
2267 See if the result of this insn is used in a reversed FP
2268 conditional branch. If so, reverse our condition and
2269 the branch. Doing so avoids useless add,tr insns. */
2270 next = next_insn (insn);
2271 while (next)
2273 /* Jumps, calls and labels stop our search. */
2274 if (GET_CODE (next) == JUMP_INSN
2275 || GET_CODE (next) == CALL_INSN
2276 || GET_CODE (next) == CODE_LABEL)
2277 break;
2279 /* As does another fcmp insn. */
2280 if (GET_CODE (next) == INSN
2281 && GET_CODE (PATTERN (next)) == SET
2282 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2283 && REGNO (SET_DEST (PATTERN (next))) == 0)
2284 break;
2286 next = next_insn (next);
2289 /* Is NEXT_INSN a branch? */
2290 if (next
2291 && GET_CODE (next) == JUMP_INSN)
2293 rtx pattern = PATTERN (next);
2295 /* If it a reversed fp conditional branch (eg uses add,tr)
2296 and CCFP dies, then reverse our conditional and the branch
2297 to avoid the add,tr. */
2298 if (GET_CODE (pattern) == SET
2299 && SET_DEST (pattern) == pc_rtx
2300 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2301 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2302 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2303 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2304 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2305 && (fcmp_count == fbranch_count
2306 || (check_notes
2307 && find_regno_note (next, REG_DEAD, 0))))
2309 /* Reverse the branch. */
2310 tmp = XEXP (SET_SRC (pattern), 1);
2311 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2312 XEXP (SET_SRC (pattern), 2) = tmp;
2313 INSN_CODE (next) = -1;
2315 /* Reverse our condition. */
2316 tmp = PATTERN (insn);
2317 PUT_CODE (XEXP (tmp, 1),
2318 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2324 pass = !pass;
2328 /* You may have trouble believing this, but this is the HP-PA stack
2329 layout. Wow.
2331 Offset Contents
2333 Variable arguments (optional; any number may be allocated)
2335 SP-(4*(N+9)) arg word N
2337 SP-56 arg word 5
2338 SP-52 arg word 4
2340 Fixed arguments (must be allocated; may remain unused)
2342 SP-48 arg word 3
2343 SP-44 arg word 2
2344 SP-40 arg word 1
2345 SP-36 arg word 0
2347 Frame Marker
2349 SP-32 External Data Pointer (DP)
2350 SP-28 External sr4
2351 SP-24 External/stub RP (RP')
2352 SP-20 Current RP
2353 SP-16 Static Link
2354 SP-12 Clean up
2355 SP-8 Calling Stub RP (RP'')
2356 SP-4 Previous SP
2358 Top of Frame
2360 SP-0 Stack Pointer (points to next available address)
2364 /* This function saves registers as follows. Registers marked with ' are
2365 this function's registers (as opposed to the previous function's).
2366 If a frame_pointer isn't needed, r4 is saved as a general register;
2367 the space for the frame pointer is still allocated, though, to keep
2368 things simple.
2371 Top of Frame
2373 SP (FP') Previous FP
2374 SP + 4 Alignment filler (sigh)
2375 SP + 8 Space for locals reserved here.
2379 SP + n All call saved register used.
2383 SP + o All call saved fp registers used.
2387 SP + p (SP') points to next available address.
2391 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2392 Handle case where DISP > 8k by using the add_high_const pattern.
2394 Note in DISP > 8k case, we will leave the high part of the address
2395 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2397 static void
2398 store_reg (reg, disp, base)
2399 int reg, disp, base;
2401 if (VAL_14_BITS_P (disp))
2402 emit_move_insn (gen_rtx_MEM (SImode,
2403 plus_constant (gen_rtx_REG (SImode, base),
2404 disp)),
2405 gen_rtx_REG (SImode, reg));
2406 else
2408 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2409 gen_rtx_REG (SImode, base),
2410 GEN_INT (disp)));
2411 emit_move_insn (gen_rtx_MEM (SImode,
2412 gen_rtx_LO_SUM (SImode,
2413 gen_rtx_REG (SImode, 1),
2414 GEN_INT (disp))),
2415 gen_rtx_REG (SImode, reg));
2419 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2420 Handle case where DISP > 8k by using the add_high_const pattern.
2422 Note in DISP > 8k case, we will leave the high part of the address
2423 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2425 static void
2426 load_reg (reg, disp, base)
2427 int reg, disp, base;
2429 if (VAL_14_BITS_P (disp))
2430 emit_move_insn (gen_rtx_REG (SImode, reg),
2431 gen_rtx_MEM (SImode,
2432 plus_constant (gen_rtx_REG (SImode, base),
2433 disp)));
2434 else
2436 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2437 gen_rtx_REG (SImode, base),
2438 GEN_INT (disp)));
2439 emit_move_insn (gen_rtx_REG (SImode, reg),
2440 gen_rtx_MEM (SImode,
2441 gen_rtx_LO_SUM (SImode,
2442 gen_rtx_REG (SImode, 1),
2443 GEN_INT (disp))));
2447 /* Emit RTL to set REG to the value specified by BASE+DISP.
2448 Handle case where DISP > 8k by using the add_high_const pattern.
2450 Note in DISP > 8k case, we will leave the high part of the address
2451 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2453 static void
2454 set_reg_plus_d (reg, base, disp)
2455 int reg, base, disp;
2457 if (VAL_14_BITS_P (disp))
2458 emit_move_insn (gen_rtx_REG (SImode, reg),
2459 plus_constant (gen_rtx_REG (SImode, base), disp));
2460 else
2462 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2463 gen_rtx_REG (SImode, base),
2464 GEN_INT (disp)));
2465 emit_move_insn (gen_rtx_REG (SImode, reg),
2466 gen_rtx_LO_SUM (SImode,
2467 gen_rtx_REG (SImode, 1),
2468 GEN_INT (disp)));
2472 /* Global variables set by FUNCTION_PROLOGUE. */
2473 /* Size of frame. Need to know this to emit return insns from
2474 leaf procedures. */
2475 static int actual_fsize;
2476 static int local_fsize, save_fregs;
2479 compute_frame_size (size, fregs_live)
2480 int size;
2481 int *fregs_live;
2483 extern int current_function_outgoing_args_size;
2484 int i, fsize;
2486 /* 8 is space for frame pointer + filler. If any frame is allocated
2487 we need to add this in because of STARTING_FRAME_OFFSET. */
2488 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2490 /* We must leave enough space for all the callee saved registers
2491 from 3 .. highest used callee save register since we don't
2492 know if we're going to have an inline or out of line prologue
2493 and epilogue. */
2494 for (i = 18; i >= 3; i--)
2495 if (regs_ever_live[i])
2497 fsize += 4 * (i - 2);
2498 break;
2501 /* Round the stack. */
2502 fsize = (fsize + 7) & ~7;
2504 /* We must leave enough space for all the callee saved registers
2505 from 3 .. highest used callee save register since we don't
2506 know if we're going to have an inline or out of line prologue
2507 and epilogue. */
2508 for (i = 66; i >= 48; i -= 2)
2509 if (regs_ever_live[i] || regs_ever_live[i + 1])
2511 if (fregs_live)
2512 *fregs_live = 1;
2514 fsize += 4 * (i - 46);
2515 break;
2518 fsize += current_function_outgoing_args_size;
2519 if (! leaf_function_p () || fsize)
2520 fsize += 32;
2521 return (fsize + 63) & ~63;
2524 rtx hp_profile_label_rtx;
2525 static char hp_profile_label_name[8];
2526 void
2527 output_function_prologue (file, size)
2528 FILE *file;
2529 int size;
2531 /* The function's label and associated .PROC must never be
2532 separated and must be output *after* any profiling declarations
2533 to avoid changing spaces/subspaces within a procedure. */
2534 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2535 fputs ("\t.PROC\n", file);
2537 /* hppa_expand_prologue does the dirty work now. We just need
2538 to output the assembler directives which denote the start
2539 of a function. */
2540 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2541 if (regs_ever_live[2] || profile_flag)
2542 fputs (",CALLS,SAVE_RP", file);
2543 else
2544 fputs (",NO_CALLS", file);
2546 if (frame_pointer_needed)
2547 fputs (",SAVE_SP", file);
2549 /* Pass on information about the number of callee register saves
2550 performed in the prologue.
2552 The compiler is supposed to pass the highest register number
2553 saved, the assembler then has to adjust that number before
2554 entering it into the unwind descriptor (to account for any
2555 caller saved registers with lower register numbers than the
2556 first callee saved register). */
2557 if (gr_saved)
2558 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2560 if (fr_saved)
2561 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2563 fputs ("\n\t.ENTRY\n", file);
2565 /* Horrid hack. emit_function_prologue will modify this RTL in
2566 place to get the expected results. */
2567 if (profile_flag)
2568 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2569 hp_profile_labelno);
2571 /* If we're using GAS and not using the portable runtime model, then
2572 we don't need to accumulate the total number of code bytes. */
2573 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2574 total_code_bytes = 0;
2575 else if (insn_addresses)
2577 unsigned int old_total = total_code_bytes;
2579 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2580 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2582 /* Be prepared to handle overflows. */
2583 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2585 else
2586 total_code_bytes = -1;
2588 remove_useless_addtr_insns (get_insns (), 0);
2590 /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */
2591 restore_unscaled_index_insn_codes (get_insns ());
2594 void
2595 hppa_expand_prologue()
2597 extern char call_used_regs[];
2598 int size = get_frame_size ();
2599 int merge_sp_adjust_with_store = 0;
2600 int i, offset;
2601 rtx tmpreg, size_rtx;
2603 gr_saved = 0;
2604 fr_saved = 0;
2605 save_fregs = 0;
2606 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2607 actual_fsize = compute_frame_size (size, &save_fregs);
2609 /* Compute a few things we will use often. */
2610 tmpreg = gen_rtx_REG (SImode, 1);
2611 size_rtx = GEN_INT (actual_fsize);
2613 /* Handle out of line prologues and epilogues. */
2614 if (TARGET_SPACE)
2616 rtx operands[2];
2617 int saves = 0;
2618 int outline_insn_count = 0;
2619 int inline_insn_count = 0;
2621 /* Count the number of insns for the inline and out of line
2622 variants so we can choose one appropriately.
2624 No need to screw with counting actual_fsize operations -- they're
2625 done for both inline and out of line prologues. */
2626 if (regs_ever_live[2])
2627 inline_insn_count += 1;
2629 if (! cint_ok_for_move (local_fsize))
2630 outline_insn_count += 2;
2631 else
2632 outline_insn_count += 1;
2634 /* Put the register save info into %r22. */
2635 for (i = 18; i >= 3; i--)
2636 if (regs_ever_live[i] && ! call_used_regs[i])
2638 /* -1 because the stack adjustment is normally done in
2639 the same insn as a register save. */
2640 inline_insn_count += (i - 2) - 1;
2641 saves = i;
2642 break;
2645 for (i = 66; i >= 48; i -= 2)
2646 if (regs_ever_live[i] || regs_ever_live[i + 1])
2648 /* +1 needed as we load %r1 with the start of the freg
2649 save area. */
2650 inline_insn_count += (i/2 - 23) + 1;
2651 saves |= ((i/2 - 12 ) << 16);
2652 break;
2655 if (frame_pointer_needed)
2656 inline_insn_count += 3;
2658 if (! cint_ok_for_move (saves))
2659 outline_insn_count += 2;
2660 else
2661 outline_insn_count += 1;
2663 if (TARGET_PORTABLE_RUNTIME)
2664 outline_insn_count += 2;
2665 else
2666 outline_insn_count += 1;
2668 /* If there's a lot of insns in the prologue, then do it as
2669 an out-of-line sequence. */
2670 if (inline_insn_count > outline_insn_count)
2672 /* Put the local_fisze into %r19. */
2673 operands[0] = gen_rtx_REG (SImode, 19);
2674 operands[1] = GEN_INT (local_fsize);
2675 emit_move_insn (operands[0], operands[1]);
2677 /* Put the stack size into %r21. */
2678 operands[0] = gen_rtx_REG (SImode, 21);
2679 operands[1] = size_rtx;
2680 emit_move_insn (operands[0], operands[1]);
2682 operands[0] = gen_rtx_REG (SImode, 22);
2683 operands[1] = GEN_INT (saves);
2684 emit_move_insn (operands[0], operands[1]);
2686 /* Now call the out-of-line prologue. */
2687 emit_insn (gen_outline_prologue_call ());
2688 emit_insn (gen_blockage ());
2690 /* Note that we're using an out-of-line prologue. */
2691 out_of_line_prologue_epilogue = 1;
2692 return;
2696 out_of_line_prologue_epilogue = 0;
2698 /* Save RP first. The calling conventions manual states RP will
2699 always be stored into the caller's frame at sp-20. */
2700 if (regs_ever_live[2] || profile_flag)
2701 store_reg (2, -20, STACK_POINTER_REGNUM);
2703 /* Allocate the local frame and set up the frame pointer if needed. */
2704 if (actual_fsize)
2705 if (frame_pointer_needed)
2707 /* Copy the old frame pointer temporarily into %r1. Set up the
2708 new stack pointer, then store away the saved old frame pointer
2709 into the stack at sp+actual_fsize and at the same time update
2710 the stack pointer by actual_fsize bytes. Two versions, first
2711 handles small (<8k) frames. The second handles large (>8k)
2712 frames. */
2713 emit_move_insn (tmpreg, frame_pointer_rtx);
2714 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2715 if (VAL_14_BITS_P (actual_fsize))
2716 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2717 else
2719 /* It is incorrect to store the saved frame pointer at *sp,
2720 then increment sp (writes beyond the current stack boundary).
2722 So instead use stwm to store at *sp and post-increment the
2723 stack pointer as an atomic operation. Then increment sp to
2724 finish allocating the new frame. */
2725 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2726 set_reg_plus_d (STACK_POINTER_REGNUM,
2727 STACK_POINTER_REGNUM,
2728 actual_fsize - 64);
2731 /* no frame pointer needed. */
2732 else
2734 /* In some cases we can perform the first callee register save
2735 and allocating the stack frame at the same time. If so, just
2736 make a note of it and defer allocating the frame until saving
2737 the callee registers. */
2738 if (VAL_14_BITS_P (-actual_fsize)
2739 && local_fsize == 0
2740 && ! profile_flag
2741 && ! flag_pic)
2742 merge_sp_adjust_with_store = 1;
2743 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2744 else if (actual_fsize != 0)
2745 set_reg_plus_d (STACK_POINTER_REGNUM,
2746 STACK_POINTER_REGNUM,
2747 actual_fsize);
2749 /* The hppa calling conventions say that that %r19, the pic offset
2750 register, is saved at sp - 32 (in this function's frame) when
2751 generating PIC code. FIXME: What is the correct thing to do
2752 for functions which make no calls and allocate no frame? Do
2753 we need to allocate a frame, or can we just omit the save? For
2754 now we'll just omit the save. */
2755 if (actual_fsize != 0 && flag_pic)
2756 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2758 /* Profiling code.
2760 Instead of taking one argument, the counter label, as most normal
2761 mcounts do, _mcount appears to behave differently on the HPPA. It
2762 takes the return address of the caller, the address of this routine,
2763 and the address of the label. Also, it isn't magic, so
2764 argument registers have to be preserved. */
2765 if (profile_flag)
2767 int pc_offset, i, arg_offset, basereg, offsetadj;
2769 pc_offset = 4 + (frame_pointer_needed
2770 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2771 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2773 /* When the function has a frame pointer, use it as the base
2774 register for saving/restore registers. Else use the stack
2775 pointer. Adjust the offset according to the frame size if
2776 this function does not have a frame pointer. */
2778 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2779 : STACK_POINTER_REGNUM;
2780 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2782 /* Horrid hack. emit_function_prologue will modify this RTL in
2783 place to get the expected results. sprintf here is just to
2784 put something in the name. */
2785 sprintf(hp_profile_label_name, "LP$%04d", -1);
2786 hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode,
2787 hp_profile_label_name);
2788 if (current_function_returns_struct)
2789 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2791 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2792 if (regs_ever_live [i])
2794 store_reg (i, arg_offset, basereg);
2795 /* Deal with arg_offset not fitting in 14 bits. */
2796 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2799 emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2));
2800 emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx));
2801 emit_move_insn (gen_rtx_REG (SImode, 24),
2802 gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx));
2803 /* %r25 is set from within the output pattern. */
2804 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2806 /* Restore argument registers. */
2807 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2808 if (regs_ever_live [i])
2809 load_reg (i, arg_offset, basereg);
2811 if (current_function_returns_struct)
2812 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2816 /* Normal register save.
2818 Do not save the frame pointer in the frame_pointer_needed case. It
2819 was done earlier. */
2820 if (frame_pointer_needed)
2822 for (i = 18, offset = local_fsize; i >= 4; i--)
2823 if (regs_ever_live[i] && ! call_used_regs[i])
2825 store_reg (i, offset, FRAME_POINTER_REGNUM);
2826 offset += 4;
2827 gr_saved++;
2829 /* Account for %r3 which is saved in a special place. */
2830 gr_saved++;
2832 /* No frame pointer needed. */
2833 else
2835 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2836 if (regs_ever_live[i] && ! call_used_regs[i])
2838 /* If merge_sp_adjust_with_store is nonzero, then we can
2839 optimize the first GR save. */
2840 if (merge_sp_adjust_with_store)
2842 merge_sp_adjust_with_store = 0;
2843 emit_insn (gen_post_stwm (stack_pointer_rtx,
2844 gen_rtx_REG (SImode, i),
2845 GEN_INT (-offset)));
2847 else
2848 store_reg (i, offset, STACK_POINTER_REGNUM);
2849 offset += 4;
2850 gr_saved++;
2853 /* If we wanted to merge the SP adjustment with a GR save, but we never
2854 did any GR saves, then just emit the adjustment here. */
2855 if (merge_sp_adjust_with_store)
2856 set_reg_plus_d (STACK_POINTER_REGNUM,
2857 STACK_POINTER_REGNUM,
2858 actual_fsize);
2861 /* Align pointer properly (doubleword boundary). */
2862 offset = (offset + 7) & ~7;
2864 /* Floating point register store. */
2865 if (save_fregs)
2867 /* First get the frame or stack pointer to the start of the FP register
2868 save area. */
2869 if (frame_pointer_needed)
2870 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2871 else
2872 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2874 /* Now actually save the FP registers. */
2875 for (i = 66; i >= 48; i -= 2)
2877 if (regs_ever_live[i] || regs_ever_live[i + 1])
2879 emit_move_insn (gen_rtx_MEM (DFmode,
2880 gen_rtx_POST_INC (DFmode, tmpreg)),
2881 gen_rtx_REG (DFmode, i));
2882 fr_saved++;
2887 /* When generating PIC code it is necessary to save/restore the
2888 PIC register around each function call. We used to do this
2889 in the call patterns themselves, but that implementation
2890 made incorrect assumptions about using global variables to hold
2891 per-function rtl code generated in the backend.
2893 So instead, we copy the PIC register into a reserved callee saved
2894 register in the prologue. Then after each call we reload the PIC
2895 register from the callee saved register. We also reload the PIC
2896 register from the callee saved register in the epilogue ensure the
2897 PIC register is valid at function exit.
2899 This may (depending on the exact characteristics of the function)
2900 even be more efficient.
2902 Avoid this if the callee saved register wasn't used (these are
2903 leaf functions). */
2904 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2905 emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2906 gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM));
2910 void
2911 output_function_epilogue (file, size)
2912 FILE *file;
2913 int size;
2915 rtx insn = get_last_insn ();
2916 int i;
2918 /* hppa_expand_epilogue does the dirty work now. We just need
2919 to output the assembler directives which denote the end
2920 of a function.
2922 To make debuggers happy, emit a nop if the epilogue was completely
2923 eliminated due to a volatile call as the last insn in the
2924 current function. That way the return address (in %r2) will
2925 always point to a valid instruction in the current function. */
2927 /* Get the last real insn. */
2928 if (GET_CODE (insn) == NOTE)
2929 insn = prev_real_insn (insn);
2931 /* If it is a sequence, then look inside. */
2932 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2933 insn = XVECEXP (PATTERN (insn), 0, 0);
2935 /* If insn is a CALL_INSN, then it must be a call to a volatile
2936 function (otherwise there would be epilogue insns). */
2937 if (insn && GET_CODE (insn) == CALL_INSN)
2938 fputs ("\tnop\n", file);
2940 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2942 /* Free up stuff we don't need anymore. */
2943 if (unscaled_index_insn_codes)
2944 free (unscaled_index_insn_codes);
2945 max_unscaled_index_insn_codes_uid = 0;
2948 void
2949 hppa_expand_epilogue ()
2951 rtx tmpreg;
2952 int offset,i;
2953 int merge_sp_adjust_with_load = 0;
2955 /* Handle out of line prologues and epilogues. */
2956 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2958 int saves = 0;
2959 rtx operands[2];
2961 /* Put the register save info into %r22. */
2962 for (i = 18; i >= 3; i--)
2963 if (regs_ever_live[i] && ! call_used_regs[i])
2965 saves = i;
2966 break;
2969 for (i = 66; i >= 48; i -= 2)
2970 if (regs_ever_live[i] || regs_ever_live[i + 1])
2972 saves |= ((i/2 - 12 ) << 16);
2973 break;
2976 emit_insn (gen_blockage ());
2978 /* Put the local_fisze into %r19. */
2979 operands[0] = gen_rtx_REG (SImode, 19);
2980 operands[1] = GEN_INT (local_fsize);
2981 emit_move_insn (operands[0], operands[1]);
2983 /* Put the stack size into %r21. */
2984 operands[0] = gen_rtx_REG (SImode, 21);
2985 operands[1] = GEN_INT (actual_fsize);
2986 emit_move_insn (operands[0], operands[1]);
2988 operands[0] = gen_rtx_REG (SImode, 22);
2989 operands[1] = GEN_INT (saves);
2990 emit_move_insn (operands[0], operands[1]);
2992 /* Now call the out-of-line epilogue. */
2993 emit_insn (gen_outline_epilogue_call ());
2994 return;
2997 /* We will use this often. */
2998 tmpreg = gen_rtx_REG (SImode, 1);
3000 /* Try to restore RP early to avoid load/use interlocks when
3001 RP gets used in the return (bv) instruction. This appears to still
3002 be necessary even when we schedule the prologue and epilogue. */
3003 if (frame_pointer_needed
3004 && (regs_ever_live [2] || profile_flag))
3005 load_reg (2, -20, FRAME_POINTER_REGNUM);
3007 /* No frame pointer, and stack is smaller than 8k. */
3008 else if (! frame_pointer_needed
3009 && VAL_14_BITS_P (actual_fsize + 20)
3010 && (regs_ever_live[2] || profile_flag))
3011 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
3013 /* General register restores. */
3014 if (frame_pointer_needed)
3016 for (i = 18, offset = local_fsize; i >= 4; i--)
3017 if (regs_ever_live[i] && ! call_used_regs[i])
3019 load_reg (i, offset, FRAME_POINTER_REGNUM);
3020 offset += 4;
3023 else
3025 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3027 if (regs_ever_live[i] && ! call_used_regs[i])
3029 /* Only for the first load.
3030 merge_sp_adjust_with_load holds the register load
3031 with which we will merge the sp adjustment. */
3032 if (VAL_14_BITS_P (actual_fsize + 20)
3033 && local_fsize == 0
3034 && ! merge_sp_adjust_with_load)
3035 merge_sp_adjust_with_load = i;
3036 else
3037 load_reg (i, offset, STACK_POINTER_REGNUM);
3038 offset += 4;
3043 /* Align pointer properly (doubleword boundary). */
3044 offset = (offset + 7) & ~7;
3046 /* FP register restores. */
3047 if (save_fregs)
3049 /* Adjust the register to index off of. */
3050 if (frame_pointer_needed)
3051 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3052 else
3053 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3055 /* Actually do the restores now. */
3056 for (i = 66; i >= 48; i -= 2)
3058 if (regs_ever_live[i] || regs_ever_live[i + 1])
3060 emit_move_insn (gen_rtx_REG (DFmode, i),
3061 gen_rtx_MEM (DFmode,
3062 gen_rtx_POST_INC (DFmode, tmpreg)));
3067 /* Emit a blockage insn here to keep these insns from being moved to
3068 an earlier spot in the epilogue, or into the main instruction stream.
3070 This is necessary as we must not cut the stack back before all the
3071 restores are finished. */
3072 emit_insn (gen_blockage ());
3073 /* No frame pointer, but we have a stack greater than 8k. We restore
3074 %r2 very late in this case. (All other cases are restored as early
3075 as possible.) */
3076 if (! frame_pointer_needed
3077 && ! VAL_14_BITS_P (actual_fsize + 20)
3078 && (regs_ever_live[2] || profile_flag))
3080 set_reg_plus_d (STACK_POINTER_REGNUM,
3081 STACK_POINTER_REGNUM,
3082 - actual_fsize);
3084 /* This used to try and be clever by not depending on the value in
3085 %r30 and instead use the value held in %r1 (so that the 2nd insn
3086 which sets %r30 could be put in the delay slot of the return insn).
3088 That won't work since if the stack is exactly 8k set_reg_plus_d
3089 doesn't set %r1, just %r30. */
3090 load_reg (2, - 20, STACK_POINTER_REGNUM);
3093 /* Reset stack pointer (and possibly frame pointer). The stack
3094 pointer is initially set to fp + 64 to avoid a race condition. */
3095 else if (frame_pointer_needed)
3097 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3098 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3099 stack_pointer_rtx,
3100 GEN_INT (-64)));
3102 /* If we were deferring a callee register restore, do it now. */
3103 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3104 emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode,
3105 merge_sp_adjust_with_load),
3106 stack_pointer_rtx,
3107 GEN_INT (- actual_fsize)));
3108 else if (actual_fsize != 0)
3109 set_reg_plus_d (STACK_POINTER_REGNUM,
3110 STACK_POINTER_REGNUM,
3111 - actual_fsize);
3114 /* Fetch the return address for the frame COUNT steps up from
3115 the current frame, after the prologue. FRAMEADDR is the
3116 frame pointer of the COUNT frame.
3118 We want to ignore any export stub remnants here.
3120 The value returned is used in two different ways:
3122 1. To find a function's caller.
3124 2. To change the return address for a function.
3126 This function handles most instances of case 1; however, it will
3127 fail if there are two levels of stubs to execute on the return
3128 path. The only way I believe that can happen is if the return value
3129 needs a parameter relocation, which never happens for C code.
3131 This function handles most instances of case 2; however, it will
3132 fail if we did not originally have stub code on the return path
3133 but will need code on the new return path. This can happen if
3134 the caller & callee are both in the main program, but the new
3135 return location is in a shared library.
3137 To handle this correctly we need to set the return pointer at
3138 frame-20 to point to a return stub frame-24 to point to the
3139 location we wish to return to. */
3142 return_addr_rtx (count, frameaddr)
3143 int count;
3144 rtx frameaddr;
3146 rtx label;
3147 rtx saved_rp;
3148 rtx ins;
3150 saved_rp = gen_reg_rtx (Pmode);
3152 /* First, we start off with the normal return address pointer from
3153 -20[frameaddr]. */
3155 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3157 /* Get pointer to the instruction stream. We have to mask out the
3158 privilege level from the two low order bits of the return address
3159 pointer here so that ins will point to the start of the first
3160 instruction that would have been executed if we returned. */
3161 ins = copy_to_reg (gen_rtx_AND (Pmode,
3162 copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)),
3163 MASK_RETURN_ADDR));
3164 label = gen_label_rtx ();
3166 /* Check the instruction stream at the normal return address for the
3167 export stub:
3169 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3170 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3171 0x00011820 | stub+16: mtsp r1,sr0
3172 0xe0400002 | stub+20: be,n 0(sr0,rp)
3174 If it is an export stub, than our return address is really in
3175 -24[frameaddr]. */
3177 emit_cmp_insn (gen_rtx_MEM (SImode, ins),
3178 GEN_INT (0x4bc23fd1),
3179 NE, NULL_RTX, SImode, 1, 0);
3180 emit_jump_insn (gen_bne (label));
3182 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3183 GEN_INT (0x004010a1),
3184 NE, NULL_RTX, SImode, 1, 0);
3185 emit_jump_insn (gen_bne (label));
3187 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3188 GEN_INT (0x00011820),
3189 NE, NULL_RTX, SImode, 1, 0);
3190 emit_jump_insn (gen_bne (label));
3192 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3193 GEN_INT (0xe0400002),
3194 NE, NULL_RTX, SImode, 1, 0);
3196 /* If there is no export stub then just use our initial guess of
3197 -20[frameaddr]. */
3199 emit_jump_insn (gen_bne (label));
3201 /* Here we know that our return address pointer points to an export
3202 stub. We don't want to return the address of the export stub,
3203 but rather the return address that leads back into user code.
3204 That return address is stored at -24[frameaddr]. */
3206 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3208 emit_label (label);
3209 return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
3212 /* This is only valid once reload has completed because it depends on
3213 knowing exactly how much (if any) frame there is and...
3215 It's only valid if there is no frame marker to de-allocate and...
3217 It's only valid if %r2 hasn't been saved into the caller's frame
3218 (we're not profiling and %r2 isn't live anywhere). */
3220 hppa_can_use_return_insn_p ()
3222 return (reload_completed
3223 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3224 && ! profile_flag
3225 && ! regs_ever_live[2]
3226 && ! frame_pointer_needed);
3229 void
3230 emit_bcond_fp (code, operand0)
3231 enum rtx_code code;
3232 rtx operand0;
3234 emit_jump_insn (gen_rtx_SET
3235 (VOIDmode, pc_rtx,
3236 gen_rtx_IF_THEN_ELSE (VOIDmode,
3237 gen_rtx (code, VOIDmode,
3238 gen_rtx_REG (CCFPmode, 0),
3239 const0_rtx),
3240 gen_rtx_LABEL_REF (VOIDmode,
3241 operand0),
3242 pc_rtx)));
3247 gen_cmp_fp (code, operand0, operand1)
3248 enum rtx_code code;
3249 rtx operand0, operand1;
3251 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3252 gen_rtx (code, CCFPmode, operand0, operand1));
3255 /* Adjust the cost of a scheduling dependency. Return the new cost of
3256 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3259 pa_adjust_cost (insn, link, dep_insn, cost)
3260 rtx insn;
3261 rtx link;
3262 rtx dep_insn;
3263 int cost;
3265 if (! recog_memoized (insn))
3266 return 0;
3268 if (REG_NOTE_KIND (link) == 0)
3270 /* Data dependency; DEP_INSN writes a register that INSN reads some
3271 cycles later. */
3273 if (get_attr_type (insn) == TYPE_FPSTORE)
3275 rtx pat = PATTERN (insn);
3276 rtx dep_pat = PATTERN (dep_insn);
3277 if (GET_CODE (pat) == PARALLEL)
3279 /* This happens for the fstXs,mb patterns. */
3280 pat = XVECEXP (pat, 0, 0);
3282 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3283 /* If this happens, we have to extend this to schedule
3284 optimally. Return 0 for now. */
3285 return 0;
3287 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3289 if (! recog_memoized (dep_insn))
3290 return 0;
3291 /* DEP_INSN is writing its result to the register
3292 being stored in the fpstore INSN. */
3293 switch (get_attr_type (dep_insn))
3295 case TYPE_FPLOAD:
3296 /* This cost 3 cycles, not 2 as the md says for the
3297 700 and 7100. Note scaling of cost for 7100. */
3298 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3300 case TYPE_FPALU:
3301 case TYPE_FPMULSGL:
3302 case TYPE_FPMULDBL:
3303 case TYPE_FPDIVSGL:
3304 case TYPE_FPDIVDBL:
3305 case TYPE_FPSQRTSGL:
3306 case TYPE_FPSQRTDBL:
3307 /* In these important cases, we save one cycle compared to
3308 when flop instruction feed each other. */
3309 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3311 default:
3312 return cost;
3317 /* For other data dependencies, the default cost specified in the
3318 md is correct. */
3319 return cost;
3321 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3323 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3324 cycles later. */
3326 if (get_attr_type (insn) == TYPE_FPLOAD)
3328 rtx pat = PATTERN (insn);
3329 rtx dep_pat = PATTERN (dep_insn);
3330 if (GET_CODE (pat) == PARALLEL)
3332 /* This happens for the fldXs,mb patterns. */
3333 pat = XVECEXP (pat, 0, 0);
3335 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3336 /* If this happens, we have to extend this to schedule
3337 optimally. Return 0 for now. */
3338 return 0;
3340 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3342 if (! recog_memoized (dep_insn))
3343 return 0;
3344 switch (get_attr_type (dep_insn))
3346 case TYPE_FPALU:
3347 case TYPE_FPMULSGL:
3348 case TYPE_FPMULDBL:
3349 case TYPE_FPDIVSGL:
3350 case TYPE_FPDIVDBL:
3351 case TYPE_FPSQRTSGL:
3352 case TYPE_FPSQRTDBL:
3353 /* A fpload can't be issued until one cycle before a
3354 preceding arithmetic operation has finished if
3355 the target of the fpload is any of the sources
3356 (or destination) of the arithmetic operation. */
3357 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3359 default:
3360 return 0;
3364 else if (get_attr_type (insn) == TYPE_FPALU)
3366 rtx pat = PATTERN (insn);
3367 rtx dep_pat = PATTERN (dep_insn);
3368 if (GET_CODE (pat) == PARALLEL)
3370 /* This happens for the fldXs,mb patterns. */
3371 pat = XVECEXP (pat, 0, 0);
3373 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3374 /* If this happens, we have to extend this to schedule
3375 optimally. Return 0 for now. */
3376 return 0;
3378 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3380 if (! recog_memoized (dep_insn))
3381 return 0;
3382 switch (get_attr_type (dep_insn))
3384 case TYPE_FPDIVSGL:
3385 case TYPE_FPDIVDBL:
3386 case TYPE_FPSQRTSGL:
3387 case TYPE_FPSQRTDBL:
3388 /* An ALU flop can't be issued until two cycles before a
3389 preceding divide or sqrt operation has finished if
3390 the target of the ALU flop is any of the sources
3391 (or destination) of the divide or sqrt operation. */
3392 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3394 default:
3395 return 0;
3400 /* For other anti dependencies, the cost is 0. */
3401 return 0;
3403 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3405 /* Output dependency; DEP_INSN writes a register that INSN writes some
3406 cycles later. */
3407 if (get_attr_type (insn) == TYPE_FPLOAD)
3409 rtx pat = PATTERN (insn);
3410 rtx dep_pat = PATTERN (dep_insn);
3411 if (GET_CODE (pat) == PARALLEL)
3413 /* This happens for the fldXs,mb patterns. */
3414 pat = XVECEXP (pat, 0, 0);
3416 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3417 /* If this happens, we have to extend this to schedule
3418 optimally. Return 0 for now. */
3419 return 0;
3421 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3423 if (! recog_memoized (dep_insn))
3424 return 0;
3425 switch (get_attr_type (dep_insn))
3427 case TYPE_FPALU:
3428 case TYPE_FPMULSGL:
3429 case TYPE_FPMULDBL:
3430 case TYPE_FPDIVSGL:
3431 case TYPE_FPDIVDBL:
3432 case TYPE_FPSQRTSGL:
3433 case TYPE_FPSQRTDBL:
3434 /* A fpload can't be issued until one cycle before a
3435 preceding arithmetic operation has finished if
3436 the target of the fpload is the destination of the
3437 arithmetic operation. */
3438 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3440 default:
3441 return 0;
3445 else if (get_attr_type (insn) == TYPE_FPALU)
3447 rtx pat = PATTERN (insn);
3448 rtx dep_pat = PATTERN (dep_insn);
3449 if (GET_CODE (pat) == PARALLEL)
3451 /* This happens for the fldXs,mb patterns. */
3452 pat = XVECEXP (pat, 0, 0);
3454 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3455 /* If this happens, we have to extend this to schedule
3456 optimally. Return 0 for now. */
3457 return 0;
3459 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3461 if (! recog_memoized (dep_insn))
3462 return 0;
3463 switch (get_attr_type (dep_insn))
3465 case TYPE_FPDIVSGL:
3466 case TYPE_FPDIVDBL:
3467 case TYPE_FPSQRTSGL:
3468 case TYPE_FPSQRTDBL:
3469 /* An ALU flop can't be issued until two cycles before a
3470 preceding divide or sqrt operation has finished if
3471 the target of the ALU flop is also the target of
3472 of the divide or sqrt operation. */
3473 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3475 default:
3476 return 0;
3481 /* For other output dependencies, the cost is 0. */
3482 return 0;
3484 else
3485 abort ();
3488 /* Return any length adjustment needed by INSN which already has its length
3489 computed as LENGTH. Return zero if no adjustment is necessary.
3491 For the PA: function calls, millicode calls, and backwards short
3492 conditional branches with unfilled delay slots need an adjustment by +1
3493 (to account for the NOP which will be inserted into the instruction stream).
3495 Also compute the length of an inline block move here as it is too
3496 complicated to express as a length attribute in pa.md. */
3498 pa_adjust_insn_length (insn, length)
3499 rtx insn;
3500 int length;
3502 rtx pat = PATTERN (insn);
3504 /* Call insns which are *not* indirect and have unfilled delay slots. */
3505 if (GET_CODE (insn) == CALL_INSN)
3508 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3509 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3510 return 4;
3511 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3512 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3513 == SYMBOL_REF)
3514 return 4;
3515 else
3516 return 0;
3518 /* Jumps inside switch tables which have unfilled delay slots
3519 also need adjustment. */
3520 else if (GET_CODE (insn) == JUMP_INSN
3521 && simplejump_p (insn)
3522 && GET_MODE (PATTERN (insn)) == DImode)
3523 return 4;
3524 /* Millicode insn with an unfilled delay slot. */
3525 else if (GET_CODE (insn) == INSN
3526 && GET_CODE (pat) != SEQUENCE
3527 && GET_CODE (pat) != USE
3528 && GET_CODE (pat) != CLOBBER
3529 && get_attr_type (insn) == TYPE_MILLI)
3530 return 4;
3531 /* Block move pattern. */
3532 else if (GET_CODE (insn) == INSN
3533 && GET_CODE (pat) == PARALLEL
3534 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3535 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3536 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3537 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3538 return compute_movstrsi_length (insn) - 4;
3539 /* Conditional branch with an unfilled delay slot. */
3540 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3542 /* Adjust a short backwards conditional with an unfilled delay slot. */
3543 if (GET_CODE (pat) == SET
3544 && length == 4
3545 && ! forward_branch_p (insn))
3546 return 4;
3547 else if (GET_CODE (pat) == PARALLEL
3548 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3549 && length == 4)
3550 return 4;
3551 /* Adjust dbra insn with short backwards conditional branch with
3552 unfilled delay slot -- only for case where counter is in a
3553 general register register. */
3554 else if (GET_CODE (pat) == PARALLEL
3555 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3556 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3557 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3558 && length == 4
3559 && ! forward_branch_p (insn))
3560 return 4;
3561 else
3562 return 0;
3564 return 0;
3567 /* Print operand X (an rtx) in assembler syntax to file FILE.
3568 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3569 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3571 void
3572 print_operand (file, x, code)
3573 FILE *file;
3574 rtx x;
3575 int code;
3577 switch (code)
3579 case '#':
3580 /* Output a 'nop' if there's nothing for the delay slot. */
3581 if (dbr_sequence_length () == 0)
3582 fputs ("\n\tnop", file);
3583 return;
3584 case '*':
3585 /* Output an nullification completer if there's nothing for the */
3586 /* delay slot or nullification is requested. */
3587 if (dbr_sequence_length () == 0 ||
3588 (final_sequence &&
3589 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3590 fputs (",n", file);
3591 return;
3592 case 'R':
3593 /* Print out the second register name of a register pair.
3594 I.e., R (6) => 7. */
3595 fputs (reg_names[REGNO (x)+1], file);
3596 return;
3597 case 'r':
3598 /* A register or zero. */
3599 if (x == const0_rtx
3600 || (x == CONST0_RTX (DFmode))
3601 || (x == CONST0_RTX (SFmode)))
3603 fputs ("0", file);
3604 return;
3606 else
3607 break;
3608 case 'C': /* Plain (C)ondition */
3609 case 'X':
3610 switch (GET_CODE (x))
3612 case EQ:
3613 fputs ("=", file); break;
3614 case NE:
3615 fputs ("<>", file); break;
3616 case GT:
3617 fputs (">", file); break;
3618 case GE:
3619 fputs (">=", file); break;
3620 case GEU:
3621 fputs (">>=", file); break;
3622 case GTU:
3623 fputs (">>", file); break;
3624 case LT:
3625 fputs ("<", file); break;
3626 case LE:
3627 fputs ("<=", file); break;
3628 case LEU:
3629 fputs ("<<=", file); break;
3630 case LTU:
3631 fputs ("<<", file); break;
3632 default:
3633 abort ();
3635 return;
3636 case 'N': /* Condition, (N)egated */
3637 switch (GET_CODE (x))
3639 case EQ:
3640 fputs ("<>", file); break;
3641 case NE:
3642 fputs ("=", file); break;
3643 case GT:
3644 fputs ("<=", file); break;
3645 case GE:
3646 fputs ("<", file); break;
3647 case GEU:
3648 fputs ("<<", file); break;
3649 case GTU:
3650 fputs ("<<=", file); break;
3651 case LT:
3652 fputs (">=", file); break;
3653 case LE:
3654 fputs (">", file); break;
3655 case LEU:
3656 fputs (">>", file); break;
3657 case LTU:
3658 fputs (">>=", file); break;
3659 default:
3660 abort ();
3662 return;
3663 /* For floating point comparisons. Need special conditions to deal
3664 with NaNs properly. */
3665 case 'Y':
3666 switch (GET_CODE (x))
3668 case EQ:
3669 fputs ("!=", file); break;
3670 case NE:
3671 fputs ("=", file); break;
3672 case GT:
3673 fputs ("<=", file); break;
3674 case GE:
3675 fputs ("<", file); break;
3676 case LT:
3677 fputs (">=", file); break;
3678 case LE:
3679 fputs (">", file); break;
3680 default:
3681 abort ();
3683 return;
3684 case 'S': /* Condition, operands are (S)wapped. */
3685 switch (GET_CODE (x))
3687 case EQ:
3688 fputs ("=", file); break;
3689 case NE:
3690 fputs ("<>", file); break;
3691 case GT:
3692 fputs ("<", file); break;
3693 case GE:
3694 fputs ("<=", file); break;
3695 case GEU:
3696 fputs ("<<=", file); break;
3697 case GTU:
3698 fputs ("<<", file); break;
3699 case LT:
3700 fputs (">", file); break;
3701 case LE:
3702 fputs (">=", file); break;
3703 case LEU:
3704 fputs (">>=", file); break;
3705 case LTU:
3706 fputs (">>", file); break;
3707 default:
3708 abort ();
3710 return;
3711 case 'B': /* Condition, (B)oth swapped and negate. */
3712 switch (GET_CODE (x))
3714 case EQ:
3715 fputs ("<>", file); break;
3716 case NE:
3717 fputs ("=", file); break;
3718 case GT:
3719 fputs (">=", file); break;
3720 case GE:
3721 fputs (">", file); break;
3722 case GEU:
3723 fputs (">>", file); break;
3724 case GTU:
3725 fputs (">>=", file); break;
3726 case LT:
3727 fputs ("<=", file); break;
3728 case LE:
3729 fputs ("<", file); break;
3730 case LEU:
3731 fputs ("<<", file); break;
3732 case LTU:
3733 fputs ("<<=", file); break;
3734 default:
3735 abort ();
3737 return;
3738 case 'k':
3739 if (GET_CODE (x) == CONST_INT)
3741 fprintf (file, "%d", ~INTVAL (x));
3742 return;
3744 abort();
3745 case 'L':
3746 if (GET_CODE (x) == CONST_INT)
3748 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3749 return;
3751 abort();
3752 case 'O':
3753 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3755 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3756 return;
3758 abort();
3759 case 'P':
3760 if (GET_CODE (x) == CONST_INT)
3762 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3763 return;
3765 abort();
3766 case 'I':
3767 if (GET_CODE (x) == CONST_INT)
3768 fputs ("i", file);
3769 return;
3770 case 'M':
3771 case 'F':
3772 switch (GET_CODE (XEXP (x, 0)))
3774 case PRE_DEC:
3775 case PRE_INC:
3776 fputs ("s,mb", file);
3777 break;
3778 case POST_DEC:
3779 case POST_INC:
3780 fputs ("s,ma", file);
3781 break;
3782 case PLUS:
3783 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3784 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3785 fputs ("x,s", file);
3786 else if (code == 'F')
3787 fputs ("s", file);
3788 break;
3789 default:
3790 if (code == 'F')
3791 fputs ("s", file);
3792 break;
3794 return;
3795 case 'G':
3796 output_global_address (file, x, 0);
3797 return;
3798 case 'H':
3799 output_global_address (file, x, 1);
3800 return;
3801 case 0: /* Don't do anything special */
3802 break;
3803 case 'Z':
3805 unsigned op[3];
3806 compute_zdepi_operands (INTVAL (x), op);
3807 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3808 return;
3810 default:
3811 abort ();
3813 if (GET_CODE (x) == REG)
3815 fputs (reg_names [REGNO (x)], file);
3816 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3817 fputs ("L", file);
3819 else if (GET_CODE (x) == MEM)
3821 int size = GET_MODE_SIZE (GET_MODE (x));
3822 rtx base = XEXP (XEXP (x, 0), 0);
3823 switch (GET_CODE (XEXP (x, 0)))
3825 case PRE_DEC:
3826 case POST_DEC:
3827 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3828 break;
3829 case PRE_INC:
3830 case POST_INC:
3831 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3832 break;
3833 default:
3834 if (GET_CODE (XEXP (x, 0)) == PLUS
3835 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3836 fprintf (file, "%s(0,%s)",
3837 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3838 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3839 else if (GET_CODE (XEXP (x, 0)) == PLUS
3840 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3841 fprintf (file, "%s(0,%s)",
3842 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3843 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3844 else
3845 output_address (XEXP (x, 0));
3846 break;
3849 else
3850 output_addr_const (file, x);
3853 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3855 void
3856 output_global_address (file, x, round_constant)
3857 FILE *file;
3858 rtx x;
3859 int round_constant;
3862 /* Imagine (high (const (plus ...))). */
3863 if (GET_CODE (x) == HIGH)
3864 x = XEXP (x, 0);
3866 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3867 assemble_name (file, XSTR (x, 0));
3868 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3870 assemble_name (file, XSTR (x, 0));
3871 fputs ("-$global$", file);
3873 else if (GET_CODE (x) == CONST)
3875 char *sep = "";
3876 int offset = 0; /* assembler wants -$global$ at end */
3877 rtx base;
3879 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3881 base = XEXP (XEXP (x, 0), 0);
3882 output_addr_const (file, base);
3884 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3885 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3886 else abort ();
3888 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3890 base = XEXP (XEXP (x, 0), 1);
3891 output_addr_const (file, base);
3893 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3894 offset = INTVAL (XEXP (XEXP (x, 0),1));
3895 else abort ();
3897 /* How bogus. The compiler is apparently responsible for
3898 rounding the constant if it uses an LR field selector.
3900 The linker and/or assembler seem a better place since
3901 they have to do this kind of thing already.
3903 If we fail to do this, HP's optimizing linker may eliminate
3904 an addil, but not update the ldw/stw/ldo instruction that
3905 uses the result of the addil. */
3906 if (round_constant)
3907 offset = ((offset + 0x1000) & ~0x1fff);
3909 if (GET_CODE (XEXP (x, 0)) == PLUS)
3911 if (offset < 0)
3913 offset = -offset;
3914 sep = "-";
3916 else
3917 sep = "+";
3919 else if (GET_CODE (XEXP (x, 0)) == MINUS
3920 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3921 sep = "-";
3922 else abort ();
3924 if (!read_only_operand (base) && !flag_pic)
3925 fputs ("-$global$", file);
3926 if (offset)
3927 fprintf (file,"%s%d", sep, offset);
3929 else
3930 output_addr_const (file, x);
3933 void
3934 output_deferred_plabels (file)
3935 FILE *file;
3937 int i;
3938 /* If we have deferred plabels, then we need to switch into the data
3939 section and align it to a 4 byte boundary before we output the
3940 deferred plabels. */
3941 if (n_deferred_plabels)
3943 data_section ();
3944 ASM_OUTPUT_ALIGN (file, 2);
3947 /* Now output the deferred plabels. */
3948 for (i = 0; i < n_deferred_plabels; i++)
3950 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
3951 assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode,
3952 deferred_plabels[i].name), 4, 1);
3956 /* HP's millicode routines mean something special to the assembler.
3957 Keep track of which ones we have used. */
3959 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3960 static char imported[(int)end1000];
3961 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3962 static char import_string[] = ".IMPORT $$....,MILLICODE";
3963 #define MILLI_START 10
3965 static void
3966 import_milli (code)
3967 enum millicodes code;
3969 char str[sizeof (import_string)];
3971 if (!imported[(int)code])
3973 imported[(int)code] = 1;
3974 strcpy (str, import_string);
3975 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3976 output_asm_insn (str, 0);
3980 /* The register constraints have put the operands and return value in
3981 the proper registers. */
3983 char *
3984 output_mul_insn (unsignedp, insn)
3985 int unsignedp;
3986 rtx insn;
3988 import_milli (mulI);
3989 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI"));
3992 /* Emit the rtl for doing a division by a constant. */
3994 /* Do magic division millicodes exist for this value? */
3995 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3996 1, 1};
3998 /* We'll use an array to keep track of the magic millicodes and
3999 whether or not we've used them already. [n][0] is signed, [n][1] is
4000 unsigned. */
4002 static int div_milli[16][2];
4005 div_operand (op, mode)
4006 rtx op;
4007 enum machine_mode mode;
4009 return (mode == SImode
4010 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4011 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4012 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4016 emit_hpdiv_const (operands, unsignedp)
4017 rtx *operands;
4018 int unsignedp;
4020 if (GET_CODE (operands[2]) == CONST_INT
4021 && INTVAL (operands[2]) > 0
4022 && INTVAL (operands[2]) < 16
4023 && magic_milli[INTVAL (operands[2])])
4025 emit_move_insn ( gen_rtx_REG (SImode, 26), operands[1]);
4026 emit
4027 (gen_rtx_PARALLEL (VOIDmode,
4028 gen_rtvec (5, gen_rtx_SET (VOIDmode,
4029 gen_rtx (REG, SImode, 29),
4030 gen_rtx (unsignedp ? UDIV : DIV, SImode,
4031 gen_rtx_REG (SImode, 26),
4032 operands[2])),
4033 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4034 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4035 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
4037 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4038 return 1;
4040 return 0;
4043 char *
4044 output_div_insn (operands, unsignedp, insn)
4045 rtx *operands;
4046 int unsignedp;
4047 rtx insn;
4049 int divisor;
4051 /* If the divisor is a constant, try to use one of the special
4052 opcodes .*/
4053 if (GET_CODE (operands[0]) == CONST_INT)
4055 static char buf[100];
4056 divisor = INTVAL (operands[0]);
4057 if (!div_milli[divisor][unsignedp])
4059 div_milli[divisor][unsignedp] = 1;
4060 if (unsignedp)
4061 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4062 else
4063 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4065 if (unsignedp)
4067 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4068 return output_millicode_call (insn,
4069 gen_rtx_SYMBOL_REF (SImode, buf));
4071 else
4073 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4074 return output_millicode_call (insn,
4075 gen_rtx_SYMBOL_REF (SImode, buf));
4078 /* Divisor isn't a special constant. */
4079 else
4081 if (unsignedp)
4083 import_milli (divU);
4084 return output_millicode_call (insn,
4085 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4087 else
4089 import_milli (divI);
4090 return output_millicode_call (insn,
4091 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4096 /* Output a $$rem millicode to do mod. */
4098 char *
4099 output_mod_insn (unsignedp, insn)
4100 int unsignedp;
4101 rtx insn;
4103 if (unsignedp)
4105 import_milli (remU);
4106 return output_millicode_call (insn,
4107 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4109 else
4111 import_milli (remI);
4112 return output_millicode_call (insn,
4113 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4117 void
4118 output_arg_descriptor (call_insn)
4119 rtx call_insn;
4121 char *arg_regs[4];
4122 enum machine_mode arg_mode;
4123 rtx link;
4124 int i, output_flag = 0;
4125 int regno;
4127 for (i = 0; i < 4; i++)
4128 arg_regs[i] = 0;
4130 /* Specify explicitly that no argument relocations should take place
4131 if using the portable runtime calling conventions. */
4132 if (TARGET_PORTABLE_RUNTIME)
4134 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4135 asm_out_file);
4136 return;
4139 if (GET_CODE (call_insn) != CALL_INSN)
4140 abort ();
4141 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4143 rtx use = XEXP (link, 0);
4145 if (! (GET_CODE (use) == USE
4146 && GET_CODE (XEXP (use, 0)) == REG
4147 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4148 continue;
4150 arg_mode = GET_MODE (XEXP (use, 0));
4151 regno = REGNO (XEXP (use, 0));
4152 if (regno >= 23 && regno <= 26)
4154 arg_regs[26 - regno] = "GR";
4155 if (arg_mode == DImode)
4156 arg_regs[25 - regno] = "GR";
4158 else if (regno >= 32 && regno <= 39)
4160 if (arg_mode == SFmode)
4161 arg_regs[(regno - 32) / 2] = "FR";
4162 else
4164 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4165 arg_regs[(regno - 34) / 2] = "FR";
4166 arg_regs[(regno - 34) / 2 + 1] = "FU";
4167 #else
4168 arg_regs[(regno - 34) / 2] = "FU";
4169 arg_regs[(regno - 34) / 2 + 1] = "FR";
4170 #endif
4174 fputs ("\t.CALL ", asm_out_file);
4175 for (i = 0; i < 4; i++)
4177 if (arg_regs[i])
4179 if (output_flag++)
4180 fputc (',', asm_out_file);
4181 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4184 fputc ('\n', asm_out_file);
4187 /* Return the class of any secondary reload register that is needed to
4188 move IN into a register in class CLASS using mode MODE.
4190 Profiling has showed this routine and its descendants account for
4191 a significant amount of compile time (~7%). So it has been
4192 optimized to reduce redundant computations and eliminate useless
4193 function calls.
4195 It might be worthwhile to try and make this a leaf function too. */
4197 enum reg_class
4198 secondary_reload_class (class, mode, in)
4199 enum reg_class class;
4200 enum machine_mode mode;
4201 rtx in;
4203 int regno, is_symbolic;
4205 /* Trying to load a constant into a FP register during PIC code
4206 generation will require %r1 as a scratch register. */
4207 if (flag_pic == 2
4208 && GET_MODE_CLASS (mode) == MODE_INT
4209 && FP_REG_CLASS_P (class)
4210 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4211 return R1_REGS;
4213 /* Profiling showed the PA port spends about 1.3% of its compilation
4214 time in true_regnum from calls inside secondary_reload_class. */
4216 if (GET_CODE (in) == REG)
4218 regno = REGNO (in);
4219 if (regno >= FIRST_PSEUDO_REGISTER)
4220 regno = true_regnum (in);
4222 else if (GET_CODE (in) == SUBREG)
4223 regno = true_regnum (in);
4224 else
4225 regno = -1;
4227 /* If we have something like (mem (mem (...)), we can safely assume the
4228 inner MEM will end up in a general register after reloading, so there's
4229 no need for a secondary reload. */
4230 if (GET_CODE (in) == MEM
4231 && GET_CODE (XEXP (in, 0)) == MEM)
4232 return NO_REGS;
4234 /* Handle out of range displacement for integer mode loads/stores of
4235 FP registers. */
4236 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4237 && GET_MODE_CLASS (mode) == MODE_INT
4238 && FP_REG_CLASS_P (class))
4239 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4240 return GENERAL_REGS;
4242 if (GET_CODE (in) == HIGH)
4243 in = XEXP (in, 0);
4245 /* Profiling has showed GCC spends about 2.6% of its compilation
4246 time in symbolic_operand from calls inside secondary_reload_class.
4248 We use an inline copy and only compute its return value once to avoid
4249 useless work. */
4250 switch (GET_CODE (in))
4252 rtx tmp;
4254 case SYMBOL_REF:
4255 case LABEL_REF:
4256 is_symbolic = 1;
4257 break;
4258 case CONST:
4259 tmp = XEXP (in, 0);
4260 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4261 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4262 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4263 break;
4265 default:
4266 is_symbolic = 0;
4267 break;
4270 if (!flag_pic
4271 && is_symbolic
4272 && read_only_operand (in))
4273 return NO_REGS;
4275 if (class != R1_REGS && is_symbolic)
4276 return R1_REGS;
4278 return NO_REGS;
4281 enum direction
4282 function_arg_padding (mode, type)
4283 enum machine_mode mode;
4284 tree type;
4286 int size;
4288 if (mode == BLKmode)
4290 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4291 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4292 else
4293 return upward; /* Don't know if this is right, but */
4294 /* same as old definition. */
4296 else
4297 size = GET_MODE_BITSIZE (mode);
4298 if (size < PARM_BOUNDARY)
4299 return downward;
4300 else if (size % PARM_BOUNDARY)
4301 return upward;
4302 else
4303 return none;
4307 /* Do what is necessary for `va_start'. The argument is ignored;
4308 We look at the current function to determine if stdargs or varargs
4309 is used and fill in an initial va_list. A pointer to this constructor
4310 is returned. */
4312 struct rtx_def *
4313 hppa_builtin_saveregs (arglist)
4314 tree arglist;
4316 rtx offset, dest;
4317 tree fntype = TREE_TYPE (current_function_decl);
4318 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4319 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4320 != void_type_node)))
4321 ? UNITS_PER_WORD : 0);
4323 if (argadj)
4324 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4325 else
4326 offset = current_function_arg_offset_rtx;
4328 /* Store general registers on the stack. */
4329 dest = gen_rtx_MEM (BLKmode,
4330 plus_constant (current_function_internal_arg_pointer,
4331 -16));
4332 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
4334 /* move_block_from_reg will emit code to store the argument registers
4335 individually as scalar stores.
4337 However, other insns may later load from the same addresses for
4338 a structure load (passing a struct to a varargs routine).
4340 The alias code assumes that such aliasing can never happen, so we
4341 have to keep memory referencing insns from moving up beyond the
4342 last argument register store. So we emit a blockage insn here. */
4343 emit_insn (gen_blockage ());
4345 if (current_function_check_memory_usage)
4346 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4347 dest, ptr_mode,
4348 GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype),
4349 GEN_INT (MEMORY_USE_RW),
4350 TYPE_MODE (integer_type_node));
4352 return copy_to_reg (expand_binop (Pmode, add_optab,
4353 current_function_internal_arg_pointer,
4354 offset, 0, 0, OPTAB_LIB_WIDEN));
4357 /* This routine handles all the normal conditional branch sequences we
4358 might need to generate. It handles compare immediate vs compare
4359 register, nullification of delay slots, varying length branches,
4360 negated branches, and all combinations of the above. It returns the
4361 output appropriate to emit the branch corresponding to all given
4362 parameters. */
4364 char *
4365 output_cbranch (operands, nullify, length, negated, insn)
4366 rtx *operands;
4367 int nullify, length, negated;
4368 rtx insn;
4370 static char buf[100];
4371 int useskip = 0;
4373 /* A conditional branch to the following instruction (eg the delay slot) is
4374 asking for a disaster. This can happen when not optimizing.
4376 In such cases it is safe to emit nothing. */
4378 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4379 return "";
4381 /* If this is a long branch with its delay slot unfilled, set `nullify'
4382 as it can nullify the delay slot and save a nop. */
4383 if (length == 8 && dbr_sequence_length () == 0)
4384 nullify = 1;
4386 /* If this is a short forward conditional branch which did not get
4387 its delay slot filled, the delay slot can still be nullified. */
4388 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4389 nullify = forward_branch_p (insn);
4391 /* A forward branch over a single nullified insn can be done with a
4392 comclr instruction. This avoids a single cycle penalty due to
4393 mis-predicted branch if we fall through (branch not taken). */
4394 if (length == 4
4395 && next_real_insn (insn) != 0
4396 && get_attr_length (next_real_insn (insn)) == 4
4397 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4398 && nullify)
4399 useskip = 1;
4401 switch (length)
4403 /* All short conditional branches except backwards with an unfilled
4404 delay slot. */
4405 case 4:
4406 if (useskip)
4407 strcpy (buf, "com%I2clr,");
4408 else
4409 strcpy (buf, "com%I2b,");
4410 if (negated)
4411 strcat (buf, "%B3");
4412 else
4413 strcat (buf, "%S3");
4414 if (useskip)
4415 strcat (buf, " %2,%1,0");
4416 else if (nullify)
4417 strcat (buf, ",n %2,%1,%0");
4418 else
4419 strcat (buf, " %2,%1,%0");
4420 break;
4422 /* All long conditionals. Note an short backward branch with an
4423 unfilled delay slot is treated just like a long backward branch
4424 with an unfilled delay slot. */
4425 case 8:
4426 /* Handle weird backwards branch with a filled delay slot
4427 with is nullified. */
4428 if (dbr_sequence_length () != 0
4429 && ! forward_branch_p (insn)
4430 && nullify)
4432 strcpy (buf, "com%I2b,");
4433 if (negated)
4434 strcat (buf, "%S3");
4435 else
4436 strcat (buf, "%B3");
4437 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4439 /* Handle short backwards branch with an unfilled delay slot.
4440 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4441 taken and untaken branches. */
4442 else if (dbr_sequence_length () == 0
4443 && ! forward_branch_p (insn)
4444 && insn_addresses
4445 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4446 - insn_addresses[INSN_UID (insn)] - 8))
4448 strcpy (buf, "com%I2b,");
4449 if (negated)
4450 strcat (buf, "%B3 %2,%1,%0%#");
4451 else
4452 strcat (buf, "%S3 %2,%1,%0%#");
4454 else
4456 strcpy (buf, "com%I2clr,");
4457 if (negated)
4458 strcat (buf, "%S3");
4459 else
4460 strcat (buf, "%B3");
4461 if (nullify)
4462 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4463 else
4464 strcat (buf, " %2,%1,0\n\tbl %0,0");
4466 break;
4468 case 20:
4469 /* Very long branch. Right now we only handle these when not
4470 optimizing. See "jump" pattern in pa.md for details. */
4471 if (optimize)
4472 abort ();
4474 /* Create a reversed conditional branch which branches around
4475 the following insns. */
4476 if (negated)
4477 strcpy (buf, "com%I2b,%S3,n %2,%1,.+20");
4478 else
4479 strcpy (buf, "com%I2b,%B3,n %2,%1,.+20");
4480 output_asm_insn (buf, operands);
4482 /* Output an insn to save %r1. */
4483 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4485 /* Now output a very long branch to the original target. */
4486 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
4488 /* Now restore the value of %r1 in the delay slot. We're not
4489 optimizing so we know nothing else can be in the delay slot. */
4490 return "ldw -16(%%r30),%%r1";
4492 case 28:
4493 /* Very long branch when generating PIC code. Right now we only
4494 handle these when not optimizing. See "jump" pattern in pa.md
4495 for details. */
4496 if (optimize)
4497 abort ();
4499 /* Create a reversed conditional branch which branches around
4500 the following insns. */
4501 if (negated)
4502 strcpy (buf, "com%I2b,%S3,n %2,%1,.+28");
4503 else
4504 strcpy (buf, "com%I2b,%B3,n %2,%1,.+28");
4505 output_asm_insn (buf, operands);
4507 /* Output an insn to save %r1. */
4508 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4510 /* Now output a very long PIC branch to the original target. */
4512 rtx xoperands[5];
4514 xoperands[0] = operands[0];
4515 xoperands[1] = operands[1];
4516 xoperands[2] = operands[2];
4517 xoperands[3] = operands[3];
4518 xoperands[4] = gen_label_rtx ();
4520 output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands);
4521 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4522 CODE_LABEL_NUMBER (xoperands[4]));
4523 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands);
4526 /* Now restore the value of %r1 in the delay slot. We're not
4527 optimizing so we know nothing else can be in the delay slot. */
4528 return "ldw -16(%%r30),%%r1";
4530 default:
4531 abort();
4533 return buf;
4536 /* This routine handles all the branch-on-bit conditional branch sequences we
4537 might need to generate. It handles nullification of delay slots,
4538 varying length branches, negated branches and all combinations of the
4539 above. it returns the appropriate output template to emit the branch. */
4541 char *
4542 output_bb (operands, nullify, length, negated, insn, which)
4543 rtx *operands;
4544 int nullify, length, negated;
4545 rtx insn;
4546 int which;
4548 static char buf[100];
4549 int useskip = 0;
4551 /* A conditional branch to the following instruction (eg the delay slot) is
4552 asking for a disaster. I do not think this can happen as this pattern
4553 is only used when optimizing; jump optimization should eliminate the
4554 jump. But be prepared just in case. */
4556 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4557 return "";
4559 /* If this is a long branch with its delay slot unfilled, set `nullify'
4560 as it can nullify the delay slot and save a nop. */
4561 if (length == 8 && dbr_sequence_length () == 0)
4562 nullify = 1;
4564 /* If this is a short forward conditional branch which did not get
4565 its delay slot filled, the delay slot can still be nullified. */
4566 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4567 nullify = forward_branch_p (insn);
4569 /* A forward branch over a single nullified insn can be done with a
4570 extrs instruction. This avoids a single cycle penalty due to
4571 mis-predicted branch if we fall through (branch not taken). */
4573 if (length == 4
4574 && next_real_insn (insn) != 0
4575 && get_attr_length (next_real_insn (insn)) == 4
4576 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4577 && nullify)
4578 useskip = 1;
4580 switch (length)
4583 /* All short conditional branches except backwards with an unfilled
4584 delay slot. */
4585 case 4:
4586 if (useskip)
4587 strcpy (buf, "extrs,");
4588 else
4589 strcpy (buf, "bb,");
4590 if ((which == 0 && negated)
4591 || (which == 1 && ! negated))
4592 strcat (buf, ">=");
4593 else
4594 strcat (buf, "<");
4595 if (useskip)
4596 strcat (buf, " %0,%1,1,0");
4597 else if (nullify && negated)
4598 strcat (buf, ",n %0,%1,%3");
4599 else if (nullify && ! negated)
4600 strcat (buf, ",n %0,%1,%2");
4601 else if (! nullify && negated)
4602 strcat (buf, "%0,%1,%3");
4603 else if (! nullify && ! negated)
4604 strcat (buf, " %0,%1,%2");
4605 break;
4607 /* All long conditionals. Note an short backward branch with an
4608 unfilled delay slot is treated just like a long backward branch
4609 with an unfilled delay slot. */
4610 case 8:
4611 /* Handle weird backwards branch with a filled delay slot
4612 with is nullified. */
4613 if (dbr_sequence_length () != 0
4614 && ! forward_branch_p (insn)
4615 && nullify)
4617 strcpy (buf, "bb,");
4618 if ((which == 0 && negated)
4619 || (which == 1 && ! negated))
4620 strcat (buf, "<");
4621 else
4622 strcat (buf, ">=");
4623 if (negated)
4624 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4625 else
4626 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4628 /* Handle short backwards branch with an unfilled delay slot.
4629 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4630 taken and untaken branches. */
4631 else if (dbr_sequence_length () == 0
4632 && ! forward_branch_p (insn)
4633 && insn_addresses
4634 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4635 - insn_addresses[INSN_UID (insn)] - 8))
4637 strcpy (buf, "bb,");
4638 if ((which == 0 && negated)
4639 || (which == 1 && ! negated))
4640 strcat (buf, ">=");
4641 else
4642 strcat (buf, "<");
4643 if (negated)
4644 strcat (buf, " %0,%1,%3%#");
4645 else
4646 strcat (buf, " %0,%1,%2%#");
4648 else
4650 strcpy (buf, "extrs,");
4651 if ((which == 0 && negated)
4652 || (which == 1 && ! negated))
4653 strcat (buf, "<");
4654 else
4655 strcat (buf, ">=");
4656 if (nullify && negated)
4657 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4658 else if (nullify && ! negated)
4659 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4660 else if (negated)
4661 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4662 else
4663 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4665 break;
4667 default:
4668 abort();
4670 return buf;
4673 /* This routine handles all the branch-on-variable-bit conditional branch
4674 sequences we might need to generate. It handles nullification of delay
4675 slots, varying length branches, negated branches and all combinations
4676 of the above. it returns the appropriate output template to emit the
4677 branch. */
4679 char *
4680 output_bvb (operands, nullify, length, negated, insn, which)
4681 rtx *operands;
4682 int nullify, length, negated;
4683 rtx insn;
4684 int which;
4686 static char buf[100];
4687 int useskip = 0;
4689 /* A conditional branch to the following instruction (eg the delay slot) is
4690 asking for a disaster. I do not think this can happen as this pattern
4691 is only used when optimizing; jump optimization should eliminate the
4692 jump. But be prepared just in case. */
4694 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4695 return "";
4697 /* If this is a long branch with its delay slot unfilled, set `nullify'
4698 as it can nullify the delay slot and save a nop. */
4699 if (length == 8 && dbr_sequence_length () == 0)
4700 nullify = 1;
4702 /* If this is a short forward conditional branch which did not get
4703 its delay slot filled, the delay slot can still be nullified. */
4704 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4705 nullify = forward_branch_p (insn);
4707 /* A forward branch over a single nullified insn can be done with a
4708 extrs instruction. This avoids a single cycle penalty due to
4709 mis-predicted branch if we fall through (branch not taken). */
4711 if (length == 4
4712 && next_real_insn (insn) != 0
4713 && get_attr_length (next_real_insn (insn)) == 4
4714 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4715 && nullify)
4716 useskip = 1;
4718 switch (length)
4721 /* All short conditional branches except backwards with an unfilled
4722 delay slot. */
4723 case 4:
4724 if (useskip)
4725 strcpy (buf, "vextrs,");
4726 else
4727 strcpy (buf, "bvb,");
4728 if ((which == 0 && negated)
4729 || (which == 1 && ! negated))
4730 strcat (buf, ">=");
4731 else
4732 strcat (buf, "<");
4733 if (useskip)
4734 strcat (buf, " %0,1,0");
4735 else if (nullify && negated)
4736 strcat (buf, ",n %0,%3");
4737 else if (nullify && ! negated)
4738 strcat (buf, ",n %0,%2");
4739 else if (! nullify && negated)
4740 strcat (buf, "%0,%3");
4741 else if (! nullify && ! negated)
4742 strcat (buf, " %0,%2");
4743 break;
4745 /* All long conditionals. Note an short backward branch with an
4746 unfilled delay slot is treated just like a long backward branch
4747 with an unfilled delay slot. */
4748 case 8:
4749 /* Handle weird backwards branch with a filled delay slot
4750 with is nullified. */
4751 if (dbr_sequence_length () != 0
4752 && ! forward_branch_p (insn)
4753 && nullify)
4755 strcpy (buf, "bvb,");
4756 if ((which == 0 && negated)
4757 || (which == 1 && ! negated))
4758 strcat (buf, "<");
4759 else
4760 strcat (buf, ">=");
4761 if (negated)
4762 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4763 else
4764 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4766 /* Handle short backwards branch with an unfilled delay slot.
4767 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4768 taken and untaken branches. */
4769 else if (dbr_sequence_length () == 0
4770 && ! forward_branch_p (insn)
4771 && insn_addresses
4772 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4773 - insn_addresses[INSN_UID (insn)] - 8))
4775 strcpy (buf, "bvb,");
4776 if ((which == 0 && negated)
4777 || (which == 1 && ! negated))
4778 strcat (buf, ">=");
4779 else
4780 strcat (buf, "<");
4781 if (negated)
4782 strcat (buf, " %0,%3%#");
4783 else
4784 strcat (buf, " %0,%2%#");
4786 else
4788 strcpy (buf, "vextrs,");
4789 if ((which == 0 && negated)
4790 || (which == 1 && ! negated))
4791 strcat (buf, "<");
4792 else
4793 strcat (buf, ">=");
4794 if (nullify && negated)
4795 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4796 else if (nullify && ! negated)
4797 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4798 else if (negated)
4799 strcat (buf, " %0,1,0\n\tbl %3,0");
4800 else
4801 strcat (buf, " %0,1,0\n\tbl %2,0");
4803 break;
4805 default:
4806 abort();
4808 return buf;
4811 /* Return the output template for emitting a dbra type insn.
4813 Note it may perform some output operations on its own before
4814 returning the final output string. */
4815 char *
4816 output_dbra (operands, insn, which_alternative)
4817 rtx *operands;
4818 rtx insn;
4819 int which_alternative;
4822 /* A conditional branch to the following instruction (eg the delay slot) is
4823 asking for a disaster. Be prepared! */
4825 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4827 if (which_alternative == 0)
4828 return "ldo %1(%0),%0";
4829 else if (which_alternative == 1)
4831 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4832 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4833 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4834 return "fldws -16(0,%%r30),%0";
4836 else
4838 output_asm_insn ("ldw %0,%4", operands);
4839 return "ldo %1(%4),%4\n\tstw %4,%0";
4843 if (which_alternative == 0)
4845 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4846 int length = get_attr_length (insn);
4848 /* If this is a long branch with its delay slot unfilled, set `nullify'
4849 as it can nullify the delay slot and save a nop. */
4850 if (length == 8 && dbr_sequence_length () == 0)
4851 nullify = 1;
4853 /* If this is a short forward conditional branch which did not get
4854 its delay slot filled, the delay slot can still be nullified. */
4855 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4856 nullify = forward_branch_p (insn);
4858 /* Handle short versions first. */
4859 if (length == 4 && nullify)
4860 return "addib,%C2,n %1,%0,%3";
4861 else if (length == 4 && ! nullify)
4862 return "addib,%C2 %1,%0,%3";
4863 else if (length == 8)
4865 /* Handle weird backwards branch with a fulled delay slot
4866 which is nullified. */
4867 if (dbr_sequence_length () != 0
4868 && ! forward_branch_p (insn)
4869 && nullify)
4870 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4871 /* Handle short backwards branch with an unfilled delay slot.
4872 Using a addb;nop rather than addi;bl saves 1 cycle for both
4873 taken and untaken branches. */
4874 else if (dbr_sequence_length () == 0
4875 && ! forward_branch_p (insn)
4876 && insn_addresses
4877 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4878 - insn_addresses[INSN_UID (insn)] - 8))
4879 return "addib,%C2 %1,%0,%3%#";
4881 /* Handle normal cases. */
4882 if (nullify)
4883 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4884 else
4885 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4887 else
4888 abort();
4890 /* Deal with gross reload from FP register case. */
4891 else if (which_alternative == 1)
4893 /* Move loop counter from FP register to MEM then into a GR,
4894 increment the GR, store the GR into MEM, and finally reload
4895 the FP register from MEM from within the branch's delay slot. */
4896 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4897 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4898 if (get_attr_length (insn) == 24)
4899 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4900 else
4901 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4903 /* Deal with gross reload from memory case. */
4904 else
4906 /* Reload loop counter from memory, the store back to memory
4907 happens in the branch's delay slot. */
4908 output_asm_insn ("ldw %0,%4", operands);
4909 if (get_attr_length (insn) == 12)
4910 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4911 else
4912 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4916 /* Return the output template for emitting a dbra type insn.
4918 Note it may perform some output operations on its own before
4919 returning the final output string. */
4920 char *
4921 output_movb (operands, insn, which_alternative, reverse_comparison)
4922 rtx *operands;
4923 rtx insn;
4924 int which_alternative;
4925 int reverse_comparison;
4928 /* A conditional branch to the following instruction (eg the delay slot) is
4929 asking for a disaster. Be prepared! */
4931 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4933 if (which_alternative == 0)
4934 return "copy %1,%0";
4935 else if (which_alternative == 1)
4937 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4938 return "fldws -16(0,%%r30),%0";
4940 else if (which_alternative == 2)
4941 return "stw %1,%0";
4942 else
4943 return "mtsar %r1";
4946 /* Support the second variant. */
4947 if (reverse_comparison)
4948 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4950 if (which_alternative == 0)
4952 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4953 int length = get_attr_length (insn);
4955 /* If this is a long branch with its delay slot unfilled, set `nullify'
4956 as it can nullify the delay slot and save a nop. */
4957 if (length == 8 && dbr_sequence_length () == 0)
4958 nullify = 1;
4960 /* If this is a short forward conditional branch which did not get
4961 its delay slot filled, the delay slot can still be nullified. */
4962 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4963 nullify = forward_branch_p (insn);
4965 /* Handle short versions first. */
4966 if (length == 4 && nullify)
4967 return "movb,%C2,n %1,%0,%3";
4968 else if (length == 4 && ! nullify)
4969 return "movb,%C2 %1,%0,%3";
4970 else if (length == 8)
4972 /* Handle weird backwards branch with a filled delay slot
4973 which is nullified. */
4974 if (dbr_sequence_length () != 0
4975 && ! forward_branch_p (insn)
4976 && nullify)
4977 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4979 /* Handle short backwards branch with an unfilled delay slot.
4980 Using a movb;nop rather than or;bl saves 1 cycle for both
4981 taken and untaken branches. */
4982 else if (dbr_sequence_length () == 0
4983 && ! forward_branch_p (insn)
4984 && insn_addresses
4985 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4986 - insn_addresses[INSN_UID (insn)] - 8))
4987 return "movb,%C2 %1,%0,%3%#";
4988 /* Handle normal cases. */
4989 if (nullify)
4990 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4991 else
4992 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4994 else
4995 abort();
4997 /* Deal with gross reload from FP register case. */
4998 else if (which_alternative == 1)
5000 /* Move loop counter from FP register to MEM then into a GR,
5001 increment the GR, store the GR into MEM, and finally reload
5002 the FP register from MEM from within the branch's delay slot. */
5003 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
5004 if (get_attr_length (insn) == 12)
5005 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
5006 else
5007 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
5009 /* Deal with gross reload from memory case. */
5010 else if (which_alternative == 2)
5012 /* Reload loop counter from memory, the store back to memory
5013 happens in the branch's delay slot. */
5014 if (get_attr_length (insn) == 8)
5015 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
5016 else
5017 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
5019 /* Handle SAR as a destination. */
5020 else
5022 if (get_attr_length (insn) == 8)
5023 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
5024 else
5025 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
5030 /* INSN is a millicode call. It may have an unconditional jump in its delay
5031 slot.
5033 CALL_DEST is the routine we are calling. */
5035 char *
5036 output_millicode_call (insn, call_dest)
5037 rtx insn;
5038 rtx call_dest;
5040 int distance;
5041 rtx xoperands[4];
5042 rtx seq_insn;
5044 /* Handle common case -- empty delay slot or no jump in the delay slot,
5045 and we're sure that the branch will reach the beginning of the $CODE$
5046 subspace. */
5047 if ((dbr_sequence_length () == 0
5048 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
5049 || (dbr_sequence_length () != 0
5050 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5051 && get_attr_length (insn) == 4))
5053 xoperands[0] = call_dest;
5054 output_asm_insn ("bl %0,%%r31%#", xoperands);
5055 return "";
5058 /* This call may not reach the beginning of the $CODE$ subspace. */
5059 if (get_attr_length (insn) > 4)
5061 int delay_insn_deleted = 0;
5062 rtx xoperands[2];
5063 rtx link;
5065 /* We need to emit an inline long-call branch. */
5066 if (dbr_sequence_length () != 0
5067 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5069 /* A non-jump insn in the delay slot. By definition we can
5070 emit this insn before the call. */
5071 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5073 /* Now delete the delay insn. */
5074 PUT_CODE (NEXT_INSN (insn), NOTE);
5075 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5076 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5077 delay_insn_deleted = 1;
5080 /* If we're allowed to use be/ble instructions, then this is the
5081 best sequence to use for a long millicode call. */
5082 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
5083 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
5085 xoperands[0] = call_dest;
5086 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
5087 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
5088 output_asm_insn ("nop", xoperands);
5090 /* Pure portable runtime doesn't allow be/ble; we also don't have
5091 PIC support int he assembler/linker, so this sequence is needed. */
5092 else if (TARGET_PORTABLE_RUNTIME)
5094 xoperands[0] = call_dest;
5095 /* Get the address of our target into %r29. */
5096 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5097 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5099 /* Get our return address into %r31. */
5100 output_asm_insn ("blr 0,%%r31", xoperands);
5102 /* Jump to our target address in %r29. */
5103 output_asm_insn ("bv,n 0(%%r29)", xoperands);
5105 /* Empty delay slot. Note this insn gets fetched twice and
5106 executed once. To be safe we use a nop. */
5107 output_asm_insn ("nop", xoperands);
5108 return "";
5110 /* PIC long millicode call sequence. */
5111 else
5113 xoperands[0] = call_dest;
5114 xoperands[1] = gen_label_rtx ();
5115 /* Get our address + 8 into %r1. */
5116 output_asm_insn ("bl .+8,%%r1", xoperands);
5118 /* Add %r1 to the offset of our target from the next insn. */
5119 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5120 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5121 CODE_LABEL_NUMBER (xoperands[1]));
5122 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5124 /* Get the return address into %r31. */
5125 output_asm_insn ("blr 0,%%r31", xoperands);
5127 /* Branch to our target which is in %r1. */
5128 output_asm_insn ("bv,n 0(%%r1)", xoperands);
5130 /* Empty delay slot. Note this insn gets fetched twice and
5131 executed once. To be safe we use a nop. */
5132 output_asm_insn ("nop", xoperands);
5135 /* If we had a jump in the call's delay slot, output it now. */
5136 if (dbr_sequence_length () != 0
5137 && !delay_insn_deleted)
5139 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5140 output_asm_insn ("b,n %0", xoperands);
5142 /* Now delete the delay insn. */
5143 PUT_CODE (NEXT_INSN (insn), NOTE);
5144 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5145 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5147 return "";
5150 /* This call has an unconditional jump in its delay slot and the
5151 call is known to reach its target or the beginning of the current
5152 subspace. */
5154 /* Use the containing sequence insn's address. */
5155 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5157 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5158 - insn_addresses[INSN_UID (seq_insn)] - 8;
5160 /* If the branch was too far away, emit a normal call followed
5161 by a nop, followed by the unconditional branch.
5163 If the branch is close, then adjust %r2 from within the
5164 call's delay slot. */
5166 xoperands[0] = call_dest;
5167 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5168 if (! VAL_14_BITS_P (distance))
5169 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
5170 else
5172 xoperands[3] = gen_label_rtx ();
5173 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5174 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5175 CODE_LABEL_NUMBER (xoperands[3]));
5178 /* Delete the jump. */
5179 PUT_CODE (NEXT_INSN (insn), NOTE);
5180 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5181 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5182 return "";
5185 extern struct obstack permanent_obstack;
5186 extern struct obstack *saveable_obstack;
5187 extern struct obstack *rtl_obstack;
5188 extern struct obstack *current_obstack;
5190 /* INSN is either a function call. It may have an unconditional jump
5191 in its delay slot.
5193 CALL_DEST is the routine we are calling. */
5195 char *
5196 output_call (insn, call_dest)
5197 rtx insn;
5198 rtx call_dest;
5200 int distance;
5201 rtx xoperands[4];
5202 rtx seq_insn;
5204 /* Handle common case -- empty delay slot or no jump in the delay slot,
5205 and we're sure that the branch will reach the beginning of the $CODE$
5206 subspace. */
5207 if ((dbr_sequence_length () == 0
5208 && get_attr_length (insn) == 8)
5209 || (dbr_sequence_length () != 0
5210 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5211 && get_attr_length (insn) == 4))
5213 xoperands[0] = call_dest;
5214 output_asm_insn ("bl %0,%%r2%#", xoperands);
5215 return "";
5218 /* This call may not reach the beginning of the $CODE$ subspace. */
5219 if (get_attr_length (insn) > 8)
5221 int delay_insn_deleted = 0;
5222 rtx xoperands[2];
5223 rtx link;
5225 /* We need to emit an inline long-call branch. Furthermore,
5226 because we're changing a named function call into an indirect
5227 function call well after the parameters have been set up, we
5228 need to make sure any FP args appear in both the integer
5229 and FP registers. Also, we need move any delay slot insn
5230 out of the delay slot. And finally, we can't rely on the linker
5231 being able to fix the call to $$dyncall! -- Yuk!. */
5232 if (dbr_sequence_length () != 0
5233 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5235 /* A non-jump insn in the delay slot. By definition we can
5236 emit this insn before the call (and in fact before argument
5237 relocating. */
5238 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5240 /* Now delete the delay insn. */
5241 PUT_CODE (NEXT_INSN (insn), NOTE);
5242 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5243 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5244 delay_insn_deleted = 1;
5247 /* Now copy any FP arguments into integer registers. */
5248 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5250 int arg_mode, regno;
5251 rtx use = XEXP (link, 0);
5252 if (! (GET_CODE (use) == USE
5253 && GET_CODE (XEXP (use, 0)) == REG
5254 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5255 continue;
5257 arg_mode = GET_MODE (XEXP (use, 0));
5258 regno = REGNO (XEXP (use, 0));
5259 /* Is it a floating point register? */
5260 if (regno >= 32 && regno <= 39)
5262 /* Copy from the FP register into an integer register
5263 (via memory). */
5264 if (arg_mode == SFmode)
5266 xoperands[0] = XEXP (use, 0);
5267 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
5268 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5269 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5271 else
5273 xoperands[0] = XEXP (use, 0);
5274 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
5275 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5276 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5277 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5282 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5283 we don't have any direct calls in that case. */
5285 int i;
5286 char *name = XSTR (call_dest, 0);
5288 /* See if we have already put this function on the list
5289 of deferred plabels. This list is generally small,
5290 so a liner search is not too ugly. If it proves too
5291 slow replace it with something faster. */
5292 for (i = 0; i < n_deferred_plabels; i++)
5293 if (strcmp (name, deferred_plabels[i].name) == 0)
5294 break;
5296 /* If the deferred plabel list is empty, or this entry was
5297 not found on the list, create a new entry on the list. */
5298 if (deferred_plabels == NULL || i == n_deferred_plabels)
5300 struct obstack *ambient_obstack = current_obstack;
5301 struct obstack *ambient_rtl_obstack = rtl_obstack;
5302 char *real_name;
5304 /* Any RTL we create here needs to live until the end of
5305 the compilation unit and therefore must live on the
5306 permanent obstack. */
5307 current_obstack = &permanent_obstack;
5308 rtl_obstack = &permanent_obstack;
5310 if (deferred_plabels == 0)
5311 deferred_plabels = (struct deferred_plabel *)
5312 xmalloc (1 * sizeof (struct deferred_plabel));
5313 else
5314 deferred_plabels = (struct deferred_plabel *)
5315 xrealloc (deferred_plabels,
5316 ((n_deferred_plabels + 1)
5317 * sizeof (struct deferred_plabel)));
5319 i = n_deferred_plabels++;
5320 deferred_plabels[i].internal_label = gen_label_rtx ();
5321 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5322 strlen (name) + 1);
5323 strcpy (deferred_plabels[i].name, name);
5325 /* Switch back to normal obstack allocation. */
5326 current_obstack = ambient_obstack;
5327 rtl_obstack = ambient_rtl_obstack;
5329 /* Gross. We have just implicitly taken the address of this
5330 function, mark it as such. */
5331 STRIP_NAME_ENCODING (real_name, name);
5332 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5335 /* We have to load the address of the function using a procedure
5336 label (plabel). Inline plabels can lose for PIC and other
5337 cases, so avoid them by creating a 32bit plabel in the data
5338 segment. */
5339 if (flag_pic)
5341 xoperands[0] = deferred_plabels[i].internal_label;
5342 xoperands[1] = gen_label_rtx ();
5344 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5345 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5346 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5348 /* Get our address + 8 into %r1. */
5349 output_asm_insn ("bl .+8,%%r1", xoperands);
5351 /* Add %r1 to the offset of dyncall from the next insn. */
5352 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5353 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5354 CODE_LABEL_NUMBER (xoperands[1]));
5355 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5357 /* Get the return address into %r31. */
5358 output_asm_insn ("blr 0,%%r31", xoperands);
5360 /* Branch to our target which is in %r1. */
5361 output_asm_insn ("bv 0(%%r1)", xoperands);
5363 /* Copy the return address into %r2 also. */
5364 output_asm_insn ("copy %%r31,%%r2", xoperands);
5366 else
5368 xoperands[0] = deferred_plabels[i].internal_label;
5370 /* Get the address of our target into %r22. */
5371 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5372 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5374 /* Get the high part of the address of $dyncall into %r2, then
5375 add in the low part in the branch instruction. */
5376 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5377 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5379 /* Copy the return pointer into both %r31 and %r2. */
5380 output_asm_insn ("copy %%r31,%%r2", xoperands);
5384 /* If we had a jump in the call's delay slot, output it now. */
5385 if (dbr_sequence_length () != 0
5386 && !delay_insn_deleted)
5388 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5389 output_asm_insn ("b,n %0", xoperands);
5391 /* Now delete the delay insn. */
5392 PUT_CODE (NEXT_INSN (insn), NOTE);
5393 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5394 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5396 return "";
5399 /* This call has an unconditional jump in its delay slot and the
5400 call is known to reach its target or the beginning of the current
5401 subspace. */
5403 /* Use the containing sequence insn's address. */
5404 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5406 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5407 - insn_addresses[INSN_UID (seq_insn)] - 8;
5409 /* If the branch was too far away, emit a normal call followed
5410 by a nop, followed by the unconditional branch.
5412 If the branch is close, then adjust %r2 from within the
5413 call's delay slot. */
5415 xoperands[0] = call_dest;
5416 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5417 if (! VAL_14_BITS_P (distance))
5418 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5419 else
5421 xoperands[3] = gen_label_rtx ();
5422 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5423 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5424 CODE_LABEL_NUMBER (xoperands[3]));
5427 /* Delete the jump. */
5428 PUT_CODE (NEXT_INSN (insn), NOTE);
5429 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5430 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5431 return "";
5434 /* In HPUX 8.0's shared library scheme, special relocations are needed
5435 for function labels if they might be passed to a function
5436 in a shared library (because shared libraries don't live in code
5437 space), and special magic is needed to construct their address.
5439 For reasons too disgusting to describe storage for the new name
5440 is allocated either on the saveable_obstack (released at function
5441 exit) or on the permanent_obstack for things that can never change
5442 (libcall names for example). */
5444 void
5445 hppa_encode_label (sym, permanent)
5446 rtx sym;
5447 int permanent;
5449 char *str = XSTR (sym, 0);
5450 int len = strlen (str);
5451 char *newstr;
5453 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5454 len + 2);
5456 if (str[0] == '*')
5457 *newstr++ = *str++;
5458 strcpy (newstr + 1, str);
5459 *newstr = '@';
5460 XSTR (sym,0) = newstr;
5464 function_label_operand (op, mode)
5465 rtx op;
5466 enum machine_mode mode;
5468 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5471 /* Returns 1 if OP is a function label involved in a simple addition
5472 with a constant. Used to keep certain patterns from matching
5473 during instruction combination. */
5475 is_function_label_plus_const (op)
5476 rtx op;
5478 /* Strip off any CONST. */
5479 if (GET_CODE (op) == CONST)
5480 op = XEXP (op, 0);
5482 return (GET_CODE (op) == PLUS
5483 && function_label_operand (XEXP (op, 0), Pmode)
5484 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5487 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5488 use in fmpyadd instructions. */
5490 fmpyaddoperands (operands)
5491 rtx *operands;
5493 enum machine_mode mode = GET_MODE (operands[0]);
5495 /* Must be a floating point mode. */
5496 if (mode != SFmode && mode != DFmode)
5497 return 0;
5499 /* All modes must be the same. */
5500 if (! (mode == GET_MODE (operands[1])
5501 && mode == GET_MODE (operands[2])
5502 && mode == GET_MODE (operands[3])
5503 && mode == GET_MODE (operands[4])
5504 && mode == GET_MODE (operands[5])))
5505 return 0;
5507 /* All operands must be registers. */
5508 if (! (GET_CODE (operands[1]) == REG
5509 && GET_CODE (operands[2]) == REG
5510 && GET_CODE (operands[3]) == REG
5511 && GET_CODE (operands[4]) == REG
5512 && GET_CODE (operands[5]) == REG))
5513 return 0;
5515 /* Only 2 real operands to the addition. One of the input operands must
5516 be the same as the output operand. */
5517 if (! rtx_equal_p (operands[3], operands[4])
5518 && ! rtx_equal_p (operands[3], operands[5]))
5519 return 0;
5521 /* Inout operand of add can not conflict with any operands from multiply. */
5522 if (rtx_equal_p (operands[3], operands[0])
5523 || rtx_equal_p (operands[3], operands[1])
5524 || rtx_equal_p (operands[3], operands[2]))
5525 return 0;
5527 /* multiply can not feed into addition operands. */
5528 if (rtx_equal_p (operands[4], operands[0])
5529 || rtx_equal_p (operands[5], operands[0]))
5530 return 0;
5532 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5533 if (mode == SFmode
5534 && (REGNO (operands[0]) < 57
5535 || REGNO (operands[1]) < 57
5536 || REGNO (operands[2]) < 57
5537 || REGNO (operands[3]) < 57
5538 || REGNO (operands[4]) < 57
5539 || REGNO (operands[5]) < 57))
5540 return 0;
5542 /* Passed. Operands are suitable for fmpyadd. */
5543 return 1;
5546 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5547 use in fmpysub instructions. */
5549 fmpysuboperands (operands)
5550 rtx *operands;
5552 enum machine_mode mode = GET_MODE (operands[0]);
5554 /* Must be a floating point mode. */
5555 if (mode != SFmode && mode != DFmode)
5556 return 0;
5558 /* All modes must be the same. */
5559 if (! (mode == GET_MODE (operands[1])
5560 && mode == GET_MODE (operands[2])
5561 && mode == GET_MODE (operands[3])
5562 && mode == GET_MODE (operands[4])
5563 && mode == GET_MODE (operands[5])))
5564 return 0;
5566 /* All operands must be registers. */
5567 if (! (GET_CODE (operands[1]) == REG
5568 && GET_CODE (operands[2]) == REG
5569 && GET_CODE (operands[3]) == REG
5570 && GET_CODE (operands[4]) == REG
5571 && GET_CODE (operands[5]) == REG))
5572 return 0;
5574 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5575 operation, so operands[4] must be the same as operand[3]. */
5576 if (! rtx_equal_p (operands[3], operands[4]))
5577 return 0;
5579 /* multiply can not feed into subtraction. */
5580 if (rtx_equal_p (operands[5], operands[0]))
5581 return 0;
5583 /* Inout operand of sub can not conflict with any operands from multiply. */
5584 if (rtx_equal_p (operands[3], operands[0])
5585 || rtx_equal_p (operands[3], operands[1])
5586 || rtx_equal_p (operands[3], operands[2]))
5587 return 0;
5589 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5590 if (mode == SFmode
5591 && (REGNO (operands[0]) < 57
5592 || REGNO (operands[1]) < 57
5593 || REGNO (operands[2]) < 57
5594 || REGNO (operands[3]) < 57
5595 || REGNO (operands[4]) < 57
5596 || REGNO (operands[5]) < 57))
5597 return 0;
5599 /* Passed. Operands are suitable for fmpysub. */
5600 return 1;
5604 plus_xor_ior_operator (op, mode)
5605 rtx op;
5606 enum machine_mode mode;
5608 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5609 || GET_CODE (op) == IOR);
5612 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5613 constants for shadd instructions. */
5615 shadd_constant_p (val)
5616 int val;
5618 if (val == 2 || val == 4 || val == 8)
5619 return 1;
5620 else
5621 return 0;
5624 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5625 the valid constant for shadd instructions. */
5627 shadd_operand (op, mode)
5628 rtx op;
5629 enum machine_mode mode;
5631 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5634 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5637 basereg_operand (op, mode)
5638 rtx op;
5639 enum machine_mode mode;
5641 /* cse will create some unscaled indexed addresses, however; it
5642 generally isn't a win on the PA, so avoid creating unscaled
5643 indexed addresses until after cse is finished. */
5644 if (!cse_not_expected)
5645 return 0;
5647 /* Once reload has started everything is considered valid. Reload should
5648 only create indexed addresses using the stack/frame pointer, and any
5649 others were checked for validity when created by the combine pass.
5651 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5652 we don't have to worry about the braindamaged implicit space register
5653 selection using the basereg only (rather than effective address)
5654 screwing us over. */
5655 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5656 return (GET_CODE (op) == REG);
5658 /* Stack is always OK for indexing. */
5659 if (op == stack_pointer_rtx)
5660 return 1;
5662 /* While it's always safe to index off the frame pointer, it's not
5663 always profitable, particularly when the frame pointer is being
5664 eliminated. */
5665 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5666 return 1;
5668 /* The only other valid OPs are pseudo registers with
5669 REGNO_POINTER_FLAG set. */
5670 if (GET_CODE (op) != REG
5671 || REGNO (op) < FIRST_PSEUDO_REGISTER
5672 || ! register_operand (op, mode))
5673 return 0;
5675 return REGNO_POINTER_FLAG (REGNO (op));
5678 /* Return 1 if this operand is anything other than a hard register. */
5681 non_hard_reg_operand (op, mode)
5682 rtx op;
5683 enum machine_mode mode;
5685 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5688 /* Return 1 if INSN branches forward. Should be using insn_addresses
5689 to avoid walking through all the insns... */
5691 forward_branch_p (insn)
5692 rtx insn;
5694 rtx label = JUMP_LABEL (insn);
5696 while (insn)
5698 if (insn == label)
5699 break;
5700 else
5701 insn = NEXT_INSN (insn);
5704 return (insn == label);
5707 /* Return 1 if OP is an equality comparison, else return 0. */
5709 eq_neq_comparison_operator (op, mode)
5710 rtx op;
5711 enum machine_mode mode;
5713 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5716 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5718 movb_comparison_operator (op, mode)
5719 rtx op;
5720 enum machine_mode mode;
5722 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5723 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5726 /* Return 1 if INSN is in the delay slot of a call instruction. */
5728 jump_in_call_delay (insn)
5729 rtx insn;
5732 if (GET_CODE (insn) != JUMP_INSN)
5733 return 0;
5735 if (PREV_INSN (insn)
5736 && PREV_INSN (PREV_INSN (insn))
5737 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5739 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5741 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5742 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5745 else
5746 return 0;
5749 /* Output an unconditional move and branch insn. */
5751 char *
5752 output_parallel_movb (operands, length)
5753 rtx *operands;
5754 int length;
5756 /* These are the cases in which we win. */
5757 if (length == 4)
5758 return "mov%I1b,tr %1,%0,%2";
5760 /* None of these cases wins, but they don't lose either. */
5761 if (dbr_sequence_length () == 0)
5763 /* Nothing in the delay slot, fake it by putting the combined
5764 insn (the copy or add) in the delay slot of a bl. */
5765 if (GET_CODE (operands[1]) == CONST_INT)
5766 return "bl %2,0\n\tldi %1,%0";
5767 else
5768 return "bl %2,0\n\tcopy %1,%0";
5770 else
5772 /* Something in the delay slot, but we've got a long branch. */
5773 if (GET_CODE (operands[1]) == CONST_INT)
5774 return "ldi %1,%0\n\tbl %2,0";
5775 else
5776 return "copy %1,%0\n\tbl %2,0";
5780 /* Output an unconditional add and branch insn. */
5782 char *
5783 output_parallel_addb (operands, length)
5784 rtx *operands;
5785 int length;
5787 /* To make life easy we want operand0 to be the shared input/output
5788 operand and operand1 to be the readonly operand. */
5789 if (operands[0] == operands[1])
5790 operands[1] = operands[2];
5792 /* These are the cases in which we win. */
5793 if (length == 4)
5794 return "add%I1b,tr %1,%0,%3";
5796 /* None of these cases win, but they don't lose either. */
5797 if (dbr_sequence_length () == 0)
5799 /* Nothing in the delay slot, fake it by putting the combined
5800 insn (the copy or add) in the delay slot of a bl. */
5801 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5803 else
5805 /* Something in the delay slot, but we've got a long branch. */
5806 return "add%I1 %1,%0,%0\n\tbl %3,0";
5810 /* Return nonzero if INSN (a jump insn) immediately follows a call to
5811 a named function. This is used to discourage creating parallel movb/addb
5812 insns since a jump which immediately follows a call can execute in the
5813 delay slot of the call.
5815 It is also used to avoid filling the delay slot of a jump which
5816 immediately follows a call since the jump can usually be eliminated
5817 completely by modifying RP in the delay slot of the call. */
5819 following_call (insn)
5820 rtx insn;
5822 /* Find the previous real insn, skipping NOTEs. */
5823 insn = PREV_INSN (insn);
5824 while (insn && GET_CODE (insn) == NOTE)
5825 insn = PREV_INSN (insn);
5827 /* Check for CALL_INSNs and millicode calls. */
5828 if (insn
5829 && ((GET_CODE (insn) == CALL_INSN
5830 && get_attr_type (insn) != TYPE_DYNCALL)
5831 || (GET_CODE (insn) == INSN
5832 && GET_CODE (PATTERN (insn)) != SEQUENCE
5833 && GET_CODE (PATTERN (insn)) != USE
5834 && GET_CODE (PATTERN (insn)) != CLOBBER
5835 && get_attr_type (insn) == TYPE_MILLI)))
5836 return 1;
5838 return 0;
5841 /* Restore any INSN_CODEs for insns with unscaled indexed addresses since
5842 the INSN_CODE might be clobberd by rerecognition triggered by reorg. */
5844 static void
5845 restore_unscaled_index_insn_codes (insns)
5846 rtx insns;
5848 rtx insn;
5850 for (insn = insns; insn; insn = NEXT_INSN (insn))
5852 if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid
5853 && unscaled_index_insn_codes[INSN_UID (insn)] != -1)
5854 INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)];
5858 /* Severe braindamage:
5860 On the PA, address computations within MEM expressions are not
5861 commutative because of the implicit space register selection
5862 from the base register (instead of the entire effective address).
5864 Because of this mis-feature we have to know which register in a reg+reg
5865 address is the base and which is the index.
5867 Before reload, the base can be identified by REGNO_POINTER_FLAG. We use
5868 this to force base + index addresses to match a different insn than
5869 index + base addresses.
5871 We assume that no pass during or after reload creates new unscaled indexed
5872 addresses, so any unscaled indexed address we find after reload must have
5873 at one time been recognized a base + index or index + base and we accept
5874 any register as a base register.
5876 This scheme assumes that no pass during/after reload will rerecognize an
5877 insn with an unscaled indexed address. This failed due to a reorg call
5878 to rerecognize certain insns.
5880 So, we record if an insn uses an unscaled indexed address and which
5881 register is the base (via recording of the INSN_CODE for such insns).
5883 Just before we output code for the function, we make sure all the insns
5884 using unscaled indexed addresses have the same INSN_CODE as they did
5885 immediately before delay slot scheduling.
5887 This is extremely gross. Long term, I'd like to be able to look at
5888 REG_POINTER_FLAG to handle these kinds of problems. */
5890 static void
5891 record_unscaled_index_insn_codes (insns)
5892 rtx insns;
5894 rtx insn;
5896 max_unscaled_index_insn_codes_uid = get_max_uid ();
5897 unscaled_index_insn_codes
5898 = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int));
5899 memset (unscaled_index_insn_codes, -1,
5900 max_unscaled_index_insn_codes_uid * sizeof (int));
5902 for (insn = insns; insn; insn = NEXT_INSN (insn))
5904 rtx set = single_set (insn);
5905 rtx mem = NULL_RTX;
5907 /* Ignore anything that isn't a normal SET. */
5908 if (set == NULL_RTX)
5909 continue;
5911 /* No insns can have more than one MEM. */
5912 if (GET_CODE (SET_SRC (set)) == MEM)
5913 mem = SET_SRC (set);
5915 if (GET_CODE (SET_DEST (set)) == MEM)
5916 mem = SET_DEST (set);
5918 /* If neither operand is a mem, then there's nothing to do. */
5919 if (mem == NULL_RTX)
5920 continue;
5922 if (GET_CODE (XEXP (mem, 0)) != PLUS)
5923 continue;
5925 /* If both are REGs (or SUBREGs), then record the insn code for
5926 this insn. */
5927 if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1)))
5928 unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn);
5932 /* We use this hook to perform a PA specific optimization which is difficult
5933 to do in earlier passes.
5935 We want the delay slots of branches within jump tables to be filled.
5936 None of the compiler passes at the moment even has the notion that a
5937 PA jump table doesn't contain addresses, but instead contains actual
5938 instructions!
5940 Because we actually jump into the table, the addresses of each entry
5941 must stay constant in relation to the beginning of the table (which
5942 itself must stay constant relative to the instruction to jump into
5943 it). I don't believe we can guarantee earlier passes of the compiler
5944 will adhere to those rules.
5946 So, late in the compilation process we find all the jump tables, and
5947 expand them into real code -- eg each entry in the jump table vector
5948 will get an appropriate label followed by a jump to the final target.
5950 Reorg and the final jump pass can then optimize these branches and
5951 fill their delay slots. We end up with smaller, more efficient code.
5953 The jump instructions within the table are special; we must be able
5954 to identify them during assembly output (if the jumps don't get filled
5955 we need to emit a nop rather than nullifying the delay slot)). We
5956 identify jumps in switch tables by marking the SET with DImode.
5958 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
5959 insns. This serves two purposes, first it prevents jump.c from
5960 noticing that the last N entries in the table jump to the instruction
5961 immediately after the table and deleting the jumps. Second, those
5962 insns mark where we should emit .begin_brtab and .end_brtab directives
5963 when using GAS (allows for better link time optimizations). */
5965 pa_reorg (insns)
5966 rtx insns;
5968 rtx insn;
5970 /* Keep track of which insns have unscaled indexed addresses, and which
5971 register is the base address in such insns. */
5972 record_unscaled_index_insn_codes (insns);
5974 remove_useless_addtr_insns (insns, 1);
5976 pa_combine_instructions (get_insns ());
5978 /* This is fairly cheap, so always run it if optimizing. */
5979 if (optimize > 0 && !TARGET_BIG_SWITCH)
5981 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
5982 insns = get_insns ();
5983 for (insn = insns; insn; insn = NEXT_INSN (insn))
5985 rtx pattern, tmp, location;
5986 unsigned int length, i;
5988 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
5989 if (GET_CODE (insn) != JUMP_INSN
5990 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
5991 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
5992 continue;
5994 /* Emit marker for the beginning of the branch table. */
5995 emit_insn_before (gen_begin_brtab (), insn);
5997 pattern = PATTERN (insn);
5998 location = PREV_INSN (insn);
5999 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6001 for (i = 0; i < length; i++)
6003 /* Emit a label before each jump to keep jump.c from
6004 removing this code. */
6005 tmp = gen_label_rtx ();
6006 LABEL_NUSES (tmp) = 1;
6007 emit_label_after (tmp, location);
6008 location = NEXT_INSN (location);
6010 if (GET_CODE (pattern) == ADDR_VEC)
6012 /* Emit the jump itself. */
6013 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6014 tmp = emit_jump_insn_after (tmp, location);
6015 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6016 LABEL_NUSES (JUMP_LABEL (tmp))++;
6017 location = NEXT_INSN (location);
6019 else
6021 /* Emit the jump itself. */
6022 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6023 tmp = emit_jump_insn_after (tmp, location);
6024 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6025 LABEL_NUSES (JUMP_LABEL (tmp))++;
6026 location = NEXT_INSN (location);
6029 /* Emit a BARRIER after the jump. */
6030 emit_barrier_after (location);
6031 location = NEXT_INSN (location);
6034 /* Emit marker for the end of the branch table. */
6035 emit_insn_before (gen_end_brtab (), location);
6036 location = NEXT_INSN (location);
6037 emit_barrier_after (location);
6039 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6040 delete_insn (insn);
6043 else
6045 /* Sill need an end_brtab insn. */
6046 insns = get_insns ();
6047 for (insn = insns; insn; insn = NEXT_INSN (insn))
6049 /* Find an ADDR_VEC insn. */
6050 if (GET_CODE (insn) != JUMP_INSN
6051 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6052 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6053 continue;
6055 /* Now generate markers for the beginning and end of the
6056 branch table. */
6057 emit_insn_before (gen_begin_brtab (), insn);
6058 emit_insn_after (gen_end_brtab (), insn);
6063 /* The PA has a number of odd instructions which can perform multiple
6064 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6065 it may be profitable to combine two instructions into one instruction
6066 with two outputs. It's not profitable PA2.0 machines because the
6067 two outputs would take two slots in the reorder buffers.
6069 This routine finds instructions which can be combined and combines
6070 them. We only support some of the potential combinations, and we
6071 only try common ways to find suitable instructions.
6073 * addb can add two registers or a register and a small integer
6074 and jump to a nearby (+-8k) location. Normally the jump to the
6075 nearby location is conditional on the result of the add, but by
6076 using the "true" condition we can make the jump unconditional.
6077 Thus addb can perform two independent operations in one insn.
6079 * movb is similar to addb in that it can perform a reg->reg
6080 or small immediate->reg copy and jump to a nearby (+-8k location).
6082 * fmpyadd and fmpysub can perform a FP multiply and either an
6083 FP add or FP sub if the operands of the multiply and add/sub are
6084 independent (there are other minor restrictions). Note both
6085 the fmpy and fadd/fsub can in theory move to better spots according
6086 to data dependencies, but for now we require the fmpy stay at a
6087 fixed location.
6089 * Many of the memory operations can perform pre & post updates
6090 of index registers. GCC's pre/post increment/decrement addressing
6091 is far too simple to take advantage of all the possibilities. This
6092 pass may not be suitable since those insns may not be independent.
6094 * comclr can compare two ints or an int and a register, nullify
6095 the following instruction and zero some other register. This
6096 is more difficult to use as it's harder to find an insn which
6097 will generate a comclr than finding something like an unconditional
6098 branch. (conditional moves & long branches create comclr insns).
6100 * Most arithmetic operations can conditionally skip the next
6101 instruction. They can be viewed as "perform this operation
6102 and conditionally jump to this nearby location" (where nearby
6103 is an insns away). These are difficult to use due to the
6104 branch length restrictions. */
6106 pa_combine_instructions (insns)
6107 rtx insns;
6109 rtx anchor, new;
6111 /* This can get expensive since the basic algorithm is on the
6112 order of O(n^2) (or worse). Only do it for -O2 or higher
6113 levels of optimization. */
6114 if (optimize < 2)
6115 return;
6117 /* Walk down the list of insns looking for "anchor" insns which
6118 may be combined with "floating" insns. As the name implies,
6119 "anchor" instructions don't move, while "floating" insns may
6120 move around. */
6121 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
6122 new = make_insn_raw (new);
6124 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
6126 enum attr_pa_combine_type anchor_attr;
6127 enum attr_pa_combine_type floater_attr;
6129 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
6130 Also ignore any special USE insns. */
6131 if (GET_CODE (anchor) != INSN
6132 && GET_CODE (anchor) != JUMP_INSN
6133 && GET_CODE (anchor) != CALL_INSN
6134 || GET_CODE (PATTERN (anchor)) == USE
6135 || GET_CODE (PATTERN (anchor)) == CLOBBER
6136 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
6137 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
6138 continue;
6140 anchor_attr = get_attr_pa_combine_type (anchor);
6141 /* See if anchor is an insn suitable for combination. */
6142 if (anchor_attr == PA_COMBINE_TYPE_FMPY
6143 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
6144 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6145 && ! forward_branch_p (anchor)))
6147 rtx floater;
6149 for (floater = PREV_INSN (anchor);
6150 floater;
6151 floater = PREV_INSN (floater))
6153 if (GET_CODE (floater) == NOTE
6154 || (GET_CODE (floater) == INSN
6155 && (GET_CODE (PATTERN (floater)) == USE
6156 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6157 continue;
6159 /* Anything except a regular INSN will stop our search. */
6160 if (GET_CODE (floater) != INSN
6161 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6162 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6164 floater = NULL_RTX;
6165 break;
6168 /* See if FLOATER is suitable for combination with the
6169 anchor. */
6170 floater_attr = get_attr_pa_combine_type (floater);
6171 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6172 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6173 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6174 && floater_attr == PA_COMBINE_TYPE_FMPY))
6176 /* If ANCHOR and FLOATER can be combined, then we're
6177 done with this pass. */
6178 if (pa_can_combine_p (new, anchor, floater, 0,
6179 SET_DEST (PATTERN (floater)),
6180 XEXP (SET_SRC (PATTERN (floater)), 0),
6181 XEXP (SET_SRC (PATTERN (floater)), 1)))
6182 break;
6185 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6186 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6188 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6190 if (pa_can_combine_p (new, anchor, floater, 0,
6191 SET_DEST (PATTERN (floater)),
6192 XEXP (SET_SRC (PATTERN (floater)), 0),
6193 XEXP (SET_SRC (PATTERN (floater)), 1)))
6194 break;
6196 else
6198 if (pa_can_combine_p (new, anchor, floater, 0,
6199 SET_DEST (PATTERN (floater)),
6200 SET_SRC (PATTERN (floater)),
6201 SET_SRC (PATTERN (floater))))
6202 break;
6207 /* If we didn't find anything on the backwards scan try forwards. */
6208 if (!floater
6209 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6210 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6212 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6214 if (GET_CODE (floater) == NOTE
6215 || (GET_CODE (floater) == INSN
6216 && (GET_CODE (PATTERN (floater)) == USE
6217 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6219 continue;
6221 /* Anything except a regular INSN will stop our search. */
6222 if (GET_CODE (floater) != INSN
6223 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6224 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6226 floater = NULL_RTX;
6227 break;
6230 /* See if FLOATER is suitable for combination with the
6231 anchor. */
6232 floater_attr = get_attr_pa_combine_type (floater);
6233 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6234 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6235 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6236 && floater_attr == PA_COMBINE_TYPE_FMPY))
6238 /* If ANCHOR and FLOATER can be combined, then we're
6239 done with this pass. */
6240 if (pa_can_combine_p (new, anchor, floater, 1,
6241 SET_DEST (PATTERN (floater)),
6242 XEXP (SET_SRC (PATTERN(floater)),0),
6243 XEXP(SET_SRC(PATTERN(floater)),1)))
6244 break;
6249 /* FLOATER will be nonzero if we found a suitable floating
6250 insn for combination with ANCHOR. */
6251 if (floater
6252 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6253 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6255 /* Emit the new instruction and delete the old anchor. */
6256 emit_insn_before (gen_rtx_PARALLEL
6257 (VOIDmode,
6258 gen_rtvec (2, PATTERN (anchor),
6259 PATTERN (floater))),
6260 anchor);
6262 PUT_CODE (anchor, NOTE);
6263 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6264 NOTE_SOURCE_FILE (anchor) = 0;
6266 /* Emit a special USE insn for FLOATER, then delete
6267 the floating insn. */
6268 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6269 delete_insn (floater);
6271 continue;
6273 else if (floater
6274 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6276 rtx temp;
6277 /* Emit the new_jump instruction and delete the old anchor. */
6278 temp
6279 = emit_jump_insn_before (gen_rtx_PARALLEL
6280 (VOIDmode,
6281 gen_rtvec (2, PATTERN (anchor),
6282 PATTERN (floater))),
6283 anchor);
6285 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6286 PUT_CODE (anchor, NOTE);
6287 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6288 NOTE_SOURCE_FILE (anchor) = 0;
6290 /* Emit a special USE insn for FLOATER, then delete
6291 the floating insn. */
6292 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6293 delete_insn (floater);
6294 continue;
6301 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6302 rtx new, anchor, floater;
6303 int reversed;
6304 rtx dest, src1, src2;
6306 int insn_code_number;
6307 rtx start, end;
6309 /* Create a PARALLEL with the patterns of ANCHOR and
6310 FLOATER, try to recognize it, then test constraints
6311 for the resulting pattern.
6313 If the pattern doesn't match or the constraints
6314 aren't met keep searching for a suitable floater
6315 insn. */
6316 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6317 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6318 INSN_CODE (new) = -1;
6319 insn_code_number = recog_memoized (new);
6320 if (insn_code_number < 0
6321 || !constrain_operands (insn_code_number, 1))
6322 return 0;
6324 if (reversed)
6326 start = anchor;
6327 end = floater;
6329 else
6331 start = floater;
6332 end = anchor;
6335 /* There's up to three operands to consider. One
6336 output and two inputs.
6338 The output must not be used between FLOATER & ANCHOR
6339 exclusive. The inputs must not be set between
6340 FLOATER and ANCHOR exclusive. */
6342 if (reg_used_between_p (dest, start, end))
6343 return 0;
6345 if (reg_set_between_p (src1, start, end))
6346 return 0;
6348 if (reg_set_between_p (src2, start, end))
6349 return 0;
6351 /* If we get here, then everything is good. */
6352 return 1;