* tcoff.h (USER_LABEL_PREFIX): Make it empty to match coff.h.
[official-gcc.git] / gcc / config / pa / pa.c
blob1100e29dd2acb62384323d274f5f5302ebdcf748
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 93, 94, 95, 96, 97, 1998 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-flags.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "reload.h"
37 #include "c-tree.h"
38 #include "expr.h"
39 #include "obstack.h"
40 #include "toplev.h"
42 static void restore_unscaled_index_insn_codes PROTO((rtx));
43 static void record_unscaled_index_insn_codes PROTO((rtx));
44 static void pa_combine_instructions PROTO((rtx));
45 static int pa_can_combine_p PROTO((rtx, rtx, rtx, int, rtx, rtx, rtx));
46 static int forward_branch_p PROTO((rtx));
47 static int shadd_constant_p PROTO((int));
49 /* Save the operands last given to a compare for use when we
50 generate a scc or bcc insn. */
52 rtx hppa_compare_op0, hppa_compare_op1;
53 enum cmp_type hppa_branch_type;
55 /* Which cpu we are scheduling for. */
56 enum processor_type pa_cpu;
58 /* String to hold which cpu we are scheduling for. */
59 char *pa_cpu_string;
61 /* Set by the FUNCTION_PROFILER macro. */
62 int hp_profile_labelno;
64 /* Counts for the number of callee-saved general and floating point
65 registers which were saved by the current function's prologue. */
66 static int gr_saved, fr_saved;
68 /* Whether or not the current function uses an out-of-line prologue
69 and epilogue. */
70 static int out_of_line_prologue_epilogue;
72 static rtx find_addr_reg ();
74 /* Keep track of the number of bytes we have output in the CODE subspaces
75 during this compilation so we'll know when to emit inline long-calls. */
77 unsigned int total_code_bytes;
79 /* Variables to handle plabels that we discover are necessary at assembly
80 output time. They are output after the current function. */
82 struct deferred_plabel
84 rtx internal_label;
85 char *name;
86 } *deferred_plabels = 0;
87 int n_deferred_plabels = 0;
89 /* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which
90 uses an unscaled indexed address before delay slot scheduling. */
91 static int *unscaled_index_insn_codes;
93 /* Upper bound for the array. */
94 static int max_unscaled_index_insn_codes_uid;
96 void
97 override_options ()
99 /* Default to 7100 scheduling. If the 7100LC scheduling ever
100 gets reasonably tuned, it should be the default since that
101 what most PAs sold now are. */
102 if (pa_cpu_string == NULL
103 || ! strcmp (pa_cpu_string, "7100"))
105 pa_cpu_string = "7100";
106 pa_cpu = PROCESSOR_7100;
108 else if (! strcmp (pa_cpu_string, "700"))
110 pa_cpu_string = "700";
111 pa_cpu = PROCESSOR_700;
113 else if (! strcmp (pa_cpu_string, "7100LC"))
115 pa_cpu_string = "7100LC";
116 pa_cpu = PROCESSOR_7100LC;
118 else if (! strcmp (pa_cpu_string, "7200"))
120 pa_cpu_string = "7200";
121 pa_cpu = PROCESSOR_7200;
123 else
125 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC and 7200\n", pa_cpu_string);
128 if (flag_pic && TARGET_PORTABLE_RUNTIME)
130 warning ("PIC code generation is not supported in the portable runtime model\n");
133 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
135 warning ("PIC code generation is not compatible with fast indirect calls\n");
138 if (flag_pic && profile_flag)
140 warning ("PIC code generation is not compatible with profiling\n");
143 if (TARGET_SPACE && (flag_pic || profile_flag))
145 warning ("Out of line entry/exit sequences are not compatible\n");
146 warning ("with PIC or profiling\n");
149 if (! TARGET_GAS && write_symbols != NO_DEBUG)
151 warning ("-g is only supported when using GAS on this processor,");
152 warning ("-g option disabled.");
153 write_symbols = NO_DEBUG;
158 /* Return non-zero only if OP is a register of mode MODE,
159 or CONST0_RTX. */
161 reg_or_0_operand (op, mode)
162 rtx op;
163 enum machine_mode mode;
165 return (op == CONST0_RTX (mode) || register_operand (op, mode));
168 /* Return non-zero if OP is suitable for use in a call to a named
169 function.
171 (???) For 2.5 try to eliminate either call_operand_address or
172 function_label_operand, they perform very similar functions. */
174 call_operand_address (op, mode)
175 rtx op;
176 enum machine_mode mode ATTRIBUTE_UNUSED;
178 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
181 /* Return 1 if X contains a symbolic expression. We know these
182 expressions will have one of a few well defined forms, so
183 we need only check those forms. */
185 symbolic_expression_p (x)
186 register rtx x;
189 /* Strip off any HIGH. */
190 if (GET_CODE (x) == HIGH)
191 x = XEXP (x, 0);
193 return (symbolic_operand (x, VOIDmode));
197 symbolic_operand (op, mode)
198 register rtx op;
199 enum machine_mode mode ATTRIBUTE_UNUSED;
201 switch (GET_CODE (op))
203 case SYMBOL_REF:
204 case LABEL_REF:
205 return 1;
206 case CONST:
207 op = XEXP (op, 0);
208 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
209 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
210 && GET_CODE (XEXP (op, 1)) == CONST_INT);
211 default:
212 return 0;
216 /* Return truth value of statement that OP is a symbolic memory
217 operand of mode MODE. */
220 symbolic_memory_operand (op, mode)
221 rtx op;
222 enum machine_mode mode ATTRIBUTE_UNUSED;
224 if (GET_CODE (op) == SUBREG)
225 op = SUBREG_REG (op);
226 if (GET_CODE (op) != MEM)
227 return 0;
228 op = XEXP (op, 0);
229 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
230 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
233 /* Return 1 if the operand is either a register or a memory operand that is
234 not symbolic. */
237 reg_or_nonsymb_mem_operand (op, mode)
238 register rtx op;
239 enum machine_mode mode;
241 if (register_operand (op, mode))
242 return 1;
244 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
245 return 1;
247 return 0;
250 /* Return 1 if the operand is either a register, zero, or a memory operand
251 that is not symbolic. */
254 reg_or_0_or_nonsymb_mem_operand (op, mode)
255 register rtx op;
256 enum machine_mode mode;
258 if (register_operand (op, mode))
259 return 1;
261 if (op == CONST0_RTX (mode))
262 return 1;
264 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
265 return 1;
267 return 0;
270 /* Accept any constant that can be moved in one instructions into a
271 general register. */
273 cint_ok_for_move (intval)
274 HOST_WIDE_INT intval;
276 /* OK if ldo, ldil, or zdepi, can be used. */
277 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
278 || zdepi_cint_p (intval));
281 /* Accept anything that can be moved in one instruction into a general
282 register. */
284 move_operand (op, mode)
285 rtx op;
286 enum machine_mode mode;
288 if (register_operand (op, mode))
289 return 1;
291 if (GET_CODE (op) == CONST_INT)
292 return cint_ok_for_move (INTVAL (op));
294 if (GET_CODE (op) == SUBREG)
295 op = SUBREG_REG (op);
296 if (GET_CODE (op) != MEM)
297 return 0;
299 op = XEXP (op, 0);
300 if (GET_CODE (op) == LO_SUM)
301 return (register_operand (XEXP (op, 0), Pmode)
302 && CONSTANT_P (XEXP (op, 1)));
304 /* Since move_operand is only used for source operands, we can always
305 allow scaled indexing! */
306 if (! TARGET_DISABLE_INDEXING
307 && GET_CODE (op) == PLUS
308 && ((GET_CODE (XEXP (op, 0)) == MULT
309 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
310 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
311 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
312 && GET_CODE (XEXP (op, 1)) == REG)
313 || (GET_CODE (XEXP (op, 1)) == MULT
314 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
315 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
316 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
317 && GET_CODE (XEXP (op, 0)) == REG)))
318 return 1;
320 return memory_address_p (mode, op);
323 /* Accept REG and any CONST_INT that can be moved in one instruction into a
324 general register. */
326 reg_or_cint_move_operand (op, mode)
327 rtx op;
328 enum machine_mode mode;
330 if (register_operand (op, mode))
331 return 1;
333 if (GET_CODE (op) == CONST_INT)
334 return cint_ok_for_move (INTVAL (op));
336 return 0;
340 pic_label_operand (op, mode)
341 rtx op;
342 enum machine_mode mode ATTRIBUTE_UNUSED;
344 if (!flag_pic)
345 return 0;
347 switch (GET_CODE (op))
349 case LABEL_REF:
350 return 1;
351 case CONST:
352 op = XEXP (op, 0);
353 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
354 && GET_CODE (XEXP (op, 1)) == CONST_INT);
355 default:
356 return 0;
361 fp_reg_operand (op, mode)
362 rtx op;
363 enum machine_mode mode ATTRIBUTE_UNUSED;
365 return reg_renumber && FP_REG_P (op);
370 /* Return truth value of whether OP can be used as an operand in a
371 three operand arithmetic insn that accepts registers of mode MODE
372 or 14-bit signed integers. */
374 arith_operand (op, mode)
375 rtx op;
376 enum machine_mode mode;
378 return (register_operand (op, mode)
379 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
382 /* Return truth value of whether OP can be used as an operand in a
383 three operand arithmetic insn that accepts registers of mode MODE
384 or 11-bit signed integers. */
386 arith11_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (register_operand (op, mode)
391 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
394 /* A constant integer suitable for use in a PRE_MODIFY memory
395 reference. */
397 pre_cint_operand (op, mode)
398 rtx op;
399 enum machine_mode mode ATTRIBUTE_UNUSED;
401 return (GET_CODE (op) == CONST_INT
402 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
405 /* A constant integer suitable for use in a POST_MODIFY memory
406 reference. */
408 post_cint_operand (op, mode)
409 rtx op;
410 enum machine_mode mode ATTRIBUTE_UNUSED;
412 return (GET_CODE (op) == CONST_INT
413 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
417 arith_double_operand (op, mode)
418 rtx op;
419 enum machine_mode mode;
421 return (register_operand (op, mode)
422 || (GET_CODE (op) == CONST_DOUBLE
423 && GET_MODE (op) == mode
424 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
425 && ((CONST_DOUBLE_HIGH (op) >= 0)
426 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
429 /* Return truth value of whether OP is a integer which fits the
430 range constraining immediate operands in three-address insns, or
431 is an integer register. */
434 ireg_or_int5_operand (op, mode)
435 rtx op;
436 enum machine_mode mode ATTRIBUTE_UNUSED;
438 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
439 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
442 /* Return truth value of whether OP is a integer which fits the
443 range constraining immediate operands in three-address insns. */
446 int5_operand (op, mode)
447 rtx op;
448 enum machine_mode mode ATTRIBUTE_UNUSED;
450 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
454 uint5_operand (op, mode)
455 rtx op;
456 enum machine_mode mode ATTRIBUTE_UNUSED;
458 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
462 int11_operand (op, mode)
463 rtx op;
464 enum machine_mode mode ATTRIBUTE_UNUSED;
466 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
470 uint32_operand (op, mode)
471 rtx op;
472 enum machine_mode mode ATTRIBUTE_UNUSED;
474 #if HOST_BITS_PER_WIDE_INT > 32
475 /* All allowed constants will fit a CONST_INT. */
476 return (GET_CODE (op) == CONST_INT
477 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
478 #else
479 return (GET_CODE (op) == CONST_INT
480 || (GET_CODE (op) == CONST_DOUBLE
481 && CONST_DOUBLE_HIGH (op) == 0));
482 #endif
486 arith5_operand (op, mode)
487 rtx op;
488 enum machine_mode mode;
490 return register_operand (op, mode) || int5_operand (op, mode);
493 /* True iff zdepi can be used to generate this CONST_INT. */
495 zdepi_cint_p (x)
496 unsigned HOST_WIDE_INT x;
498 unsigned HOST_WIDE_INT lsb_mask, t;
500 /* This might not be obvious, but it's at least fast.
501 This function is critical; we don't have the time loops would take. */
502 lsb_mask = x & -x;
503 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
504 /* Return true iff t is a power of two. */
505 return ((t & (t - 1)) == 0);
508 /* True iff depi or extru can be used to compute (reg & mask).
509 Accept bit pattern like these:
510 0....01....1
511 1....10....0
512 1..10..01..1 */
514 and_mask_p (mask)
515 unsigned HOST_WIDE_INT mask;
517 mask = ~mask;
518 mask += mask & -mask;
519 return (mask & (mask - 1)) == 0;
522 /* True iff depi or extru can be used to compute (reg & OP). */
524 and_operand (op, mode)
525 rtx op;
526 enum machine_mode mode;
528 return (register_operand (op, mode)
529 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
532 /* True iff depi can be used to compute (reg | MASK). */
534 ior_mask_p (mask)
535 unsigned HOST_WIDE_INT mask;
537 mask += mask & -mask;
538 return (mask & (mask - 1)) == 0;
541 /* True iff depi can be used to compute (reg | OP). */
543 ior_operand (op, mode)
544 rtx op;
545 enum machine_mode mode ATTRIBUTE_UNUSED;
547 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
551 lhs_lshift_operand (op, mode)
552 rtx op;
553 enum machine_mode mode;
555 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
558 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
559 Such values can be the left hand side x in (x << r), using the zvdepi
560 instruction. */
562 lhs_lshift_cint_operand (op, mode)
563 rtx op;
564 enum machine_mode mode ATTRIBUTE_UNUSED;
566 unsigned HOST_WIDE_INT x;
567 if (GET_CODE (op) != CONST_INT)
568 return 0;
569 x = INTVAL (op) >> 4;
570 return (x & (x + 1)) == 0;
574 arith32_operand (op, mode)
575 rtx op;
576 enum machine_mode mode;
578 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
582 pc_or_label_operand (op, mode)
583 rtx op;
584 enum machine_mode mode ATTRIBUTE_UNUSED;
586 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
589 /* Legitimize PIC addresses. If the address is already
590 position-independent, we return ORIG. Newly generated
591 position-independent addresses go to REG. If we need more
592 than one register, we lose. */
595 legitimize_pic_address (orig, mode, reg)
596 rtx orig, reg;
597 enum machine_mode mode ATTRIBUTE_UNUSED;
599 rtx pic_ref = orig;
601 /* Labels need special handling. */
602 if (pic_label_operand (orig))
604 emit_insn (gen_pic_load_label (reg, orig));
605 current_function_uses_pic_offset_table = 1;
606 return reg;
608 if (GET_CODE (orig) == SYMBOL_REF)
610 if (reg == 0)
611 abort ();
613 if (flag_pic == 2)
615 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
616 pic_ref
617 = gen_rtx_MEM (Pmode,
618 gen_rtx_LO_SUM (Pmode, reg,
619 gen_rtx_UNSPEC (SImode,
620 gen_rtvec (1, orig),
621 0)));
623 else
624 pic_ref = gen_rtx_MEM (Pmode,
625 gen_rtx_PLUS (Pmode,
626 pic_offset_table_rtx, orig));
627 current_function_uses_pic_offset_table = 1;
628 RTX_UNCHANGING_P (pic_ref) = 1;
629 emit_move_insn (reg, pic_ref);
630 return reg;
632 else if (GET_CODE (orig) == CONST)
634 rtx base;
636 if (GET_CODE (XEXP (orig, 0)) == PLUS
637 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
638 return orig;
640 if (reg == 0)
641 abort ();
643 if (GET_CODE (XEXP (orig, 0)) == PLUS)
645 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
646 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
647 base == reg ? 0 : reg);
649 else abort ();
650 if (GET_CODE (orig) == CONST_INT)
652 if (INT_14_BITS (orig))
653 return plus_constant_for_output (base, INTVAL (orig));
654 orig = force_reg (Pmode, orig);
656 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
657 /* Likewise, should we set special REG_NOTEs here? */
659 return pic_ref;
662 /* Try machine-dependent ways of modifying an illegitimate address
663 to be legitimate. If we find one, return the new, valid address.
664 This macro is used in only one place: `memory_address' in explow.c.
666 OLDX is the address as it was before break_out_memory_refs was called.
667 In some cases it is useful to look at this to decide what needs to be done.
669 MODE and WIN are passed so that this macro can use
670 GO_IF_LEGITIMATE_ADDRESS.
672 It is always safe for this macro to do nothing. It exists to recognize
673 opportunities to optimize the output.
675 For the PA, transform:
677 memory(X + <large int>)
679 into:
681 if (<large int> & mask) >= 16
682 Y = (<large int> & ~mask) + mask + 1 Round up.
683 else
684 Y = (<large int> & ~mask) Round down.
685 Z = X + Y
686 memory (Z + (<large int> - Y));
688 This is for CSE to find several similar references, and only use one Z.
690 X can either be a SYMBOL_REF or REG, but because combine can not
691 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
692 D will not fit in 14 bits.
694 MODE_FLOAT references allow displacements which fit in 5 bits, so use
695 0x1f as the mask.
697 MODE_INT references allow displacements which fit in 14 bits, so use
698 0x3fff as the mask.
700 This relies on the fact that most mode MODE_FLOAT references will use FP
701 registers and most mode MODE_INT references will use integer registers.
702 (In the rare case of an FP register used in an integer MODE, we depend
703 on secondary reloads to clean things up.)
706 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
707 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
708 addressing modes to be used).
710 Put X and Z into registers. Then put the entire expression into
711 a register. */
714 hppa_legitimize_address (x, oldx, mode)
715 rtx x, oldx ATTRIBUTE_UNUSED;
716 enum machine_mode mode;
718 rtx orig = x;
720 if (flag_pic)
721 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
723 /* Strip off CONST. */
724 if (GET_CODE (x) == CONST)
725 x = XEXP (x, 0);
727 /* Special case. Get the SYMBOL_REF into a register and use indexing.
728 That should always be safe. */
729 if (GET_CODE (x) == PLUS
730 && GET_CODE (XEXP (x, 0)) == REG
731 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
733 rtx reg = force_reg (SImode, XEXP (x, 1));
734 return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0)));
737 /* Note we must reject symbols which represent function addresses
738 since the assembler/linker can't handle arithmetic on plabels. */
739 if (GET_CODE (x) == PLUS
740 && GET_CODE (XEXP (x, 1)) == CONST_INT
741 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
742 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
743 || GET_CODE (XEXP (x, 0)) == REG))
745 rtx int_part, ptr_reg;
746 int newoffset;
747 int offset = INTVAL (XEXP (x, 1));
748 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
750 /* Choose which way to round the offset. Round up if we
751 are >= halfway to the next boundary. */
752 if ((offset & mask) >= ((mask + 1) / 2))
753 newoffset = (offset & ~ mask) + mask + 1;
754 else
755 newoffset = (offset & ~ mask);
757 /* If the newoffset will not fit in 14 bits (ldo), then
758 handling this would take 4 or 5 instructions (2 to load
759 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
760 add the new offset and the SYMBOL_REF.) Combine can
761 not handle 4->2 or 5->2 combinations, so do not create
762 them. */
763 if (! VAL_14_BITS_P (newoffset)
764 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
766 rtx const_part
767 = gen_rtx_CONST (VOIDmode, gen_rtx_PLUS (Pmode,
768 XEXP (x, 0),
769 GEN_INT (newoffset)));
770 rtx tmp_reg
771 = force_reg (Pmode,
772 gen_rtx_HIGH (Pmode, const_part));
773 ptr_reg
774 = force_reg (Pmode,
775 gen_rtx_LO_SUM (Pmode, tmp_reg, const_part));
777 else
779 if (! VAL_14_BITS_P (newoffset))
780 int_part = force_reg (Pmode, GEN_INT (newoffset));
781 else
782 int_part = GEN_INT (newoffset);
784 ptr_reg = force_reg (Pmode,
785 gen_rtx_PLUS (Pmode,
786 force_reg (Pmode, XEXP (x, 0)),
787 int_part));
789 return plus_constant (ptr_reg, offset - newoffset);
792 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
794 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
795 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
796 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
797 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
798 || GET_CODE (XEXP (x, 1)) == SUBREG)
799 && GET_CODE (XEXP (x, 1)) != CONST)
801 int val = INTVAL (XEXP (XEXP (x, 0), 1));
802 rtx reg1, reg2;
804 reg1 = XEXP (x, 1);
805 if (GET_CODE (reg1) != REG)
806 reg1 = force_reg (Pmode, force_operand (reg1, 0));
808 reg2 = XEXP (XEXP (x, 0), 0);
809 if (GET_CODE (reg2) != REG)
810 reg2 = force_reg (Pmode, force_operand (reg2, 0));
812 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
813 gen_rtx_MULT (Pmode, reg2,
814 GEN_INT (val)),
815 reg1));
818 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
820 Only do so for floating point modes since this is more speculative
821 and we lose if it's an integer store. */
822 if (GET_CODE (x) == PLUS
823 && GET_CODE (XEXP (x, 0)) == PLUS
824 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
825 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
826 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
827 && (mode == SFmode || mode == DFmode))
830 /* First, try and figure out what to use as a base register. */
831 rtx reg1, reg2, base, idx, orig_base;
833 reg1 = XEXP (XEXP (x, 0), 1);
834 reg2 = XEXP (x, 1);
835 base = NULL_RTX;
836 idx = NULL_RTX;
838 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
839 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
840 know it's a base register below. */
841 if (GET_CODE (reg1) != REG)
842 reg1 = force_reg (Pmode, force_operand (reg1, 0));
844 if (GET_CODE (reg2) != REG)
845 reg2 = force_reg (Pmode, force_operand (reg2, 0));
847 /* Figure out what the base and index are. */
849 if (GET_CODE (reg1) == REG
850 && REGNO_POINTER_FLAG (REGNO (reg1)))
852 base = reg1;
853 orig_base = XEXP (XEXP (x, 0), 1);
854 idx = gen_rtx_PLUS (Pmode,
855 gen_rtx_MULT (Pmode,
856 XEXP (XEXP (XEXP (x, 0), 0), 0),
857 XEXP (XEXP (XEXP (x, 0), 0), 1)),
858 XEXP (x, 1));
860 else if (GET_CODE (reg2) == REG
861 && REGNO_POINTER_FLAG (REGNO (reg2)))
863 base = reg2;
864 orig_base = XEXP (x, 1);
865 idx = XEXP (x, 0);
868 if (base == 0)
869 return orig;
871 /* If the index adds a large constant, try to scale the
872 constant so that it can be loaded with only one insn. */
873 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
874 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
875 / INTVAL (XEXP (XEXP (idx, 0), 1)))
876 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
878 /* Divide the CONST_INT by the scale factor, then add it to A. */
879 int val = INTVAL (XEXP (idx, 1));
881 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
882 reg1 = XEXP (XEXP (idx, 0), 0);
883 if (GET_CODE (reg1) != REG)
884 reg1 = force_reg (Pmode, force_operand (reg1, 0));
886 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
888 /* We can now generate a simple scaled indexed address. */
889 return force_reg (Pmode,
890 gen_rtx_PLUS (Pmode,
891 gen_rtx_MULT (Pmode, reg1,
892 XEXP (XEXP (idx, 0), 1)),
893 base));
896 /* If B + C is still a valid base register, then add them. */
897 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
898 && INTVAL (XEXP (idx, 1)) <= 4096
899 && INTVAL (XEXP (idx, 1)) >= -4096)
901 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
902 rtx reg1, reg2;
904 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
906 reg2 = XEXP (XEXP (idx, 0), 0);
907 if (GET_CODE (reg2) != CONST_INT)
908 reg2 = force_reg (Pmode, force_operand (reg2, 0));
910 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
911 gen_rtx_MULT (Pmode, reg2,
912 GEN_INT (val)),
913 reg1));
916 /* Get the index into a register, then add the base + index and
917 return a register holding the result. */
919 /* First get A into a register. */
920 reg1 = XEXP (XEXP (idx, 0), 0);
921 if (GET_CODE (reg1) != REG)
922 reg1 = force_reg (Pmode, force_operand (reg1, 0));
924 /* And get B into a register. */
925 reg2 = XEXP (idx, 1);
926 if (GET_CODE (reg2) != REG)
927 reg2 = force_reg (Pmode, force_operand (reg2, 0));
929 reg1 = force_reg (Pmode,
930 gen_rtx_PLUS (Pmode,
931 gen_rtx_MULT (Pmode, reg1,
932 XEXP (XEXP (idx, 0), 1)),
933 reg2));
935 /* Add the result to our base register and return. */
936 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
940 /* Uh-oh. We might have an address for x[n-100000]. This needs
941 special handling to avoid creating an indexed memory address
942 with x-100000 as the base.
944 If the constant part is small enough, then it's still safe because
945 there is a guard page at the beginning and end of the data segment.
947 Scaled references are common enough that we want to try and rearrange the
948 terms so that we can use indexing for these addresses too. Only
949 do the optimization for floatint point modes. */
951 if (GET_CODE (x) == PLUS
952 && symbolic_expression_p (XEXP (x, 1)))
954 /* Ugly. We modify things here so that the address offset specified
955 by the index expression is computed first, then added to x to form
956 the entire address. */
958 rtx regx1, regx2, regy1, regy2, y;
960 /* Strip off any CONST. */
961 y = XEXP (x, 1);
962 if (GET_CODE (y) == CONST)
963 y = XEXP (y, 0);
965 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
967 /* See if this looks like
968 (plus (mult (reg) (shadd_const))
969 (const (plus (symbol_ref) (const_int))))
971 Where const_int is small. In that case the const
972 expression is a valid pointer for indexing.
974 If const_int is big, but can be divided evenly by shadd_const
975 and added to (reg). This allows more scaled indexed addresses. */
976 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
977 && GET_CODE (XEXP (x, 0)) == MULT
978 && GET_CODE (XEXP (y, 1)) == CONST_INT
979 && INTVAL (XEXP (y, 1)) >= -4096
980 && INTVAL (XEXP (y, 1)) <= 4095
981 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
982 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
984 int val = INTVAL (XEXP (XEXP (x, 0), 1));
985 rtx reg1, reg2;
987 reg1 = XEXP (x, 1);
988 if (GET_CODE (reg1) != REG)
989 reg1 = force_reg (Pmode, force_operand (reg1, 0));
991 reg2 = XEXP (XEXP (x, 0), 0);
992 if (GET_CODE (reg2) != REG)
993 reg2 = force_reg (Pmode, force_operand (reg2, 0));
995 return force_reg (Pmode,
996 gen_rtx_PLUS (Pmode,
997 gen_rtx_MULT (Pmode, reg2,
998 GEN_INT (val)),
999 reg1));
1001 else if ((mode == DFmode || mode == SFmode)
1002 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1003 && GET_CODE (XEXP (x, 0)) == MULT
1004 && GET_CODE (XEXP (y, 1)) == CONST_INT
1005 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1006 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1007 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1009 regx1
1010 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1011 / INTVAL (XEXP (XEXP (x, 0), 1))));
1012 regx2 = XEXP (XEXP (x, 0), 0);
1013 if (GET_CODE (regx2) != REG)
1014 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1015 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1016 regx2, regx1));
1017 return force_reg (Pmode,
1018 gen_rtx_PLUS (Pmode,
1019 gen_rtx_MULT (Pmode, regx2,
1020 XEXP (XEXP (x, 0),
1021 1)),
1022 force_reg (Pmode, XEXP (y, 0))));
1024 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1025 && INTVAL (XEXP (y, 1)) >= -4096
1026 && INTVAL (XEXP (y, 1)) <= 4095)
1028 /* This is safe because of the guard page at the
1029 beginning and end of the data space. Just
1030 return the original address. */
1031 return orig;
1033 else
1035 /* Doesn't look like one we can optimize. */
1036 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1037 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1038 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1039 regx1 = force_reg (Pmode,
1040 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1041 regx1, regy2));
1042 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1047 return orig;
1050 /* For the HPPA, REG and REG+CONST is cost 0
1051 and addresses involving symbolic constants are cost 2.
1053 PIC addresses are very expensive.
1055 It is no coincidence that this has the same structure
1056 as GO_IF_LEGITIMATE_ADDRESS. */
1058 hppa_address_cost (X)
1059 rtx X;
1061 if (GET_CODE (X) == PLUS)
1062 return 1;
1063 else if (GET_CODE (X) == LO_SUM)
1064 return 1;
1065 else if (GET_CODE (X) == HIGH)
1066 return 2;
1067 return 4;
1070 /* Emit insns to move operands[1] into operands[0].
1072 Return 1 if we have written out everything that needs to be done to
1073 do the move. Otherwise, return 0 and the caller will emit the move
1074 normally. */
1077 emit_move_sequence (operands, mode, scratch_reg)
1078 rtx *operands;
1079 enum machine_mode mode;
1080 rtx scratch_reg;
1082 register rtx operand0 = operands[0];
1083 register rtx operand1 = operands[1];
1084 register rtx tem;
1086 if (reload_in_progress && GET_CODE (operand0) == REG
1087 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1088 operand0 = reg_equiv_mem[REGNO (operand0)];
1089 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1090 && GET_CODE (SUBREG_REG (operand0)) == REG
1091 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1093 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1094 operand0 = alter_subreg (operand0);
1097 if (reload_in_progress && GET_CODE (operand1) == REG
1098 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1099 operand1 = reg_equiv_mem[REGNO (operand1)];
1100 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1101 && GET_CODE (SUBREG_REG (operand1)) == REG
1102 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1104 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1105 operand1 = alter_subreg (operand1);
1108 if (reload_in_progress && GET_CODE (operand0) == MEM
1109 && ((tem = find_replacement (&XEXP (operand0, 0)))
1110 != XEXP (operand0, 0)))
1111 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1112 if (reload_in_progress && GET_CODE (operand1) == MEM
1113 && ((tem = find_replacement (&XEXP (operand1, 0)))
1114 != XEXP (operand1, 0)))
1115 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1117 /* Handle secondary reloads for loads/stores of FP registers from
1118 REG+D addresses where D does not fit in 5 bits, including
1119 (subreg (mem (addr))) cases. */
1120 if (fp_reg_operand (operand0, mode)
1121 && ((GET_CODE (operand1) == MEM
1122 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1123 || ((GET_CODE (operand1) == SUBREG
1124 && GET_CODE (XEXP (operand1, 0)) == MEM
1125 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1126 && scratch_reg)
1128 if (GET_CODE (operand1) == SUBREG)
1129 operand1 = XEXP (operand1, 0);
1131 scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
1133 /* D might not fit in 14 bits either; for such cases load D into
1134 scratch reg. */
1135 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1137 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1138 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1139 SImode,
1140 XEXP (XEXP (operand1, 0), 0),
1141 scratch_reg));
1143 else
1144 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1145 emit_insn (gen_rtx_SET (VOIDmode, operand0, gen_rtx_MEM (mode,
1146 scratch_reg)));
1147 return 1;
1149 else if (fp_reg_operand (operand1, mode)
1150 && ((GET_CODE (operand0) == MEM
1151 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1152 || ((GET_CODE (operand0) == SUBREG)
1153 && GET_CODE (XEXP (operand0, 0)) == MEM
1154 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1155 && scratch_reg)
1157 if (GET_CODE (operand0) == SUBREG)
1158 operand0 = XEXP (operand0, 0);
1160 scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
1161 /* D might not fit in 14 bits either; for such cases load D into
1162 scratch reg. */
1163 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1165 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1166 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1167 0)),
1168 SImode,
1169 XEXP (XEXP (operand0, 0),
1171 scratch_reg));
1173 else
1174 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1175 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1176 operand1));
1177 return 1;
1179 /* Handle secondary reloads for loads of FP registers from constant
1180 expressions by forcing the constant into memory.
1182 use scratch_reg to hold the address of the memory location.
1184 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1185 NO_REGS when presented with a const_int and an register class
1186 containing only FP registers. Doing so unfortunately creates
1187 more problems than it solves. Fix this for 2.5. */
1188 else if (fp_reg_operand (operand0, mode)
1189 && CONSTANT_P (operand1)
1190 && scratch_reg)
1192 rtx xoperands[2];
1194 /* Force the constant into memory and put the address of the
1195 memory location into scratch_reg. */
1196 xoperands[0] = scratch_reg;
1197 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1198 emit_move_sequence (xoperands, Pmode, 0);
1200 /* Now load the destination register. */
1201 emit_insn (gen_rtx_SET (mode, operand0, gen_rtx_MEM (mode, scratch_reg)));
1202 return 1;
1204 /* Handle secondary reloads for SAR. These occur when trying to load
1205 the SAR from memory a FP register, or with a constant. */
1206 else if (GET_CODE (operand0) == REG
1207 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1208 && (GET_CODE (operand1) == MEM
1209 || GET_CODE (operand1) == CONST_INT
1210 || (GET_CODE (operand1) == REG
1211 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1212 && scratch_reg)
1214 /* D might not fit in 14 bits either; for such cases load D into
1215 scratch reg. */
1216 if (GET_CODE (operand1) == MEM
1217 && !memory_address_p (SImode, XEXP (operand1, 0)))
1219 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1220 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1221 0)),
1222 SImode,
1223 XEXP (XEXP (operand1, 0),
1225 scratch_reg));
1226 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1),
1227 scratch_reg));
1229 else
1230 emit_move_insn (scratch_reg, operand1);
1231 emit_move_insn (operand0, scratch_reg);
1232 return 1;
1234 /* Handle most common case: storing into a register. */
1235 else if (register_operand (operand0, mode))
1237 if (register_operand (operand1, mode)
1238 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1239 || (operand1 == CONST0_RTX (mode))
1240 || (GET_CODE (operand1) == HIGH
1241 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1242 /* Only `general_operands' can come here, so MEM is ok. */
1243 || GET_CODE (operand1) == MEM)
1245 /* Run this case quickly. */
1246 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1247 return 1;
1250 else if (GET_CODE (operand0) == MEM)
1252 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1253 && !(reload_in_progress || reload_completed))
1255 rtx temp = gen_reg_rtx (DFmode);
1257 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1258 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1259 return 1;
1261 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1263 /* Run this case quickly. */
1264 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1265 return 1;
1267 if (! (reload_in_progress || reload_completed))
1269 operands[0] = validize_mem (operand0);
1270 operands[1] = operand1 = force_reg (mode, operand1);
1274 /* Simplify the source if we need to.
1275 Note we do have to handle function labels here, even though we do
1276 not consider them legitimate constants. Loop optimizations can
1277 call the emit_move_xxx with one as a source. */
1278 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1279 || function_label_operand (operand1, mode)
1280 || (GET_CODE (operand1) == HIGH
1281 && symbolic_operand (XEXP (operand1, 0), mode)))
1283 int ishighonly = 0;
1285 if (GET_CODE (operand1) == HIGH)
1287 ishighonly = 1;
1288 operand1 = XEXP (operand1, 0);
1290 if (symbolic_operand (operand1, mode))
1292 /* Argh. The assembler and linker can't handle arithmetic
1293 involving plabels.
1295 So we force the plabel into memory, load operand0 from
1296 the memory location, then add in the constant part. */
1297 if ((GET_CODE (operand1) == CONST
1298 && GET_CODE (XEXP (operand1, 0)) == PLUS
1299 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1300 || function_label_operand (operand1, mode))
1302 rtx temp, const_part;
1304 /* Figure out what (if any) scratch register to use. */
1305 if (reload_in_progress || reload_completed)
1306 scratch_reg = scratch_reg ? scratch_reg : operand0;
1307 else if (flag_pic)
1308 scratch_reg = gen_reg_rtx (Pmode);
1310 if (GET_CODE (operand1) == CONST)
1312 /* Save away the constant part of the expression. */
1313 const_part = XEXP (XEXP (operand1, 0), 1);
1314 if (GET_CODE (const_part) != CONST_INT)
1315 abort ();
1317 /* Force the function label into memory. */
1318 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1320 else
1322 /* No constant part. */
1323 const_part = NULL_RTX;
1325 /* Force the function label into memory. */
1326 temp = force_const_mem (mode, operand1);
1330 /* Get the address of the memory location. PIC-ify it if
1331 necessary. */
1332 temp = XEXP (temp, 0);
1333 if (flag_pic)
1334 temp = legitimize_pic_address (temp, mode, scratch_reg);
1336 /* Put the address of the memory location into our destination
1337 register. */
1338 operands[1] = temp;
1339 emit_move_sequence (operands, mode, scratch_reg);
1341 /* Now load from the memory location into our destination
1342 register. */
1343 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1344 emit_move_sequence (operands, mode, scratch_reg);
1346 /* And add back in the constant part. */
1347 if (const_part != NULL_RTX)
1348 expand_inc (operand0, const_part);
1350 return 1;
1353 if (flag_pic)
1355 rtx temp;
1357 if (reload_in_progress || reload_completed)
1358 temp = scratch_reg ? scratch_reg : operand0;
1359 else
1360 temp = gen_reg_rtx (Pmode);
1362 /* (const (plus (symbol) (const_int))) must be forced to
1363 memory during/after reload if the const_int will not fit
1364 in 14 bits. */
1365 if (GET_CODE (operand1) == CONST
1366 && GET_CODE (XEXP (operand1, 0)) == PLUS
1367 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1368 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1369 && (reload_completed || reload_in_progress)
1370 && flag_pic)
1372 operands[1] = force_const_mem (mode, operand1);
1373 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1374 mode, temp);
1375 emit_move_sequence (operands, mode, temp);
1377 else
1379 operands[1] = legitimize_pic_address (operand1, mode, temp);
1380 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1383 /* On the HPPA, references to data space are supposed to use dp,
1384 register 27, but showing it in the RTL inhibits various cse
1385 and loop optimizations. */
1386 else
1388 rtx temp, set;
1390 if (reload_in_progress || reload_completed)
1391 temp = scratch_reg ? scratch_reg : operand0;
1392 else
1393 temp = gen_reg_rtx (mode);
1395 /* Loading a SYMBOL_REF into a register makes that register
1396 safe to be used as the base in an indexed address.
1398 Don't mark hard registers though. That loses. */
1399 if (GET_CODE (operand0) == REG
1400 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1401 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1402 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1403 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1404 if (ishighonly)
1405 set = gen_rtx_SET (mode, operand0, temp);
1406 else
1407 set = gen_rtx_SET (VOIDmode, operand0,
1408 gen_rtx_LO_SUM (mode, temp, operand1));
1410 emit_insn (gen_rtx_SET (VOIDmode,
1411 temp,
1412 gen_rtx_HIGH (mode, operand1)));
1413 emit_insn (set);
1416 return 1;
1418 else if (GET_CODE (operand1) != CONST_INT
1419 || ! cint_ok_for_move (INTVAL (operand1)))
1421 rtx temp;
1423 if (reload_in_progress || reload_completed)
1424 temp = operand0;
1425 else
1426 temp = gen_reg_rtx (mode);
1428 emit_insn (gen_rtx_SET (VOIDmode, temp,
1429 gen_rtx_HIGH (mode, operand1)));
1430 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1433 /* Now have insn-emit do whatever it normally does. */
1434 return 0;
1437 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1438 it will need a link/runtime reloc). */
1441 reloc_needed (exp)
1442 tree exp;
1444 int reloc = 0;
1446 switch (TREE_CODE (exp))
1448 case ADDR_EXPR:
1449 return 1;
1451 case PLUS_EXPR:
1452 case MINUS_EXPR:
1453 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1454 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1455 break;
1457 case NOP_EXPR:
1458 case CONVERT_EXPR:
1459 case NON_LVALUE_EXPR:
1460 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1461 break;
1463 case CONSTRUCTOR:
1465 register tree link;
1466 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1467 if (TREE_VALUE (link) != 0)
1468 reloc |= reloc_needed (TREE_VALUE (link));
1470 break;
1472 case ERROR_MARK:
1473 break;
1475 default:
1476 break;
1478 return reloc;
1481 /* Does operand (which is a symbolic_operand) live in text space? If
1482 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1485 read_only_operand (operand)
1486 rtx operand;
1488 if (GET_CODE (operand) == CONST)
1489 operand = XEXP (XEXP (operand, 0), 0);
1490 if (flag_pic)
1492 if (GET_CODE (operand) == SYMBOL_REF)
1493 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1495 else
1497 if (GET_CODE (operand) == SYMBOL_REF)
1498 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1500 return 1;
1504 /* Return the best assembler insn template
1505 for moving operands[1] into operands[0] as a fullword. */
1506 char *
1507 singlemove_string (operands)
1508 rtx *operands;
1510 HOST_WIDE_INT intval;
1512 if (GET_CODE (operands[0]) == MEM)
1513 return "stw %r1,%0";
1514 if (GET_CODE (operands[1]) == MEM)
1515 return "ldw %1,%0";
1516 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1518 long i;
1519 REAL_VALUE_TYPE d;
1521 if (GET_MODE (operands[1]) != SFmode)
1522 abort ();
1524 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1525 bit pattern. */
1526 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1527 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1529 operands[1] = GEN_INT (i);
1530 /* Fall through to CONST_INT case. */
1532 if (GET_CODE (operands[1]) == CONST_INT)
1534 intval = INTVAL (operands[1]);
1536 if (VAL_14_BITS_P (intval))
1537 return "ldi %1,%0";
1538 else if ((intval & 0x7ff) == 0)
1539 return "ldil L'%1,%0";
1540 else if (zdepi_cint_p (intval))
1541 return "zdepi %Z1,%0";
1542 else
1543 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1545 return "copy %1,%0";
1549 /* Compute position (in OP[1]) and width (in OP[2])
1550 useful for copying IMM to a register using the zdepi
1551 instructions. Store the immediate value to insert in OP[0]. */
1552 void
1553 compute_zdepi_operands (imm, op)
1554 unsigned HOST_WIDE_INT imm;
1555 unsigned *op;
1557 int lsb, len;
1559 /* Find the least significant set bit in IMM. */
1560 for (lsb = 0; lsb < 32; lsb++)
1562 if ((imm & 1) != 0)
1563 break;
1564 imm >>= 1;
1567 /* Choose variants based on *sign* of the 5-bit field. */
1568 if ((imm & 0x10) == 0)
1569 len = (lsb <= 28) ? 4 : 32 - lsb;
1570 else
1572 /* Find the width of the bitstring in IMM. */
1573 for (len = 5; len < 32; len++)
1575 if ((imm & (1 << len)) == 0)
1576 break;
1579 /* Sign extend IMM as a 5-bit value. */
1580 imm = (imm & 0xf) - 0x10;
1583 op[0] = imm;
1584 op[1] = 31 - lsb;
1585 op[2] = len;
1588 /* Output assembler code to perform a doubleword move insn
1589 with operands OPERANDS. */
1591 char *
1592 output_move_double (operands)
1593 rtx *operands;
1595 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1596 rtx latehalf[2];
1597 rtx addreg0 = 0, addreg1 = 0;
1599 /* First classify both operands. */
1601 if (REG_P (operands[0]))
1602 optype0 = REGOP;
1603 else if (offsettable_memref_p (operands[0]))
1604 optype0 = OFFSOP;
1605 else if (GET_CODE (operands[0]) == MEM)
1606 optype0 = MEMOP;
1607 else
1608 optype0 = RNDOP;
1610 if (REG_P (operands[1]))
1611 optype1 = REGOP;
1612 else if (CONSTANT_P (operands[1]))
1613 optype1 = CNSTOP;
1614 else if (offsettable_memref_p (operands[1]))
1615 optype1 = OFFSOP;
1616 else if (GET_CODE (operands[1]) == MEM)
1617 optype1 = MEMOP;
1618 else
1619 optype1 = RNDOP;
1621 /* Check for the cases that the operand constraints are not
1622 supposed to allow to happen. Abort if we get one,
1623 because generating code for these cases is painful. */
1625 if (optype0 != REGOP && optype1 != REGOP)
1626 abort ();
1628 /* Handle auto decrementing and incrementing loads and stores
1629 specifically, since the structure of the function doesn't work
1630 for them without major modification. Do it better when we learn
1631 this port about the general inc/dec addressing of PA.
1632 (This was written by tege. Chide him if it doesn't work.) */
1634 if (optype0 == MEMOP)
1636 /* We have to output the address syntax ourselves, since print_operand
1637 doesn't deal with the addresses we want to use. Fix this later. */
1639 rtx addr = XEXP (operands[0], 0);
1640 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1642 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1644 operands[0] = XEXP (addr, 0);
1645 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1646 abort ();
1648 if (!reg_overlap_mentioned_p (high_reg, addr))
1650 /* No overlap between high target register and address
1651 register. (We do this in a non-obvious way to
1652 save a register file writeback) */
1653 if (GET_CODE (addr) == POST_INC)
1654 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1655 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1657 else
1658 abort();
1660 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1662 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1664 operands[0] = XEXP (addr, 0);
1665 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1666 abort ();
1668 if (!reg_overlap_mentioned_p (high_reg, addr))
1670 /* No overlap between high target register and address
1671 register. (We do this in a non-obvious way to
1672 save a register file writeback) */
1673 if (GET_CODE (addr) == PRE_INC)
1674 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1675 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1677 else
1678 abort();
1681 if (optype1 == MEMOP)
1683 /* We have to output the address syntax ourselves, since print_operand
1684 doesn't deal with the addresses we want to use. Fix this later. */
1686 rtx addr = XEXP (operands[1], 0);
1687 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1689 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1691 operands[1] = XEXP (addr, 0);
1692 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1693 abort ();
1695 if (!reg_overlap_mentioned_p (high_reg, addr))
1697 /* No overlap between high target register and address
1698 register. (We do this in a non-obvious way to
1699 save a register file writeback) */
1700 if (GET_CODE (addr) == POST_INC)
1701 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1702 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1704 else
1706 /* This is an undefined situation. We should load into the
1707 address register *and* update that register. Probably
1708 we don't need to handle this at all. */
1709 if (GET_CODE (addr) == POST_INC)
1710 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1711 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1714 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1716 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1718 operands[1] = XEXP (addr, 0);
1719 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1720 abort ();
1722 if (!reg_overlap_mentioned_p (high_reg, addr))
1724 /* No overlap between high target register and address
1725 register. (We do this in a non-obvious way to
1726 save a register file writeback) */
1727 if (GET_CODE (addr) == PRE_INC)
1728 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1729 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1731 else
1733 /* This is an undefined situation. We should load into the
1734 address register *and* update that register. Probably
1735 we don't need to handle this at all. */
1736 if (GET_CODE (addr) == PRE_INC)
1737 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1738 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1741 else if (GET_CODE (addr) == PLUS
1742 && GET_CODE (XEXP (addr, 0)) == MULT)
1744 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1746 if (!reg_overlap_mentioned_p (high_reg, addr))
1748 rtx xoperands[3];
1750 xoperands[0] = high_reg;
1751 xoperands[1] = XEXP (addr, 1);
1752 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1753 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1754 output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
1755 return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
1757 else
1759 rtx xoperands[3];
1761 xoperands[0] = high_reg;
1762 xoperands[1] = XEXP (addr, 1);
1763 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1764 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1765 output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
1766 return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
1772 /* If an operand is an unoffsettable memory ref, find a register
1773 we can increment temporarily to make it refer to the second word. */
1775 if (optype0 == MEMOP)
1776 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1778 if (optype1 == MEMOP)
1779 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1781 /* Ok, we can do one word at a time.
1782 Normally we do the low-numbered word first.
1784 In either case, set up in LATEHALF the operands to use
1785 for the high-numbered word and in some cases alter the
1786 operands in OPERANDS to be suitable for the low-numbered word. */
1788 if (optype0 == REGOP)
1789 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
1790 else if (optype0 == OFFSOP)
1791 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1792 else
1793 latehalf[0] = operands[0];
1795 if (optype1 == REGOP)
1796 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
1797 else if (optype1 == OFFSOP)
1798 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1799 else if (optype1 == CNSTOP)
1800 split_double (operands[1], &operands[1], &latehalf[1]);
1801 else
1802 latehalf[1] = operands[1];
1804 /* If the first move would clobber the source of the second one,
1805 do them in the other order.
1807 This can happen in two cases:
1809 mem -> register where the first half of the destination register
1810 is the same register used in the memory's address. Reload
1811 can create such insns.
1813 mem in this case will be either register indirect or register
1814 indirect plus a valid offset.
1816 register -> register move where REGNO(dst) == REGNO(src + 1)
1817 someone (Tim/Tege?) claimed this can happen for parameter loads.
1819 Handle mem -> register case first. */
1820 if (optype0 == REGOP
1821 && (optype1 == MEMOP || optype1 == OFFSOP)
1822 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1823 operands[1], 0))
1825 /* Do the late half first. */
1826 if (addreg1)
1827 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1828 output_asm_insn (singlemove_string (latehalf), latehalf);
1830 /* Then clobber. */
1831 if (addreg1)
1832 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1833 return singlemove_string (operands);
1836 /* Now handle register -> register case. */
1837 if (optype0 == REGOP && optype1 == REGOP
1838 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1840 output_asm_insn (singlemove_string (latehalf), latehalf);
1841 return singlemove_string (operands);
1844 /* Normal case: do the two words, low-numbered first. */
1846 output_asm_insn (singlemove_string (operands), operands);
1848 /* Make any unoffsettable addresses point at high-numbered word. */
1849 if (addreg0)
1850 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1851 if (addreg1)
1852 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1854 /* Do that word. */
1855 output_asm_insn (singlemove_string (latehalf), latehalf);
1857 /* Undo the adds we just did. */
1858 if (addreg0)
1859 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1860 if (addreg1)
1861 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1863 return "";
1866 char *
1867 output_fp_move_double (operands)
1868 rtx *operands;
1870 if (FP_REG_P (operands[0]))
1872 if (FP_REG_P (operands[1])
1873 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1874 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1875 else
1876 output_asm_insn ("fldd%F1 %1,%0", operands);
1878 else if (FP_REG_P (operands[1]))
1880 output_asm_insn ("fstd%F0 %1,%0", operands);
1882 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1884 if (GET_CODE (operands[0]) == REG)
1886 rtx xoperands[2];
1887 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
1888 xoperands[0] = operands[0];
1889 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1891 /* This is a pain. You have to be prepared to deal with an
1892 arbitrary address here including pre/post increment/decrement.
1894 so avoid this in the MD. */
1895 else
1896 abort ();
1898 else abort ();
1899 return "";
1902 /* Return a REG that occurs in ADDR with coefficient 1.
1903 ADDR can be effectively incremented by incrementing REG. */
1905 static rtx
1906 find_addr_reg (addr)
1907 rtx addr;
1909 while (GET_CODE (addr) == PLUS)
1911 if (GET_CODE (XEXP (addr, 0)) == REG)
1912 addr = XEXP (addr, 0);
1913 else if (GET_CODE (XEXP (addr, 1)) == REG)
1914 addr = XEXP (addr, 1);
1915 else if (CONSTANT_P (XEXP (addr, 0)))
1916 addr = XEXP (addr, 1);
1917 else if (CONSTANT_P (XEXP (addr, 1)))
1918 addr = XEXP (addr, 0);
1919 else
1920 abort ();
1922 if (GET_CODE (addr) == REG)
1923 return addr;
1924 abort ();
1927 /* Emit code to perform a block move.
1929 OPERANDS[0] is the destination pointer as a REG, clobbered.
1930 OPERANDS[1] is the source pointer as a REG, clobbered.
1931 OPERANDS[2] is a register for temporary storage.
1932 OPERANDS[4] is the size as a CONST_INT
1933 OPERANDS[3] is a register for temporary storage.
1934 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1935 OPERANDS[6] is another temporary register. */
1937 char *
1938 output_block_move (operands, size_is_constant)
1939 rtx *operands;
1940 int size_is_constant ATTRIBUTE_UNUSED;
1942 int align = INTVAL (operands[5]);
1943 unsigned long n_bytes = INTVAL (operands[4]);
1945 /* We can't move more than four bytes at a time because the PA
1946 has no longer integer move insns. (Could use fp mem ops?) */
1947 if (align > 4)
1948 align = 4;
1950 /* Note that we know each loop below will execute at least twice
1951 (else we would have open-coded the copy). */
1952 switch (align)
1954 case 4:
1955 /* Pre-adjust the loop counter. */
1956 operands[4] = GEN_INT (n_bytes - 8);
1957 output_asm_insn ("ldi %4,%2", operands);
1959 /* Copying loop. */
1960 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1961 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1962 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1963 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1964 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1966 /* Handle the residual. There could be up to 7 bytes of
1967 residual to copy! */
1968 if (n_bytes % 8 != 0)
1970 operands[4] = GEN_INT (n_bytes % 4);
1971 if (n_bytes % 8 >= 4)
1972 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1973 if (n_bytes % 4 != 0)
1974 output_asm_insn ("ldw 0(0,%1),%6", operands);
1975 if (n_bytes % 8 >= 4)
1976 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1977 if (n_bytes % 4 != 0)
1978 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1980 return "";
1982 case 2:
1983 /* Pre-adjust the loop counter. */
1984 operands[4] = GEN_INT (n_bytes - 4);
1985 output_asm_insn ("ldi %4,%2", operands);
1987 /* Copying loop. */
1988 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1989 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1990 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1991 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1992 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1994 /* Handle the residual. */
1995 if (n_bytes % 4 != 0)
1997 if (n_bytes % 4 >= 2)
1998 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1999 if (n_bytes % 2 != 0)
2000 output_asm_insn ("ldb 0(0,%1),%6", operands);
2001 if (n_bytes % 4 >= 2)
2002 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
2003 if (n_bytes % 2 != 0)
2004 output_asm_insn ("stb %6,0(0,%0)", operands);
2006 return "";
2008 case 1:
2009 /* Pre-adjust the loop counter. */
2010 operands[4] = GEN_INT (n_bytes - 2);
2011 output_asm_insn ("ldi %4,%2", operands);
2013 /* Copying loop. */
2014 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
2015 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
2016 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
2017 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2018 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
2020 /* Handle the residual. */
2021 if (n_bytes % 2 != 0)
2023 output_asm_insn ("ldb 0(0,%1),%3", operands);
2024 output_asm_insn ("stb %3,0(0,%0)", operands);
2026 return "";
2028 default:
2029 abort ();
2033 /* Count the number of insns necessary to handle this block move.
2035 Basic structure is the same as emit_block_move, except that we
2036 count insns rather than emit them. */
2039 compute_movstrsi_length (insn)
2040 rtx insn;
2042 rtx pat = PATTERN (insn);
2043 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2044 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
2045 unsigned int n_insns = 0;
2047 /* We can't move more than four bytes at a time because the PA
2048 has no longer integer move insns. (Could use fp mem ops?) */
2049 if (align > 4)
2050 align = 4;
2052 /* The basic copying loop. */
2053 n_insns = 6;
2055 /* Residuals. */
2056 if (n_bytes % (2 * align) != 0)
2058 if ((n_bytes % (2 * align)) >= align)
2059 n_insns += 2;
2061 if ((n_bytes % align) != 0)
2062 n_insns += 2;
2065 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2066 return n_insns * 4;
2070 char *
2071 output_and (operands)
2072 rtx *operands;
2074 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2076 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2077 int ls0, ls1, ms0, p, len;
2079 for (ls0 = 0; ls0 < 32; ls0++)
2080 if ((mask & (1 << ls0)) == 0)
2081 break;
2083 for (ls1 = ls0; ls1 < 32; ls1++)
2084 if ((mask & (1 << ls1)) != 0)
2085 break;
2087 for (ms0 = ls1; ms0 < 32; ms0++)
2088 if ((mask & (1 << ms0)) == 0)
2089 break;
2091 if (ms0 != 32)
2092 abort();
2094 if (ls1 == 32)
2096 len = ls0;
2098 if (len == 0)
2099 abort ();
2101 operands[2] = GEN_INT (len);
2102 return "extru %1,31,%2,%0";
2104 else
2106 /* We could use this `depi' for the case above as well, but `depi'
2107 requires one more register file access than an `extru'. */
2109 p = 31 - ls0;
2110 len = ls1 - ls0;
2112 operands[2] = GEN_INT (p);
2113 operands[3] = GEN_INT (len);
2114 return "depi 0,%2,%3,%0";
2117 else
2118 return "and %1,%2,%0";
2121 char *
2122 output_ior (operands)
2123 rtx *operands;
2125 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2126 int bs0, bs1, p, len;
2128 if (INTVAL (operands[2]) == 0)
2129 return "copy %1,%0";
2131 for (bs0 = 0; bs0 < 32; bs0++)
2132 if ((mask & (1 << bs0)) != 0)
2133 break;
2135 for (bs1 = bs0; bs1 < 32; bs1++)
2136 if ((mask & (1 << bs1)) == 0)
2137 break;
2139 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2140 abort();
2142 p = 31 - bs0;
2143 len = bs1 - bs0;
2145 operands[2] = GEN_INT (p);
2146 operands[3] = GEN_INT (len);
2147 return "depi -1,%2,%3,%0";
2150 /* Output an ascii string. */
2151 void
2152 output_ascii (file, p, size)
2153 FILE *file;
2154 unsigned char *p;
2155 int size;
2157 int i;
2158 int chars_output;
2159 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2161 /* The HP assembler can only take strings of 256 characters at one
2162 time. This is a limitation on input line length, *not* the
2163 length of the string. Sigh. Even worse, it seems that the
2164 restriction is in number of input characters (see \xnn &
2165 \whatever). So we have to do this very carefully. */
2167 fputs ("\t.STRING \"", file);
2169 chars_output = 0;
2170 for (i = 0; i < size; i += 4)
2172 int co = 0;
2173 int io = 0;
2174 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2176 register unsigned int c = p[i + io];
2178 if (c == '\"' || c == '\\')
2179 partial_output[co++] = '\\';
2180 if (c >= ' ' && c < 0177)
2181 partial_output[co++] = c;
2182 else
2184 unsigned int hexd;
2185 partial_output[co++] = '\\';
2186 partial_output[co++] = 'x';
2187 hexd = c / 16 - 0 + '0';
2188 if (hexd > '9')
2189 hexd -= '9' - 'a' + 1;
2190 partial_output[co++] = hexd;
2191 hexd = c % 16 - 0 + '0';
2192 if (hexd > '9')
2193 hexd -= '9' - 'a' + 1;
2194 partial_output[co++] = hexd;
2197 if (chars_output + co > 243)
2199 fputs ("\"\n\t.STRING \"", file);
2200 chars_output = 0;
2202 fwrite (partial_output, 1, co, file);
2203 chars_output += co;
2204 co = 0;
2206 fputs ("\"\n", file);
2209 /* Try to rewrite floating point comparisons & branches to avoid
2210 useless add,tr insns.
2212 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2213 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2214 first attempt to remove useless add,tr insns. It is zero
2215 for the second pass as reorg sometimes leaves bogus REG_DEAD
2216 notes lying around.
2218 When CHECK_NOTES is zero we can only eliminate add,tr insns
2219 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2220 instructions. */
2221 void
2222 remove_useless_addtr_insns (insns, check_notes)
2223 rtx insns;
2224 int check_notes;
2226 rtx insn;
2227 static int pass = 0;
2229 /* This is fairly cheap, so always run it when optimizing. */
2230 if (optimize > 0)
2232 int fcmp_count = 0;
2233 int fbranch_count = 0;
2235 /* Walk all the insns in this function looking for fcmp & fbranch
2236 instructions. Keep track of how many of each we find. */
2237 insns = get_insns ();
2238 for (insn = insns; insn; insn = next_insn (insn))
2240 rtx tmp;
2242 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2243 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2244 continue;
2246 tmp = PATTERN (insn);
2248 /* It must be a set. */
2249 if (GET_CODE (tmp) != SET)
2250 continue;
2252 /* If the destination is CCFP, then we've found an fcmp insn. */
2253 tmp = SET_DEST (tmp);
2254 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2256 fcmp_count++;
2257 continue;
2260 tmp = PATTERN (insn);
2261 /* If this is an fbranch instruction, bump the fbranch counter. */
2262 if (GET_CODE (tmp) == SET
2263 && SET_DEST (tmp) == pc_rtx
2264 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2265 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2266 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2267 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2269 fbranch_count++;
2270 continue;
2275 /* Find all floating point compare + branch insns. If possible,
2276 reverse the comparison & the branch to avoid add,tr insns. */
2277 for (insn = insns; insn; insn = next_insn (insn))
2279 rtx tmp, next;
2281 /* Ignore anything that isn't an INSN. */
2282 if (GET_CODE (insn) != INSN)
2283 continue;
2285 tmp = PATTERN (insn);
2287 /* It must be a set. */
2288 if (GET_CODE (tmp) != SET)
2289 continue;
2291 /* The destination must be CCFP, which is register zero. */
2292 tmp = SET_DEST (tmp);
2293 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2294 continue;
2296 /* INSN should be a set of CCFP.
2298 See if the result of this insn is used in a reversed FP
2299 conditional branch. If so, reverse our condition and
2300 the branch. Doing so avoids useless add,tr insns. */
2301 next = next_insn (insn);
2302 while (next)
2304 /* Jumps, calls and labels stop our search. */
2305 if (GET_CODE (next) == JUMP_INSN
2306 || GET_CODE (next) == CALL_INSN
2307 || GET_CODE (next) == CODE_LABEL)
2308 break;
2310 /* As does another fcmp insn. */
2311 if (GET_CODE (next) == INSN
2312 && GET_CODE (PATTERN (next)) == SET
2313 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2314 && REGNO (SET_DEST (PATTERN (next))) == 0)
2315 break;
2317 next = next_insn (next);
2320 /* Is NEXT_INSN a branch? */
2321 if (next
2322 && GET_CODE (next) == JUMP_INSN)
2324 rtx pattern = PATTERN (next);
2326 /* If it a reversed fp conditional branch (eg uses add,tr)
2327 and CCFP dies, then reverse our conditional and the branch
2328 to avoid the add,tr. */
2329 if (GET_CODE (pattern) == SET
2330 && SET_DEST (pattern) == pc_rtx
2331 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2332 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2333 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2334 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2335 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2336 && (fcmp_count == fbranch_count
2337 || (check_notes
2338 && find_regno_note (next, REG_DEAD, 0))))
2340 /* Reverse the branch. */
2341 tmp = XEXP (SET_SRC (pattern), 1);
2342 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2343 XEXP (SET_SRC (pattern), 2) = tmp;
2344 INSN_CODE (next) = -1;
2346 /* Reverse our condition. */
2347 tmp = PATTERN (insn);
2348 PUT_CODE (XEXP (tmp, 1),
2349 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2355 pass = !pass;
2359 /* You may have trouble believing this, but this is the HP-PA stack
2360 layout. Wow.
2362 Offset Contents
2364 Variable arguments (optional; any number may be allocated)
2366 SP-(4*(N+9)) arg word N
2368 SP-56 arg word 5
2369 SP-52 arg word 4
2371 Fixed arguments (must be allocated; may remain unused)
2373 SP-48 arg word 3
2374 SP-44 arg word 2
2375 SP-40 arg word 1
2376 SP-36 arg word 0
2378 Frame Marker
2380 SP-32 External Data Pointer (DP)
2381 SP-28 External sr4
2382 SP-24 External/stub RP (RP')
2383 SP-20 Current RP
2384 SP-16 Static Link
2385 SP-12 Clean up
2386 SP-8 Calling Stub RP (RP'')
2387 SP-4 Previous SP
2389 Top of Frame
2391 SP-0 Stack Pointer (points to next available address)
2395 /* This function saves registers as follows. Registers marked with ' are
2396 this function's registers (as opposed to the previous function's).
2397 If a frame_pointer isn't needed, r4 is saved as a general register;
2398 the space for the frame pointer is still allocated, though, to keep
2399 things simple.
2402 Top of Frame
2404 SP (FP') Previous FP
2405 SP + 4 Alignment filler (sigh)
2406 SP + 8 Space for locals reserved here.
2410 SP + n All call saved register used.
2414 SP + o All call saved fp registers used.
2418 SP + p (SP') points to next available address.
2422 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2423 Handle case where DISP > 8k by using the add_high_const pattern.
2425 Note in DISP > 8k case, we will leave the high part of the address
2426 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2427 static void
2428 store_reg (reg, disp, base)
2429 int reg, disp, base;
2431 if (VAL_14_BITS_P (disp))
2433 emit_move_insn (gen_rtx_MEM (SImode,
2434 gen_rtx_PLUS (SImode,
2435 gen_rtx_REG (SImode, base),
2436 GEN_INT (disp))),
2437 gen_rtx_REG (SImode, reg));
2439 else
2441 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2442 gen_rtx_REG (SImode, base),
2443 GEN_INT (disp)));
2444 emit_move_insn (gen_rtx_MEM (SImode,
2445 gen_rtx_LO_SUM (SImode,
2446 gen_rtx_REG (SImode, 1),
2447 GEN_INT (disp))),
2448 gen_rtx_REG (SImode, reg));
2452 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2453 Handle case where DISP > 8k by using the add_high_const pattern.
2455 Note in DISP > 8k case, we will leave the high part of the address
2456 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2457 static void
2458 load_reg (reg, disp, base)
2459 int reg, disp, base;
2461 if (VAL_14_BITS_P (disp))
2463 emit_move_insn (gen_rtx_REG (SImode, reg),
2464 gen_rtx_MEM (SImode,
2465 gen_rtx_PLUS (SImode,
2466 gen_rtx_REG (SImode, base),
2467 GEN_INT (disp))));
2469 else
2471 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2472 gen_rtx_REG (SImode, base),
2473 GEN_INT (disp)));
2474 emit_move_insn (gen_rtx_REG (SImode, reg),
2475 gen_rtx_MEM (SImode,
2476 gen_rtx_LO_SUM (SImode,
2477 gen_rtx_REG (SImode, 1),
2478 GEN_INT (disp))));
2482 /* Emit RTL to set REG to the value specified by BASE+DISP.
2483 Handle case where DISP > 8k by using the add_high_const pattern.
2485 Note in DISP > 8k case, we will leave the high part of the address
2486 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2487 static void
2488 set_reg_plus_d(reg, base, disp)
2489 int reg, base, disp;
2491 if (VAL_14_BITS_P (disp))
2493 emit_move_insn (gen_rtx_REG (SImode, reg),
2494 gen_rtx_PLUS (SImode,
2495 gen_rtx_REG (SImode, base),
2496 GEN_INT (disp)));
2498 else
2500 emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
2501 gen_rtx_REG (SImode, base),
2502 GEN_INT (disp)));
2503 emit_move_insn (gen_rtx_REG (SImode, reg),
2504 gen_rtx_LO_SUM (SImode,
2505 gen_rtx_REG (SImode, 1),
2506 GEN_INT (disp)));
2510 /* Global variables set by FUNCTION_PROLOGUE. */
2511 /* Size of frame. Need to know this to emit return insns from
2512 leaf procedures. */
2513 static int actual_fsize;
2514 static int local_fsize, save_fregs;
2517 compute_frame_size (size, fregs_live)
2518 int size;
2519 int *fregs_live;
2521 extern int current_function_outgoing_args_size;
2522 int i, fsize;
2524 /* 8 is space for frame pointer + filler. If any frame is allocated
2525 we need to add this in because of STARTING_FRAME_OFFSET. */
2526 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2528 /* We must leave enough space for all the callee saved registers
2529 from 3 .. highest used callee save register since we don't
2530 know if we're going to have an inline or out of line prologue
2531 and epilogue. */
2532 for (i = 18; i >= 3; i--)
2533 if (regs_ever_live[i])
2535 fsize += 4 * (i - 2);
2536 break;
2539 /* Round the stack. */
2540 fsize = (fsize + 7) & ~7;
2542 /* We must leave enough space for all the callee saved registers
2543 from 3 .. highest used callee save register since we don't
2544 know if we're going to have an inline or out of line prologue
2545 and epilogue. */
2546 for (i = 66; i >= 48; i -= 2)
2547 if (regs_ever_live[i] || regs_ever_live[i + 1])
2549 if (fregs_live)
2550 *fregs_live = 1;
2552 fsize += 4 * (i - 46);
2553 break;
2556 fsize += current_function_outgoing_args_size;
2557 if (! leaf_function_p () || fsize)
2558 fsize += 32;
2559 return (fsize + 63) & ~63;
2562 rtx hp_profile_label_rtx;
2563 static char hp_profile_label_name[8];
2564 void
2565 output_function_prologue (file, size)
2566 FILE *file;
2567 int size ATTRIBUTE_UNUSED;
2569 /* The function's label and associated .PROC must never be
2570 separated and must be output *after* any profiling declarations
2571 to avoid changing spaces/subspaces within a procedure. */
2572 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2573 fputs ("\t.PROC\n", file);
2575 /* hppa_expand_prologue does the dirty work now. We just need
2576 to output the assembler directives which denote the start
2577 of a function. */
2578 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2579 if (regs_ever_live[2] || profile_flag)
2580 fputs (",CALLS,SAVE_RP", file);
2581 else
2582 fputs (",NO_CALLS", file);
2584 if (frame_pointer_needed)
2585 fputs (",SAVE_SP", file);
2587 /* Pass on information about the number of callee register saves
2588 performed in the prologue.
2590 The compiler is supposed to pass the highest register number
2591 saved, the assembler then has to adjust that number before
2592 entering it into the unwind descriptor (to account for any
2593 caller saved registers with lower register numbers than the
2594 first callee saved register). */
2595 if (gr_saved)
2596 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2598 if (fr_saved)
2599 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2601 fputs ("\n\t.ENTRY\n", file);
2603 /* Horrid hack. emit_function_prologue will modify this RTL in
2604 place to get the expected results. */
2605 if (profile_flag)
2606 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2607 hp_profile_labelno);
2609 /* If we're using GAS and not using the portable runtime model, then
2610 we don't need to accumulate the total number of code bytes. */
2611 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2612 total_code_bytes = 0;
2613 else if (insn_addresses)
2615 unsigned int old_total = total_code_bytes;
2617 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2618 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2620 /* Be prepared to handle overflows. */
2621 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2623 else
2624 total_code_bytes = -1;
2626 remove_useless_addtr_insns (get_insns (), 0);
2628 /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */
2629 restore_unscaled_index_insn_codes (get_insns ());
2632 void
2633 hppa_expand_prologue()
2635 extern char call_used_regs[];
2636 int size = get_frame_size ();
2637 int merge_sp_adjust_with_store = 0;
2638 int i, offset;
2639 rtx tmpreg, size_rtx;
2641 gr_saved = 0;
2642 fr_saved = 0;
2643 save_fregs = 0;
2644 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2645 actual_fsize = compute_frame_size (size, &save_fregs);
2647 /* Compute a few things we will use often. */
2648 tmpreg = gen_rtx_REG (SImode, 1);
2649 size_rtx = GEN_INT (actual_fsize);
2651 /* Handle out of line prologues and epilogues. */
2652 if (TARGET_SPACE)
2654 rtx operands[2];
2655 int saves = 0;
2656 int outline_insn_count = 0;
2657 int inline_insn_count = 0;
2659 /* Count the number of insns for the inline and out of line
2660 variants so we can choose one appropriately.
2662 No need to screw with counting actual_fsize operations -- they're
2663 done for both inline and out of line prologues. */
2664 if (regs_ever_live[2])
2665 inline_insn_count += 1;
2667 if (! cint_ok_for_move (local_fsize))
2668 outline_insn_count += 2;
2669 else
2670 outline_insn_count += 1;
2672 /* Put the register save info into %r22. */
2673 for (i = 18; i >= 3; i--)
2674 if (regs_ever_live[i] && ! call_used_regs[i])
2676 /* -1 because the stack adjustment is normally done in
2677 the same insn as a register save. */
2678 inline_insn_count += (i - 2) - 1;
2679 saves = i;
2680 break;
2683 for (i = 66; i >= 48; i -= 2)
2684 if (regs_ever_live[i] || regs_ever_live[i + 1])
2686 /* +1 needed as we load %r1 with the start of the freg
2687 save area. */
2688 inline_insn_count += (i/2 - 23) + 1;
2689 saves |= ((i/2 - 12 ) << 16);
2690 break;
2693 if (frame_pointer_needed)
2694 inline_insn_count += 3;
2696 if (! cint_ok_for_move (saves))
2697 outline_insn_count += 2;
2698 else
2699 outline_insn_count += 1;
2701 if (TARGET_PORTABLE_RUNTIME)
2702 outline_insn_count += 2;
2703 else
2704 outline_insn_count += 1;
2706 /* If there's a lot of insns in the prologue, then do it as
2707 an out-of-line sequence. */
2708 if (inline_insn_count > outline_insn_count)
2710 /* Put the local_fisze into %r19. */
2711 operands[0] = gen_rtx_REG (SImode, 19);
2712 operands[1] = GEN_INT (local_fsize);
2713 emit_move_insn (operands[0], operands[1]);
2715 /* Put the stack size into %r21. */
2716 operands[0] = gen_rtx_REG (SImode, 21);
2717 operands[1] = size_rtx;
2718 emit_move_insn (operands[0], operands[1]);
2720 operands[0] = gen_rtx_REG (SImode, 22);
2721 operands[1] = GEN_INT (saves);
2722 emit_move_insn (operands[0], operands[1]);
2724 /* Now call the out-of-line prologue. */
2725 emit_insn (gen_outline_prologue_call ());
2726 emit_insn (gen_blockage ());
2728 /* Note that we're using an out-of-line prologue. */
2729 out_of_line_prologue_epilogue = 1;
2730 return;
2734 out_of_line_prologue_epilogue = 0;
2736 /* Save RP first. The calling conventions manual states RP will
2737 always be stored into the caller's frame at sp-20. */
2738 if (regs_ever_live[2] || profile_flag)
2739 store_reg (2, -20, STACK_POINTER_REGNUM);
2741 /* Allocate the local frame and set up the frame pointer if needed. */
2742 if (actual_fsize)
2744 if (frame_pointer_needed)
2746 /* Copy the old frame pointer temporarily into %r1. Set up the
2747 new stack pointer, then store away the saved old frame pointer
2748 into the stack at sp+actual_fsize and at the same time update
2749 the stack pointer by actual_fsize bytes. Two versions, first
2750 handles small (<8k) frames. The second handles large (>8k)
2751 frames. */
2752 emit_move_insn (tmpreg, frame_pointer_rtx);
2753 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2754 if (VAL_14_BITS_P (actual_fsize))
2755 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2756 else
2758 /* It is incorrect to store the saved frame pointer at *sp,
2759 then increment sp (writes beyond the current stack boundary).
2761 So instead use stwm to store at *sp and post-increment the
2762 stack pointer as an atomic operation. Then increment sp to
2763 finish allocating the new frame. */
2764 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2765 set_reg_plus_d (STACK_POINTER_REGNUM,
2766 STACK_POINTER_REGNUM,
2767 actual_fsize - 64);
2770 /* no frame pointer needed. */
2771 else
2773 /* In some cases we can perform the first callee register save
2774 and allocating the stack frame at the same time. If so, just
2775 make a note of it and defer allocating the frame until saving
2776 the callee registers. */
2777 if (VAL_14_BITS_P (-actual_fsize)
2778 && local_fsize == 0
2779 && ! profile_flag
2780 && ! flag_pic)
2781 merge_sp_adjust_with_store = 1;
2782 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2783 else if (actual_fsize != 0)
2784 set_reg_plus_d (STACK_POINTER_REGNUM,
2785 STACK_POINTER_REGNUM,
2786 actual_fsize);
2790 /* The hppa calling conventions say that %r19, the pic offset
2791 register, is saved at sp - 32 (in this function's frame) when
2792 generating PIC code. FIXME: What is the correct thing to do
2793 for functions which make no calls and allocate no frame? Do
2794 we need to allocate a frame, or can we just omit the save? For
2795 now we'll just omit the save. */
2796 if (actual_fsize != 0 && flag_pic)
2797 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2799 /* Profiling code.
2801 Instead of taking one argument, the counter label, as most normal
2802 mcounts do, _mcount appears to behave differently on the HPPA. It
2803 takes the return address of the caller, the address of this routine,
2804 and the address of the label. Also, it isn't magic, so
2805 argument registers have to be preserved. */
2806 if (profile_flag)
2808 int pc_offset, i, arg_offset, basereg, offsetadj;
2810 pc_offset = 4 + (frame_pointer_needed
2811 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2812 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2814 /* When the function has a frame pointer, use it as the base
2815 register for saving/restore registers. Else use the stack
2816 pointer. Adjust the offset according to the frame size if
2817 this function does not have a frame pointer. */
2819 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2820 : STACK_POINTER_REGNUM;
2821 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2823 /* Horrid hack. emit_function_prologue will modify this RTL in
2824 place to get the expected results. sprintf here is just to
2825 put something in the name. */
2826 sprintf(hp_profile_label_name, "LP$%04d", -1);
2827 hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode, hp_profile_label_name);
2828 if (current_function_returns_struct)
2829 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2831 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2832 if (regs_ever_live [i])
2834 store_reg (i, arg_offset, basereg);
2835 /* Deal with arg_offset not fitting in 14 bits. */
2836 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2839 emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2));
2840 emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx));
2841 emit_move_insn (gen_rtx_REG (SImode, 24),
2842 gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx));
2843 /* %r25 is set from within the output pattern. */
2844 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2846 /* Restore argument registers. */
2847 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2848 if (regs_ever_live [i])
2849 load_reg (i, arg_offset, basereg);
2851 if (current_function_returns_struct)
2852 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2856 /* Normal register save.
2858 Do not save the frame pointer in the frame_pointer_needed case. It
2859 was done earlier. */
2860 if (frame_pointer_needed)
2862 for (i = 18, offset = local_fsize; i >= 4; i--)
2863 if (regs_ever_live[i] && ! call_used_regs[i])
2865 store_reg (i, offset, FRAME_POINTER_REGNUM);
2866 offset += 4;
2867 gr_saved++;
2869 /* Account for %r3 which is saved in a special place. */
2870 gr_saved++;
2872 /* No frame pointer needed. */
2873 else
2875 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2876 if (regs_ever_live[i] && ! call_used_regs[i])
2878 /* If merge_sp_adjust_with_store is nonzero, then we can
2879 optimize the first GR save. */
2880 if (merge_sp_adjust_with_store)
2882 merge_sp_adjust_with_store = 0;
2883 emit_insn (gen_post_stwm (stack_pointer_rtx,
2884 gen_rtx_REG (SImode, i),
2885 GEN_INT (-offset)));
2887 else
2888 store_reg (i, offset, STACK_POINTER_REGNUM);
2889 offset += 4;
2890 gr_saved++;
2893 /* If we wanted to merge the SP adjustment with a GR save, but we never
2894 did any GR saves, then just emit the adjustment here. */
2895 if (merge_sp_adjust_with_store)
2896 set_reg_plus_d (STACK_POINTER_REGNUM,
2897 STACK_POINTER_REGNUM,
2898 actual_fsize);
2901 /* Align pointer properly (doubleword boundary). */
2902 offset = (offset + 7) & ~7;
2904 /* Floating point register store. */
2905 if (save_fregs)
2907 /* First get the frame or stack pointer to the start of the FP register
2908 save area. */
2909 if (frame_pointer_needed)
2910 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2911 else
2912 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2914 /* Now actually save the FP registers. */
2915 for (i = 66; i >= 48; i -= 2)
2917 if (regs_ever_live[i] || regs_ever_live[i + 1])
2919 emit_move_insn (gen_rtx_MEM (DFmode,
2920 gen_rtx_POST_INC (DFmode, tmpreg)),
2921 gen_rtx_REG (DFmode, i));
2922 fr_saved++;
2927 /* When generating PIC code it is necessary to save/restore the
2928 PIC register around each function call. We used to do this
2929 in the call patterns themselves, but that implementation
2930 made incorrect assumptions about using global variables to hold
2931 per-function rtl code generated in the backend.
2933 So instead, we copy the PIC register into a reserved callee saved
2934 register in the prologue. Then after each call we reload the PIC
2935 register from the callee saved register. We also reload the PIC
2936 register from the callee saved register in the epilogue ensure the
2937 PIC register is valid at function exit.
2939 This may (depending on the exact characteristics of the function)
2940 even be more efficient.
2942 Avoid this if the callee saved register wasn't used (these are
2943 leaf functions). */
2944 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2945 emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2946 gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM));
2950 void
2951 output_function_epilogue (file, size)
2952 FILE *file;
2953 int size ATTRIBUTE_UNUSED;
2955 rtx insn = get_last_insn ();
2957 /* hppa_expand_epilogue does the dirty work now. We just need
2958 to output the assembler directives which denote the end
2959 of a function.
2961 To make debuggers happy, emit a nop if the epilogue was completely
2962 eliminated due to a volatile call as the last insn in the
2963 current function. That way the return address (in %r2) will
2964 always point to a valid instruction in the current function. */
2966 /* Get the last real insn. */
2967 if (GET_CODE (insn) == NOTE)
2968 insn = prev_real_insn (insn);
2970 /* If it is a sequence, then look inside. */
2971 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2972 insn = XVECEXP (PATTERN (insn), 0, 0);
2974 /* If insn is a CALL_INSN, then it must be a call to a volatile
2975 function (otherwise there would be epilogue insns). */
2976 if (insn && GET_CODE (insn) == CALL_INSN)
2977 fputs ("\tnop\n", file);
2979 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2981 /* Free up stuff we don't need anymore. */
2982 if (unscaled_index_insn_codes)
2983 free (unscaled_index_insn_codes);
2984 max_unscaled_index_insn_codes_uid = 0;
2987 void
2988 hppa_expand_epilogue ()
2990 rtx tmpreg;
2991 int offset,i;
2992 int merge_sp_adjust_with_load = 0;
2994 /* Handle out of line prologues and epilogues. */
2995 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2997 int saves = 0;
2998 rtx operands[2];
3000 /* Put the register save info into %r22. */
3001 for (i = 18; i >= 3; i--)
3002 if (regs_ever_live[i] && ! call_used_regs[i])
3004 saves = i;
3005 break;
3008 for (i = 66; i >= 48; i -= 2)
3009 if (regs_ever_live[i] || regs_ever_live[i + 1])
3011 saves |= ((i/2 - 12 ) << 16);
3012 break;
3015 emit_insn (gen_blockage ());
3017 /* Put the local_fisze into %r19. */
3018 operands[0] = gen_rtx_REG (SImode, 19);
3019 operands[1] = GEN_INT (local_fsize);
3020 emit_move_insn (operands[0], operands[1]);
3022 /* Put the stack size into %r21. */
3023 operands[0] = gen_rtx_REG (SImode, 21);
3024 operands[1] = GEN_INT (actual_fsize);
3025 emit_move_insn (operands[0], operands[1]);
3027 operands[0] = gen_rtx_REG (SImode, 22);
3028 operands[1] = GEN_INT (saves);
3029 emit_move_insn (operands[0], operands[1]);
3031 /* Now call the out-of-line epilogue. */
3032 emit_insn (gen_outline_epilogue_call ());
3033 return;
3036 /* We will use this often. */
3037 tmpreg = gen_rtx_REG (SImode, 1);
3039 /* Try to restore RP early to avoid load/use interlocks when
3040 RP gets used in the return (bv) instruction. This appears to still
3041 be necessary even when we schedule the prologue and epilogue. */
3042 if (frame_pointer_needed
3043 && (regs_ever_live [2] || profile_flag))
3044 load_reg (2, -20, FRAME_POINTER_REGNUM);
3046 /* No frame pointer, and stack is smaller than 8k. */
3047 else if (! frame_pointer_needed
3048 && VAL_14_BITS_P (actual_fsize + 20)
3049 && (regs_ever_live[2] || profile_flag))
3050 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
3052 /* General register restores. */
3053 if (frame_pointer_needed)
3055 for (i = 18, offset = local_fsize; i >= 4; i--)
3056 if (regs_ever_live[i] && ! call_used_regs[i])
3058 load_reg (i, offset, FRAME_POINTER_REGNUM);
3059 offset += 4;
3062 else
3064 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3066 if (regs_ever_live[i] && ! call_used_regs[i])
3068 /* Only for the first load.
3069 merge_sp_adjust_with_load holds the register load
3070 with which we will merge the sp adjustment. */
3071 if (VAL_14_BITS_P (actual_fsize + 20)
3072 && local_fsize == 0
3073 && ! merge_sp_adjust_with_load)
3074 merge_sp_adjust_with_load = i;
3075 else
3076 load_reg (i, offset, STACK_POINTER_REGNUM);
3077 offset += 4;
3082 /* Align pointer properly (doubleword boundary). */
3083 offset = (offset + 7) & ~7;
3085 /* FP register restores. */
3086 if (save_fregs)
3088 /* Adjust the register to index off of. */
3089 if (frame_pointer_needed)
3090 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3091 else
3092 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3094 /* Actually do the restores now. */
3095 for (i = 66; i >= 48; i -= 2)
3097 if (regs_ever_live[i] || regs_ever_live[i + 1])
3099 emit_move_insn (gen_rtx_REG (DFmode, i),
3100 gen_rtx_MEM (DFmode,
3101 gen_rtx_POST_INC (DFmode, tmpreg)));
3106 /* Emit a blockage insn here to keep these insns from being moved to
3107 an earlier spot in the epilogue, or into the main instruction stream.
3109 This is necessary as we must not cut the stack back before all the
3110 restores are finished. */
3111 emit_insn (gen_blockage ());
3112 /* No frame pointer, but we have a stack greater than 8k. We restore
3113 %r2 very late in this case. (All other cases are restored as early
3114 as possible.) */
3115 if (! frame_pointer_needed
3116 && ! VAL_14_BITS_P (actual_fsize + 20)
3117 && (regs_ever_live[2] || profile_flag))
3119 set_reg_plus_d (STACK_POINTER_REGNUM,
3120 STACK_POINTER_REGNUM,
3121 - actual_fsize);
3123 /* This used to try and be clever by not depending on the value in
3124 %r30 and instead use the value held in %r1 (so that the 2nd insn
3125 which sets %r30 could be put in the delay slot of the return insn).
3127 That won't work since if the stack is exactly 8k set_reg_plus_d
3128 doesn't set %r1, just %r30. */
3129 load_reg (2, - 20, STACK_POINTER_REGNUM);
3132 /* Reset stack pointer (and possibly frame pointer). The stack
3133 pointer is initially set to fp + 64 to avoid a race condition. */
3134 else if (frame_pointer_needed)
3136 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3137 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3138 stack_pointer_rtx,
3139 GEN_INT (-64)));
3141 /* If we were deferring a callee register restore, do it now. */
3142 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3143 emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode, merge_sp_adjust_with_load),
3144 stack_pointer_rtx,
3145 GEN_INT (- actual_fsize)));
3146 else if (actual_fsize != 0)
3147 set_reg_plus_d (STACK_POINTER_REGNUM,
3148 STACK_POINTER_REGNUM,
3149 - actual_fsize);
3152 /* Fetch the return address for the frame COUNT steps up from
3153 the current frame, after the prologue. FRAMEADDR is the
3154 frame pointer of the COUNT frame.
3156 We want to ignore any export stub remnants here.
3158 The value returned is used in two different ways:
3160 1. To find a function's caller.
3162 2. To change the return address for a function.
3164 This function handles most instances of case 1; however, it will
3165 fail if there are two levels of stubs to execute on the return
3166 path. The only way I believe that can happen is if the return value
3167 needs a parameter relocation, which never happens for C code.
3169 This function handles most instances of case 2; however, it will
3170 fail if we did not originally have stub code on the return path
3171 but will need code on the new return path. This can happen if
3172 the caller & callee are both in the main program, but the new
3173 return location is in a shared library.
3175 To handle this correctly we need to set the return pointer at
3176 frame-20 to point to a return stub frame-24 to point to the
3177 location we wish to return to. */
3180 return_addr_rtx (count, frameaddr)
3181 int count ATTRIBUTE_UNUSED;
3182 rtx frameaddr;
3184 rtx label;
3185 rtx saved_rp;
3186 rtx ins;
3188 saved_rp = gen_reg_rtx (Pmode);
3190 /* First, we start off with the normal return address pointer from
3191 -20[frameaddr]. */
3193 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3195 /* Get pointer to the instruction stream. We have to mask out the
3196 privilege level from the two low order bits of the return address
3197 pointer here so that ins will point to the start of the first
3198 instruction that would have been executed if we returned. */
3199 ins = copy_to_reg (gen_rtx_AND (Pmode,
3200 copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)),
3201 MASK_RETURN_ADDR));
3202 label = gen_label_rtx ();
3204 /* Check the instruction stream at the normal return address for the
3205 export stub:
3207 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3208 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3209 0x00011820 | stub+16: mtsp r1,sr0
3210 0xe0400002 | stub+20: be,n 0(sr0,rp)
3212 If it is an export stub, than our return address is really in
3213 -24[frameaddr]. */
3215 emit_cmp_insn (gen_rtx_MEM (SImode, ins),
3216 GEN_INT (0x4bc23fd1),
3217 NE, NULL_RTX, SImode, 1, 0);
3218 emit_jump_insn (gen_bne (label));
3220 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3221 GEN_INT (0x004010a1),
3222 NE, NULL_RTX, SImode, 1, 0);
3223 emit_jump_insn (gen_bne (label));
3225 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3226 GEN_INT (0x00011820),
3227 NE, NULL_RTX, SImode, 1, 0);
3228 emit_jump_insn (gen_bne (label));
3230 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3231 GEN_INT (0xe0400002),
3232 NE, NULL_RTX, SImode, 1, 0);
3234 /* If there is no export stub then just use our initial guess of
3235 -20[frameaddr]. */
3237 emit_jump_insn (gen_bne (label));
3239 /* Here we know that our return address pointer points to an export
3240 stub. We don't want to return the address of the export stub,
3241 but rather the return address that leads back into user code.
3242 That return address is stored at -24[frameaddr]. */
3244 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3246 emit_label (label);
3247 return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
3250 /* This is only valid once reload has completed because it depends on
3251 knowing exactly how much (if any) frame there is and...
3253 It's only valid if there is no frame marker to de-allocate and...
3255 It's only valid if %r2 hasn't been saved into the caller's frame
3256 (we're not profiling and %r2 isn't live anywhere). */
3258 hppa_can_use_return_insn_p ()
3260 return (reload_completed
3261 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3262 && ! profile_flag
3263 && ! regs_ever_live[2]
3264 && ! frame_pointer_needed);
3267 void
3268 emit_bcond_fp (code, operand0)
3269 enum rtx_code code;
3270 rtx operand0;
3272 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3273 gen_rtx_IF_THEN_ELSE (VOIDmode,
3274 gen_rtx_fmt_ee (code,
3275 VOIDmode,
3276 gen_rtx_REG (CCFPmode, 0),
3277 const0_rtx),
3278 gen_rtx_LABEL_REF (VOIDmode, operand0),
3279 pc_rtx)));
3284 gen_cmp_fp (code, operand0, operand1)
3285 enum rtx_code code;
3286 rtx operand0, operand1;
3288 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3289 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3292 /* Adjust the cost of a scheduling dependency. Return the new cost of
3293 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3296 pa_adjust_cost (insn, link, dep_insn, cost)
3297 rtx insn;
3298 rtx link;
3299 rtx dep_insn;
3300 int cost;
3302 enum attr_type attr_type;
3304 if (! recog_memoized (insn))
3305 return 0;
3307 attr_type = get_attr_type (insn);
3309 if (REG_NOTE_KIND (link) == 0)
3311 /* Data dependency; DEP_INSN writes a register that INSN reads some
3312 cycles later. */
3314 if (attr_type == TYPE_FPSTORE)
3316 rtx pat = PATTERN (insn);
3317 rtx dep_pat = PATTERN (dep_insn);
3318 if (GET_CODE (pat) == PARALLEL)
3320 /* This happens for the fstXs,mb patterns. */
3321 pat = XVECEXP (pat, 0, 0);
3323 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3324 /* If this happens, we have to extend this to schedule
3325 optimally. Return 0 for now. */
3326 return 0;
3328 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3330 if (! recog_memoized (dep_insn))
3331 return 0;
3332 /* DEP_INSN is writing its result to the register
3333 being stored in the fpstore INSN. */
3334 switch (get_attr_type (dep_insn))
3336 case TYPE_FPLOAD:
3337 /* This cost 3 cycles, not 2 as the md says for the
3338 700 and 7100. */
3339 return cost + 1;
3341 case TYPE_FPALU:
3342 case TYPE_FPMULSGL:
3343 case TYPE_FPMULDBL:
3344 case TYPE_FPDIVSGL:
3345 case TYPE_FPDIVDBL:
3346 case TYPE_FPSQRTSGL:
3347 case TYPE_FPSQRTDBL:
3348 /* In these important cases, we save one cycle compared to
3349 when flop instruction feed each other. */
3350 return cost - 1;
3352 default:
3353 return cost;
3358 /* For other data dependencies, the default cost specified in the
3359 md is correct. */
3360 return cost;
3362 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3364 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3365 cycles later. */
3367 if (attr_type == TYPE_FPLOAD)
3369 rtx pat = PATTERN (insn);
3370 rtx dep_pat = PATTERN (dep_insn);
3371 if (GET_CODE (pat) == PARALLEL)
3373 /* This happens for the fldXs,mb patterns. */
3374 pat = XVECEXP (pat, 0, 0);
3376 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3377 /* If this happens, we have to extend this to schedule
3378 optimally. Return 0 for now. */
3379 return 0;
3381 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3383 if (! recog_memoized (dep_insn))
3384 return 0;
3385 switch (get_attr_type (dep_insn))
3387 case TYPE_FPALU:
3388 case TYPE_FPMULSGL:
3389 case TYPE_FPMULDBL:
3390 case TYPE_FPDIVSGL:
3391 case TYPE_FPDIVDBL:
3392 case TYPE_FPSQRTSGL:
3393 case TYPE_FPSQRTDBL:
3394 /* A fpload can't be issued until one cycle before a
3395 preceding arithmetic operation has finished if
3396 the target of the fpload is any of the sources
3397 (or destination) of the arithmetic operation. */
3398 return cost - 1;
3400 default:
3401 return 0;
3405 else if (attr_type == TYPE_FPALU)
3407 rtx pat = PATTERN (insn);
3408 rtx dep_pat = PATTERN (dep_insn);
3409 if (GET_CODE (pat) == PARALLEL)
3411 /* This happens for the fldXs,mb patterns. */
3412 pat = XVECEXP (pat, 0, 0);
3414 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3415 /* If this happens, we have to extend this to schedule
3416 optimally. Return 0 for now. */
3417 return 0;
3419 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3421 if (! recog_memoized (dep_insn))
3422 return 0;
3423 switch (get_attr_type (dep_insn))
3425 case TYPE_FPDIVSGL:
3426 case TYPE_FPDIVDBL:
3427 case TYPE_FPSQRTSGL:
3428 case TYPE_FPSQRTDBL:
3429 /* An ALU flop can't be issued until two cycles before a
3430 preceding divide or sqrt operation has finished if
3431 the target of the ALU flop is any of the sources
3432 (or destination) of the divide or sqrt operation. */
3433 return cost - 2;
3435 default:
3436 return 0;
3441 /* For other anti dependencies, the cost is 0. */
3442 return 0;
3444 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3446 /* Output dependency; DEP_INSN writes a register that INSN writes some
3447 cycles later. */
3448 if (attr_type == TYPE_FPLOAD)
3450 rtx pat = PATTERN (insn);
3451 rtx dep_pat = PATTERN (dep_insn);
3452 if (GET_CODE (pat) == PARALLEL)
3454 /* This happens for the fldXs,mb patterns. */
3455 pat = XVECEXP (pat, 0, 0);
3457 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3458 /* If this happens, we have to extend this to schedule
3459 optimally. Return 0 for now. */
3460 return 0;
3462 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3464 if (! recog_memoized (dep_insn))
3465 return 0;
3466 switch (get_attr_type (dep_insn))
3468 case TYPE_FPALU:
3469 case TYPE_FPMULSGL:
3470 case TYPE_FPMULDBL:
3471 case TYPE_FPDIVSGL:
3472 case TYPE_FPDIVDBL:
3473 case TYPE_FPSQRTSGL:
3474 case TYPE_FPSQRTDBL:
3475 /* A fpload can't be issued until one cycle before a
3476 preceding arithmetic operation has finished if
3477 the target of the fpload is the destination of the
3478 arithmetic operation. */
3479 return cost - 1;
3481 default:
3482 return 0;
3486 else if (attr_type == TYPE_FPALU)
3488 rtx pat = PATTERN (insn);
3489 rtx dep_pat = PATTERN (dep_insn);
3490 if (GET_CODE (pat) == PARALLEL)
3492 /* This happens for the fldXs,mb patterns. */
3493 pat = XVECEXP (pat, 0, 0);
3495 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3496 /* If this happens, we have to extend this to schedule
3497 optimally. Return 0 for now. */
3498 return 0;
3500 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3502 if (! recog_memoized (dep_insn))
3503 return 0;
3504 switch (get_attr_type (dep_insn))
3506 case TYPE_FPDIVSGL:
3507 case TYPE_FPDIVDBL:
3508 case TYPE_FPSQRTSGL:
3509 case TYPE_FPSQRTDBL:
3510 /* An ALU flop can't be issued until two cycles before a
3511 preceding divide or sqrt operation has finished if
3512 the target of the ALU flop is also the target of
3513 the divide or sqrt operation. */
3514 return cost - 2;
3516 default:
3517 return 0;
3522 /* For other output dependencies, the cost is 0. */
3523 return 0;
3525 else
3526 abort ();
3529 /* Return any length adjustment needed by INSN which already has its length
3530 computed as LENGTH. Return zero if no adjustment is necessary.
3532 For the PA: function calls, millicode calls, and backwards short
3533 conditional branches with unfilled delay slots need an adjustment by +1
3534 (to account for the NOP which will be inserted into the instruction stream).
3536 Also compute the length of an inline block move here as it is too
3537 complicated to express as a length attribute in pa.md. */
3539 pa_adjust_insn_length (insn, length)
3540 rtx insn;
3541 int length;
3543 rtx pat = PATTERN (insn);
3545 /* Call insns which are *not* indirect and have unfilled delay slots. */
3546 if (GET_CODE (insn) == CALL_INSN)
3549 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3550 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3551 return 4;
3552 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3553 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3554 == SYMBOL_REF)
3555 return 4;
3556 else
3557 return 0;
3559 /* Jumps inside switch tables which have unfilled delay slots
3560 also need adjustment. */
3561 else if (GET_CODE (insn) == JUMP_INSN
3562 && simplejump_p (insn)
3563 && GET_MODE (PATTERN (insn)) == DImode)
3564 return 4;
3565 /* Millicode insn with an unfilled delay slot. */
3566 else if (GET_CODE (insn) == INSN
3567 && GET_CODE (pat) != SEQUENCE
3568 && GET_CODE (pat) != USE
3569 && GET_CODE (pat) != CLOBBER
3570 && get_attr_type (insn) == TYPE_MILLI)
3571 return 4;
3572 /* Block move pattern. */
3573 else if (GET_CODE (insn) == INSN
3574 && GET_CODE (pat) == PARALLEL
3575 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3576 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3577 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3578 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3579 return compute_movstrsi_length (insn) - 4;
3580 /* Conditional branch with an unfilled delay slot. */
3581 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3583 /* Adjust a short backwards conditional with an unfilled delay slot. */
3584 if (GET_CODE (pat) == SET
3585 && length == 4
3586 && ! forward_branch_p (insn))
3587 return 4;
3588 else if (GET_CODE (pat) == PARALLEL
3589 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3590 && length == 4)
3591 return 4;
3592 /* Adjust dbra insn with short backwards conditional branch with
3593 unfilled delay slot -- only for case where counter is in a
3594 general register register. */
3595 else if (GET_CODE (pat) == PARALLEL
3596 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3597 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3598 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3599 && length == 4
3600 && ! forward_branch_p (insn))
3601 return 4;
3602 else
3603 return 0;
3605 return 0;
3608 /* Print operand X (an rtx) in assembler syntax to file FILE.
3609 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3610 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3612 void
3613 print_operand (file, x, code)
3614 FILE *file;
3615 rtx x;
3616 int code;
3618 switch (code)
3620 case '#':
3621 /* Output a 'nop' if there's nothing for the delay slot. */
3622 if (dbr_sequence_length () == 0)
3623 fputs ("\n\tnop", file);
3624 return;
3625 case '*':
3626 /* Output an nullification completer if there's nothing for the */
3627 /* delay slot or nullification is requested. */
3628 if (dbr_sequence_length () == 0 ||
3629 (final_sequence &&
3630 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3631 fputs (",n", file);
3632 return;
3633 case 'R':
3634 /* Print out the second register name of a register pair.
3635 I.e., R (6) => 7. */
3636 fputs (reg_names[REGNO (x)+1], file);
3637 return;
3638 case 'r':
3639 /* A register or zero. */
3640 if (x == const0_rtx
3641 || (x == CONST0_RTX (DFmode))
3642 || (x == CONST0_RTX (SFmode)))
3644 fputs ("0", file);
3645 return;
3647 else
3648 break;
3649 case 'C': /* Plain (C)ondition */
3650 case 'X':
3651 switch (GET_CODE (x))
3653 case EQ:
3654 fputs ("=", file); break;
3655 case NE:
3656 fputs ("<>", file); break;
3657 case GT:
3658 fputs (">", file); break;
3659 case GE:
3660 fputs (">=", file); break;
3661 case GEU:
3662 fputs (">>=", file); break;
3663 case GTU:
3664 fputs (">>", file); break;
3665 case LT:
3666 fputs ("<", file); break;
3667 case LE:
3668 fputs ("<=", file); break;
3669 case LEU:
3670 fputs ("<<=", file); break;
3671 case LTU:
3672 fputs ("<<", file); break;
3673 default:
3674 abort ();
3676 return;
3677 case 'N': /* Condition, (N)egated */
3678 switch (GET_CODE (x))
3680 case EQ:
3681 fputs ("<>", file); break;
3682 case NE:
3683 fputs ("=", file); break;
3684 case GT:
3685 fputs ("<=", file); break;
3686 case GE:
3687 fputs ("<", file); break;
3688 case GEU:
3689 fputs ("<<", file); break;
3690 case GTU:
3691 fputs ("<<=", file); break;
3692 case LT:
3693 fputs (">=", file); break;
3694 case LE:
3695 fputs (">", file); break;
3696 case LEU:
3697 fputs (">>", file); break;
3698 case LTU:
3699 fputs (">>=", file); break;
3700 default:
3701 abort ();
3703 return;
3704 /* For floating point comparisons. Need special conditions to deal
3705 with NaNs properly. */
3706 case 'Y':
3707 switch (GET_CODE (x))
3709 case EQ:
3710 fputs ("!=", file); break;
3711 case NE:
3712 fputs ("=", file); break;
3713 case GT:
3714 fputs ("<=", file); break;
3715 case GE:
3716 fputs ("<", file); break;
3717 case LT:
3718 fputs (">=", file); break;
3719 case LE:
3720 fputs (">", file); break;
3721 default:
3722 abort ();
3724 return;
3725 case 'S': /* Condition, operands are (S)wapped. */
3726 switch (GET_CODE (x))
3728 case EQ:
3729 fputs ("=", file); break;
3730 case NE:
3731 fputs ("<>", file); break;
3732 case GT:
3733 fputs ("<", file); break;
3734 case GE:
3735 fputs ("<=", file); break;
3736 case GEU:
3737 fputs ("<<=", file); break;
3738 case GTU:
3739 fputs ("<<", file); break;
3740 case LT:
3741 fputs (">", file); break;
3742 case LE:
3743 fputs (">=", file); break;
3744 case LEU:
3745 fputs (">>=", file); break;
3746 case LTU:
3747 fputs (">>", file); break;
3748 default:
3749 abort ();
3751 return;
3752 case 'B': /* Condition, (B)oth swapped and negate. */
3753 switch (GET_CODE (x))
3755 case EQ:
3756 fputs ("<>", file); break;
3757 case NE:
3758 fputs ("=", file); break;
3759 case GT:
3760 fputs (">=", file); break;
3761 case GE:
3762 fputs (">", file); break;
3763 case GEU:
3764 fputs (">>", file); break;
3765 case GTU:
3766 fputs (">>=", file); break;
3767 case LT:
3768 fputs ("<=", file); break;
3769 case LE:
3770 fputs ("<", file); break;
3771 case LEU:
3772 fputs ("<<", file); break;
3773 case LTU:
3774 fputs ("<<=", file); break;
3775 default:
3776 abort ();
3778 return;
3779 case 'k':
3780 if (GET_CODE (x) == CONST_INT)
3782 fprintf (file, "%d", ~INTVAL (x));
3783 return;
3785 abort();
3786 case 'L':
3787 if (GET_CODE (x) == CONST_INT)
3789 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3790 return;
3792 abort();
3793 case 'O':
3794 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3796 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3797 return;
3799 abort();
3800 case 'P':
3801 if (GET_CODE (x) == CONST_INT)
3803 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3804 return;
3806 abort();
3807 case 'I':
3808 if (GET_CODE (x) == CONST_INT)
3809 fputs ("i", file);
3810 return;
3811 case 'M':
3812 case 'F':
3813 switch (GET_CODE (XEXP (x, 0)))
3815 case PRE_DEC:
3816 case PRE_INC:
3817 fputs ("s,mb", file);
3818 break;
3819 case POST_DEC:
3820 case POST_INC:
3821 fputs ("s,ma", file);
3822 break;
3823 case PLUS:
3824 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3825 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3826 fputs ("x,s", file);
3827 else if (code == 'F')
3828 fputs ("s", file);
3829 break;
3830 default:
3831 if (code == 'F')
3832 fputs ("s", file);
3833 break;
3835 return;
3836 case 'G':
3837 output_global_address (file, x, 0);
3838 return;
3839 case 'H':
3840 output_global_address (file, x, 1);
3841 return;
3842 case 0: /* Don't do anything special */
3843 break;
3844 case 'Z':
3846 unsigned op[3];
3847 compute_zdepi_operands (INTVAL (x), op);
3848 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3849 return;
3851 default:
3852 abort ();
3854 if (GET_CODE (x) == REG)
3856 fputs (reg_names [REGNO (x)], file);
3857 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3858 fputs ("L", file);
3860 else if (GET_CODE (x) == MEM)
3862 int size = GET_MODE_SIZE (GET_MODE (x));
3863 rtx base = XEXP (XEXP (x, 0), 0);
3864 switch (GET_CODE (XEXP (x, 0)))
3866 case PRE_DEC:
3867 case POST_DEC:
3868 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3869 break;
3870 case PRE_INC:
3871 case POST_INC:
3872 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3873 break;
3874 default:
3875 if (GET_CODE (XEXP (x, 0)) == PLUS
3876 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3877 fprintf (file, "%s(0,%s)",
3878 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3879 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3880 else if (GET_CODE (XEXP (x, 0)) == PLUS
3881 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3882 fprintf (file, "%s(0,%s)",
3883 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3884 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3885 else
3886 output_address (XEXP (x, 0));
3887 break;
3890 else
3891 output_addr_const (file, x);
3894 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3896 void
3897 output_global_address (file, x, round_constant)
3898 FILE *file;
3899 rtx x;
3900 int round_constant;
3903 /* Imagine (high (const (plus ...))). */
3904 if (GET_CODE (x) == HIGH)
3905 x = XEXP (x, 0);
3907 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3908 assemble_name (file, XSTR (x, 0));
3909 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3911 assemble_name (file, XSTR (x, 0));
3912 fputs ("-$global$", file);
3914 else if (GET_CODE (x) == CONST)
3916 char *sep = "";
3917 int offset = 0; /* assembler wants -$global$ at end */
3918 rtx base = NULL_RTX;
3920 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3922 base = XEXP (XEXP (x, 0), 0);
3923 output_addr_const (file, base);
3925 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3926 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3927 else abort ();
3929 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3931 base = XEXP (XEXP (x, 0), 1);
3932 output_addr_const (file, base);
3934 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3935 offset = INTVAL (XEXP (XEXP (x, 0),1));
3936 else abort ();
3938 /* How bogus. The compiler is apparently responsible for
3939 rounding the constant if it uses an LR field selector.
3941 The linker and/or assembler seem a better place since
3942 they have to do this kind of thing already.
3944 If we fail to do this, HP's optimizing linker may eliminate
3945 an addil, but not update the ldw/stw/ldo instruction that
3946 uses the result of the addil. */
3947 if (round_constant)
3948 offset = ((offset + 0x1000) & ~0x1fff);
3950 if (GET_CODE (XEXP (x, 0)) == PLUS)
3952 if (offset < 0)
3954 offset = -offset;
3955 sep = "-";
3957 else
3958 sep = "+";
3960 else if (GET_CODE (XEXP (x, 0)) == MINUS
3961 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3962 sep = "-";
3963 else abort ();
3965 if (!read_only_operand (base) && !flag_pic)
3966 fputs ("-$global$", file);
3967 if (offset)
3968 fprintf (file,"%s%d", sep, offset);
3970 else
3971 output_addr_const (file, x);
3974 void
3975 output_deferred_plabels (file)
3976 FILE *file;
3978 int i;
3979 /* If we have deferred plabels, then we need to switch into the data
3980 section and align it to a 4 byte boundary before we output the
3981 deferred plabels. */
3982 if (n_deferred_plabels)
3984 data_section ();
3985 ASM_OUTPUT_ALIGN (file, 2);
3988 /* Now output the deferred plabels. */
3989 for (i = 0; i < n_deferred_plabels; i++)
3991 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
3992 assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode,
3993 deferred_plabels[i].name), 4, 1);
3997 /* HP's millicode routines mean something special to the assembler.
3998 Keep track of which ones we have used. */
4000 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4001 static char imported[(int)end1000];
4002 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4003 static char import_string[] = ".IMPORT $$....,MILLICODE";
4004 #define MILLI_START 10
4006 static void
4007 import_milli (code)
4008 enum millicodes code;
4010 char str[sizeof (import_string)];
4012 if (!imported[(int)code])
4014 imported[(int)code] = 1;
4015 strcpy (str, import_string);
4016 strncpy (str + MILLI_START, milli_names[(int)code], 4);
4017 output_asm_insn (str, 0);
4021 /* The register constraints have put the operands and return value in
4022 the proper registers. */
4024 char *
4025 output_mul_insn (unsignedp, insn)
4026 int unsignedp ATTRIBUTE_UNUSED;
4027 rtx insn;
4029 import_milli (mulI);
4030 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI"));
4033 /* Emit the rtl for doing a division by a constant. */
4035 /* Do magic division millicodes exist for this value? */
4036 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4037 1, 1};
4039 /* We'll use an array to keep track of the magic millicodes and
4040 whether or not we've used them already. [n][0] is signed, [n][1] is
4041 unsigned. */
4043 static int div_milli[16][2];
4046 div_operand (op, mode)
4047 rtx op;
4048 enum machine_mode mode;
4050 return (mode == SImode
4051 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4052 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4053 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4057 emit_hpdiv_const (operands, unsignedp)
4058 rtx *operands;
4059 int unsignedp;
4061 if (GET_CODE (operands[2]) == CONST_INT
4062 && INTVAL (operands[2]) > 0
4063 && INTVAL (operands[2]) < 16
4064 && magic_milli[INTVAL (operands[2])])
4066 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4067 emit
4068 (gen_rtx
4069 (PARALLEL, VOIDmode,
4070 gen_rtvec (5, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4071 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4072 SImode,
4073 gen_rtx_REG (SImode, 26),
4074 operands[2])),
4075 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4076 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4077 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4078 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
4079 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4080 return 1;
4082 return 0;
4085 char *
4086 output_div_insn (operands, unsignedp, insn)
4087 rtx *operands;
4088 int unsignedp;
4089 rtx insn;
4091 int divisor;
4093 /* If the divisor is a constant, try to use one of the special
4094 opcodes .*/
4095 if (GET_CODE (operands[0]) == CONST_INT)
4097 static char buf[100];
4098 divisor = INTVAL (operands[0]);
4099 if (!div_milli[divisor][unsignedp])
4101 div_milli[divisor][unsignedp] = 1;
4102 if (unsignedp)
4103 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4104 else
4105 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4107 if (unsignedp)
4109 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4110 return output_millicode_call (insn,
4111 gen_rtx_SYMBOL_REF (SImode, buf));
4113 else
4115 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4116 return output_millicode_call (insn,
4117 gen_rtx_SYMBOL_REF (SImode, buf));
4120 /* Divisor isn't a special constant. */
4121 else
4123 if (unsignedp)
4125 import_milli (divU);
4126 return output_millicode_call (insn,
4127 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4129 else
4131 import_milli (divI);
4132 return output_millicode_call (insn,
4133 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4138 /* Output a $$rem millicode to do mod. */
4140 char *
4141 output_mod_insn (unsignedp, insn)
4142 int unsignedp;
4143 rtx insn;
4145 if (unsignedp)
4147 import_milli (remU);
4148 return output_millicode_call (insn,
4149 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4151 else
4153 import_milli (remI);
4154 return output_millicode_call (insn,
4155 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4159 void
4160 output_arg_descriptor (call_insn)
4161 rtx call_insn;
4163 char *arg_regs[4];
4164 enum machine_mode arg_mode;
4165 rtx link;
4166 int i, output_flag = 0;
4167 int regno;
4169 for (i = 0; i < 4; i++)
4170 arg_regs[i] = 0;
4172 /* Specify explicitly that no argument relocations should take place
4173 if using the portable runtime calling conventions. */
4174 if (TARGET_PORTABLE_RUNTIME)
4176 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4177 asm_out_file);
4178 return;
4181 if (GET_CODE (call_insn) != CALL_INSN)
4182 abort ();
4183 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4185 rtx use = XEXP (link, 0);
4187 if (! (GET_CODE (use) == USE
4188 && GET_CODE (XEXP (use, 0)) == REG
4189 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4190 continue;
4192 arg_mode = GET_MODE (XEXP (use, 0));
4193 regno = REGNO (XEXP (use, 0));
4194 if (regno >= 23 && regno <= 26)
4196 arg_regs[26 - regno] = "GR";
4197 if (arg_mode == DImode)
4198 arg_regs[25 - regno] = "GR";
4200 else if (regno >= 32 && regno <= 39)
4202 if (arg_mode == SFmode)
4203 arg_regs[(regno - 32) / 2] = "FR";
4204 else
4206 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4207 arg_regs[(regno - 34) / 2] = "FR";
4208 arg_regs[(regno - 34) / 2 + 1] = "FU";
4209 #else
4210 arg_regs[(regno - 34) / 2] = "FU";
4211 arg_regs[(regno - 34) / 2 + 1] = "FR";
4212 #endif
4216 fputs ("\t.CALL ", asm_out_file);
4217 for (i = 0; i < 4; i++)
4219 if (arg_regs[i])
4221 if (output_flag++)
4222 fputc (',', asm_out_file);
4223 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4226 fputc ('\n', asm_out_file);
4229 /* Return the class of any secondary reload register that is needed to
4230 move IN into a register in class CLASS using mode MODE.
4232 Profiling has showed this routine and its descendants account for
4233 a significant amount of compile time (~7%). So it has been
4234 optimized to reduce redundant computations and eliminate useless
4235 function calls.
4237 It might be worthwhile to try and make this a leaf function too. */
4239 enum reg_class
4240 secondary_reload_class (class, mode, in)
4241 enum reg_class class;
4242 enum machine_mode mode;
4243 rtx in;
4245 int regno, is_symbolic;
4247 /* Trying to load a constant into a FP register during PIC code
4248 generation will require %r1 as a scratch register. */
4249 if (flag_pic == 2
4250 && GET_MODE_CLASS (mode) == MODE_INT
4251 && FP_REG_CLASS_P (class)
4252 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4253 return R1_REGS;
4255 /* Profiling showed the PA port spends about 1.3% of its compilation
4256 time in true_regnum from calls inside secondary_reload_class. */
4258 if (GET_CODE (in) == REG)
4260 regno = REGNO (in);
4261 if (regno >= FIRST_PSEUDO_REGISTER)
4262 regno = true_regnum (in);
4264 else if (GET_CODE (in) == SUBREG)
4265 regno = true_regnum (in);
4266 else
4267 regno = -1;
4269 /* If we have something like (mem (mem (...)), we can safely assume the
4270 inner MEM will end up in a general register after reloading, so there's
4271 no need for a secondary reload. */
4272 if (GET_CODE (in) == MEM
4273 && GET_CODE (XEXP (in, 0)) == MEM)
4274 return NO_REGS;
4276 /* Handle out of range displacement for integer mode loads/stores of
4277 FP registers. */
4278 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4279 && GET_MODE_CLASS (mode) == MODE_INT
4280 && FP_REG_CLASS_P (class))
4281 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4282 return GENERAL_REGS;
4284 if (GET_CODE (in) == HIGH)
4285 in = XEXP (in, 0);
4287 /* Profiling has showed GCC spends about 2.6% of its compilation
4288 time in symbolic_operand from calls inside secondary_reload_class.
4290 We use an inline copy and only compute its return value once to avoid
4291 useless work. */
4292 switch (GET_CODE (in))
4294 rtx tmp;
4296 case SYMBOL_REF:
4297 case LABEL_REF:
4298 is_symbolic = 1;
4299 break;
4300 case CONST:
4301 tmp = XEXP (in, 0);
4302 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4303 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4304 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4305 break;
4307 default:
4308 is_symbolic = 0;
4309 break;
4312 if (!flag_pic
4313 && is_symbolic
4314 && read_only_operand (in))
4315 return NO_REGS;
4317 if (class != R1_REGS && is_symbolic)
4318 return R1_REGS;
4320 return NO_REGS;
4323 enum direction
4324 function_arg_padding (mode, type)
4325 enum machine_mode mode;
4326 tree type;
4328 int size;
4330 if (mode == BLKmode)
4332 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4333 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4334 else
4335 return upward; /* Don't know if this is right, but */
4336 /* same as old definition. */
4338 else
4339 size = GET_MODE_BITSIZE (mode);
4340 if (size < PARM_BOUNDARY)
4341 return downward;
4342 else if (size % PARM_BOUNDARY)
4343 return upward;
4344 else
4345 return none;
4349 /* Do what is necessary for `va_start'. The argument is ignored;
4350 We look at the current function to determine if stdargs or varargs
4351 is used and fill in an initial va_list. A pointer to this constructor
4352 is returned. */
4354 struct rtx_def *
4355 hppa_builtin_saveregs (arglist)
4356 tree arglist ATTRIBUTE_UNUSED;
4358 rtx offset, dest;
4359 tree fntype = TREE_TYPE (current_function_decl);
4360 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4361 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4362 != void_type_node)))
4363 ? UNITS_PER_WORD : 0);
4365 if (argadj)
4366 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4367 else
4368 offset = current_function_arg_offset_rtx;
4370 /* Store general registers on the stack. */
4371 dest = gen_rtx_MEM (BLKmode,
4372 plus_constant (current_function_internal_arg_pointer,
4373 -16));
4374 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
4376 /* move_block_from_reg will emit code to store the argument registers
4377 individually as scalar stores.
4379 However, other insns may later load from the same addresses for
4380 a structure load (passing a struct to a varargs routine).
4382 The alias code assumes that such aliasing can never happen, so we
4383 have to keep memory referencing insns from moving up beyond the
4384 last argument register store. So we emit a blockage insn here. */
4385 emit_insn (gen_blockage ());
4387 if (flag_check_memory_usage)
4388 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4389 dest, ptr_mode,
4390 GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype),
4391 GEN_INT (MEMORY_USE_RW),
4392 TYPE_MODE (integer_type_node));
4394 return copy_to_reg (expand_binop (Pmode, add_optab,
4395 current_function_internal_arg_pointer,
4396 offset, 0, 0, OPTAB_LIB_WIDEN));
4399 /* This routine handles all the normal conditional branch sequences we
4400 might need to generate. It handles compare immediate vs compare
4401 register, nullification of delay slots, varying length branches,
4402 negated branches, and all combinations of the above. It returns the
4403 output appropriate to emit the branch corresponding to all given
4404 parameters. */
4406 char *
4407 output_cbranch (operands, nullify, length, negated, insn)
4408 rtx *operands;
4409 int nullify, length, negated;
4410 rtx insn;
4412 static char buf[100];
4413 int useskip = 0;
4415 /* A conditional branch to the following instruction (eg the delay slot) is
4416 asking for a disaster. This can happen when not optimizing.
4418 In such cases it is safe to emit nothing. */
4420 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4421 return "";
4423 /* If this is a long branch with its delay slot unfilled, set `nullify'
4424 as it can nullify the delay slot and save a nop. */
4425 if (length == 8 && dbr_sequence_length () == 0)
4426 nullify = 1;
4428 /* If this is a short forward conditional branch which did not get
4429 its delay slot filled, the delay slot can still be nullified. */
4430 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4431 nullify = forward_branch_p (insn);
4433 /* A forward branch over a single nullified insn can be done with a
4434 comclr instruction. This avoids a single cycle penalty due to
4435 mis-predicted branch if we fall through (branch not taken). */
4436 if (length == 4
4437 && next_real_insn (insn) != 0
4438 && get_attr_length (next_real_insn (insn)) == 4
4439 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4440 && nullify)
4441 useskip = 1;
4443 switch (length)
4445 /* All short conditional branches except backwards with an unfilled
4446 delay slot. */
4447 case 4:
4448 if (useskip)
4449 strcpy (buf, "com%I2clr,");
4450 else
4451 strcpy (buf, "com%I2b,");
4452 if (negated)
4453 strcat (buf, "%B3");
4454 else
4455 strcat (buf, "%S3");
4456 if (useskip)
4457 strcat (buf, " %2,%1,0");
4458 else if (nullify)
4459 strcat (buf, ",n %2,%1,%0");
4460 else
4461 strcat (buf, " %2,%1,%0");
4462 break;
4464 /* All long conditionals. Note an short backward branch with an
4465 unfilled delay slot is treated just like a long backward branch
4466 with an unfilled delay slot. */
4467 case 8:
4468 /* Handle weird backwards branch with a filled delay slot
4469 with is nullified. */
4470 if (dbr_sequence_length () != 0
4471 && ! forward_branch_p (insn)
4472 && nullify)
4474 strcpy (buf, "com%I2b,");
4475 if (negated)
4476 strcat (buf, "%S3");
4477 else
4478 strcat (buf, "%B3");
4479 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4481 /* Handle short backwards branch with an unfilled delay slot.
4482 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4483 taken and untaken branches. */
4484 else if (dbr_sequence_length () == 0
4485 && ! forward_branch_p (insn)
4486 && insn_addresses
4487 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4488 - insn_addresses[INSN_UID (insn)] - 8))
4490 strcpy (buf, "com%I2b,");
4491 if (negated)
4492 strcat (buf, "%B3 %2,%1,%0%#");
4493 else
4494 strcat (buf, "%S3 %2,%1,%0%#");
4496 else
4498 strcpy (buf, "com%I2clr,");
4499 if (negated)
4500 strcat (buf, "%S3");
4501 else
4502 strcat (buf, "%B3");
4503 if (nullify)
4504 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4505 else
4506 strcat (buf, " %2,%1,0\n\tbl %0,0");
4508 break;
4510 case 20:
4511 /* Very long branch. Right now we only handle these when not
4512 optimizing. See "jump" pattern in pa.md for details. */
4513 if (optimize)
4514 abort ();
4516 /* Create a reversed conditional branch which branches around
4517 the following insns. */
4518 if (negated)
4519 strcpy (buf, "com%I2b,%S3,n %2,%1,.+20");
4520 else
4521 strcpy (buf, "com%I2b,%B3,n %2,%1,.+20");
4522 output_asm_insn (buf, operands);
4524 /* Output an insn to save %r1. */
4525 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4527 /* Now output a very long branch to the original target. */
4528 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
4530 /* Now restore the value of %r1 in the delay slot. We're not
4531 optimizing so we know nothing else can be in the delay slot. */
4532 return "ldw -16(%%r30),%%r1";
4534 case 28:
4535 /* Very long branch when generating PIC code. Right now we only
4536 handle these when not optimizing. See "jump" pattern in pa.md
4537 for details. */
4538 if (optimize)
4539 abort ();
4541 /* Create a reversed conditional branch which branches around
4542 the following insns. */
4543 if (negated)
4544 strcpy (buf, "com%I2b,%S3,n %2,%1,.+28");
4545 else
4546 strcpy (buf, "com%I2b,%B3,n %2,%1,.+28");
4547 output_asm_insn (buf, operands);
4549 /* Output an insn to save %r1. */
4550 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4552 /* Now output a very long PIC branch to the original target. */
4554 rtx xoperands[5];
4556 xoperands[0] = operands[0];
4557 xoperands[1] = operands[1];
4558 xoperands[2] = operands[2];
4559 xoperands[3] = operands[3];
4560 xoperands[4] = gen_label_rtx ();
4562 output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands);
4563 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4564 CODE_LABEL_NUMBER (xoperands[4]));
4565 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands);
4568 /* Now restore the value of %r1 in the delay slot. We're not
4569 optimizing so we know nothing else can be in the delay slot. */
4570 return "ldw -16(%%r30),%%r1";
4572 default:
4573 abort();
4575 return buf;
4578 /* This routine handles all the branch-on-bit conditional branch sequences we
4579 might need to generate. It handles nullification of delay slots,
4580 varying length branches, negated branches and all combinations of the
4581 above. it returns the appropriate output template to emit the branch. */
4583 char *
4584 output_bb (operands, nullify, length, negated, insn, which)
4585 rtx *operands ATTRIBUTE_UNUSED;
4586 int nullify, length, negated;
4587 rtx insn;
4588 int which;
4590 static char buf[100];
4591 int useskip = 0;
4593 /* A conditional branch to the following instruction (eg the delay slot) is
4594 asking for a disaster. I do not think this can happen as this pattern
4595 is only used when optimizing; jump optimization should eliminate the
4596 jump. But be prepared just in case. */
4598 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4599 return "";
4601 /* If this is a long branch with its delay slot unfilled, set `nullify'
4602 as it can nullify the delay slot and save a nop. */
4603 if (length == 8 && dbr_sequence_length () == 0)
4604 nullify = 1;
4606 /* If this is a short forward conditional branch which did not get
4607 its delay slot filled, the delay slot can still be nullified. */
4608 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4609 nullify = forward_branch_p (insn);
4611 /* A forward branch over a single nullified insn can be done with a
4612 extrs instruction. This avoids a single cycle penalty due to
4613 mis-predicted branch if we fall through (branch not taken). */
4615 if (length == 4
4616 && next_real_insn (insn) != 0
4617 && get_attr_length (next_real_insn (insn)) == 4
4618 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4619 && nullify)
4620 useskip = 1;
4622 switch (length)
4625 /* All short conditional branches except backwards with an unfilled
4626 delay slot. */
4627 case 4:
4628 if (useskip)
4629 strcpy (buf, "extrs,");
4630 else
4631 strcpy (buf, "bb,");
4632 if ((which == 0 && negated)
4633 || (which == 1 && ! negated))
4634 strcat (buf, ">=");
4635 else
4636 strcat (buf, "<");
4637 if (useskip)
4638 strcat (buf, " %0,%1,1,0");
4639 else if (nullify && negated)
4640 strcat (buf, ",n %0,%1,%3");
4641 else if (nullify && ! negated)
4642 strcat (buf, ",n %0,%1,%2");
4643 else if (! nullify && negated)
4644 strcat (buf, "%0,%1,%3");
4645 else if (! nullify && ! negated)
4646 strcat (buf, " %0,%1,%2");
4647 break;
4649 /* All long conditionals. Note an short backward branch with an
4650 unfilled delay slot is treated just like a long backward branch
4651 with an unfilled delay slot. */
4652 case 8:
4653 /* Handle weird backwards branch with a filled delay slot
4654 with is nullified. */
4655 if (dbr_sequence_length () != 0
4656 && ! forward_branch_p (insn)
4657 && nullify)
4659 strcpy (buf, "bb,");
4660 if ((which == 0 && negated)
4661 || (which == 1 && ! negated))
4662 strcat (buf, "<");
4663 else
4664 strcat (buf, ">=");
4665 if (negated)
4666 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4667 else
4668 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4670 /* Handle short backwards branch with an unfilled delay slot.
4671 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4672 taken and untaken branches. */
4673 else if (dbr_sequence_length () == 0
4674 && ! forward_branch_p (insn)
4675 && insn_addresses
4676 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4677 - insn_addresses[INSN_UID (insn)] - 8))
4679 strcpy (buf, "bb,");
4680 if ((which == 0 && negated)
4681 || (which == 1 && ! negated))
4682 strcat (buf, ">=");
4683 else
4684 strcat (buf, "<");
4685 if (negated)
4686 strcat (buf, " %0,%1,%3%#");
4687 else
4688 strcat (buf, " %0,%1,%2%#");
4690 else
4692 strcpy (buf, "extrs,");
4693 if ((which == 0 && negated)
4694 || (which == 1 && ! negated))
4695 strcat (buf, "<");
4696 else
4697 strcat (buf, ">=");
4698 if (nullify && negated)
4699 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4700 else if (nullify && ! negated)
4701 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4702 else if (negated)
4703 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4704 else
4705 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4707 break;
4709 default:
4710 abort();
4712 return buf;
4715 /* This routine handles all the branch-on-variable-bit conditional branch
4716 sequences we might need to generate. It handles nullification of delay
4717 slots, varying length branches, negated branches and all combinations
4718 of the above. it returns the appropriate output template to emit the
4719 branch. */
4721 char *
4722 output_bvb (operands, nullify, length, negated, insn, which)
4723 rtx *operands ATTRIBUTE_UNUSED;
4724 int nullify, length, negated;
4725 rtx insn;
4726 int which;
4728 static char buf[100];
4729 int useskip = 0;
4731 /* A conditional branch to the following instruction (eg the delay slot) is
4732 asking for a disaster. I do not think this can happen as this pattern
4733 is only used when optimizing; jump optimization should eliminate the
4734 jump. But be prepared just in case. */
4736 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4737 return "";
4739 /* If this is a long branch with its delay slot unfilled, set `nullify'
4740 as it can nullify the delay slot and save a nop. */
4741 if (length == 8 && dbr_sequence_length () == 0)
4742 nullify = 1;
4744 /* If this is a short forward conditional branch which did not get
4745 its delay slot filled, the delay slot can still be nullified. */
4746 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4747 nullify = forward_branch_p (insn);
4749 /* A forward branch over a single nullified insn can be done with a
4750 extrs instruction. This avoids a single cycle penalty due to
4751 mis-predicted branch if we fall through (branch not taken). */
4753 if (length == 4
4754 && next_real_insn (insn) != 0
4755 && get_attr_length (next_real_insn (insn)) == 4
4756 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4757 && nullify)
4758 useskip = 1;
4760 switch (length)
4763 /* All short conditional branches except backwards with an unfilled
4764 delay slot. */
4765 case 4:
4766 if (useskip)
4767 strcpy (buf, "vextrs,");
4768 else
4769 strcpy (buf, "bvb,");
4770 if ((which == 0 && negated)
4771 || (which == 1 && ! negated))
4772 strcat (buf, ">=");
4773 else
4774 strcat (buf, "<");
4775 if (useskip)
4776 strcat (buf, " %0,1,0");
4777 else if (nullify && negated)
4778 strcat (buf, ",n %0,%3");
4779 else if (nullify && ! negated)
4780 strcat (buf, ",n %0,%2");
4781 else if (! nullify && negated)
4782 strcat (buf, "%0,%3");
4783 else if (! nullify && ! negated)
4784 strcat (buf, " %0,%2");
4785 break;
4787 /* All long conditionals. Note an short backward branch with an
4788 unfilled delay slot is treated just like a long backward branch
4789 with an unfilled delay slot. */
4790 case 8:
4791 /* Handle weird backwards branch with a filled delay slot
4792 with is nullified. */
4793 if (dbr_sequence_length () != 0
4794 && ! forward_branch_p (insn)
4795 && nullify)
4797 strcpy (buf, "bvb,");
4798 if ((which == 0 && negated)
4799 || (which == 1 && ! negated))
4800 strcat (buf, "<");
4801 else
4802 strcat (buf, ">=");
4803 if (negated)
4804 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4805 else
4806 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4808 /* Handle short backwards branch with an unfilled delay slot.
4809 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4810 taken and untaken branches. */
4811 else if (dbr_sequence_length () == 0
4812 && ! forward_branch_p (insn)
4813 && insn_addresses
4814 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4815 - insn_addresses[INSN_UID (insn)] - 8))
4817 strcpy (buf, "bvb,");
4818 if ((which == 0 && negated)
4819 || (which == 1 && ! negated))
4820 strcat (buf, ">=");
4821 else
4822 strcat (buf, "<");
4823 if (negated)
4824 strcat (buf, " %0,%3%#");
4825 else
4826 strcat (buf, " %0,%2%#");
4828 else
4830 strcpy (buf, "vextrs,");
4831 if ((which == 0 && negated)
4832 || (which == 1 && ! negated))
4833 strcat (buf, "<");
4834 else
4835 strcat (buf, ">=");
4836 if (nullify && negated)
4837 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4838 else if (nullify && ! negated)
4839 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4840 else if (negated)
4841 strcat (buf, " %0,1,0\n\tbl %3,0");
4842 else
4843 strcat (buf, " %0,1,0\n\tbl %2,0");
4845 break;
4847 default:
4848 abort();
4850 return buf;
4853 /* Return the output template for emitting a dbra type insn.
4855 Note it may perform some output operations on its own before
4856 returning the final output string. */
4857 char *
4858 output_dbra (operands, insn, which_alternative)
4859 rtx *operands;
4860 rtx insn;
4861 int which_alternative;
4864 /* A conditional branch to the following instruction (eg the delay slot) is
4865 asking for a disaster. Be prepared! */
4867 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4869 if (which_alternative == 0)
4870 return "ldo %1(%0),%0";
4871 else if (which_alternative == 1)
4873 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4874 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4875 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4876 return "fldws -16(0,%%r30),%0";
4878 else
4880 output_asm_insn ("ldw %0,%4", operands);
4881 return "ldo %1(%4),%4\n\tstw %4,%0";
4885 if (which_alternative == 0)
4887 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4888 int length = get_attr_length (insn);
4890 /* If this is a long branch with its delay slot unfilled, set `nullify'
4891 as it can nullify the delay slot and save a nop. */
4892 if (length == 8 && dbr_sequence_length () == 0)
4893 nullify = 1;
4895 /* If this is a short forward conditional branch which did not get
4896 its delay slot filled, the delay slot can still be nullified. */
4897 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4898 nullify = forward_branch_p (insn);
4900 /* Handle short versions first. */
4901 if (length == 4 && nullify)
4902 return "addib,%C2,n %1,%0,%3";
4903 else if (length == 4 && ! nullify)
4904 return "addib,%C2 %1,%0,%3";
4905 else if (length == 8)
4907 /* Handle weird backwards branch with a fulled delay slot
4908 which is nullified. */
4909 if (dbr_sequence_length () != 0
4910 && ! forward_branch_p (insn)
4911 && nullify)
4912 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4913 /* Handle short backwards branch with an unfilled delay slot.
4914 Using a addb;nop rather than addi;bl saves 1 cycle for both
4915 taken and untaken branches. */
4916 else if (dbr_sequence_length () == 0
4917 && ! forward_branch_p (insn)
4918 && insn_addresses
4919 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4920 - insn_addresses[INSN_UID (insn)] - 8))
4921 return "addib,%C2 %1,%0,%3%#";
4923 /* Handle normal cases. */
4924 if (nullify)
4925 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4926 else
4927 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4929 else
4930 abort();
4932 /* Deal with gross reload from FP register case. */
4933 else if (which_alternative == 1)
4935 /* Move loop counter from FP register to MEM then into a GR,
4936 increment the GR, store the GR into MEM, and finally reload
4937 the FP register from MEM from within the branch's delay slot. */
4938 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4939 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4940 if (get_attr_length (insn) == 24)
4941 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4942 else
4943 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4945 /* Deal with gross reload from memory case. */
4946 else
4948 /* Reload loop counter from memory, the store back to memory
4949 happens in the branch's delay slot. */
4950 output_asm_insn ("ldw %0,%4", operands);
4951 if (get_attr_length (insn) == 12)
4952 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4953 else
4954 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4958 /* Return the output template for emitting a dbra type insn.
4960 Note it may perform some output operations on its own before
4961 returning the final output string. */
4962 char *
4963 output_movb (operands, insn, which_alternative, reverse_comparison)
4964 rtx *operands;
4965 rtx insn;
4966 int which_alternative;
4967 int reverse_comparison;
4970 /* A conditional branch to the following instruction (eg the delay slot) is
4971 asking for a disaster. Be prepared! */
4973 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4975 if (which_alternative == 0)
4976 return "copy %1,%0";
4977 else if (which_alternative == 1)
4979 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4980 return "fldws -16(0,%%r30),%0";
4982 else if (which_alternative == 2)
4983 return "stw %1,%0";
4984 else
4985 return "mtsar %r1";
4988 /* Support the second variant. */
4989 if (reverse_comparison)
4990 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4992 if (which_alternative == 0)
4994 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4995 int length = get_attr_length (insn);
4997 /* If this is a long branch with its delay slot unfilled, set `nullify'
4998 as it can nullify the delay slot and save a nop. */
4999 if (length == 8 && dbr_sequence_length () == 0)
5000 nullify = 1;
5002 /* If this is a short forward conditional branch which did not get
5003 its delay slot filled, the delay slot can still be nullified. */
5004 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5005 nullify = forward_branch_p (insn);
5007 /* Handle short versions first. */
5008 if (length == 4 && nullify)
5009 return "movb,%C2,n %1,%0,%3";
5010 else if (length == 4 && ! nullify)
5011 return "movb,%C2 %1,%0,%3";
5012 else if (length == 8)
5014 /* Handle weird backwards branch with a filled delay slot
5015 which is nullified. */
5016 if (dbr_sequence_length () != 0
5017 && ! forward_branch_p (insn)
5018 && nullify)
5019 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
5021 /* Handle short backwards branch with an unfilled delay slot.
5022 Using a movb;nop rather than or;bl saves 1 cycle for both
5023 taken and untaken branches. */
5024 else if (dbr_sequence_length () == 0
5025 && ! forward_branch_p (insn)
5026 && insn_addresses
5027 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
5028 - insn_addresses[INSN_UID (insn)] - 8))
5029 return "movb,%C2 %1,%0,%3%#";
5030 /* Handle normal cases. */
5031 if (nullify)
5032 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
5033 else
5034 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
5036 else
5037 abort();
5039 /* Deal with gross reload from FP register case. */
5040 else if (which_alternative == 1)
5042 /* Move loop counter from FP register to MEM then into a GR,
5043 increment the GR, store the GR into MEM, and finally reload
5044 the FP register from MEM from within the branch's delay slot. */
5045 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
5046 if (get_attr_length (insn) == 12)
5047 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
5048 else
5049 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
5051 /* Deal with gross reload from memory case. */
5052 else if (which_alternative == 2)
5054 /* Reload loop counter from memory, the store back to memory
5055 happens in the branch's delay slot. */
5056 if (get_attr_length (insn) == 8)
5057 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
5058 else
5059 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
5061 /* Handle SAR as a destination. */
5062 else
5064 if (get_attr_length (insn) == 8)
5065 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
5066 else
5067 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
5072 /* INSN is a millicode call. It may have an unconditional jump in its delay
5073 slot.
5075 CALL_DEST is the routine we are calling. */
5077 char *
5078 output_millicode_call (insn, call_dest)
5079 rtx insn;
5080 rtx call_dest;
5082 int distance;
5083 rtx xoperands[4];
5084 rtx seq_insn;
5086 /* Handle common case -- empty delay slot or no jump in the delay slot,
5087 and we're sure that the branch will reach the beginning of the $CODE$
5088 subspace. */
5089 if ((dbr_sequence_length () == 0
5090 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
5091 || (dbr_sequence_length () != 0
5092 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5093 && get_attr_length (insn) == 4))
5095 xoperands[0] = call_dest;
5096 output_asm_insn ("bl %0,%%r31%#", xoperands);
5097 return "";
5100 /* This call may not reach the beginning of the $CODE$ subspace. */
5101 if (get_attr_length (insn) > 4)
5103 int delay_insn_deleted = 0;
5104 rtx xoperands[2];
5106 /* We need to emit an inline long-call branch. */
5107 if (dbr_sequence_length () != 0
5108 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5110 /* A non-jump insn in the delay slot. By definition we can
5111 emit this insn before the call. */
5112 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5114 /* Now delete the delay insn. */
5115 PUT_CODE (NEXT_INSN (insn), NOTE);
5116 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5117 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5118 delay_insn_deleted = 1;
5121 /* If we're allowed to use be/ble instructions, then this is the
5122 best sequence to use for a long millicode call. */
5123 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
5124 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
5126 xoperands[0] = call_dest;
5127 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
5128 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
5129 output_asm_insn ("nop", xoperands);
5131 /* Pure portable runtime doesn't allow be/ble; we also don't have
5132 PIC support int he assembler/linker, so this sequence is needed. */
5133 else if (TARGET_PORTABLE_RUNTIME)
5135 xoperands[0] = call_dest;
5136 /* Get the address of our target into %r29. */
5137 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5138 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5140 /* Get our return address into %r31. */
5141 output_asm_insn ("blr 0,%%r31", xoperands);
5143 /* Jump to our target address in %r29. */
5144 output_asm_insn ("bv,n 0(%%r29)", xoperands);
5146 /* Empty delay slot. Note this insn gets fetched twice and
5147 executed once. To be safe we use a nop. */
5148 output_asm_insn ("nop", xoperands);
5149 return "";
5151 /* PIC long millicode call sequence. */
5152 else
5154 xoperands[0] = call_dest;
5155 xoperands[1] = gen_label_rtx ();
5156 /* Get our address + 8 into %r1. */
5157 output_asm_insn ("bl .+8,%%r1", xoperands);
5159 /* Add %r1 to the offset of our target from the next insn. */
5160 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5161 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5162 CODE_LABEL_NUMBER (xoperands[1]));
5163 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5165 /* Get the return address into %r31. */
5166 output_asm_insn ("blr 0,%%r31", xoperands);
5168 /* Branch to our target which is in %r1. */
5169 output_asm_insn ("bv,n 0(%%r1)", xoperands);
5171 /* Empty delay slot. Note this insn gets fetched twice and
5172 executed once. To be safe we use a nop. */
5173 output_asm_insn ("nop", xoperands);
5176 /* If we had a jump in the call's delay slot, output it now. */
5177 if (dbr_sequence_length () != 0
5178 && !delay_insn_deleted)
5180 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5181 output_asm_insn ("b,n %0", xoperands);
5183 /* Now delete the delay insn. */
5184 PUT_CODE (NEXT_INSN (insn), NOTE);
5185 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5186 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5188 return "";
5191 /* This call has an unconditional jump in its delay slot and the
5192 call is known to reach its target or the beginning of the current
5193 subspace. */
5195 /* Use the containing sequence insn's address. */
5196 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5198 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5199 - insn_addresses[INSN_UID (seq_insn)] - 8;
5201 /* If the branch was too far away, emit a normal call followed
5202 by a nop, followed by the unconditional branch.
5204 If the branch is close, then adjust %r2 from within the
5205 call's delay slot. */
5207 xoperands[0] = call_dest;
5208 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5209 if (! VAL_14_BITS_P (distance))
5210 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
5211 else
5213 xoperands[3] = gen_label_rtx ();
5214 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5215 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5216 CODE_LABEL_NUMBER (xoperands[3]));
5219 /* Delete the jump. */
5220 PUT_CODE (NEXT_INSN (insn), NOTE);
5221 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5222 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5223 return "";
5226 extern struct obstack permanent_obstack;
5227 extern struct obstack *saveable_obstack;
5228 extern struct obstack *rtl_obstack;
5229 extern struct obstack *current_obstack;
5231 /* INSN is either a function call. It may have an unconditional jump
5232 in its delay slot.
5234 CALL_DEST is the routine we are calling. */
5236 char *
5237 output_call (insn, call_dest)
5238 rtx insn;
5239 rtx call_dest;
5241 int distance;
5242 rtx xoperands[4];
5243 rtx seq_insn;
5245 /* Handle common case -- empty delay slot or no jump in the delay slot,
5246 and we're sure that the branch will reach the beginning of the $CODE$
5247 subspace. */
5248 if ((dbr_sequence_length () == 0
5249 && get_attr_length (insn) == 8)
5250 || (dbr_sequence_length () != 0
5251 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5252 && get_attr_length (insn) == 4))
5254 xoperands[0] = call_dest;
5255 output_asm_insn ("bl %0,%%r2%#", xoperands);
5256 return "";
5259 /* This call may not reach the beginning of the $CODE$ subspace. */
5260 if (get_attr_length (insn) > 8)
5262 int delay_insn_deleted = 0;
5263 rtx xoperands[2];
5264 rtx link;
5266 /* We need to emit an inline long-call branch. Furthermore,
5267 because we're changing a named function call into an indirect
5268 function call well after the parameters have been set up, we
5269 need to make sure any FP args appear in both the integer
5270 and FP registers. Also, we need move any delay slot insn
5271 out of the delay slot. And finally, we can't rely on the linker
5272 being able to fix the call to $$dyncall! -- Yuk!. */
5273 if (dbr_sequence_length () != 0
5274 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5276 /* A non-jump insn in the delay slot. By definition we can
5277 emit this insn before the call (and in fact before argument
5278 relocating. */
5279 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5281 /* Now delete the delay insn. */
5282 PUT_CODE (NEXT_INSN (insn), NOTE);
5283 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5284 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5285 delay_insn_deleted = 1;
5288 /* Now copy any FP arguments into integer registers. */
5289 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5291 int arg_mode, regno;
5292 rtx use = XEXP (link, 0);
5293 if (! (GET_CODE (use) == USE
5294 && GET_CODE (XEXP (use, 0)) == REG
5295 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5296 continue;
5298 arg_mode = GET_MODE (XEXP (use, 0));
5299 regno = REGNO (XEXP (use, 0));
5300 /* Is it a floating point register? */
5301 if (regno >= 32 && regno <= 39)
5303 /* Copy from the FP register into an integer register
5304 (via memory). */
5305 if (arg_mode == SFmode)
5307 xoperands[0] = XEXP (use, 0);
5308 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
5309 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5310 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5312 else
5314 xoperands[0] = XEXP (use, 0);
5315 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
5316 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5317 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5318 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5323 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5324 we don't have any direct calls in that case. */
5326 int i;
5327 char *name = XSTR (call_dest, 0);
5329 /* See if we have already put this function on the list
5330 of deferred plabels. This list is generally small,
5331 so a liner search is not too ugly. If it proves too
5332 slow replace it with something faster. */
5333 for (i = 0; i < n_deferred_plabels; i++)
5334 if (strcmp (name, deferred_plabels[i].name) == 0)
5335 break;
5337 /* If the deferred plabel list is empty, or this entry was
5338 not found on the list, create a new entry on the list. */
5339 if (deferred_plabels == NULL || i == n_deferred_plabels)
5341 struct obstack *ambient_obstack = current_obstack;
5342 struct obstack *ambient_rtl_obstack = rtl_obstack;
5343 char *real_name;
5345 /* Any RTL we create here needs to live until the end of
5346 the compilation unit and therefore must live on the
5347 permanent obstack. */
5348 current_obstack = &permanent_obstack;
5349 rtl_obstack = &permanent_obstack;
5351 if (deferred_plabels == 0)
5352 deferred_plabels = (struct deferred_plabel *)
5353 xmalloc (1 * sizeof (struct deferred_plabel));
5354 else
5355 deferred_plabels = (struct deferred_plabel *)
5356 xrealloc (deferred_plabels,
5357 ((n_deferred_plabels + 1)
5358 * sizeof (struct deferred_plabel)));
5360 i = n_deferred_plabels++;
5361 deferred_plabels[i].internal_label = gen_label_rtx ();
5362 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5363 strlen (name) + 1);
5364 strcpy (deferred_plabels[i].name, name);
5366 /* Switch back to normal obstack allocation. */
5367 current_obstack = ambient_obstack;
5368 rtl_obstack = ambient_rtl_obstack;
5370 /* Gross. We have just implicitly taken the address of this
5371 function, mark it as such. */
5372 STRIP_NAME_ENCODING (real_name, name);
5373 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5376 /* We have to load the address of the function using a procedure
5377 label (plabel). Inline plabels can lose for PIC and other
5378 cases, so avoid them by creating a 32bit plabel in the data
5379 segment. */
5380 if (flag_pic)
5382 xoperands[0] = deferred_plabels[i].internal_label;
5383 xoperands[1] = gen_label_rtx ();
5385 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5386 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5387 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5389 /* Get our address + 8 into %r1. */
5390 output_asm_insn ("bl .+8,%%r1", xoperands);
5392 /* Add %r1 to the offset of dyncall from the next insn. */
5393 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5394 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5395 CODE_LABEL_NUMBER (xoperands[1]));
5396 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5398 /* Get the return address into %r31. */
5399 output_asm_insn ("blr 0,%%r31", xoperands);
5401 /* Branch to our target which is in %r1. */
5402 output_asm_insn ("bv 0(%%r1)", xoperands);
5404 /* Copy the return address into %r2 also. */
5405 output_asm_insn ("copy %%r31,%%r2", xoperands);
5407 else
5409 xoperands[0] = deferred_plabels[i].internal_label;
5411 /* Get the address of our target into %r22. */
5412 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5413 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5415 /* Get the high part of the address of $dyncall into %r2, then
5416 add in the low part in the branch instruction. */
5417 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5418 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5420 /* Copy the return pointer into both %r31 and %r2. */
5421 output_asm_insn ("copy %%r31,%%r2", xoperands);
5425 /* If we had a jump in the call's delay slot, output it now. */
5426 if (dbr_sequence_length () != 0
5427 && !delay_insn_deleted)
5429 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5430 output_asm_insn ("b,n %0", xoperands);
5432 /* Now delete the delay insn. */
5433 PUT_CODE (NEXT_INSN (insn), NOTE);
5434 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5435 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5437 return "";
5440 /* This call has an unconditional jump in its delay slot and the
5441 call is known to reach its target or the beginning of the current
5442 subspace. */
5444 /* Use the containing sequence insn's address. */
5445 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5447 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5448 - insn_addresses[INSN_UID (seq_insn)] - 8;
5450 /* If the branch was too far away, emit a normal call followed
5451 by a nop, followed by the unconditional branch.
5453 If the branch is close, then adjust %r2 from within the
5454 call's delay slot. */
5456 xoperands[0] = call_dest;
5457 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5458 if (! VAL_14_BITS_P (distance))
5459 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5460 else
5462 xoperands[3] = gen_label_rtx ();
5463 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5464 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5465 CODE_LABEL_NUMBER (xoperands[3]));
5468 /* Delete the jump. */
5469 PUT_CODE (NEXT_INSN (insn), NOTE);
5470 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5471 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5472 return "";
5475 /* In HPUX 8.0's shared library scheme, special relocations are needed
5476 for function labels if they might be passed to a function
5477 in a shared library (because shared libraries don't live in code
5478 space), and special magic is needed to construct their address.
5480 For reasons too disgusting to describe storage for the new name
5481 is allocated either on the saveable_obstack (released at function
5482 exit) or on the permanent_obstack for things that can never change
5483 (libcall names for example). */
5485 void
5486 hppa_encode_label (sym, permanent)
5487 rtx sym;
5488 int permanent;
5490 char *str = XSTR (sym, 0);
5491 int len = strlen (str);
5492 char *newstr;
5494 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5495 len + 2);
5497 if (str[0] == '*')
5498 *newstr++ = *str++;
5499 strcpy (newstr + 1, str);
5500 *newstr = '@';
5501 XSTR (sym,0) = newstr;
5505 function_label_operand (op, mode)
5506 rtx op;
5507 enum machine_mode mode ATTRIBUTE_UNUSED;
5509 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5512 /* Returns 1 if OP is a function label involved in a simple addition
5513 with a constant. Used to keep certain patterns from matching
5514 during instruction combination. */
5516 is_function_label_plus_const (op)
5517 rtx op;
5519 /* Strip off any CONST. */
5520 if (GET_CODE (op) == CONST)
5521 op = XEXP (op, 0);
5523 return (GET_CODE (op) == PLUS
5524 && function_label_operand (XEXP (op, 0), Pmode)
5525 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5528 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5529 use in fmpyadd instructions. */
5531 fmpyaddoperands (operands)
5532 rtx *operands;
5534 enum machine_mode mode = GET_MODE (operands[0]);
5536 /* Must be a floating point mode. */
5537 if (mode != SFmode && mode != DFmode)
5538 return 0;
5540 /* All modes must be the same. */
5541 if (! (mode == GET_MODE (operands[1])
5542 && mode == GET_MODE (operands[2])
5543 && mode == GET_MODE (operands[3])
5544 && mode == GET_MODE (operands[4])
5545 && mode == GET_MODE (operands[5])))
5546 return 0;
5548 /* All operands must be registers. */
5549 if (! (GET_CODE (operands[1]) == REG
5550 && GET_CODE (operands[2]) == REG
5551 && GET_CODE (operands[3]) == REG
5552 && GET_CODE (operands[4]) == REG
5553 && GET_CODE (operands[5]) == REG))
5554 return 0;
5556 /* Only 2 real operands to the addition. One of the input operands must
5557 be the same as the output operand. */
5558 if (! rtx_equal_p (operands[3], operands[4])
5559 && ! rtx_equal_p (operands[3], operands[5]))
5560 return 0;
5562 /* Inout operand of add can not conflict with any operands from multiply. */
5563 if (rtx_equal_p (operands[3], operands[0])
5564 || rtx_equal_p (operands[3], operands[1])
5565 || rtx_equal_p (operands[3], operands[2]))
5566 return 0;
5568 /* multiply can not feed into addition operands. */
5569 if (rtx_equal_p (operands[4], operands[0])
5570 || rtx_equal_p (operands[5], operands[0]))
5571 return 0;
5573 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5574 if (mode == SFmode
5575 && (REGNO (operands[0]) < 57
5576 || REGNO (operands[1]) < 57
5577 || REGNO (operands[2]) < 57
5578 || REGNO (operands[3]) < 57
5579 || REGNO (operands[4]) < 57
5580 || REGNO (operands[5]) < 57))
5581 return 0;
5583 /* Passed. Operands are suitable for fmpyadd. */
5584 return 1;
5587 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5588 use in fmpysub instructions. */
5590 fmpysuboperands (operands)
5591 rtx *operands;
5593 enum machine_mode mode = GET_MODE (operands[0]);
5595 /* Must be a floating point mode. */
5596 if (mode != SFmode && mode != DFmode)
5597 return 0;
5599 /* All modes must be the same. */
5600 if (! (mode == GET_MODE (operands[1])
5601 && mode == GET_MODE (operands[2])
5602 && mode == GET_MODE (operands[3])
5603 && mode == GET_MODE (operands[4])
5604 && mode == GET_MODE (operands[5])))
5605 return 0;
5607 /* All operands must be registers. */
5608 if (! (GET_CODE (operands[1]) == REG
5609 && GET_CODE (operands[2]) == REG
5610 && GET_CODE (operands[3]) == REG
5611 && GET_CODE (operands[4]) == REG
5612 && GET_CODE (operands[5]) == REG))
5613 return 0;
5615 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5616 operation, so operands[4] must be the same as operand[3]. */
5617 if (! rtx_equal_p (operands[3], operands[4]))
5618 return 0;
5620 /* multiply can not feed into subtraction. */
5621 if (rtx_equal_p (operands[5], operands[0]))
5622 return 0;
5624 /* Inout operand of sub can not conflict with any operands from multiply. */
5625 if (rtx_equal_p (operands[3], operands[0])
5626 || rtx_equal_p (operands[3], operands[1])
5627 || rtx_equal_p (operands[3], operands[2]))
5628 return 0;
5630 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5631 if (mode == SFmode
5632 && (REGNO (operands[0]) < 57
5633 || REGNO (operands[1]) < 57
5634 || REGNO (operands[2]) < 57
5635 || REGNO (operands[3]) < 57
5636 || REGNO (operands[4]) < 57
5637 || REGNO (operands[5]) < 57))
5638 return 0;
5640 /* Passed. Operands are suitable for fmpysub. */
5641 return 1;
5645 plus_xor_ior_operator (op, mode)
5646 rtx op;
5647 enum machine_mode mode ATTRIBUTE_UNUSED;
5649 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5650 || GET_CODE (op) == IOR);
5653 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5654 constants for shadd instructions. */
5655 static int
5656 shadd_constant_p (val)
5657 int val;
5659 if (val == 2 || val == 4 || val == 8)
5660 return 1;
5661 else
5662 return 0;
5665 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5666 the valid constant for shadd instructions. */
5668 shadd_operand (op, mode)
5669 rtx op;
5670 enum machine_mode mode ATTRIBUTE_UNUSED;
5672 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5675 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5678 basereg_operand (op, mode)
5679 rtx op;
5680 enum machine_mode mode;
5682 /* cse will create some unscaled indexed addresses, however; it
5683 generally isn't a win on the PA, so avoid creating unscaled
5684 indexed addresses until after cse is finished. */
5685 if (!cse_not_expected)
5686 return 0;
5688 /* Once reload has started everything is considered valid. Reload should
5689 only create indexed addresses using the stack/frame pointer, and any
5690 others were checked for validity when created by the combine pass.
5692 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5693 we don't have to worry about the braindamaged implicit space register
5694 selection using the basereg only (rather than effective address)
5695 screwing us over. */
5696 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5697 return (GET_CODE (op) == REG);
5699 /* Stack is always OK for indexing. */
5700 if (op == stack_pointer_rtx)
5701 return 1;
5703 /* While it's always safe to index off the frame pointer, it's not
5704 always profitable, particularly when the frame pointer is being
5705 eliminated. */
5706 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5707 return 1;
5709 /* The only other valid OPs are pseudo registers with
5710 REGNO_POINTER_FLAG set. */
5711 if (GET_CODE (op) != REG
5712 || REGNO (op) < FIRST_PSEUDO_REGISTER
5713 || ! register_operand (op, mode))
5714 return 0;
5716 return REGNO_POINTER_FLAG (REGNO (op));
5719 /* Return 1 if this operand is anything other than a hard register. */
5722 non_hard_reg_operand (op, mode)
5723 rtx op;
5724 enum machine_mode mode ATTRIBUTE_UNUSED;
5726 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5729 /* Return 1 if INSN branches forward. Should be using insn_addresses
5730 to avoid walking through all the insns... */
5731 static int
5732 forward_branch_p (insn)
5733 rtx insn;
5735 rtx label = JUMP_LABEL (insn);
5737 while (insn)
5739 if (insn == label)
5740 break;
5741 else
5742 insn = NEXT_INSN (insn);
5745 return (insn == label);
5748 /* Return 1 if OP is an equality comparison, else return 0. */
5750 eq_neq_comparison_operator (op, mode)
5751 rtx op;
5752 enum machine_mode mode ATTRIBUTE_UNUSED;
5754 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5757 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5759 movb_comparison_operator (op, mode)
5760 rtx op;
5761 enum machine_mode mode ATTRIBUTE_UNUSED;
5763 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5764 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5767 /* Return 1 if INSN is in the delay slot of a call instruction. */
5769 jump_in_call_delay (insn)
5770 rtx insn;
5773 if (GET_CODE (insn) != JUMP_INSN)
5774 return 0;
5776 if (PREV_INSN (insn)
5777 && PREV_INSN (PREV_INSN (insn))
5778 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5780 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5782 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5783 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5786 else
5787 return 0;
5790 /* Output an unconditional move and branch insn. */
5792 char *
5793 output_parallel_movb (operands, length)
5794 rtx *operands;
5795 int length;
5797 /* These are the cases in which we win. */
5798 if (length == 4)
5799 return "mov%I1b,tr %1,%0,%2";
5801 /* None of these cases wins, but they don't lose either. */
5802 if (dbr_sequence_length () == 0)
5804 /* Nothing in the delay slot, fake it by putting the combined
5805 insn (the copy or add) in the delay slot of a bl. */
5806 if (GET_CODE (operands[1]) == CONST_INT)
5807 return "bl %2,0\n\tldi %1,%0";
5808 else
5809 return "bl %2,0\n\tcopy %1,%0";
5811 else
5813 /* Something in the delay slot, but we've got a long branch. */
5814 if (GET_CODE (operands[1]) == CONST_INT)
5815 return "ldi %1,%0\n\tbl %2,0";
5816 else
5817 return "copy %1,%0\n\tbl %2,0";
5821 /* Output an unconditional add and branch insn. */
5823 char *
5824 output_parallel_addb (operands, length)
5825 rtx *operands;
5826 int length;
5828 /* To make life easy we want operand0 to be the shared input/output
5829 operand and operand1 to be the readonly operand. */
5830 if (operands[0] == operands[1])
5831 operands[1] = operands[2];
5833 /* These are the cases in which we win. */
5834 if (length == 4)
5835 return "add%I1b,tr %1,%0,%3";
5837 /* None of these cases win, but they don't lose either. */
5838 if (dbr_sequence_length () == 0)
5840 /* Nothing in the delay slot, fake it by putting the combined
5841 insn (the copy or add) in the delay slot of a bl. */
5842 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5844 else
5846 /* Something in the delay slot, but we've got a long branch. */
5847 return "add%I1 %1,%0,%0\n\tbl %3,0";
5851 /* Return nonzero if INSN (a jump insn) immediately follows a call to
5852 a named function. This is used to discourage creating parallel movb/addb
5853 insns since a jump which immediately follows a call can execute in the
5854 delay slot of the call.
5856 It is also used to avoid filling the delay slot of a jump which
5857 immediately follows a call since the jump can usually be eliminated
5858 completely by modifying RP in the delay slot of the call. */
5861 following_call (insn)
5862 rtx insn;
5864 /* Find the previous real insn, skipping NOTEs. */
5865 insn = PREV_INSN (insn);
5866 while (insn && GET_CODE (insn) == NOTE)
5867 insn = PREV_INSN (insn);
5869 /* Check for CALL_INSNs and millicode calls. */
5870 if (insn
5871 && ((GET_CODE (insn) == CALL_INSN
5872 && get_attr_type (insn) != TYPE_DYNCALL)
5873 || (GET_CODE (insn) == INSN
5874 && GET_CODE (PATTERN (insn)) != SEQUENCE
5875 && GET_CODE (PATTERN (insn)) != USE
5876 && GET_CODE (PATTERN (insn)) != CLOBBER
5877 && get_attr_type (insn) == TYPE_MILLI)))
5878 return 1;
5880 return 0;
5883 /* Restore any INSN_CODEs for insns with unscaled indexed addresses since
5884 the INSN_CODE might be clobberd by rerecognition triggered by reorg. */
5886 static void
5887 restore_unscaled_index_insn_codes (insns)
5888 rtx insns;
5890 rtx insn;
5892 for (insn = insns; insn; insn = NEXT_INSN (insn))
5894 if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid
5895 && unscaled_index_insn_codes[INSN_UID (insn)] != -1)
5896 INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)];
5900 /* Severe braindamage:
5902 On the PA, address computations within MEM expressions are not
5903 commutative because of the implicit space register selection
5904 from the base register (instead of the entire effective address).
5906 Because of this mis-feature we have to know which register in a reg+reg
5907 address is the base and which is the index.
5909 Before reload, the base can be identified by REGNO_POINTER_FLAG. We use
5910 this to force base + index addresses to match a different insn than
5911 index + base addresses.
5913 We assume that no pass during or after reload creates new unscaled indexed
5914 addresses, so any unscaled indexed address we find after reload must have
5915 at one time been recognized a base + index or index + base and we accept
5916 any register as a base register.
5918 This scheme assumes that no pass during/after reload will rerecognize an
5919 insn with an unscaled indexed address. This failed due to a reorg call
5920 to rerecognize certain insns.
5922 So, we record if an insn uses an unscaled indexed address and which
5923 register is the base (via recording of the INSN_CODE for such insns).
5925 Just before we output code for the function, we make sure all the insns
5926 using unscaled indexed addresses have the same INSN_CODE as they did
5927 immediately before delay slot scheduling.
5929 This is extremely gross. Long term, I'd like to be able to look at
5930 REG_POINTER_FLAG to handle these kinds of problems. */
5932 static void
5933 record_unscaled_index_insn_codes (insns)
5934 rtx insns;
5936 rtx insn;
5938 max_unscaled_index_insn_codes_uid = get_max_uid ();
5939 unscaled_index_insn_codes
5940 = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int));
5941 memset (unscaled_index_insn_codes, -1,
5942 max_unscaled_index_insn_codes_uid * sizeof (int));
5944 for (insn = insns; insn; insn = NEXT_INSN (insn))
5946 rtx set = single_set (insn);
5947 rtx mem = NULL_RTX;
5949 /* Ignore anything that isn't a normal SET. */
5950 if (set == NULL_RTX)
5951 continue;
5953 /* No insns can have more than one MEM. */
5954 if (GET_CODE (SET_SRC (set)) == MEM)
5955 mem = SET_SRC (set);
5957 if (GET_CODE (SET_DEST (set)) == MEM)
5958 mem = SET_DEST (set);
5960 /* If neither operand is a mem, then there's nothing to do. */
5961 if (mem == NULL_RTX)
5962 continue;
5964 if (GET_CODE (XEXP (mem, 0)) != PLUS)
5965 continue;
5967 /* If both are REGs (or SUBREGs), then record the insn code for
5968 this insn. */
5969 if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1)))
5970 unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn);
5974 /* We use this hook to perform a PA specific optimization which is difficult
5975 to do in earlier passes.
5977 We want the delay slots of branches within jump tables to be filled.
5978 None of the compiler passes at the moment even has the notion that a
5979 PA jump table doesn't contain addresses, but instead contains actual
5980 instructions!
5982 Because we actually jump into the table, the addresses of each entry
5983 must stay constant in relation to the beginning of the table (which
5984 itself must stay constant relative to the instruction to jump into
5985 it). I don't believe we can guarantee earlier passes of the compiler
5986 will adhere to those rules.
5988 So, late in the compilation process we find all the jump tables, and
5989 expand them into real code -- eg each entry in the jump table vector
5990 will get an appropriate label followed by a jump to the final target.
5992 Reorg and the final jump pass can then optimize these branches and
5993 fill their delay slots. We end up with smaller, more efficient code.
5995 The jump instructions within the table are special; we must be able
5996 to identify them during assembly output (if the jumps don't get filled
5997 we need to emit a nop rather than nullifying the delay slot)). We
5998 identify jumps in switch tables by marking the SET with DImode.
6000 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6001 insns. This serves two purposes, first it prevents jump.c from
6002 noticing that the last N entries in the table jump to the instruction
6003 immediately after the table and deleting the jumps. Second, those
6004 insns mark where we should emit .begin_brtab and .end_brtab directives
6005 when using GAS (allows for better link time optimizations). */
6007 void
6008 pa_reorg (insns)
6009 rtx insns;
6011 rtx insn;
6013 /* Keep track of which insns have unscaled indexed addresses, and which
6014 register is the base address in such insns. */
6015 record_unscaled_index_insn_codes (insns);
6017 remove_useless_addtr_insns (insns, 1);
6019 pa_combine_instructions (get_insns ());
6021 /* This is fairly cheap, so always run it if optimizing. */
6022 if (optimize > 0 && !TARGET_BIG_SWITCH)
6024 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6025 insns = get_insns ();
6026 for (insn = insns; insn; insn = NEXT_INSN (insn))
6028 rtx pattern, tmp, location;
6029 unsigned int length, i;
6031 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6032 if (GET_CODE (insn) != JUMP_INSN
6033 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6034 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6035 continue;
6037 /* Emit marker for the beginning of the branch table. */
6038 emit_insn_before (gen_begin_brtab (), insn);
6040 pattern = PATTERN (insn);
6041 location = PREV_INSN (insn);
6042 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6044 for (i = 0; i < length; i++)
6046 /* Emit a label before each jump to keep jump.c from
6047 removing this code. */
6048 tmp = gen_label_rtx ();
6049 LABEL_NUSES (tmp) = 1;
6050 emit_label_after (tmp, location);
6051 location = NEXT_INSN (location);
6053 if (GET_CODE (pattern) == ADDR_VEC)
6055 /* Emit the jump itself. */
6056 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6057 tmp = emit_jump_insn_after (tmp, location);
6058 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6059 LABEL_NUSES (JUMP_LABEL (tmp))++;
6060 location = NEXT_INSN (location);
6062 else
6064 /* Emit the jump itself. */
6065 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6066 tmp = emit_jump_insn_after (tmp, location);
6067 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6068 LABEL_NUSES (JUMP_LABEL (tmp))++;
6069 location = NEXT_INSN (location);
6072 /* Emit a BARRIER after the jump. */
6073 emit_barrier_after (location);
6074 location = NEXT_INSN (location);
6077 /* Emit marker for the end of the branch table. */
6078 emit_insn_before (gen_end_brtab (), location);
6079 location = NEXT_INSN (location);
6080 emit_barrier_after (location);
6082 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6083 delete_insn (insn);
6086 else
6088 /* Sill need an end_brtab insn. */
6089 insns = get_insns ();
6090 for (insn = insns; insn; insn = NEXT_INSN (insn))
6092 /* Find an ADDR_VEC insn. */
6093 if (GET_CODE (insn) != JUMP_INSN
6094 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6095 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6096 continue;
6098 /* Now generate markers for the beginning and end of the
6099 branch table. */
6100 emit_insn_before (gen_begin_brtab (), insn);
6101 emit_insn_after (gen_end_brtab (), insn);
6106 /* The PA has a number of odd instructions which can perform multiple
6107 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6108 it may be profitable to combine two instructions into one instruction
6109 with two outputs. It's not profitable PA2.0 machines because the
6110 two outputs would take two slots in the reorder buffers.
6112 This routine finds instructions which can be combined and combines
6113 them. We only support some of the potential combinations, and we
6114 only try common ways to find suitable instructions.
6116 * addb can add two registers or a register and a small integer
6117 and jump to a nearby (+-8k) location. Normally the jump to the
6118 nearby location is conditional on the result of the add, but by
6119 using the "true" condition we can make the jump unconditional.
6120 Thus addb can perform two independent operations in one insn.
6122 * movb is similar to addb in that it can perform a reg->reg
6123 or small immediate->reg copy and jump to a nearby (+-8k location).
6125 * fmpyadd and fmpysub can perform a FP multiply and either an
6126 FP add or FP sub if the operands of the multiply and add/sub are
6127 independent (there are other minor restrictions). Note both
6128 the fmpy and fadd/fsub can in theory move to better spots according
6129 to data dependencies, but for now we require the fmpy stay at a
6130 fixed location.
6132 * Many of the memory operations can perform pre & post updates
6133 of index registers. GCC's pre/post increment/decrement addressing
6134 is far too simple to take advantage of all the possibilities. This
6135 pass may not be suitable since those insns may not be independent.
6137 * comclr can compare two ints or an int and a register, nullify
6138 the following instruction and zero some other register. This
6139 is more difficult to use as it's harder to find an insn which
6140 will generate a comclr than finding something like an unconditional
6141 branch. (conditional moves & long branches create comclr insns).
6143 * Most arithmetic operations can conditionally skip the next
6144 instruction. They can be viewed as "perform this operation
6145 and conditionally jump to this nearby location" (where nearby
6146 is an insns away). These are difficult to use due to the
6147 branch length restrictions. */
6149 static void
6150 pa_combine_instructions (insns)
6151 rtx insns ATTRIBUTE_UNUSED;
6153 rtx anchor, new;
6155 /* This can get expensive since the basic algorithm is on the
6156 order of O(n^2) (or worse). Only do it for -O2 or higher
6157 levels of optimization. */
6158 if (optimize < 2)
6159 return;
6161 /* Walk down the list of insns looking for "anchor" insns which
6162 may be combined with "floating" insns. As the name implies,
6163 "anchor" instructions don't move, while "floating" insns may
6164 move around. */
6165 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
6166 new = make_insn_raw (new);
6168 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
6170 enum attr_pa_combine_type anchor_attr;
6171 enum attr_pa_combine_type floater_attr;
6173 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
6174 Also ignore any special USE insns. */
6175 if ((GET_CODE (anchor) != INSN
6176 && GET_CODE (anchor) != JUMP_INSN
6177 && GET_CODE (anchor) != CALL_INSN)
6178 || GET_CODE (PATTERN (anchor)) == USE
6179 || GET_CODE (PATTERN (anchor)) == CLOBBER
6180 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
6181 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
6182 continue;
6184 anchor_attr = get_attr_pa_combine_type (anchor);
6185 /* See if anchor is an insn suitable for combination. */
6186 if (anchor_attr == PA_COMBINE_TYPE_FMPY
6187 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
6188 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6189 && ! forward_branch_p (anchor)))
6191 rtx floater;
6193 for (floater = PREV_INSN (anchor);
6194 floater;
6195 floater = PREV_INSN (floater))
6197 if (GET_CODE (floater) == NOTE
6198 || (GET_CODE (floater) == INSN
6199 && (GET_CODE (PATTERN (floater)) == USE
6200 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6201 continue;
6203 /* Anything except a regular INSN will stop our search. */
6204 if (GET_CODE (floater) != INSN
6205 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6206 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6208 floater = NULL_RTX;
6209 break;
6212 /* See if FLOATER is suitable for combination with the
6213 anchor. */
6214 floater_attr = get_attr_pa_combine_type (floater);
6215 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6216 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6217 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6218 && floater_attr == PA_COMBINE_TYPE_FMPY))
6220 /* If ANCHOR and FLOATER can be combined, then we're
6221 done with this pass. */
6222 if (pa_can_combine_p (new, anchor, floater, 0,
6223 SET_DEST (PATTERN (floater)),
6224 XEXP (SET_SRC (PATTERN (floater)), 0),
6225 XEXP (SET_SRC (PATTERN (floater)), 1)))
6226 break;
6229 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6230 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6232 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6234 if (pa_can_combine_p (new, anchor, floater, 0,
6235 SET_DEST (PATTERN (floater)),
6236 XEXP (SET_SRC (PATTERN (floater)), 0),
6237 XEXP (SET_SRC (PATTERN (floater)), 1)))
6238 break;
6240 else
6242 if (pa_can_combine_p (new, anchor, floater, 0,
6243 SET_DEST (PATTERN (floater)),
6244 SET_SRC (PATTERN (floater)),
6245 SET_SRC (PATTERN (floater))))
6246 break;
6251 /* If we didn't find anything on the backwards scan try forwards. */
6252 if (!floater
6253 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6254 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6256 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6258 if (GET_CODE (floater) == NOTE
6259 || (GET_CODE (floater) == INSN
6260 && (GET_CODE (PATTERN (floater)) == USE
6261 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6263 continue;
6265 /* Anything except a regular INSN will stop our search. */
6266 if (GET_CODE (floater) != INSN
6267 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6268 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6270 floater = NULL_RTX;
6271 break;
6274 /* See if FLOATER is suitable for combination with the
6275 anchor. */
6276 floater_attr = get_attr_pa_combine_type (floater);
6277 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6278 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6279 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6280 && floater_attr == PA_COMBINE_TYPE_FMPY))
6282 /* If ANCHOR and FLOATER can be combined, then we're
6283 done with this pass. */
6284 if (pa_can_combine_p (new, anchor, floater, 1,
6285 SET_DEST (PATTERN (floater)),
6286 XEXP (SET_SRC (PATTERN(floater)),0),
6287 XEXP(SET_SRC(PATTERN(floater)),1)))
6288 break;
6293 /* FLOATER will be nonzero if we found a suitable floating
6294 insn for combination with ANCHOR. */
6295 if (floater
6296 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6297 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6299 /* Emit the new instruction and delete the old anchor. */
6300 emit_insn_before (gen_rtx_PARALLEL (VOIDmode,
6301 gen_rtvec (2,
6302 PATTERN (anchor),
6303 PATTERN (floater))),
6304 anchor);
6305 PUT_CODE (anchor, NOTE);
6306 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6307 NOTE_SOURCE_FILE (anchor) = 0;
6309 /* Emit a special USE insn for FLOATER, then delete
6310 the floating insn. */
6311 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6312 delete_insn (floater);
6314 continue;
6316 else if (floater
6317 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6319 rtx temp;
6320 /* Emit the new_jump instruction and delete the old anchor. */
6321 temp = emit_jump_insn_before (gen_rtx_PARALLEL (VOIDmode,
6322 gen_rtvec (2, PATTERN (anchor),
6323 PATTERN (floater))),
6324 anchor);
6325 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6326 PUT_CODE (anchor, NOTE);
6327 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6328 NOTE_SOURCE_FILE (anchor) = 0;
6330 /* Emit a special USE insn for FLOATER, then delete
6331 the floating insn. */
6332 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6333 delete_insn (floater);
6334 continue;
6341 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6342 rtx new, anchor, floater;
6343 int reversed;
6344 rtx dest, src1, src2;
6346 int insn_code_number;
6347 rtx start, end;
6349 /* Create a PARALLEL with the patterns of ANCHOR and
6350 FLOATER, try to recognize it, then test constraints
6351 for the resulting pattern.
6353 If the pattern doesn't match or the constraints
6354 aren't met keep searching for a suitable floater
6355 insn. */
6356 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6357 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6358 INSN_CODE (new) = -1;
6359 insn_code_number = recog_memoized (new);
6360 if (insn_code_number < 0
6361 || !constrain_operands (insn_code_number, 1))
6362 return 0;
6364 if (reversed)
6366 start = anchor;
6367 end = floater;
6369 else
6371 start = floater;
6372 end = anchor;
6375 /* There's up to three operands to consider. One
6376 output and two inputs.
6378 The output must not be used between FLOATER & ANCHOR
6379 exclusive. The inputs must not be set between
6380 FLOATER and ANCHOR exclusive. */
6382 if (reg_used_between_p (dest, start, end))
6383 return 0;
6385 if (reg_set_between_p (src1, start, end))
6386 return 0;
6388 if (reg_set_between_p (src2, start, end))
6389 return 0;
6391 /* If we get here, then everything is good. */
6392 return 1;