* pa.md (alternate dbra pattern): Remove incorrect pattern.
[official-gcc.git] / gcc / config / pa / pa.c
blob0eae0b1c1b666e50c5fae02598706343ca392800
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
68 unsigned int total_code_bytes;
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
73 struct deferred_plabel
75 rtx internal_label;
76 char *name;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
80 void
81 override_options ()
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
92 else if (! strcmp (pa_cpu_string, "700"))
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
97 else if (! strcmp (pa_cpu_string, "7100LC"))
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
102 else
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
109 warning ("PIC code generation is not supported in the portable runtime model\n");
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
117 if (flag_pic && profile_flag)
119 warning ("PIC code generation is not compatable with profiling\n");
122 if (TARGET_SPACE && (flag_pic || profile_flag))
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
164 symbolic_expression_p (x)
165 register rtx x;
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
172 return (symbolic_operand (x, VOIDmode));
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
180 switch (GET_CODE (op))
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
220 if (register_operand (op, mode))
221 return 1;
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
226 return 0;
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
237 if (register_operand (op, mode))
238 return 1;
240 if (op == CONST0_RTX (mode))
241 return 1;
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
246 return 0;
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
267 if (register_operand (op, mode))
268 return 1;
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
298 return memory_address_p (mode, op);
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
308 if (register_operand (op, mode))
309 return 1;
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
314 return 0;
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
322 if (!flag_pic)
323 return 0;
325 switch (GET_CODE (op))
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
343 return reg_renumber && FP_REG_P (op);
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
468 return register_operand (op, mode) || int5_operand (op, mode);
471 /* True iff zdepi can be used to generate this CONST_INT. */
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
476 unsigned HOST_WIDE_INT lsb_mask, t;
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
500 /* True iff depi or extru can be used to compute (reg & OP). */
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
510 /* True iff depi can be used to compute (reg | MASK). */
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
519 /* True iff depi can be used to compute (reg | OP). */
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
577 rtx pic_ref = orig;
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
586 if (GET_CODE (orig) == SYMBOL_REF)
588 if (reg == 0)
589 abort ();
591 if (flag_pic == 2)
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
606 else if (GET_CODE (orig) == CONST)
608 rtx base;
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
614 if (reg == 0)
615 abort ();
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
633 return pic_ref;
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
649 For the PA, transform:
651 memory(X + <large int>)
653 into:
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
662 This is for CSE to find several similar references, and only use one Z.
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
692 rtx orig = x;
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
752 else
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
764 return plus_constant (ptr_reg, offset - newoffset);
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
822 /* Figure out what the base and index are. */
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
843 if (base == 0)
844 return orig;
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
931 rtx regx1, regx2, regy1, regy2, y;
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1004 else
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1017 return orig;
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1023 PIC addresses are very expensive.
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1028 hppa_address_cost (X)
1029 rtx X;
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1040 /* Emit insns to move operands[1] into operands[0].
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1140 use scratch_reg to hold the address of the memory location.
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1150 rtx xoperands[2];
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1173 /* D might not fit in 14 bits either; for such cases load D into
1174 scratch reg. */
1175 if (GET_CODE (operand1) == MEM
1176 && !memory_address_p (SImode, XEXP (operand1, 0)))
1178 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1179 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1180 SImode,
1181 XEXP (XEXP (operand1, 0), 0),
1182 scratch_reg));
1183 emit_move_insn (scratch_reg, gen_rtx (MEM, GET_MODE (operand1),
1184 scratch_reg));
1186 else
1187 emit_move_insn (scratch_reg, operand1);
1188 emit_move_insn (operand0, scratch_reg);
1189 return 1;
1191 /* Handle most common case: storing into a register. */
1192 else if (register_operand (operand0, mode))
1194 if (register_operand (operand1, mode)
1195 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1196 || (operand1 == CONST0_RTX (mode))
1197 || (GET_CODE (operand1) == HIGH
1198 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1199 /* Only `general_operands' can come here, so MEM is ok. */
1200 || GET_CODE (operand1) == MEM)
1202 /* Run this case quickly. */
1203 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1204 return 1;
1207 else if (GET_CODE (operand0) == MEM)
1209 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1210 && !(reload_in_progress || reload_completed))
1212 rtx temp = gen_reg_rtx (DFmode);
1214 emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
1215 emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
1216 return 1;
1218 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1220 /* Run this case quickly. */
1221 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1222 return 1;
1224 if (! (reload_in_progress || reload_completed))
1226 operands[0] = validize_mem (operand0);
1227 operands[1] = operand1 = force_reg (mode, operand1);
1231 /* Simplify the source if we need to. */
1232 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1233 || (GET_CODE (operand1) == HIGH
1234 && symbolic_operand (XEXP (operand1, 0), mode)))
1236 int ishighonly = 0;
1238 if (GET_CODE (operand1) == HIGH)
1240 ishighonly = 1;
1241 operand1 = XEXP (operand1, 0);
1243 if (symbolic_operand (operand1, mode))
1245 rtx const_part = NULL;
1247 /* Argh. The assembler and linker can't handle arithmetic
1248 involving plabels. We'll have to split up operand1 here
1249 if it's a function label involved in an arithmetic
1250 expression. Luckily, this only happens with addition
1251 of constants to plabels, which simplifies the test.
1253 We add the constant back in just before returning to
1254 our caller. */
1255 if (GET_CODE (operand1) == CONST
1256 && GET_CODE (XEXP (operand1, 0)) == PLUS
1257 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1259 /* Save away the constant part of the expression. */
1260 const_part = XEXP (XEXP (operand1, 0), 1);
1261 if (GET_CODE (const_part) != CONST_INT)
1262 abort ();
1264 /* Set operand1 to just the SYMBOL_REF. */
1265 operand1 = XEXP (XEXP (operand1, 0), 0);
1268 if (flag_pic)
1270 rtx temp;
1272 if (reload_in_progress || reload_completed)
1273 temp = scratch_reg ? scratch_reg : operand0;
1274 else
1275 temp = gen_reg_rtx (Pmode);
1277 /* If operand1 is a function label, then we've got to
1278 force it to memory, then load op0 from memory. */
1279 if (function_label_operand (operand1, mode))
1281 operands[1] = force_const_mem (mode, operand1);
1282 emit_move_sequence (operands, mode, temp);
1284 /* Likewise for (const (plus (symbol) (const_int))) when
1285 generating pic code during or after reload and const_int
1286 will not fit in 14 bits. */
1287 else if (GET_CODE (operand1) == CONST
1288 && GET_CODE (XEXP (operand1, 0)) == PLUS
1289 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1290 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1291 && (reload_completed || reload_in_progress)
1292 && flag_pic)
1294 operands[1] = force_const_mem (mode, operand1);
1295 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1296 mode, temp);
1297 emit_move_sequence (operands, mode, temp);
1299 else
1301 operands[1] = legitimize_pic_address (operand1, mode, temp);
1302 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1305 /* On the HPPA, references to data space are supposed to use dp,
1306 register 27, but showing it in the RTL inhibits various cse
1307 and loop optimizations. */
1308 else
1310 rtx temp, set;
1312 if (reload_in_progress || reload_completed)
1313 temp = scratch_reg ? scratch_reg : operand0;
1314 else
1315 temp = gen_reg_rtx (mode);
1317 /* Loading a SYMBOL_REF into a register makes that register
1318 safe to be used as the base in an indexed address.
1320 Don't mark hard registers though. That loses. */
1321 if (GET_CODE (operand0) == REG
1322 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1323 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1324 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1325 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1326 if (ishighonly)
1327 set = gen_rtx (SET, mode, operand0, temp);
1328 else
1329 set = gen_rtx (SET, VOIDmode,
1330 operand0,
1331 gen_rtx (LO_SUM, mode, temp, operand1));
1333 emit_insn (gen_rtx (SET, VOIDmode,
1334 temp,
1335 gen_rtx (HIGH, mode, operand1)));
1336 emit_insn (set);
1340 /* Add back in the constant part if needed. */
1341 if (const_part != NULL)
1342 expand_inc (operand0, const_part);
1343 return 1;
1345 else if (GET_CODE (operand1) != CONST_INT
1346 || ! cint_ok_for_move (INTVAL (operand1)))
1348 rtx temp;
1350 if (reload_in_progress || reload_completed)
1351 temp = operand0;
1352 else
1353 temp = gen_reg_rtx (mode);
1355 emit_insn (gen_rtx (SET, VOIDmode, temp,
1356 gen_rtx (HIGH, mode, operand1)));
1357 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1360 /* Now have insn-emit do whatever it normally does. */
1361 return 0;
1364 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1365 it will need a link/runtime reloc). */
1368 reloc_needed (exp)
1369 tree exp;
1371 int reloc = 0;
1373 switch (TREE_CODE (exp))
1375 case ADDR_EXPR:
1376 return 1;
1378 case PLUS_EXPR:
1379 case MINUS_EXPR:
1380 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1381 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1382 break;
1384 case NOP_EXPR:
1385 case CONVERT_EXPR:
1386 case NON_LVALUE_EXPR:
1387 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1388 break;
1390 case CONSTRUCTOR:
1392 register tree link;
1393 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1394 if (TREE_VALUE (link) != 0)
1395 reloc |= reloc_needed (TREE_VALUE (link));
1397 break;
1399 case ERROR_MARK:
1400 break;
1402 return reloc;
1405 /* Does operand (which is a symbolic_operand) live in text space? If
1406 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1409 read_only_operand (operand)
1410 rtx operand;
1412 if (GET_CODE (operand) == CONST)
1413 operand = XEXP (XEXP (operand, 0), 0);
1414 if (flag_pic)
1416 if (GET_CODE (operand) == SYMBOL_REF)
1417 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1419 else
1421 if (GET_CODE (operand) == SYMBOL_REF)
1422 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1424 return 1;
1428 /* Return the best assembler insn template
1429 for moving operands[1] into operands[0] as a fullword. */
1430 char *
1431 singlemove_string (operands)
1432 rtx *operands;
1434 HOST_WIDE_INT intval;
1436 if (GET_CODE (operands[0]) == MEM)
1437 return "stw %r1,%0";
1438 if (GET_CODE (operands[1]) == MEM)
1439 return "ldw %1,%0";
1440 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1442 long i;
1443 REAL_VALUE_TYPE d;
1445 if (GET_MODE (operands[1]) != SFmode)
1446 abort ();
1448 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1449 bit pattern. */
1450 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1451 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1453 operands[1] = GEN_INT (i);
1454 /* Fall through to CONST_INT case. */
1456 if (GET_CODE (operands[1]) == CONST_INT)
1458 intval = INTVAL (operands[1]);
1460 if (VAL_14_BITS_P (intval))
1461 return "ldi %1,%0";
1462 else if ((intval & 0x7ff) == 0)
1463 return "ldil L'%1,%0";
1464 else if (zdepi_cint_p (intval))
1465 return "zdepi %Z1,%0";
1466 else
1467 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1469 return "copy %1,%0";
1473 /* Compute position (in OP[1]) and width (in OP[2])
1474 useful for copying IMM to a register using the zdepi
1475 instructions. Store the immediate value to insert in OP[0]. */
1476 void
1477 compute_zdepi_operands (imm, op)
1478 unsigned HOST_WIDE_INT imm;
1479 unsigned *op;
1481 int lsb, len;
1483 /* Find the least significant set bit in IMM. */
1484 for (lsb = 0; lsb < 32; lsb++)
1486 if ((imm & 1) != 0)
1487 break;
1488 imm >>= 1;
1491 /* Choose variants based on *sign* of the 5-bit field. */
1492 if ((imm & 0x10) == 0)
1493 len = (lsb <= 28) ? 4 : 32 - lsb;
1494 else
1496 /* Find the width of the bitstring in IMM. */
1497 for (len = 5; len < 32; len++)
1499 if ((imm & (1 << len)) == 0)
1500 break;
1503 /* Sign extend IMM as a 5-bit value. */
1504 imm = (imm & 0xf) - 0x10;
1507 op[0] = imm;
1508 op[1] = 31 - lsb;
1509 op[2] = len;
1512 /* Output assembler code to perform a doubleword move insn
1513 with operands OPERANDS. */
1515 char *
1516 output_move_double (operands)
1517 rtx *operands;
1519 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1520 rtx latehalf[2];
1521 rtx addreg0 = 0, addreg1 = 0;
1523 /* First classify both operands. */
1525 if (REG_P (operands[0]))
1526 optype0 = REGOP;
1527 else if (offsettable_memref_p (operands[0]))
1528 optype0 = OFFSOP;
1529 else if (GET_CODE (operands[0]) == MEM)
1530 optype0 = MEMOP;
1531 else
1532 optype0 = RNDOP;
1534 if (REG_P (operands[1]))
1535 optype1 = REGOP;
1536 else if (CONSTANT_P (operands[1]))
1537 optype1 = CNSTOP;
1538 else if (offsettable_memref_p (operands[1]))
1539 optype1 = OFFSOP;
1540 else if (GET_CODE (operands[1]) == MEM)
1541 optype1 = MEMOP;
1542 else
1543 optype1 = RNDOP;
1545 /* Check for the cases that the operand constraints are not
1546 supposed to allow to happen. Abort if we get one,
1547 because generating code for these cases is painful. */
1549 if (optype0 != REGOP && optype1 != REGOP)
1550 abort ();
1552 /* Handle auto decrementing and incrementing loads and stores
1553 specifically, since the structure of the function doesn't work
1554 for them without major modification. Do it better when we learn
1555 this port about the general inc/dec addressing of PA.
1556 (This was written by tege. Chide him if it doesn't work.) */
1558 if (optype0 == MEMOP)
1560 /* We have to output the address syntax ourselves, since print_operand
1561 doesn't deal with the addresses we want to use. Fix this later. */
1563 rtx addr = XEXP (operands[0], 0);
1564 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1566 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1568 operands[0] = XEXP (addr, 0);
1569 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1570 abort ();
1572 if (!reg_overlap_mentioned_p (high_reg, addr))
1574 /* No overlap between high target register and address
1575 register. (We do this in a non-obvious way to
1576 save a register file writeback) */
1577 if (GET_CODE (addr) == POST_INC)
1578 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1579 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1581 else
1582 abort();
1584 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1586 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1588 operands[0] = XEXP (addr, 0);
1589 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1590 abort ();
1592 if (!reg_overlap_mentioned_p (high_reg, addr))
1594 /* No overlap between high target register and address
1595 register. (We do this in a non-obvious way to
1596 save a register file writeback) */
1597 if (GET_CODE (addr) == PRE_INC)
1598 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1599 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1601 else
1602 abort();
1605 if (optype1 == MEMOP)
1607 /* We have to output the address syntax ourselves, since print_operand
1608 doesn't deal with the addresses we want to use. Fix this later. */
1610 rtx addr = XEXP (operands[1], 0);
1611 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1613 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1615 operands[1] = XEXP (addr, 0);
1616 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1617 abort ();
1619 if (!reg_overlap_mentioned_p (high_reg, addr))
1621 /* No overlap between high target register and address
1622 register. (We do this in a non-obvious way to
1623 save a register file writeback) */
1624 if (GET_CODE (addr) == POST_INC)
1625 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1626 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1628 else
1630 /* This is an undefined situation. We should load into the
1631 address register *and* update that register. Probably
1632 we don't need to handle this at all. */
1633 if (GET_CODE (addr) == POST_INC)
1634 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1635 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1638 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1640 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1642 operands[1] = XEXP (addr, 0);
1643 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1644 abort ();
1646 if (!reg_overlap_mentioned_p (high_reg, addr))
1648 /* No overlap between high target register and address
1649 register. (We do this in a non-obvious way to
1650 save a register file writeback) */
1651 if (GET_CODE (addr) == PRE_INC)
1652 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1653 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1655 else
1657 /* This is an undefined situation. We should load into the
1658 address register *and* update that register. Probably
1659 we don't need to handle this at all. */
1660 if (GET_CODE (addr) == PRE_INC)
1661 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1662 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1665 else if (GET_CODE (addr) == PLUS
1666 && GET_CODE (XEXP (addr, 0)) == MULT)
1668 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1670 if (!reg_overlap_mentioned_p (high_reg, addr))
1672 rtx xoperands[3];
1674 xoperands[0] = high_reg;
1675 xoperands[1] = XEXP (addr, 1);
1676 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1677 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1678 output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
1679 return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
1681 else
1683 rtx xoperands[3];
1685 xoperands[0] = high_reg;
1686 xoperands[1] = XEXP (addr, 1);
1687 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1688 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1689 output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
1690 return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
1696 /* If an operand is an unoffsettable memory ref, find a register
1697 we can increment temporarily to make it refer to the second word. */
1699 if (optype0 == MEMOP)
1700 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1702 if (optype1 == MEMOP)
1703 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1705 /* Ok, we can do one word at a time.
1706 Normally we do the low-numbered word first.
1708 In either case, set up in LATEHALF the operands to use
1709 for the high-numbered word and in some cases alter the
1710 operands in OPERANDS to be suitable for the low-numbered word. */
1712 if (optype0 == REGOP)
1713 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1714 else if (optype0 == OFFSOP)
1715 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1716 else
1717 latehalf[0] = operands[0];
1719 if (optype1 == REGOP)
1720 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1721 else if (optype1 == OFFSOP)
1722 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1723 else if (optype1 == CNSTOP)
1724 split_double (operands[1], &operands[1], &latehalf[1]);
1725 else
1726 latehalf[1] = operands[1];
1728 /* If the first move would clobber the source of the second one,
1729 do them in the other order.
1731 This can happen in two cases:
1733 mem -> register where the first half of the destination register
1734 is the same register used in the memory's address. Reload
1735 can create such insns.
1737 mem in this case will be either register indirect or register
1738 indirect plus a valid offset.
1740 register -> register move where REGNO(dst) == REGNO(src + 1)
1741 someone (Tim/Tege?) claimed this can happen for parameter loads.
1743 Handle mem -> register case first. */
1744 if (optype0 == REGOP
1745 && (optype1 == MEMOP || optype1 == OFFSOP)
1746 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1747 operands[1], 0))
1749 /* Do the late half first. */
1750 if (addreg1)
1751 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1752 output_asm_insn (singlemove_string (latehalf), latehalf);
1754 /* Then clobber. */
1755 if (addreg1)
1756 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1757 return singlemove_string (operands);
1760 /* Now handle register -> register case. */
1761 if (optype0 == REGOP && optype1 == REGOP
1762 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1764 output_asm_insn (singlemove_string (latehalf), latehalf);
1765 return singlemove_string (operands);
1768 /* Normal case: do the two words, low-numbered first. */
1770 output_asm_insn (singlemove_string (operands), operands);
1772 /* Make any unoffsettable addresses point at high-numbered word. */
1773 if (addreg0)
1774 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1775 if (addreg1)
1776 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1778 /* Do that word. */
1779 output_asm_insn (singlemove_string (latehalf), latehalf);
1781 /* Undo the adds we just did. */
1782 if (addreg0)
1783 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1784 if (addreg1)
1785 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1787 return "";
1790 char *
1791 output_fp_move_double (operands)
1792 rtx *operands;
1794 if (FP_REG_P (operands[0]))
1796 if (FP_REG_P (operands[1])
1797 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1798 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1799 else
1800 output_asm_insn ("fldd%F1 %1,%0", operands);
1802 else if (FP_REG_P (operands[1]))
1804 output_asm_insn ("fstd%F0 %1,%0", operands);
1806 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1808 if (GET_CODE (operands[0]) == REG)
1810 rtx xoperands[2];
1811 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1812 xoperands[0] = operands[0];
1813 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1815 /* This is a pain. You have to be prepared to deal with an
1816 arbitrary address here including pre/post increment/decrement.
1818 so avoid this in the MD. */
1819 else
1820 abort ();
1822 else abort ();
1823 return "";
1826 /* Return a REG that occurs in ADDR with coefficient 1.
1827 ADDR can be effectively incremented by incrementing REG. */
1829 static rtx
1830 find_addr_reg (addr)
1831 rtx addr;
1833 while (GET_CODE (addr) == PLUS)
1835 if (GET_CODE (XEXP (addr, 0)) == REG)
1836 addr = XEXP (addr, 0);
1837 else if (GET_CODE (XEXP (addr, 1)) == REG)
1838 addr = XEXP (addr, 1);
1839 else if (CONSTANT_P (XEXP (addr, 0)))
1840 addr = XEXP (addr, 1);
1841 else if (CONSTANT_P (XEXP (addr, 1)))
1842 addr = XEXP (addr, 0);
1843 else
1844 abort ();
1846 if (GET_CODE (addr) == REG)
1847 return addr;
1848 abort ();
1851 /* Emit code to perform a block move.
1853 OPERANDS[0] is the destination pointer as a REG, clobbered.
1854 OPERANDS[1] is the source pointer as a REG, clobbered.
1855 OPERANDS[2] is a register for temporary storage.
1856 OPERANDS[4] is the size as a CONST_INT
1857 OPERANDS[3] is a register for temporary storage.
1858 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1859 OPERNADS[6] is another temporary register. */
1861 char *
1862 output_block_move (operands, size_is_constant)
1863 rtx *operands;
1864 int size_is_constant;
1866 int align = INTVAL (operands[5]);
1867 unsigned long n_bytes = INTVAL (operands[4]);
1869 /* We can't move more than four bytes at a time because the PA
1870 has no longer integer move insns. (Could use fp mem ops?) */
1871 if (align > 4)
1872 align = 4;
1874 /* Note that we know each loop below will execute at least twice
1875 (else we would have open-coded the copy). */
1876 switch (align)
1878 case 4:
1879 /* Pre-adjust the loop counter. */
1880 operands[4] = GEN_INT (n_bytes - 8);
1881 output_asm_insn ("ldi %4,%2", operands);
1883 /* Copying loop. */
1884 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1885 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1886 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1887 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1888 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1890 /* Handle the residual. There could be up to 7 bytes of
1891 residual to copy! */
1892 if (n_bytes % 8 != 0)
1894 operands[4] = GEN_INT (n_bytes % 4);
1895 if (n_bytes % 8 >= 4)
1896 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1897 if (n_bytes % 4 != 0)
1898 output_asm_insn ("ldw 0(0,%1),%6", operands);
1899 if (n_bytes % 8 >= 4)
1900 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1901 if (n_bytes % 4 != 0)
1902 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1904 return "";
1906 case 2:
1907 /* Pre-adjust the loop counter. */
1908 operands[4] = GEN_INT (n_bytes - 4);
1909 output_asm_insn ("ldi %4,%2", operands);
1911 /* Copying loop. */
1912 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1913 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1914 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1915 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1916 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1918 /* Handle the residual. */
1919 if (n_bytes % 4 != 0)
1921 if (n_bytes % 4 >= 2)
1922 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1923 if (n_bytes % 2 != 0)
1924 output_asm_insn ("ldb 0(0,%1),%6", operands);
1925 if (n_bytes % 4 >= 2)
1926 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1927 if (n_bytes % 2 != 0)
1928 output_asm_insn ("stb %6,0(0,%0)", operands);
1930 return "";
1932 case 1:
1933 /* Pre-adjust the loop counter. */
1934 operands[4] = GEN_INT (n_bytes - 2);
1935 output_asm_insn ("ldi %4,%2", operands);
1937 /* Copying loop. */
1938 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1939 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1940 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1941 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1942 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1944 /* Handle the residual. */
1945 if (n_bytes % 2 != 0)
1947 output_asm_insn ("ldb 0(0,%1),%3", operands);
1948 output_asm_insn ("stb %3,0(0,%0)", operands);
1950 return "";
1952 default:
1953 abort ();
1957 /* Count the number of insns necessary to handle this block move.
1959 Basic structure is the same as emit_block_move, except that we
1960 count insns rather than emit them. */
1963 compute_movstrsi_length (insn)
1964 rtx insn;
1966 rtx pat = PATTERN (insn);
1967 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1968 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1969 unsigned int n_insns = 0;
1971 /* We can't move more than four bytes at a time because the PA
1972 has no longer integer move insns. (Could use fp mem ops?) */
1973 if (align > 4)
1974 align = 4;
1976 /* The basic copying loop. */
1977 n_insns = 6;
1979 /* Residuals. */
1980 if (n_bytes % (2 * align) != 0)
1982 if ((n_bytes % (2 * align)) >= align)
1983 n_insns += 2;
1985 if ((n_bytes % align) != 0)
1986 n_insns += 2;
1989 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1990 return n_insns * 4;
1994 char *
1995 output_and (operands)
1996 rtx *operands;
1998 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2000 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2001 int ls0, ls1, ms0, p, len;
2003 for (ls0 = 0; ls0 < 32; ls0++)
2004 if ((mask & (1 << ls0)) == 0)
2005 break;
2007 for (ls1 = ls0; ls1 < 32; ls1++)
2008 if ((mask & (1 << ls1)) != 0)
2009 break;
2011 for (ms0 = ls1; ms0 < 32; ms0++)
2012 if ((mask & (1 << ms0)) == 0)
2013 break;
2015 if (ms0 != 32)
2016 abort();
2018 if (ls1 == 32)
2020 len = ls0;
2022 if (len == 0)
2023 abort ();
2025 operands[2] = GEN_INT (len);
2026 return "extru %1,31,%2,%0";
2028 else
2030 /* We could use this `depi' for the case above as well, but `depi'
2031 requires one more register file access than an `extru'. */
2033 p = 31 - ls0;
2034 len = ls1 - ls0;
2036 operands[2] = GEN_INT (p);
2037 operands[3] = GEN_INT (len);
2038 return "depi 0,%2,%3,%0";
2041 else
2042 return "and %1,%2,%0";
2045 char *
2046 output_ior (operands)
2047 rtx *operands;
2049 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2050 int bs0, bs1, p, len;
2052 if (INTVAL (operands[2]) == 0)
2053 return "copy %1,%0";
2055 for (bs0 = 0; bs0 < 32; bs0++)
2056 if ((mask & (1 << bs0)) != 0)
2057 break;
2059 for (bs1 = bs0; bs1 < 32; bs1++)
2060 if ((mask & (1 << bs1)) == 0)
2061 break;
2063 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2064 abort();
2066 p = 31 - bs0;
2067 len = bs1 - bs0;
2069 operands[2] = GEN_INT (p);
2070 operands[3] = GEN_INT (len);
2071 return "depi -1,%2,%3,%0";
2074 /* Output an ascii string. */
2075 void
2076 output_ascii (file, p, size)
2077 FILE *file;
2078 unsigned char *p;
2079 int size;
2081 int i;
2082 int chars_output;
2083 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2085 /* The HP assembler can only take strings of 256 characters at one
2086 time. This is a limitation on input line length, *not* the
2087 length of the string. Sigh. Even worse, it seems that the
2088 restriction is in number of input characters (see \xnn &
2089 \whatever). So we have to do this very carefully. */
2091 fputs ("\t.STRING \"", file);
2093 chars_output = 0;
2094 for (i = 0; i < size; i += 4)
2096 int co = 0;
2097 int io = 0;
2098 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2100 register unsigned int c = p[i + io];
2102 if (c == '\"' || c == '\\')
2103 partial_output[co++] = '\\';
2104 if (c >= ' ' && c < 0177)
2105 partial_output[co++] = c;
2106 else
2108 unsigned int hexd;
2109 partial_output[co++] = '\\';
2110 partial_output[co++] = 'x';
2111 hexd = c / 16 - 0 + '0';
2112 if (hexd > '9')
2113 hexd -= '9' - 'a' + 1;
2114 partial_output[co++] = hexd;
2115 hexd = c % 16 - 0 + '0';
2116 if (hexd > '9')
2117 hexd -= '9' - 'a' + 1;
2118 partial_output[co++] = hexd;
2121 if (chars_output + co > 243)
2123 fputs ("\"\n\t.STRING \"", file);
2124 chars_output = 0;
2126 fwrite (partial_output, 1, co, file);
2127 chars_output += co;
2128 co = 0;
2130 fputs ("\"\n", file);
2133 /* Try to rewrite floating point comparisons & branches to avoid
2134 useless add,tr insns.
2136 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2137 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2138 first attempt to remove useless add,tr insns. It is zero
2139 for the second pass as reorg sometimes leaves bogus REG_DEAD
2140 notes lying around.
2142 When CHECK_NOTES is zero we can only eliminate add,tr insns
2143 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2144 instructions. */
2145 void
2146 remove_useless_addtr_insns (insns, check_notes)
2147 rtx insns;
2148 int check_notes;
2150 rtx insn;
2151 int all;
2152 static int pass = 0;
2154 /* This is fairly cheap, so always run it when optimizing. */
2155 if (optimize > 0)
2157 int fcmp_count = 0;
2158 int fbranch_count = 0;
2160 /* Walk all the insns in this function looking for fcmp & fbranch
2161 instructions. Keep track of how many of each we find. */
2162 insns = get_insns ();
2163 for (insn = insns; insn; insn = next_insn (insn))
2165 rtx tmp;
2167 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2168 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2169 continue;
2171 tmp = PATTERN (insn);
2173 /* It must be a set. */
2174 if (GET_CODE (tmp) != SET)
2175 continue;
2177 /* If the destination is CCFP, then we've found an fcmp insn. */
2178 tmp = SET_DEST (tmp);
2179 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2181 fcmp_count++;
2182 continue;
2185 tmp = PATTERN (insn);
2186 /* If this is an fbranch instruction, bump the fbranch counter. */
2187 if (GET_CODE (tmp) == SET
2188 && SET_DEST (tmp) == pc_rtx
2189 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2190 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2191 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2192 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2194 fbranch_count++;
2195 continue;
2200 /* Find all floating point compare + branch insns. If possible,
2201 reverse the comparison & the branch to avoid add,tr insns. */
2202 for (insn = insns; insn; insn = next_insn (insn))
2204 rtx tmp, next;
2206 /* Ignore anything that isn't an INSN. */
2207 if (GET_CODE (insn) != INSN)
2208 continue;
2210 tmp = PATTERN (insn);
2212 /* It must be a set. */
2213 if (GET_CODE (tmp) != SET)
2214 continue;
2216 /* The destination must be CCFP, which is register zero. */
2217 tmp = SET_DEST (tmp);
2218 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2219 continue;
2221 /* INSN should be a set of CCFP.
2223 See if the result of this insn is used in a reversed FP
2224 conditional branch. If so, reverse our condition and
2225 the branch. Doing so avoids useless add,tr insns. */
2226 next = next_insn (insn);
2227 while (next)
2229 /* Jumps, calls and labels stop our search. */
2230 if (GET_CODE (next) == JUMP_INSN
2231 || GET_CODE (next) == CALL_INSN
2232 || GET_CODE (next) == CODE_LABEL)
2233 break;
2235 /* As does another fcmp insn. */
2236 if (GET_CODE (next) == INSN
2237 && GET_CODE (PATTERN (next)) == SET
2238 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2239 && REGNO (SET_DEST (PATTERN (next))) == 0)
2240 break;
2242 next = next_insn (next);
2245 /* Is NEXT_INSN a branch? */
2246 if (next
2247 && GET_CODE (next) == JUMP_INSN)
2249 rtx pattern = PATTERN (next);
2251 /* If it a reversed fp conditional branch (eg uses add,tr)
2252 and CCFP dies, then reverse our conditional and the branch
2253 to avoid the add,tr. */
2254 if (GET_CODE (pattern) == SET
2255 && SET_DEST (pattern) == pc_rtx
2256 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2257 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2258 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2259 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2260 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2261 && (fcmp_count == fbranch_count
2262 || (check_notes
2263 && find_regno_note (next, REG_DEAD, 0))))
2265 /* Reverse the branch. */
2266 tmp = XEXP (SET_SRC (pattern), 1);
2267 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2268 XEXP (SET_SRC (pattern), 2) = tmp;
2269 INSN_CODE (next) = -1;
2271 /* Reverse our condition. */
2272 tmp = PATTERN (insn);
2273 PUT_CODE (XEXP (tmp, 1),
2274 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2280 pass = !pass;
2284 /* You may have trouble believing this, but this is the HP-PA stack
2285 layout. Wow.
2287 Offset Contents
2289 Variable arguments (optional; any number may be allocated)
2291 SP-(4*(N+9)) arg word N
2293 SP-56 arg word 5
2294 SP-52 arg word 4
2296 Fixed arguments (must be allocated; may remain unused)
2298 SP-48 arg word 3
2299 SP-44 arg word 2
2300 SP-40 arg word 1
2301 SP-36 arg word 0
2303 Frame Marker
2305 SP-32 External Data Pointer (DP)
2306 SP-28 External sr4
2307 SP-24 External/stub RP (RP')
2308 SP-20 Current RP
2309 SP-16 Static Link
2310 SP-12 Clean up
2311 SP-8 Calling Stub RP (RP'')
2312 SP-4 Previous SP
2314 Top of Frame
2316 SP-0 Stack Pointer (points to next available address)
2320 /* This function saves registers as follows. Registers marked with ' are
2321 this function's registers (as opposed to the previous function's).
2322 If a frame_pointer isn't needed, r4 is saved as a general register;
2323 the space for the frame pointer is still allocated, though, to keep
2324 things simple.
2327 Top of Frame
2329 SP (FP') Previous FP
2330 SP + 4 Alignment filler (sigh)
2331 SP + 8 Space for locals reserved here.
2335 SP + n All call saved register used.
2339 SP + o All call saved fp registers used.
2343 SP + p (SP') points to next available address.
2347 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2348 Handle case where DISP > 8k by using the add_high_const pattern.
2350 Note in DISP > 8k case, we will leave the high part of the address
2351 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2352 static void
2353 store_reg (reg, disp, base)
2354 int reg, disp, base;
2356 if (VAL_14_BITS_P (disp))
2358 emit_move_insn (gen_rtx (MEM, SImode,
2359 gen_rtx (PLUS, SImode,
2360 gen_rtx (REG, SImode, base),
2361 GEN_INT (disp))),
2362 gen_rtx (REG, SImode, reg));
2364 else
2366 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2367 gen_rtx (REG, SImode, base),
2368 GEN_INT (disp)));
2369 emit_move_insn (gen_rtx (MEM, SImode,
2370 gen_rtx (LO_SUM, SImode,
2371 gen_rtx (REG, SImode, 1),
2372 GEN_INT (disp))),
2373 gen_rtx (REG, SImode, reg));
2377 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2378 Handle case where DISP > 8k by using the add_high_const pattern.
2380 Note in DISP > 8k case, we will leave the high part of the address
2381 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2382 static void
2383 load_reg (reg, disp, base)
2384 int reg, disp, base;
2386 if (VAL_14_BITS_P (disp))
2388 emit_move_insn (gen_rtx (REG, SImode, reg),
2389 gen_rtx (MEM, SImode,
2390 gen_rtx (PLUS, SImode,
2391 gen_rtx (REG, SImode, base),
2392 GEN_INT (disp))));
2394 else
2396 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2397 gen_rtx (REG, SImode, base),
2398 GEN_INT (disp)));
2399 emit_move_insn (gen_rtx (REG, SImode, reg),
2400 gen_rtx (MEM, SImode,
2401 gen_rtx (LO_SUM, SImode,
2402 gen_rtx (REG, SImode, 1),
2403 GEN_INT (disp))));
2407 /* Emit RTL to set REG to the value specified by BASE+DISP.
2408 Handle case where DISP > 8k by using the add_high_const pattern.
2410 Note in DISP > 8k case, we will leave the high part of the address
2411 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2412 static void
2413 set_reg_plus_d(reg, base, disp)
2414 int reg, base, disp;
2416 if (VAL_14_BITS_P (disp))
2418 emit_move_insn (gen_rtx (REG, SImode, reg),
2419 gen_rtx (PLUS, SImode,
2420 gen_rtx (REG, SImode, base),
2421 GEN_INT (disp)));
2423 else
2425 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2426 gen_rtx (REG, SImode, base),
2427 GEN_INT (disp)));
2428 emit_move_insn (gen_rtx (REG, SImode, reg),
2429 gen_rtx (LO_SUM, SImode,
2430 gen_rtx (REG, SImode, 1),
2431 GEN_INT (disp)));
2435 /* Global variables set by FUNCTION_PROLOGUE. */
2436 /* Size of frame. Need to know this to emit return insns from
2437 leaf procedures. */
2438 static int actual_fsize;
2439 static int local_fsize, save_fregs;
2442 compute_frame_size (size, fregs_live)
2443 int size;
2444 int *fregs_live;
2446 extern int current_function_outgoing_args_size;
2447 int i, fsize;
2449 /* 8 is space for frame pointer + filler. If any frame is allocated
2450 we need to add this in because of STARTING_FRAME_OFFSET. */
2451 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2453 /* We must leave enough space for all the callee saved registers
2454 from 3 .. highest used callee save register since we don't
2455 know if we're going to have an inline or out of line prologue
2456 and epilogue. */
2457 for (i = 18; i >= 3; i--)
2458 if (regs_ever_live[i])
2460 fsize += 4 * (i - 2);
2461 break;
2464 /* Round the stack. */
2465 fsize = (fsize + 7) & ~7;
2467 /* We must leave enough space for all the callee saved registers
2468 from 3 .. highest used callee save register since we don't
2469 know if we're going to have an inline or out of line prologue
2470 and epilogue. */
2471 for (i = 66; i >= 48; i -= 2)
2472 if (regs_ever_live[i] || regs_ever_live[i + 1])
2474 if (fregs_live)
2475 *fregs_live = 1;
2477 fsize += 4 * (i - 46);
2478 break;
2481 fsize += current_function_outgoing_args_size;
2482 if (! leaf_function_p () || fsize)
2483 fsize += 32;
2484 return (fsize + 63) & ~63;
2487 rtx hp_profile_label_rtx;
2488 static char hp_profile_label_name[8];
2489 void
2490 output_function_prologue (file, size)
2491 FILE *file;
2492 int size;
2494 /* The function's label and associated .PROC must never be
2495 separated and must be output *after* any profiling declarations
2496 to avoid changing spaces/subspaces within a procedure. */
2497 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2498 fputs ("\t.PROC\n", file);
2500 /* hppa_expand_prologue does the dirty work now. We just need
2501 to output the assembler directives which denote the start
2502 of a function. */
2503 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2504 if (regs_ever_live[2] || profile_flag)
2505 fputs (",CALLS,SAVE_RP", file);
2506 else
2507 fputs (",NO_CALLS", file);
2509 if (frame_pointer_needed)
2510 fputs (",SAVE_SP", file);
2512 /* Pass on information about the number of callee register saves
2513 performed in the prologue.
2515 The compiler is supposed to pass the highest register number
2516 saved, the assembler then has to adjust that number before
2517 entering it into the unwind descriptor (to account for any
2518 caller saved registers with lower register numbers than the
2519 first callee saved register). */
2520 if (gr_saved)
2521 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2523 if (fr_saved)
2524 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2526 fputs ("\n\t.ENTRY\n", file);
2528 /* Horrid hack. emit_function_prologue will modify this RTL in
2529 place to get the expected results. */
2530 if (profile_flag)
2531 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2532 hp_profile_labelno);
2534 /* If we're using GAS and not using the portable runtime model, then
2535 we don't need to accumulate the total number of code bytes. */
2536 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2537 total_code_bytes = 0;
2538 else if (insn_addresses)
2540 unsigned int old_total = total_code_bytes;
2542 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2543 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2545 /* Be prepared to handle overflows. */
2546 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2548 else
2549 total_code_bytes = -1;
2551 remove_useless_addtr_insns (get_insns (), 0);
2554 void
2555 hppa_expand_prologue()
2557 extern char call_used_regs[];
2558 int size = get_frame_size ();
2559 int merge_sp_adjust_with_store = 0;
2560 int i, offset;
2561 rtx tmpreg, size_rtx;
2563 gr_saved = 0;
2564 fr_saved = 0;
2565 save_fregs = 0;
2566 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2567 actual_fsize = compute_frame_size (size, &save_fregs);
2569 /* Compute a few things we will use often. */
2570 tmpreg = gen_rtx (REG, SImode, 1);
2571 size_rtx = GEN_INT (actual_fsize);
2573 /* Handle out of line prologues and epilogues. */
2574 if (TARGET_SPACE)
2576 rtx operands[2];
2577 int saves = 0;
2578 int outline_insn_count = 0;
2579 int inline_insn_count = 0;
2581 /* Count the number of insns for the inline and out of line
2582 variants so we can choose one appropriately.
2584 No need to screw with counting actual_fsize operations -- they're
2585 done for both inline and out of line prologues. */
2586 if (regs_ever_live[2])
2587 inline_insn_count += 1;
2589 if (! cint_ok_for_move (local_fsize))
2590 outline_insn_count += 2;
2591 else
2592 outline_insn_count += 1;
2594 /* Put the register save info into %r22. */
2595 for (i = 18; i >= 3; i--)
2596 if (regs_ever_live[i] && ! call_used_regs[i])
2598 /* -1 because the stack adjustment is normally done in
2599 the same insn as a register save. */
2600 inline_insn_count += (i - 2) - 1;
2601 saves = i;
2602 break;
2605 for (i = 66; i >= 48; i -= 2)
2606 if (regs_ever_live[i] || regs_ever_live[i + 1])
2608 /* +1 needed as we load %r1 with the start of the freg
2609 save area. */
2610 inline_insn_count += (i/2 - 23) + 1;
2611 saves |= ((i/2 - 12 ) << 16);
2612 break;
2615 if (frame_pointer_needed)
2616 inline_insn_count += 3;
2618 if (! cint_ok_for_move (saves))
2619 outline_insn_count += 2;
2620 else
2621 outline_insn_count += 1;
2623 if (TARGET_PORTABLE_RUNTIME)
2624 outline_insn_count += 2;
2625 else
2626 outline_insn_count += 1;
2628 /* If there's a lot of insns in the prologue, then do it as
2629 an out-of-line sequence. */
2630 if (inline_insn_count > outline_insn_count)
2632 /* Put the local_fisze into %r19. */
2633 operands[0] = gen_rtx (REG, SImode, 19);
2634 operands[1] = GEN_INT (local_fsize);
2635 emit_move_insn (operands[0], operands[1]);
2637 /* Put the stack size into %r21. */
2638 operands[0] = gen_rtx (REG, SImode, 21);
2639 operands[1] = size_rtx;
2640 emit_move_insn (operands[0], operands[1]);
2642 operands[0] = gen_rtx (REG, SImode, 22);
2643 operands[1] = GEN_INT (saves);
2644 emit_move_insn (operands[0], operands[1]);
2646 /* Now call the out-of-line prologue. */
2647 emit_insn (gen_outline_prologue_call ());
2648 emit_insn (gen_blockage ());
2650 /* Note that we're using an out-of-line prologue. */
2651 out_of_line_prologue_epilogue = 1;
2652 return;
2656 out_of_line_prologue_epilogue = 0;
2658 /* Save RP first. The calling conventions manual states RP will
2659 always be stored into the caller's frame at sp-20. */
2660 if (regs_ever_live[2] || profile_flag)
2661 store_reg (2, -20, STACK_POINTER_REGNUM);
2663 /* Allocate the local frame and set up the frame pointer if needed. */
2664 if (actual_fsize)
2665 if (frame_pointer_needed)
2667 /* Copy the old frame pointer temporarily into %r1. Set up the
2668 new stack pointer, then store away the saved old frame pointer
2669 into the stack at sp+actual_fsize and at the same time update
2670 the stack pointer by actual_fsize bytes. Two versions, first
2671 handles small (<8k) frames. The second handles large (>8k)
2672 frames. */
2673 emit_move_insn (tmpreg, frame_pointer_rtx);
2674 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2675 if (VAL_14_BITS_P (actual_fsize))
2676 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2677 else
2679 /* It is incorrect to store the saved frame pointer at *sp,
2680 then increment sp (writes beyond the current stack boundary).
2682 So instead use stwm to store at *sp and post-increment the
2683 stack pointer as an atomic operation. Then increment sp to
2684 finish allocating the new frame. */
2685 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2686 set_reg_plus_d (STACK_POINTER_REGNUM,
2687 STACK_POINTER_REGNUM,
2688 actual_fsize - 64);
2691 /* no frame pointer needed. */
2692 else
2694 /* In some cases we can perform the first callee register save
2695 and allocating the stack frame at the same time. If so, just
2696 make a note of it and defer allocating the frame until saving
2697 the callee registers. */
2698 if (VAL_14_BITS_P (-actual_fsize)
2699 && local_fsize == 0
2700 && ! profile_flag
2701 && ! flag_pic)
2702 merge_sp_adjust_with_store = 1;
2703 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2704 else if (actual_fsize != 0)
2705 set_reg_plus_d (STACK_POINTER_REGNUM,
2706 STACK_POINTER_REGNUM,
2707 actual_fsize);
2709 /* The hppa calling conventions say that that %r19, the pic offset
2710 register, is saved at sp - 32 (in this function's frame) when
2711 generating PIC code. FIXME: What is the correct thing to do
2712 for functions which make no calls and allocate no frame? Do
2713 we need to allocate a frame, or can we just omit the save? For
2714 now we'll just omit the save. */
2715 if (actual_fsize != 0 && flag_pic)
2716 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2718 /* Profiling code.
2720 Instead of taking one argument, the counter label, as most normal
2721 mcounts do, _mcount appears to behave differently on the HPPA. It
2722 takes the return address of the caller, the address of this routine,
2723 and the address of the label. Also, it isn't magic, so
2724 argument registers have to be preserved. */
2725 if (profile_flag)
2727 int pc_offset, i, arg_offset, basereg, offsetadj;
2729 pc_offset = 4 + (frame_pointer_needed
2730 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2731 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2733 /* When the function has a frame pointer, use it as the base
2734 register for saving/restore registers. Else use the stack
2735 pointer. Adjust the offset according to the frame size if
2736 this function does not have a frame pointer. */
2738 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2739 : STACK_POINTER_REGNUM;
2740 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2742 /* Horrid hack. emit_function_prologue will modify this RTL in
2743 place to get the expected results. sprintf here is just to
2744 put something in the name. */
2745 sprintf(hp_profile_label_name, "LP$%04d", -1);
2746 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2747 hp_profile_label_name);
2748 if (current_function_returns_struct)
2749 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2751 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2752 if (regs_ever_live [i])
2754 store_reg (i, arg_offset, basereg);
2755 /* Deal with arg_offset not fitting in 14 bits. */
2756 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2759 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2760 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2761 emit_move_insn (gen_rtx (REG, SImode, 24),
2762 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2763 /* %r25 is set from within the output pattern. */
2764 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2766 /* Restore argument registers. */
2767 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2768 if (regs_ever_live [i])
2769 load_reg (i, arg_offset, basereg);
2771 if (current_function_returns_struct)
2772 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2776 /* Normal register save.
2778 Do not save the frame pointer in the frame_pointer_needed case. It
2779 was done earlier. */
2780 if (frame_pointer_needed)
2782 for (i = 18, offset = local_fsize; i >= 4; i--)
2783 if (regs_ever_live[i] && ! call_used_regs[i])
2785 store_reg (i, offset, FRAME_POINTER_REGNUM);
2786 offset += 4;
2787 gr_saved++;
2789 /* Account for %r3 which is saved in a special place. */
2790 gr_saved++;
2792 /* No frame pointer needed. */
2793 else
2795 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2796 if (regs_ever_live[i] && ! call_used_regs[i])
2798 /* If merge_sp_adjust_with_store is nonzero, then we can
2799 optimize the first GR save. */
2800 if (merge_sp_adjust_with_store)
2802 merge_sp_adjust_with_store = 0;
2803 emit_insn (gen_post_stwm (stack_pointer_rtx,
2804 gen_rtx (REG, SImode, i),
2805 GEN_INT (-offset)));
2807 else
2808 store_reg (i, offset, STACK_POINTER_REGNUM);
2809 offset += 4;
2810 gr_saved++;
2813 /* If we wanted to merge the SP adjustment with a GR save, but we never
2814 did any GR saves, then just emit the adjustment here. */
2815 if (merge_sp_adjust_with_store)
2816 set_reg_plus_d (STACK_POINTER_REGNUM,
2817 STACK_POINTER_REGNUM,
2818 actual_fsize);
2821 /* Align pointer properly (doubleword boundary). */
2822 offset = (offset + 7) & ~7;
2824 /* Floating point register store. */
2825 if (save_fregs)
2827 /* First get the frame or stack pointer to the start of the FP register
2828 save area. */
2829 if (frame_pointer_needed)
2830 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2831 else
2832 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2834 /* Now actually save the FP registers. */
2835 for (i = 66; i >= 48; i -= 2)
2837 if (regs_ever_live[i] || regs_ever_live[i + 1])
2839 emit_move_insn (gen_rtx (MEM, DFmode,
2840 gen_rtx (POST_INC, DFmode, tmpreg)),
2841 gen_rtx (REG, DFmode, i));
2842 fr_saved++;
2847 /* When generating PIC code it is necessary to save/restore the
2848 PIC register around each function call. We used to do this
2849 in the call patterns themselves, but that implementation
2850 made incorrect assumptions about using global variables to hold
2851 per-function rtl code generated in the backend.
2853 So instead, we copy the PIC register into a reserved callee saved
2854 register in the prologue. Then after each call we reload the PIC
2855 register from the callee saved register. We also reload the PIC
2856 register from the callee saved register in the epilogue ensure the
2857 PIC register is valid at function exit.
2859 This may (depending on the exact characteristics of the function)
2860 even be more efficient.
2862 Avoid this if the callee saved register wasn't used (these are
2863 leaf functions). */
2864 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2865 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2866 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2870 void
2871 output_function_epilogue (file, size)
2872 FILE *file;
2873 int size;
2875 rtx insn = get_last_insn ();
2876 int i;
2878 /* hppa_expand_epilogue does the dirty work now. We just need
2879 to output the assembler directives which denote the end
2880 of a function.
2882 To make debuggers happy, emit a nop if the epilogue was completely
2883 eliminated due to a volatile call as the last insn in the
2884 current function. That way the return address (in %r2) will
2885 always point to a valid instruction in the current function. */
2887 /* Get the last real insn. */
2888 if (GET_CODE (insn) == NOTE)
2889 insn = prev_real_insn (insn);
2891 /* If it is a sequence, then look inside. */
2892 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2893 insn = XVECEXP (PATTERN (insn), 0, 0);
2895 /* If insn is a CALL_INSN, then it must be a call to a volatile
2896 function (otherwise there would be epilogue insns). */
2897 if (insn && GET_CODE (insn) == CALL_INSN)
2898 fputs ("\tnop\n", file);
2900 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2903 void
2904 hppa_expand_epilogue ()
2906 rtx tmpreg;
2907 int offset,i;
2908 int merge_sp_adjust_with_load = 0;
2910 /* Handle out of line prologues and epilogues. */
2911 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2913 int saves = 0;
2914 rtx operands[2];
2916 /* Put the register save info into %r22. */
2917 for (i = 18; i >= 3; i--)
2918 if (regs_ever_live[i] && ! call_used_regs[i])
2920 saves = i;
2921 break;
2924 for (i = 66; i >= 48; i -= 2)
2925 if (regs_ever_live[i] || regs_ever_live[i + 1])
2927 saves |= ((i/2 - 12 ) << 16);
2928 break;
2931 emit_insn (gen_blockage ());
2933 /* Put the local_fisze into %r19. */
2934 operands[0] = gen_rtx (REG, SImode, 19);
2935 operands[1] = GEN_INT (local_fsize);
2936 emit_move_insn (operands[0], operands[1]);
2938 /* Put the stack size into %r21. */
2939 operands[0] = gen_rtx (REG, SImode, 21);
2940 operands[1] = GEN_INT (actual_fsize);
2941 emit_move_insn (operands[0], operands[1]);
2943 operands[0] = gen_rtx (REG, SImode, 22);
2944 operands[1] = GEN_INT (saves);
2945 emit_move_insn (operands[0], operands[1]);
2947 /* Now call the out-of-line epilogue. */
2948 emit_insn (gen_outline_epilogue_call ());
2949 return;
2952 /* We will use this often. */
2953 tmpreg = gen_rtx (REG, SImode, 1);
2955 /* Try to restore RP early to avoid load/use interlocks when
2956 RP gets used in the return (bv) instruction. This appears to still
2957 be necessary even when we schedule the prologue and epilogue. */
2958 if (frame_pointer_needed
2959 && (regs_ever_live [2] || profile_flag))
2960 load_reg (2, -20, FRAME_POINTER_REGNUM);
2962 /* No frame pointer, and stack is smaller than 8k. */
2963 else if (! frame_pointer_needed
2964 && VAL_14_BITS_P (actual_fsize + 20)
2965 && (regs_ever_live[2] || profile_flag))
2966 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2968 /* General register restores. */
2969 if (frame_pointer_needed)
2971 for (i = 18, offset = local_fsize; i >= 4; i--)
2972 if (regs_ever_live[i] && ! call_used_regs[i])
2974 load_reg (i, offset, FRAME_POINTER_REGNUM);
2975 offset += 4;
2978 else
2980 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2982 if (regs_ever_live[i] && ! call_used_regs[i])
2984 /* Only for the first load.
2985 merge_sp_adjust_with_load holds the register load
2986 with which we will merge the sp adjustment. */
2987 if (VAL_14_BITS_P (actual_fsize + 20)
2988 && local_fsize == 0
2989 && ! merge_sp_adjust_with_load)
2990 merge_sp_adjust_with_load = i;
2991 else
2992 load_reg (i, offset, STACK_POINTER_REGNUM);
2993 offset += 4;
2998 /* Align pointer properly (doubleword boundary). */
2999 offset = (offset + 7) & ~7;
3001 /* FP register restores. */
3002 if (save_fregs)
3004 /* Adjust the register to index off of. */
3005 if (frame_pointer_needed)
3006 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3007 else
3008 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3010 /* Actually do the restores now. */
3011 for (i = 66; i >= 48; i -= 2)
3013 if (regs_ever_live[i] || regs_ever_live[i + 1])
3015 emit_move_insn (gen_rtx (REG, DFmode, i),
3016 gen_rtx (MEM, DFmode,
3017 gen_rtx (POST_INC, DFmode, tmpreg)));
3022 /* Emit a blockage insn here to keep these insns from being moved to
3023 an earlier spot in the epilogue, or into the main instruction stream.
3025 This is necessary as we must not cut the stack back before all the
3026 restores are finished. */
3027 emit_insn (gen_blockage ());
3028 /* No frame pointer, but we have a stack greater than 8k. We restore
3029 %r2 very late in this case. (All other cases are restored as early
3030 as possible.) */
3031 if (! frame_pointer_needed
3032 && ! VAL_14_BITS_P (actual_fsize + 20)
3033 && (regs_ever_live[2] || profile_flag))
3035 set_reg_plus_d (STACK_POINTER_REGNUM,
3036 STACK_POINTER_REGNUM,
3037 - actual_fsize);
3039 /* This used to try and be clever by not depending on the value in
3040 %r30 and instead use the value held in %r1 (so that the 2nd insn
3041 which sets %r30 could be put in the delay slot of the return insn).
3043 That won't work since if the stack is exactly 8k set_reg_plus_d
3044 doesn't set %r1, just %r30. */
3045 load_reg (2, - 20, STACK_POINTER_REGNUM);
3048 /* Reset stack pointer (and possibly frame pointer). The stack
3049 pointer is initially set to fp + 64 to avoid a race condition. */
3050 else if (frame_pointer_needed)
3052 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3053 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3054 stack_pointer_rtx,
3055 GEN_INT (-64)));
3057 /* If we were deferring a callee register restore, do it now. */
3058 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3059 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3060 merge_sp_adjust_with_load),
3061 stack_pointer_rtx,
3062 GEN_INT (- actual_fsize)));
3063 else if (actual_fsize != 0)
3064 set_reg_plus_d (STACK_POINTER_REGNUM,
3065 STACK_POINTER_REGNUM,
3066 - actual_fsize);
3069 /* Fetch the return address for the frame COUNT steps up from
3070 the current frame, after the prologue. FRAMEADDR is the
3071 frame pointer of the COUNT frame.
3073 We want to ignore any export stub remnants here.
3075 The value returned is used in two different ways:
3077 1. To find a function's caller.
3079 2. To change the return address for a function.
3081 This function handles most instances of case 1; however, it will
3082 fail if there are two levels of stubs to execute on the return
3083 path. The only way I believe that can happen is if the return value
3084 needs a parameter relocation, which never happens for C code.
3086 This function handles most instances of case 2; however, it will
3087 fail if we did not originally have stub code on the return path
3088 but will need code on the new return path. This can happen if
3089 the caller & callee are both in the main program, but the new
3090 return location is in a shared library.
3092 To handle this correctly we need to set the return pointer at
3093 frame-20 to point to a return stub frame-24 to point to the
3094 location we wish to return to. */
3097 return_addr_rtx (count, frameaddr)
3098 int count;
3099 rtx frameaddr;
3101 rtx label;
3102 rtx saved_rp;
3103 rtx ins;
3105 saved_rp = gen_reg_rtx (Pmode);
3107 /* First, we start off with the normal return address pointer from
3108 -20[frameaddr]. */
3110 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3112 /* Get pointer to the instruction stream. We have to mask out the
3113 privilege level from the two low order bits of the return address
3114 pointer here so that ins will point to the start of the first
3115 instruction that would have been executed if we returned. */
3116 ins = copy_to_reg (gen_rtx (AND, Pmode,
3117 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3118 MASK_RETURN_ADDR));
3119 label = gen_label_rtx ();
3121 /* Check the instruction stream at the normal return address for the
3122 export stub:
3124 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3125 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3126 0x00011820 | stub+16: mtsp r1,sr0
3127 0xe0400002 | stub+20: be,n 0(sr0,rp)
3129 If it is an export stub, than our return address is really in
3130 -24[frameaddr]. */
3132 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3133 GEN_INT (0x4bc23fd1),
3134 NE, NULL_RTX, SImode, 1, 0);
3135 emit_jump_insn (gen_bne (label));
3137 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3138 GEN_INT (0x004010a1),
3139 NE, NULL_RTX, SImode, 1, 0);
3140 emit_jump_insn (gen_bne (label));
3142 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3143 GEN_INT (0x00011820),
3144 NE, NULL_RTX, SImode, 1, 0);
3145 emit_jump_insn (gen_bne (label));
3147 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3148 GEN_INT (0xe0400002),
3149 NE, NULL_RTX, SImode, 1, 0);
3151 /* If there is no export stub then just use our initial guess of
3152 -20[frameaddr]. */
3154 emit_jump_insn (gen_bne (label));
3156 /* Here we know that our return address pointer points to an export
3157 stub. We don't want to return the address of the export stub,
3158 but rather the return address that leads back into user code.
3159 That return address is stored at -24[frameaddr]. */
3161 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3163 emit_label (label);
3164 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3167 /* This is only valid once reload has completed because it depends on
3168 knowing exactly how much (if any) frame there is and...
3170 It's only valid if there is no frame marker to de-allocate and...
3172 It's only valid if %r2 hasn't been saved into the caller's frame
3173 (we're not profiling and %r2 isn't live anywhere). */
3175 hppa_can_use_return_insn_p ()
3177 return (reload_completed
3178 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3179 && ! profile_flag
3180 && ! regs_ever_live[2]
3181 && ! frame_pointer_needed);
3184 void
3185 emit_bcond_fp (code, operand0)
3186 enum rtx_code code;
3187 rtx operand0;
3189 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3190 gen_rtx (IF_THEN_ELSE, VOIDmode,
3191 gen_rtx (code, VOIDmode,
3192 gen_rtx (REG, CCFPmode, 0),
3193 const0_rtx),
3194 gen_rtx (LABEL_REF, VOIDmode, operand0),
3195 pc_rtx)));
3200 gen_cmp_fp (code, operand0, operand1)
3201 enum rtx_code code;
3202 rtx operand0, operand1;
3204 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3205 gen_rtx (code, CCFPmode, operand0, operand1));
3208 /* Adjust the cost of a scheduling dependency. Return the new cost of
3209 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3212 pa_adjust_cost (insn, link, dep_insn, cost)
3213 rtx insn;
3214 rtx link;
3215 rtx dep_insn;
3216 int cost;
3218 if (! recog_memoized (insn))
3219 return 0;
3221 if (REG_NOTE_KIND (link) == 0)
3223 /* Data dependency; DEP_INSN writes a register that INSN reads some
3224 cycles later. */
3226 if (get_attr_type (insn) == TYPE_FPSTORE)
3228 rtx pat = PATTERN (insn);
3229 rtx dep_pat = PATTERN (dep_insn);
3230 if (GET_CODE (pat) == PARALLEL)
3232 /* This happens for the fstXs,mb patterns. */
3233 pat = XVECEXP (pat, 0, 0);
3235 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3236 /* If this happens, we have to extend this to schedule
3237 optimally. Return 0 for now. */
3238 return 0;
3240 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3242 if (! recog_memoized (dep_insn))
3243 return 0;
3244 /* DEP_INSN is writing its result to the register
3245 being stored in the fpstore INSN. */
3246 switch (get_attr_type (dep_insn))
3248 case TYPE_FPLOAD:
3249 /* This cost 3 cycles, not 2 as the md says for the
3250 700 and 7100. Note scaling of cost for 7100. */
3251 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3253 case TYPE_FPALU:
3254 case TYPE_FPMULSGL:
3255 case TYPE_FPMULDBL:
3256 case TYPE_FPDIVSGL:
3257 case TYPE_FPDIVDBL:
3258 case TYPE_FPSQRTSGL:
3259 case TYPE_FPSQRTDBL:
3260 /* In these important cases, we save one cycle compared to
3261 when flop instruction feed each other. */
3262 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3264 default:
3265 return cost;
3270 /* For other data dependencies, the default cost specified in the
3271 md is correct. */
3272 return cost;
3274 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3276 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3277 cycles later. */
3279 if (get_attr_type (insn) == TYPE_FPLOAD)
3281 rtx pat = PATTERN (insn);
3282 rtx dep_pat = PATTERN (dep_insn);
3283 if (GET_CODE (pat) == PARALLEL)
3285 /* This happens for the fldXs,mb patterns. */
3286 pat = XVECEXP (pat, 0, 0);
3288 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3289 /* If this happens, we have to extend this to schedule
3290 optimally. Return 0 for now. */
3291 return 0;
3293 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3295 if (! recog_memoized (dep_insn))
3296 return 0;
3297 switch (get_attr_type (dep_insn))
3299 case TYPE_FPALU:
3300 case TYPE_FPMULSGL:
3301 case TYPE_FPMULDBL:
3302 case TYPE_FPDIVSGL:
3303 case TYPE_FPDIVDBL:
3304 case TYPE_FPSQRTSGL:
3305 case TYPE_FPSQRTDBL:
3306 /* A fpload can't be issued until one cycle before a
3307 preceding arithmetic operation has finished if
3308 the target of the fpload is any of the sources
3309 (or destination) of the arithmetic operation. */
3310 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3312 default:
3313 return 0;
3317 else if (get_attr_type (insn) == TYPE_FPALU)
3319 rtx pat = PATTERN (insn);
3320 rtx dep_pat = PATTERN (dep_insn);
3321 if (GET_CODE (pat) == PARALLEL)
3323 /* This happens for the fldXs,mb patterns. */
3324 pat = XVECEXP (pat, 0, 0);
3326 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3327 /* If this happens, we have to extend this to schedule
3328 optimally. Return 0 for now. */
3329 return 0;
3331 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3333 if (! recog_memoized (dep_insn))
3334 return 0;
3335 switch (get_attr_type (dep_insn))
3337 case TYPE_FPDIVSGL:
3338 case TYPE_FPDIVDBL:
3339 case TYPE_FPSQRTSGL:
3340 case TYPE_FPSQRTDBL:
3341 /* An ALU flop can't be issued until two cycles before a
3342 preceding divide or sqrt operation has finished if
3343 the target of the ALU flop is any of the sources
3344 (or destination) of the divide or sqrt operation. */
3345 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3347 default:
3348 return 0;
3353 /* For other anti dependencies, the cost is 0. */
3354 return 0;
3356 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3358 /* Output dependency; DEP_INSN writes a register that INSN writes some
3359 cycles later. */
3360 if (get_attr_type (insn) == TYPE_FPLOAD)
3362 rtx pat = PATTERN (insn);
3363 rtx dep_pat = PATTERN (dep_insn);
3364 if (GET_CODE (pat) == PARALLEL)
3366 /* This happens for the fldXs,mb patterns. */
3367 pat = XVECEXP (pat, 0, 0);
3369 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3370 /* If this happens, we have to extend this to schedule
3371 optimally. Return 0 for now. */
3372 return 0;
3374 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3376 if (! recog_memoized (dep_insn))
3377 return 0;
3378 switch (get_attr_type (dep_insn))
3380 case TYPE_FPALU:
3381 case TYPE_FPMULSGL:
3382 case TYPE_FPMULDBL:
3383 case TYPE_FPDIVSGL:
3384 case TYPE_FPDIVDBL:
3385 case TYPE_FPSQRTSGL:
3386 case TYPE_FPSQRTDBL:
3387 /* A fpload can't be issued until one cycle before a
3388 preceding arithmetic operation has finished if
3389 the target of the fpload is the destination of the
3390 arithmetic operation. */
3391 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3393 default:
3394 return 0;
3398 else if (get_attr_type (insn) == TYPE_FPALU)
3400 rtx pat = PATTERN (insn);
3401 rtx dep_pat = PATTERN (dep_insn);
3402 if (GET_CODE (pat) == PARALLEL)
3404 /* This happens for the fldXs,mb patterns. */
3405 pat = XVECEXP (pat, 0, 0);
3407 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3408 /* If this happens, we have to extend this to schedule
3409 optimally. Return 0 for now. */
3410 return 0;
3412 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3414 if (! recog_memoized (dep_insn))
3415 return 0;
3416 switch (get_attr_type (dep_insn))
3418 case TYPE_FPDIVSGL:
3419 case TYPE_FPDIVDBL:
3420 case TYPE_FPSQRTSGL:
3421 case TYPE_FPSQRTDBL:
3422 /* An ALU flop can't be issued until two cycles before a
3423 preceding divide or sqrt operation has finished if
3424 the target of the ALU flop is also the target of
3425 of the divide or sqrt operation. */
3426 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3428 default:
3429 return 0;
3434 /* For other output dependencies, the cost is 0. */
3435 return 0;
3437 else
3438 abort ();
3441 /* Return any length adjustment needed by INSN which already has its length
3442 computed as LENGTH. Return zero if no adjustment is necessary.
3444 For the PA: function calls, millicode calls, and backwards short
3445 conditional branches with unfilled delay slots need an adjustment by +1
3446 (to account for the NOP which will be inserted into the instruction stream).
3448 Also compute the length of an inline block move here as it is too
3449 complicated to express as a length attribute in pa.md. */
3451 pa_adjust_insn_length (insn, length)
3452 rtx insn;
3453 int length;
3455 rtx pat = PATTERN (insn);
3457 /* Call insns which are *not* indirect and have unfilled delay slots. */
3458 if (GET_CODE (insn) == CALL_INSN)
3461 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3462 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3463 return 4;
3464 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3465 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3466 == SYMBOL_REF)
3467 return 4;
3468 else
3469 return 0;
3471 /* Jumps inside switch tables which have unfilled delay slots
3472 also need adjustment. */
3473 else if (GET_CODE (insn) == JUMP_INSN
3474 && simplejump_p (insn)
3475 && GET_MODE (PATTERN (insn)) == DImode)
3476 return 4;
3477 /* Millicode insn with an unfilled delay slot. */
3478 else if (GET_CODE (insn) == INSN
3479 && GET_CODE (pat) != SEQUENCE
3480 && GET_CODE (pat) != USE
3481 && GET_CODE (pat) != CLOBBER
3482 && get_attr_type (insn) == TYPE_MILLI)
3483 return 4;
3484 /* Block move pattern. */
3485 else if (GET_CODE (insn) == INSN
3486 && GET_CODE (pat) == PARALLEL
3487 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3488 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3489 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3490 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3491 return compute_movstrsi_length (insn) - 4;
3492 /* Conditional branch with an unfilled delay slot. */
3493 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3495 /* Adjust a short backwards conditional with an unfilled delay slot. */
3496 if (GET_CODE (pat) == SET
3497 && length == 4
3498 && ! forward_branch_p (insn))
3499 return 4;
3500 else if (GET_CODE (pat) == PARALLEL
3501 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3502 && length == 4)
3503 return 4;
3504 /* Adjust dbra insn with short backwards conditional branch with
3505 unfilled delay slot -- only for case where counter is in a
3506 general register register. */
3507 else if (GET_CODE (pat) == PARALLEL
3508 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3509 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3510 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3511 && length == 4
3512 && ! forward_branch_p (insn))
3513 return 4;
3514 else
3515 return 0;
3517 return 0;
3520 /* Print operand X (an rtx) in assembler syntax to file FILE.
3521 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3522 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3524 void
3525 print_operand (file, x, code)
3526 FILE *file;
3527 rtx x;
3528 int code;
3530 switch (code)
3532 case '#':
3533 /* Output a 'nop' if there's nothing for the delay slot. */
3534 if (dbr_sequence_length () == 0)
3535 fputs ("\n\tnop", file);
3536 return;
3537 case '*':
3538 /* Output an nullification completer if there's nothing for the */
3539 /* delay slot or nullification is requested. */
3540 if (dbr_sequence_length () == 0 ||
3541 (final_sequence &&
3542 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3543 fputs (",n", file);
3544 return;
3545 case 'R':
3546 /* Print out the second register name of a register pair.
3547 I.e., R (6) => 7. */
3548 fputs (reg_names[REGNO (x)+1], file);
3549 return;
3550 case 'r':
3551 /* A register or zero. */
3552 if (x == const0_rtx
3553 || (x == CONST0_RTX (DFmode))
3554 || (x == CONST0_RTX (SFmode)))
3556 fputs ("0", file);
3557 return;
3559 else
3560 break;
3561 case 'C': /* Plain (C)ondition */
3562 case 'X':
3563 switch (GET_CODE (x))
3565 case EQ:
3566 fputs ("=", file); break;
3567 case NE:
3568 fputs ("<>", file); break;
3569 case GT:
3570 fputs (">", file); break;
3571 case GE:
3572 fputs (">=", file); break;
3573 case GEU:
3574 fputs (">>=", file); break;
3575 case GTU:
3576 fputs (">>", file); break;
3577 case LT:
3578 fputs ("<", file); break;
3579 case LE:
3580 fputs ("<=", file); break;
3581 case LEU:
3582 fputs ("<<=", file); break;
3583 case LTU:
3584 fputs ("<<", file); break;
3585 default:
3586 abort ();
3588 return;
3589 case 'N': /* Condition, (N)egated */
3590 switch (GET_CODE (x))
3592 case EQ:
3593 fputs ("<>", file); break;
3594 case NE:
3595 fputs ("=", file); break;
3596 case GT:
3597 fputs ("<=", file); break;
3598 case GE:
3599 fputs ("<", file); break;
3600 case GEU:
3601 fputs ("<<", file); break;
3602 case GTU:
3603 fputs ("<<=", file); break;
3604 case LT:
3605 fputs (">=", file); break;
3606 case LE:
3607 fputs (">", file); break;
3608 case LEU:
3609 fputs (">>", file); break;
3610 case LTU:
3611 fputs (">>=", file); break;
3612 default:
3613 abort ();
3615 return;
3616 /* For floating point comparisons. Need special conditions to deal
3617 with NaNs properly. */
3618 case 'Y':
3619 switch (GET_CODE (x))
3621 case EQ:
3622 fputs ("!=", file); break;
3623 case NE:
3624 fputs ("=", file); break;
3625 case GT:
3626 fputs ("<=", file); break;
3627 case GE:
3628 fputs ("<", file); break;
3629 case LT:
3630 fputs (">=", file); break;
3631 case LE:
3632 fputs (">", file); break;
3633 default:
3634 abort ();
3636 return;
3637 case 'S': /* Condition, operands are (S)wapped. */
3638 switch (GET_CODE (x))
3640 case EQ:
3641 fputs ("=", file); break;
3642 case NE:
3643 fputs ("<>", file); break;
3644 case GT:
3645 fputs ("<", file); break;
3646 case GE:
3647 fputs ("<=", file); break;
3648 case GEU:
3649 fputs ("<<=", file); break;
3650 case GTU:
3651 fputs ("<<", file); break;
3652 case LT:
3653 fputs (">", file); break;
3654 case LE:
3655 fputs (">=", file); break;
3656 case LEU:
3657 fputs (">>=", file); break;
3658 case LTU:
3659 fputs (">>", file); break;
3660 default:
3661 abort ();
3663 return;
3664 case 'B': /* Condition, (B)oth swapped and negate. */
3665 switch (GET_CODE (x))
3667 case EQ:
3668 fputs ("<>", file); break;
3669 case NE:
3670 fputs ("=", file); break;
3671 case GT:
3672 fputs (">=", file); break;
3673 case GE:
3674 fputs (">", file); break;
3675 case GEU:
3676 fputs (">>", file); break;
3677 case GTU:
3678 fputs (">>=", file); break;
3679 case LT:
3680 fputs ("<=", file); break;
3681 case LE:
3682 fputs ("<", file); break;
3683 case LEU:
3684 fputs ("<<", file); break;
3685 case LTU:
3686 fputs ("<<=", file); break;
3687 default:
3688 abort ();
3690 return;
3691 case 'k':
3692 if (GET_CODE (x) == CONST_INT)
3694 fprintf (file, "%d", ~INTVAL (x));
3695 return;
3697 abort();
3698 case 'L':
3699 if (GET_CODE (x) == CONST_INT)
3701 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3702 return;
3704 abort();
3705 case 'O':
3706 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3708 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3709 return;
3711 abort();
3712 case 'P':
3713 if (GET_CODE (x) == CONST_INT)
3715 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3716 return;
3718 abort();
3719 case 'I':
3720 if (GET_CODE (x) == CONST_INT)
3721 fputs ("i", file);
3722 return;
3723 case 'M':
3724 case 'F':
3725 switch (GET_CODE (XEXP (x, 0)))
3727 case PRE_DEC:
3728 case PRE_INC:
3729 fputs ("s,mb", file);
3730 break;
3731 case POST_DEC:
3732 case POST_INC:
3733 fputs ("s,ma", file);
3734 break;
3735 case PLUS:
3736 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3737 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3738 fputs ("x,s", file);
3739 else if (code == 'F')
3740 fputs ("s", file);
3741 break;
3742 default:
3743 if (code == 'F')
3744 fputs ("s", file);
3745 break;
3747 return;
3748 case 'G':
3749 output_global_address (file, x, 0);
3750 return;
3751 case 'H':
3752 output_global_address (file, x, 1);
3753 return;
3754 case 0: /* Don't do anything special */
3755 break;
3756 case 'Z':
3758 unsigned op[3];
3759 compute_zdepi_operands (INTVAL (x), op);
3760 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3761 return;
3763 default:
3764 abort ();
3766 if (GET_CODE (x) == REG)
3768 fputs (reg_names [REGNO (x)], file);
3769 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3770 fputs ("L", file);
3772 else if (GET_CODE (x) == MEM)
3774 int size = GET_MODE_SIZE (GET_MODE (x));
3775 rtx base = XEXP (XEXP (x, 0), 0);
3776 switch (GET_CODE (XEXP (x, 0)))
3778 case PRE_DEC:
3779 case POST_DEC:
3780 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3781 break;
3782 case PRE_INC:
3783 case POST_INC:
3784 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3785 break;
3786 default:
3787 if (GET_CODE (XEXP (x, 0)) == PLUS
3788 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3789 fprintf (file, "%s(0,%s)",
3790 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3791 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3792 else if (GET_CODE (XEXP (x, 0)) == PLUS
3793 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3794 fprintf (file, "%s(0,%s)",
3795 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3796 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3797 else
3798 output_address (XEXP (x, 0));
3799 break;
3802 else
3803 output_addr_const (file, x);
3806 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3808 void
3809 output_global_address (file, x, round_constant)
3810 FILE *file;
3811 rtx x;
3812 int round_constant;
3815 /* Imagine (high (const (plus ...))). */
3816 if (GET_CODE (x) == HIGH)
3817 x = XEXP (x, 0);
3819 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3820 assemble_name (file, XSTR (x, 0));
3821 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3823 assemble_name (file, XSTR (x, 0));
3824 fputs ("-$global$", file);
3826 else if (GET_CODE (x) == CONST)
3828 char *sep = "";
3829 int offset = 0; /* assembler wants -$global$ at end */
3830 rtx base;
3832 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3834 base = XEXP (XEXP (x, 0), 0);
3835 output_addr_const (file, base);
3837 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3838 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3839 else abort ();
3841 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3843 base = XEXP (XEXP (x, 0), 1);
3844 output_addr_const (file, base);
3846 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3847 offset = INTVAL (XEXP (XEXP (x, 0),1));
3848 else abort ();
3850 /* How bogus. The compiler is apparently responsible for
3851 rounding the constant if it uses an LR field selector.
3853 The linker and/or assembler seem a better place since
3854 they have to do this kind of thing already.
3856 If we fail to do this, HP's optimizing linker may eliminate
3857 an addil, but not update the ldw/stw/ldo instruction that
3858 uses the result of the addil. */
3859 if (round_constant)
3860 offset = ((offset + 0x1000) & ~0x1fff);
3862 if (GET_CODE (XEXP (x, 0)) == PLUS)
3864 if (offset < 0)
3866 offset = -offset;
3867 sep = "-";
3869 else
3870 sep = "+";
3872 else if (GET_CODE (XEXP (x, 0)) == MINUS
3873 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3874 sep = "-";
3875 else abort ();
3877 if (!read_only_operand (base) && !flag_pic)
3878 fputs ("-$global$", file);
3879 if (offset)
3880 fprintf (file,"%s%d", sep, offset);
3882 else
3883 output_addr_const (file, x);
3886 void
3887 output_deferred_plabels (file)
3888 FILE *file;
3890 int i;
3891 /* If we have deferred plabels, then we need to switch into the data
3892 section and align it to a 4 byte boundary before we output the
3893 deferred plabels. */
3894 if (n_deferred_plabels)
3896 data_section ();
3897 ASM_OUTPUT_ALIGN (file, 2);
3900 /* Now output the deferred plabels. */
3901 for (i = 0; i < n_deferred_plabels; i++)
3903 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
3904 assemble_integer (gen_rtx (SYMBOL_REF, VOIDmode,
3905 deferred_plabels[i].name), 4, 1);
3909 /* HP's millicode routines mean something special to the assembler.
3910 Keep track of which ones we have used. */
3912 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3913 static char imported[(int)end1000];
3914 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3915 static char import_string[] = ".IMPORT $$....,MILLICODE";
3916 #define MILLI_START 10
3918 static void
3919 import_milli (code)
3920 enum millicodes code;
3922 char str[sizeof (import_string)];
3924 if (!imported[(int)code])
3926 imported[(int)code] = 1;
3927 strcpy (str, import_string);
3928 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3929 output_asm_insn (str, 0);
3933 /* The register constraints have put the operands and return value in
3934 the proper registers. */
3936 char *
3937 output_mul_insn (unsignedp, insn)
3938 int unsignedp;
3939 rtx insn;
3941 import_milli (mulI);
3942 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3945 /* Emit the rtl for doing a division by a constant. */
3947 /* Do magic division millicodes exist for this value? */
3948 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3949 1, 1};
3951 /* We'll use an array to keep track of the magic millicodes and
3952 whether or not we've used them already. [n][0] is signed, [n][1] is
3953 unsigned. */
3955 static int div_milli[16][2];
3958 div_operand (op, mode)
3959 rtx op;
3960 enum machine_mode mode;
3962 return (mode == SImode
3963 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3964 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3965 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3969 emit_hpdiv_const (operands, unsignedp)
3970 rtx *operands;
3971 int unsignedp;
3973 if (GET_CODE (operands[2]) == CONST_INT
3974 && INTVAL (operands[2]) > 0
3975 && INTVAL (operands[2]) < 16
3976 && magic_milli[INTVAL (operands[2])])
3978 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3979 emit
3980 (gen_rtx
3981 (PARALLEL, VOIDmode,
3982 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3983 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3984 gen_rtx (REG, SImode, 26),
3985 operands[2])),
3986 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3987 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3988 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3989 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3990 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3991 return 1;
3993 return 0;
3996 char *
3997 output_div_insn (operands, unsignedp, insn)
3998 rtx *operands;
3999 int unsignedp;
4000 rtx insn;
4002 int divisor;
4004 /* If the divisor is a constant, try to use one of the special
4005 opcodes .*/
4006 if (GET_CODE (operands[0]) == CONST_INT)
4008 static char buf[100];
4009 divisor = INTVAL (operands[0]);
4010 if (!div_milli[divisor][unsignedp])
4012 div_milli[divisor][unsignedp] = 1;
4013 if (unsignedp)
4014 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4015 else
4016 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4018 if (unsignedp)
4020 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4021 return output_millicode_call (insn,
4022 gen_rtx (SYMBOL_REF, SImode, buf));
4024 else
4026 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4027 return output_millicode_call (insn,
4028 gen_rtx (SYMBOL_REF, SImode, buf));
4031 /* Divisor isn't a special constant. */
4032 else
4034 if (unsignedp)
4036 import_milli (divU);
4037 return output_millicode_call (insn,
4038 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
4040 else
4042 import_milli (divI);
4043 return output_millicode_call (insn,
4044 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
4049 /* Output a $$rem millicode to do mod. */
4051 char *
4052 output_mod_insn (unsignedp, insn)
4053 int unsignedp;
4054 rtx insn;
4056 if (unsignedp)
4058 import_milli (remU);
4059 return output_millicode_call (insn,
4060 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
4062 else
4064 import_milli (remI);
4065 return output_millicode_call (insn,
4066 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
4070 void
4071 output_arg_descriptor (call_insn)
4072 rtx call_insn;
4074 char *arg_regs[4];
4075 enum machine_mode arg_mode;
4076 rtx link;
4077 int i, output_flag = 0;
4078 int regno;
4080 for (i = 0; i < 4; i++)
4081 arg_regs[i] = 0;
4083 /* Specify explicitly that no argument relocations should take place
4084 if using the portable runtime calling conventions. */
4085 if (TARGET_PORTABLE_RUNTIME)
4087 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4088 asm_out_file);
4089 return;
4092 if (GET_CODE (call_insn) != CALL_INSN)
4093 abort ();
4094 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4096 rtx use = XEXP (link, 0);
4098 if (! (GET_CODE (use) == USE
4099 && GET_CODE (XEXP (use, 0)) == REG
4100 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4101 continue;
4103 arg_mode = GET_MODE (XEXP (use, 0));
4104 regno = REGNO (XEXP (use, 0));
4105 if (regno >= 23 && regno <= 26)
4107 arg_regs[26 - regno] = "GR";
4108 if (arg_mode == DImode)
4109 arg_regs[25 - regno] = "GR";
4111 else if (regno >= 32 && regno <= 39)
4113 if (arg_mode == SFmode)
4114 arg_regs[(regno - 32) / 2] = "FR";
4115 else
4117 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4118 arg_regs[(regno - 34) / 2] = "FR";
4119 arg_regs[(regno - 34) / 2 + 1] = "FU";
4120 #else
4121 arg_regs[(regno - 34) / 2] = "FU";
4122 arg_regs[(regno - 34) / 2 + 1] = "FR";
4123 #endif
4127 fputs ("\t.CALL ", asm_out_file);
4128 for (i = 0; i < 4; i++)
4130 if (arg_regs[i])
4132 if (output_flag++)
4133 fputc (',', asm_out_file);
4134 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4137 fputc ('\n', asm_out_file);
4140 /* Return the class of any secondary reload register that is needed to
4141 move IN into a register in class CLASS using mode MODE.
4143 Profiling has showed this routine and its descendants account for
4144 a significant amount of compile time (~7%). So it has been
4145 optimized to reduce redundant computations and eliminate useless
4146 function calls.
4148 It might be worthwhile to try and make this a leaf function too. */
4150 enum reg_class
4151 secondary_reload_class (class, mode, in)
4152 enum reg_class class;
4153 enum machine_mode mode;
4154 rtx in;
4156 int regno, is_symbolic;
4158 /* Trying to load a constant into a FP register during PIC code
4159 generation will require %r1 as a scratch register. */
4160 if (flag_pic == 2
4161 && GET_MODE_CLASS (mode) == MODE_INT
4162 && FP_REG_CLASS_P (class)
4163 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4164 return R1_REGS;
4166 /* Profiling showed the PA port spends about 1.3% of its compilation
4167 time in true_regnum from calls inside secondary_reload_class. */
4169 if (GET_CODE (in) == REG)
4171 regno = REGNO (in);
4172 if (regno >= FIRST_PSEUDO_REGISTER)
4173 regno = true_regnum (in);
4175 else if (GET_CODE (in) == SUBREG)
4176 regno = true_regnum (in);
4177 else
4178 regno = -1;
4180 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4181 && GET_MODE_CLASS (mode) == MODE_INT
4182 && FP_REG_CLASS_P (class))
4183 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4184 return GENERAL_REGS;
4186 if (GET_CODE (in) == HIGH)
4187 in = XEXP (in, 0);
4189 /* Profiling has showed GCC spends about 2.6% of its compilation
4190 time in symbolic_operand from calls inside secondary_reload_class.
4192 We use an inline copy and only compute its return value once to avoid
4193 useless work. */
4194 switch (GET_CODE (in))
4196 rtx tmp;
4198 case SYMBOL_REF:
4199 case LABEL_REF:
4200 is_symbolic = 1;
4201 break;
4202 case CONST:
4203 tmp = XEXP (in, 0);
4204 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4205 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4206 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4207 break;
4208 default:
4209 is_symbolic = 0;
4210 break;
4213 if (!flag_pic
4214 && is_symbolic
4215 && read_only_operand (in))
4216 return NO_REGS;
4218 if (class != R1_REGS && is_symbolic)
4219 return R1_REGS;
4221 return NO_REGS;
4224 enum direction
4225 function_arg_padding (mode, type)
4226 enum machine_mode mode;
4227 tree type;
4229 int size;
4231 if (mode == BLKmode)
4233 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4234 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4235 else
4236 return upward; /* Don't know if this is right, but */
4237 /* same as old definition. */
4239 else
4240 size = GET_MODE_BITSIZE (mode);
4241 if (size < PARM_BOUNDARY)
4242 return downward;
4243 else if (size % PARM_BOUNDARY)
4244 return upward;
4245 else
4246 return none;
4250 /* Do what is necessary for `va_start'. The argument is ignored;
4251 We look at the current function to determine if stdargs or varargs
4252 is used and fill in an initial va_list. A pointer to this constructor
4253 is returned. */
4255 struct rtx_def *
4256 hppa_builtin_saveregs (arglist)
4257 tree arglist;
4259 rtx offset;
4260 tree fntype = TREE_TYPE (current_function_decl);
4261 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4262 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4263 != void_type_node)))
4264 ? UNITS_PER_WORD : 0);
4266 if (argadj)
4267 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4268 else
4269 offset = current_function_arg_offset_rtx;
4271 /* Store general registers on the stack. */
4272 move_block_from_reg (23,
4273 gen_rtx (MEM, BLKmode,
4274 plus_constant
4275 (current_function_internal_arg_pointer, -16)),
4276 4, 4 * UNITS_PER_WORD);
4277 return copy_to_reg (expand_binop (Pmode, add_optab,
4278 current_function_internal_arg_pointer,
4279 offset, 0, 0, OPTAB_LIB_WIDEN));
4282 /* This routine handles all the normal conditional branch sequences we
4283 might need to generate. It handles compare immediate vs compare
4284 register, nullification of delay slots, varying length branches,
4285 negated branches, and all combinations of the above. It returns the
4286 output appropriate to emit the branch corresponding to all given
4287 parameters. */
4289 char *
4290 output_cbranch (operands, nullify, length, negated, insn)
4291 rtx *operands;
4292 int nullify, length, negated;
4293 rtx insn;
4295 static char buf[100];
4296 int useskip = 0;
4298 /* A conditional branch to the following instruction (eg the delay slot) is
4299 asking for a disaster. This can happen when not optimizing.
4301 In such cases it is safe to emit nothing. */
4303 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4304 return "";
4306 /* If this is a long branch with its delay slot unfilled, set `nullify'
4307 as it can nullify the delay slot and save a nop. */
4308 if (length == 8 && dbr_sequence_length () == 0)
4309 nullify = 1;
4311 /* If this is a short forward conditional branch which did not get
4312 its delay slot filled, the delay slot can still be nullified. */
4313 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4314 nullify = forward_branch_p (insn);
4316 /* A forward branch over a single nullified insn can be done with a
4317 comclr instruction. This avoids a single cycle penalty due to
4318 mis-predicted branch if we fall through (branch not taken). */
4319 if (length == 4
4320 && next_real_insn (insn) != 0
4321 && get_attr_length (next_real_insn (insn)) == 4
4322 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4323 && nullify)
4324 useskip = 1;
4326 switch (length)
4328 /* All short conditional branches except backwards with an unfilled
4329 delay slot. */
4330 case 4:
4331 if (useskip)
4332 strcpy (buf, "com%I2clr,");
4333 else
4334 strcpy (buf, "com%I2b,");
4335 if (negated)
4336 strcat (buf, "%B3");
4337 else
4338 strcat (buf, "%S3");
4339 if (useskip)
4340 strcat (buf, " %2,%1,0");
4341 else if (nullify)
4342 strcat (buf, ",n %2,%1,%0");
4343 else
4344 strcat (buf, " %2,%1,%0");
4345 break;
4347 /* All long conditionals. Note an short backward branch with an
4348 unfilled delay slot is treated just like a long backward branch
4349 with an unfilled delay slot. */
4350 case 8:
4351 /* Handle weird backwards branch with a filled delay slot
4352 with is nullified. */
4353 if (dbr_sequence_length () != 0
4354 && ! forward_branch_p (insn)
4355 && nullify)
4357 strcpy (buf, "com%I2b,");
4358 if (negated)
4359 strcat (buf, "%S3");
4360 else
4361 strcat (buf, "%B3");
4362 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4364 /* Handle short backwards branch with an unfilled delay slot.
4365 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4366 taken and untaken branches. */
4367 else if (dbr_sequence_length () == 0
4368 && ! forward_branch_p (insn)
4369 && insn_addresses
4370 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4371 - insn_addresses[INSN_UID (insn)] - 8))
4373 strcpy (buf, "com%I2b,");
4374 if (negated)
4375 strcat (buf, "%B3 %2,%1,%0%#");
4376 else
4377 strcat (buf, "%S3 %2,%1,%0%#");
4379 else
4381 strcpy (buf, "com%I2clr,");
4382 if (negated)
4383 strcat (buf, "%S3");
4384 else
4385 strcat (buf, "%B3");
4386 if (nullify)
4387 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4388 else
4389 strcat (buf, " %2,%1,0\n\tbl %0,0");
4391 break;
4393 default:
4394 abort();
4396 return buf;
4399 /* This routine handles all the branch-on-bit conditional branch sequences we
4400 might need to generate. It handles nullification of delay slots,
4401 varying length branches, negated branches and all combinations of the
4402 above. it returns the appropriate output template to emit the branch. */
4404 char *
4405 output_bb (operands, nullify, length, negated, insn, which)
4406 rtx *operands;
4407 int nullify, length, negated;
4408 rtx insn;
4409 int which;
4411 static char buf[100];
4412 int useskip = 0;
4414 /* A conditional branch to the following instruction (eg the delay slot) is
4415 asking for a disaster. I do not think this can happen as this pattern
4416 is only used when optimizing; jump optimization should eliminate the
4417 jump. But be prepared just in case. */
4419 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4420 return "";
4422 /* If this is a long branch with its delay slot unfilled, set `nullify'
4423 as it can nullify the delay slot and save a nop. */
4424 if (length == 8 && dbr_sequence_length () == 0)
4425 nullify = 1;
4427 /* If this is a short forward conditional branch which did not get
4428 its delay slot filled, the delay slot can still be nullified. */
4429 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4430 nullify = forward_branch_p (insn);
4432 /* A forward branch over a single nullified insn can be done with a
4433 extrs instruction. This avoids a single cycle penalty due to
4434 mis-predicted branch if we fall through (branch not taken). */
4436 if (length == 4
4437 && next_real_insn (insn) != 0
4438 && get_attr_length (next_real_insn (insn)) == 4
4439 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4440 && nullify)
4441 useskip = 1;
4443 switch (length)
4446 /* All short conditional branches except backwards with an unfilled
4447 delay slot. */
4448 case 4:
4449 if (useskip)
4450 strcpy (buf, "extrs,");
4451 else
4452 strcpy (buf, "bb,");
4453 if ((which == 0 && negated)
4454 || (which == 1 && ! negated))
4455 strcat (buf, ">=");
4456 else
4457 strcat (buf, "<");
4458 if (useskip)
4459 strcat (buf, " %0,%1,1,0");
4460 else if (nullify && negated)
4461 strcat (buf, ",n %0,%1,%3");
4462 else if (nullify && ! negated)
4463 strcat (buf, ",n %0,%1,%2");
4464 else if (! nullify && negated)
4465 strcat (buf, "%0,%1,%3");
4466 else if (! nullify && ! negated)
4467 strcat (buf, " %0,%1,%2");
4468 break;
4470 /* All long conditionals. Note an short backward branch with an
4471 unfilled delay slot is treated just like a long backward branch
4472 with an unfilled delay slot. */
4473 case 8:
4474 /* Handle weird backwards branch with a filled delay slot
4475 with is nullified. */
4476 if (dbr_sequence_length () != 0
4477 && ! forward_branch_p (insn)
4478 && nullify)
4480 strcpy (buf, "bb,");
4481 if ((which == 0 && negated)
4482 || (which == 1 && ! negated))
4483 strcat (buf, "<");
4484 else
4485 strcat (buf, ">=");
4486 if (negated)
4487 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4488 else
4489 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4491 /* Handle short backwards branch with an unfilled delay slot.
4492 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4493 taken and untaken branches. */
4494 else if (dbr_sequence_length () == 0
4495 && ! forward_branch_p (insn)
4496 && insn_addresses
4497 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4498 - insn_addresses[INSN_UID (insn)] - 8))
4500 strcpy (buf, "bb,");
4501 if ((which == 0 && negated)
4502 || (which == 1 && ! negated))
4503 strcat (buf, ">=");
4504 else
4505 strcat (buf, "<");
4506 if (negated)
4507 strcat (buf, " %0,%1,%3%#");
4508 else
4509 strcat (buf, " %0,%1,%2%#");
4511 else
4513 strcpy (buf, "extrs,");
4514 if ((which == 0 && negated)
4515 || (which == 1 && ! negated))
4516 strcat (buf, "<");
4517 else
4518 strcat (buf, ">=");
4519 if (nullify && negated)
4520 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4521 else if (nullify && ! negated)
4522 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4523 else if (negated)
4524 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4525 else
4526 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4528 break;
4530 default:
4531 abort();
4533 return buf;
4536 /* This routine handles all the branch-on-variable-bit conditional branch
4537 sequences we might need to generate. It handles nullification of delay
4538 slots, varying length branches, negated branches and all combinations
4539 of the above. it returns the appropriate output template to emit the
4540 branch. */
4542 char *
4543 output_bvb (operands, nullify, length, negated, insn, which)
4544 rtx *operands;
4545 int nullify, length, negated;
4546 rtx insn;
4547 int which;
4549 static char buf[100];
4550 int useskip = 0;
4552 /* A conditional branch to the following instruction (eg the delay slot) is
4553 asking for a disaster. I do not think this can happen as this pattern
4554 is only used when optimizing; jump optimization should eliminate the
4555 jump. But be prepared just in case. */
4557 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4558 return "";
4560 /* If this is a long branch with its delay slot unfilled, set `nullify'
4561 as it can nullify the delay slot and save a nop. */
4562 if (length == 8 && dbr_sequence_length () == 0)
4563 nullify = 1;
4565 /* If this is a short forward conditional branch which did not get
4566 its delay slot filled, the delay slot can still be nullified. */
4567 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4568 nullify = forward_branch_p (insn);
4570 /* A forward branch over a single nullified insn can be done with a
4571 extrs instruction. This avoids a single cycle penalty due to
4572 mis-predicted branch if we fall through (branch not taken). */
4574 if (length == 4
4575 && next_real_insn (insn) != 0
4576 && get_attr_length (next_real_insn (insn)) == 4
4577 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4578 && nullify)
4579 useskip = 1;
4581 switch (length)
4584 /* All short conditional branches except backwards with an unfilled
4585 delay slot. */
4586 case 4:
4587 if (useskip)
4588 strcpy (buf, "vextrs,");
4589 else
4590 strcpy (buf, "bvb,");
4591 if ((which == 0 && negated)
4592 || (which == 1 && ! negated))
4593 strcat (buf, ">=");
4594 else
4595 strcat (buf, "<");
4596 if (useskip)
4597 strcat (buf, " %0,1,0");
4598 else if (nullify && negated)
4599 strcat (buf, ",n %0,%3");
4600 else if (nullify && ! negated)
4601 strcat (buf, ",n %0,%2");
4602 else if (! nullify && negated)
4603 strcat (buf, "%0,%3");
4604 else if (! nullify && ! negated)
4605 strcat (buf, " %0,%2");
4606 break;
4608 /* All long conditionals. Note an short backward branch with an
4609 unfilled delay slot is treated just like a long backward branch
4610 with an unfilled delay slot. */
4611 case 8:
4612 /* Handle weird backwards branch with a filled delay slot
4613 with is nullified. */
4614 if (dbr_sequence_length () != 0
4615 && ! forward_branch_p (insn)
4616 && nullify)
4618 strcpy (buf, "bvb,");
4619 if ((which == 0 && negated)
4620 || (which == 1 && ! negated))
4621 strcat (buf, "<");
4622 else
4623 strcat (buf, ">=");
4624 if (negated)
4625 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4626 else
4627 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4629 /* Handle short backwards branch with an unfilled delay slot.
4630 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4631 taken and untaken branches. */
4632 else if (dbr_sequence_length () == 0
4633 && ! forward_branch_p (insn)
4634 && insn_addresses
4635 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4636 - insn_addresses[INSN_UID (insn)] - 8))
4638 strcpy (buf, "bvb,");
4639 if ((which == 0 && negated)
4640 || (which == 1 && ! negated))
4641 strcat (buf, ">=");
4642 else
4643 strcat (buf, "<");
4644 if (negated)
4645 strcat (buf, " %0,%3%#");
4646 else
4647 strcat (buf, " %0,%2%#");
4649 else
4651 strcpy (buf, "vextrs,");
4652 if ((which == 0 && negated)
4653 || (which == 1 && ! negated))
4654 strcat (buf, "<");
4655 else
4656 strcat (buf, ">=");
4657 if (nullify && negated)
4658 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4659 else if (nullify && ! negated)
4660 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4661 else if (negated)
4662 strcat (buf, " %0,1,0\n\tbl %3,0");
4663 else
4664 strcat (buf, " %0,1,0\n\tbl %2,0");
4666 break;
4668 default:
4669 abort();
4671 return buf;
4674 /* Return the output template for emitting a dbra type insn.
4676 Note it may perform some output operations on its own before
4677 returning the final output string. */
4678 char *
4679 output_dbra (operands, insn, which_alternative)
4680 rtx *operands;
4681 rtx insn;
4682 int which_alternative;
4685 /* A conditional branch to the following instruction (eg the delay slot) is
4686 asking for a disaster. Be prepared! */
4688 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4690 if (which_alternative == 0)
4691 return "ldo %1(%0),%0";
4692 else if (which_alternative == 1)
4694 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4695 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4696 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4697 return "fldws -16(0,%%r30),%0";
4699 else
4701 output_asm_insn ("ldw %0,%4", operands);
4702 return "ldo %1(%4),%4\n\tstw %4,%0";
4706 if (which_alternative == 0)
4708 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4709 int length = get_attr_length (insn);
4711 /* If this is a long branch with its delay slot unfilled, set `nullify'
4712 as it can nullify the delay slot and save a nop. */
4713 if (length == 8 && dbr_sequence_length () == 0)
4714 nullify = 1;
4716 /* If this is a short forward conditional branch which did not get
4717 its delay slot filled, the delay slot can still be nullified. */
4718 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4719 nullify = forward_branch_p (insn);
4721 /* Handle short versions first. */
4722 if (length == 4 && nullify)
4723 return "addib,%C2,n %1,%0,%3";
4724 else if (length == 4 && ! nullify)
4725 return "addib,%C2 %1,%0,%3";
4726 else if (length == 8)
4728 /* Handle weird backwards branch with a fulled delay slot
4729 which is nullified. */
4730 if (dbr_sequence_length () != 0
4731 && ! forward_branch_p (insn)
4732 && nullify)
4733 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4734 /* Handle short backwards branch with an unfilled delay slot.
4735 Using a addb;nop rather than addi;bl saves 1 cycle for both
4736 taken and untaken branches. */
4737 else if (dbr_sequence_length () == 0
4738 && ! forward_branch_p (insn)
4739 && insn_addresses
4740 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4741 - insn_addresses[INSN_UID (insn)] - 8))
4742 return "addib,%C2 %1,%0,%3%#";
4744 /* Handle normal cases. */
4745 if (nullify)
4746 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4747 else
4748 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4750 else
4751 abort();
4753 /* Deal with gross reload from FP register case. */
4754 else if (which_alternative == 1)
4756 /* Move loop counter from FP register to MEM then into a GR,
4757 increment the GR, store the GR into MEM, and finally reload
4758 the FP register from MEM from within the branch's delay slot. */
4759 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4760 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4761 if (get_attr_length (insn) == 24)
4762 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4763 else
4764 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4766 /* Deal with gross reload from memory case. */
4767 else
4769 /* Reload loop counter from memory, the store back to memory
4770 happens in the branch's delay slot. */
4771 output_asm_insn ("ldw %0,%4", operands);
4772 if (get_attr_length (insn) == 12)
4773 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4774 else
4775 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4779 /* Return the output template for emitting a dbra type insn.
4781 Note it may perform some output operations on its own before
4782 returning the final output string. */
4783 char *
4784 output_movb (operands, insn, which_alternative, reverse_comparison)
4785 rtx *operands;
4786 rtx insn;
4787 int which_alternative;
4788 int reverse_comparison;
4791 /* A conditional branch to the following instruction (eg the delay slot) is
4792 asking for a disaster. Be prepared! */
4794 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4796 if (which_alternative == 0)
4797 return "copy %1,%0";
4798 else if (which_alternative == 1)
4800 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4801 return "fldws -16(0,%%r30),%0";
4803 else if (which_alternative == 2)
4804 return "stw %1,%0";
4805 else
4806 return "mtsar %r1";
4809 /* Support the second variant. */
4810 if (reverse_comparison)
4811 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4813 if (which_alternative == 0)
4815 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4816 int length = get_attr_length (insn);
4818 /* If this is a long branch with its delay slot unfilled, set `nullify'
4819 as it can nullify the delay slot and save a nop. */
4820 if (length == 8 && dbr_sequence_length () == 0)
4821 nullify = 1;
4823 /* If this is a short forward conditional branch which did not get
4824 its delay slot filled, the delay slot can still be nullified. */
4825 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4826 nullify = forward_branch_p (insn);
4828 /* Handle short versions first. */
4829 if (length == 4 && nullify)
4830 return "movb,%C2,n %1,%0,%3";
4831 else if (length == 4 && ! nullify)
4832 return "movb,%C2 %1,%0,%3";
4833 else if (length == 8)
4835 /* Handle weird backwards branch with a filled delay slot
4836 which is nullified. */
4837 if (dbr_sequence_length () != 0
4838 && ! forward_branch_p (insn)
4839 && nullify)
4840 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4842 /* Handle short backwards branch with an unfilled delay slot.
4843 Using a movb;nop rather than or;bl saves 1 cycle for both
4844 taken and untaken branches. */
4845 else if (dbr_sequence_length () == 0
4846 && ! forward_branch_p (insn)
4847 && insn_addresses
4848 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4849 - insn_addresses[INSN_UID (insn)] - 8))
4850 return "movb,%C2 %1,%0,%3%#";
4851 /* Handle normal cases. */
4852 if (nullify)
4853 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4854 else
4855 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4857 else
4858 abort();
4860 /* Deal with gross reload from FP register case. */
4861 else if (which_alternative == 1)
4863 /* Move loop counter from FP register to MEM then into a GR,
4864 increment the GR, store the GR into MEM, and finally reload
4865 the FP register from MEM from within the branch's delay slot. */
4866 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4867 if (get_attr_length (insn) == 12)
4868 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4869 else
4870 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4872 /* Deal with gross reload from memory case. */
4873 else if (which_alternative == 2)
4875 /* Reload loop counter from memory, the store back to memory
4876 happens in the branch's delay slot. */
4877 if (get_attr_length (insn) == 8)
4878 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4879 else
4880 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4882 /* Handle SAR as a destination. */
4883 else
4885 if (get_attr_length (insn) == 8)
4886 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4887 else
4888 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4893 /* INSN is a millicode call. It may have an unconditional jump in its delay
4894 slot.
4896 CALL_DEST is the routine we are calling. */
4898 char *
4899 output_millicode_call (insn, call_dest)
4900 rtx insn;
4901 rtx call_dest;
4903 int distance;
4904 rtx xoperands[4];
4905 rtx seq_insn;
4907 /* Handle common case -- empty delay slot or no jump in the delay slot,
4908 and we're sure that the branch will reach the beginning of the $CODE$
4909 subspace. */
4910 if ((dbr_sequence_length () == 0
4911 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4912 || (dbr_sequence_length () != 0
4913 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4914 && get_attr_length (insn) == 4))
4916 xoperands[0] = call_dest;
4917 output_asm_insn ("bl %0,%%r31%#", xoperands);
4918 return "";
4921 /* This call may not reach the beginning of the $CODE$ subspace. */
4922 if (get_attr_length (insn) > 4)
4924 int delay_insn_deleted = 0;
4925 rtx xoperands[2];
4926 rtx link;
4928 /* We need to emit an inline long-call branch. */
4929 if (dbr_sequence_length () != 0
4930 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4932 /* A non-jump insn in the delay slot. By definition we can
4933 emit this insn before the call. */
4934 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4936 /* Now delete the delay insn. */
4937 PUT_CODE (NEXT_INSN (insn), NOTE);
4938 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4939 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4940 delay_insn_deleted = 1;
4943 /* If we're allowed to use be/ble instructions, then this is the
4944 best sequence to use for a long millicode call. */
4945 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4946 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4948 xoperands[0] = call_dest;
4949 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4950 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4951 output_asm_insn ("nop", xoperands);
4953 /* Pure portable runtime doesn't allow be/ble; we also don't have
4954 PIC support int he assembler/linker, so this sequence is needed. */
4955 else if (TARGET_PORTABLE_RUNTIME)
4957 xoperands[0] = call_dest;
4958 /* Get the address of our target into %r29. */
4959 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4960 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4962 /* Get our return address into %r31. */
4963 output_asm_insn ("blr 0,%%r31", xoperands);
4965 /* Jump to our target address in %r29. */
4966 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4968 /* Empty delay slot. Note this insn gets fetched twice and
4969 executed once. To be safe we use a nop. */
4970 output_asm_insn ("nop", xoperands);
4971 return "";
4973 /* PIC long millicode call sequence. */
4974 else
4976 xoperands[0] = call_dest;
4977 xoperands[1] = gen_label_rtx ();
4978 /* Get our address + 8 into %r1. */
4979 output_asm_insn ("bl .+8,%%r1", xoperands);
4981 /* Add %r1 to the offset of our target from the next insn. */
4982 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4983 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4984 CODE_LABEL_NUMBER (xoperands[1]));
4985 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4987 /* Get the return address into %r31. */
4988 output_asm_insn ("blr 0,%%r31", xoperands);
4990 /* Branch to our target which is in %r1. */
4991 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4993 /* Empty delay slot. Note this insn gets fetched twice and
4994 executed once. To be safe we use a nop. */
4995 output_asm_insn ("nop", xoperands);
4998 /* If we had a jump in the call's delay slot, output it now. */
4999 if (dbr_sequence_length () != 0
5000 && !delay_insn_deleted)
5002 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5003 output_asm_insn ("b,n %0", xoperands);
5005 /* Now delete the delay insn. */
5006 PUT_CODE (NEXT_INSN (insn), NOTE);
5007 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5008 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5010 return "";
5013 /* This call has an unconditional jump in its delay slot and the
5014 call is known to reach its target or the beginning of the current
5015 subspace. */
5017 /* Use the containing sequence insn's address. */
5018 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5020 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5021 - insn_addresses[INSN_UID (seq_insn)] - 8;
5023 /* If the branch was too far away, emit a normal call followed
5024 by a nop, followed by the unconditional branch.
5026 If the branch is close, then adjust %r2 from within the
5027 call's delay slot. */
5029 xoperands[0] = call_dest;
5030 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5031 if (! VAL_14_BITS_P (distance))
5032 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
5033 else
5035 xoperands[3] = gen_label_rtx ();
5036 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5037 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5038 CODE_LABEL_NUMBER (xoperands[3]));
5041 /* Delete the jump. */
5042 PUT_CODE (NEXT_INSN (insn), NOTE);
5043 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5044 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5045 return "";
5048 extern struct obstack permanent_obstack;
5049 extern struct obstack *saveable_obstack;
5050 extern struct obstack *rtl_obstack;
5051 extern struct obstack *current_obstack;
5053 /* INSN is either a function call. It may have an unconditional jump
5054 in its delay slot.
5056 CALL_DEST is the routine we are calling. */
5058 char *
5059 output_call (insn, call_dest)
5060 rtx insn;
5061 rtx call_dest;
5063 int distance;
5064 rtx xoperands[4];
5065 rtx seq_insn;
5067 /* Handle common case -- empty delay slot or no jump in the delay slot,
5068 and we're sure that the branch will reach the beginning of the $CODE$
5069 subspace. */
5070 if ((dbr_sequence_length () == 0
5071 && get_attr_length (insn) == 8)
5072 || (dbr_sequence_length () != 0
5073 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5074 && get_attr_length (insn) == 4))
5076 xoperands[0] = call_dest;
5077 output_asm_insn ("bl %0,%%r2%#", xoperands);
5078 return "";
5081 /* This call may not reach the beginning of the $CODE$ subspace. */
5082 if (get_attr_length (insn) > 8)
5084 int delay_insn_deleted = 0;
5085 rtx xoperands[2];
5086 rtx link;
5088 /* We need to emit an inline long-call branch. Furthermore,
5089 because we're changing a named function call into an indirect
5090 function call well after the parameters have been set up, we
5091 need to make sure any FP args appear in both the integer
5092 and FP registers. Also, we need move any delay slot insn
5093 out of the delay slot. And finally, we can't rely on the linker
5094 being able to fix the call to $$dyncall! -- Yuk!. */
5095 if (dbr_sequence_length () != 0
5096 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5098 /* A non-jump insn in the delay slot. By definition we can
5099 emit this insn before the call (and in fact before argument
5100 relocating. */
5101 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5103 /* Now delete the delay insn. */
5104 PUT_CODE (NEXT_INSN (insn), NOTE);
5105 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5106 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5107 delay_insn_deleted = 1;
5110 /* Now copy any FP arguments into integer registers. */
5111 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5113 int arg_mode, regno;
5114 rtx use = XEXP (link, 0);
5115 if (! (GET_CODE (use) == USE
5116 && GET_CODE (XEXP (use, 0)) == REG
5117 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5118 continue;
5120 arg_mode = GET_MODE (XEXP (use, 0));
5121 regno = REGNO (XEXP (use, 0));
5122 /* Is it a floating point register? */
5123 if (regno >= 32 && regno <= 39)
5125 /* Copy from the FP register into an integer register
5126 (via memory). */
5127 if (arg_mode == SFmode)
5129 xoperands[0] = XEXP (use, 0);
5130 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5131 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5132 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5134 else
5136 xoperands[0] = XEXP (use, 0);
5137 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5138 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5139 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5140 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5145 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5146 we don't have any direct calls in that case. */
5148 int i;
5149 char *name = XSTR (call_dest, 0);
5151 /* See if we have already put this function on the list
5152 of deferred plabels. This list is generally small,
5153 so a liner search is not too ugly. If it proves too
5154 slow replace it with something faster. */
5155 for (i = 0; i < n_deferred_plabels; i++)
5156 if (strcmp (name, deferred_plabels[i].name) == 0)
5157 break;
5159 /* If the deferred plabel list is empty, or this entry was
5160 not found on the list, create a new entry on the list. */
5161 if (deferred_plabels == NULL || i == n_deferred_plabels)
5163 struct obstack *ambient_obstack = current_obstack;
5164 struct obstack *ambient_rtl_obstack = rtl_obstack;
5165 char *real_name;
5167 /* Any RTL we create here needs to live until the end of
5168 the compilation unit and therefore must live on the
5169 permanent obstack. */
5170 current_obstack = &permanent_obstack;
5171 rtl_obstack = &permanent_obstack;
5173 if (deferred_plabels == 0)
5174 deferred_plabels = (struct deferred_plabel *)
5175 xmalloc (1 * sizeof (struct deferred_plabel));
5176 else
5177 deferred_plabels = (struct deferred_plabel *)
5178 xrealloc (deferred_plabels,
5179 ((n_deferred_plabels + 1)
5180 * sizeof (struct deferred_plabel)));
5182 i = n_deferred_plabels++;
5183 deferred_plabels[i].internal_label = gen_label_rtx ();
5184 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5185 strlen (name) + 1);
5186 strcpy (deferred_plabels[i].name, name);
5188 /* Switch back to normal obstack allocation. */
5189 current_obstack = ambient_obstack;
5190 rtl_obstack = ambient_rtl_obstack;
5192 /* Gross. We have just implicitly taken the address of this
5193 function, mark it as such. */
5194 STRIP_NAME_ENCODING (real_name, name);
5195 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5198 /* We have to load the address of the function using a procedure
5199 label (plabel). Inline plabels can lose for PIC and other
5200 cases, so avoid them by creating a 32bit plabel in the data
5201 segment. */
5202 if (flag_pic)
5204 xoperands[0] = deferred_plabels[i].internal_label;
5205 xoperands[1] = gen_label_rtx ();
5207 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5208 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5209 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5211 /* Get our address + 8 into %r1. */
5212 output_asm_insn ("bl .+8,%%r1", xoperands);
5214 /* Add %r1 to the offset of dyncall from the next insn. */
5215 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5216 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5217 CODE_LABEL_NUMBER (xoperands[1]));
5218 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5220 /* Get the return address into %r31. */
5221 output_asm_insn ("blr 0,%%r31", xoperands);
5223 /* Branch to our target which is in %r1. */
5224 output_asm_insn ("bv 0(%%r1)", xoperands);
5226 /* Copy the return address into %r2 also. */
5227 output_asm_insn ("copy %%r31,%%r2", xoperands);
5229 else
5231 xoperands[0] = deferred_plabels[i].internal_label;
5233 /* Get the address of our target into %r22. */
5234 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5235 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5237 /* Get the high part of the address of $dyncall into %r2, then
5238 add in the low part in the branch instruction. */
5239 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5240 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5242 /* Copy the return pointer into both %r31 and %r2. */
5243 output_asm_insn ("copy %%r31,%%r2", xoperands);
5247 /* If we had a jump in the call's delay slot, output it now. */
5248 if (dbr_sequence_length () != 0
5249 && !delay_insn_deleted)
5251 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5252 output_asm_insn ("b,n %0", xoperands);
5254 /* Now delete the delay insn. */
5255 PUT_CODE (NEXT_INSN (insn), NOTE);
5256 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5257 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5259 return "";
5262 /* This call has an unconditional jump in its delay slot and the
5263 call is known to reach its target or the beginning of the current
5264 subspace. */
5266 /* Use the containing sequence insn's address. */
5267 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5269 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5270 - insn_addresses[INSN_UID (seq_insn)] - 8;
5272 /* If the branch was too far away, emit a normal call followed
5273 by a nop, followed by the unconditional branch.
5275 If the branch is close, then adjust %r2 from within the
5276 call's delay slot. */
5278 xoperands[0] = call_dest;
5279 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5280 if (! VAL_14_BITS_P (distance))
5281 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5282 else
5284 xoperands[3] = gen_label_rtx ();
5285 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5286 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5287 CODE_LABEL_NUMBER (xoperands[3]));
5290 /* Delete the jump. */
5291 PUT_CODE (NEXT_INSN (insn), NOTE);
5292 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5293 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5294 return "";
5297 /* In HPUX 8.0's shared library scheme, special relocations are needed
5298 for function labels if they might be passed to a function
5299 in a shared library (because shared libraries don't live in code
5300 space), and special magic is needed to construct their address.
5302 For reasons too disgusting to describe storage for the new name
5303 is allocated either on the saveable_obstack (released at function
5304 exit) or on the permanent_obstack for things that can never change
5305 (libcall names for example). */
5307 void
5308 hppa_encode_label (sym, permanent)
5309 rtx sym;
5310 int permanent;
5312 char *str = XSTR (sym, 0);
5313 int len = strlen (str);
5314 char *newstr;
5316 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5317 len + 2);
5319 if (str[0] == '*')
5320 *newstr++ = *str++;
5321 strcpy (newstr + 1, str);
5322 *newstr = '@';
5323 XSTR (sym,0) = newstr;
5327 function_label_operand (op, mode)
5328 rtx op;
5329 enum machine_mode mode;
5331 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5334 /* Returns 1 if OP is a function label involved in a simple addition
5335 with a constant. Used to keep certain patterns from matching
5336 during instruction combination. */
5338 is_function_label_plus_const (op)
5339 rtx op;
5341 /* Strip off any CONST. */
5342 if (GET_CODE (op) == CONST)
5343 op = XEXP (op, 0);
5345 return (GET_CODE (op) == PLUS
5346 && function_label_operand (XEXP (op, 0), Pmode)
5347 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5350 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5351 use in fmpyadd instructions. */
5353 fmpyaddoperands (operands)
5354 rtx *operands;
5356 enum machine_mode mode = GET_MODE (operands[0]);
5358 /* Must be a floating point mode. */
5359 if (mode != SFmode && mode != DFmode)
5360 return 0;
5362 /* All modes must be the same. */
5363 if (! (mode == GET_MODE (operands[1])
5364 && mode == GET_MODE (operands[2])
5365 && mode == GET_MODE (operands[3])
5366 && mode == GET_MODE (operands[4])
5367 && mode == GET_MODE (operands[5])))
5368 return 0;
5370 /* All operands must be registers. */
5371 if (! (GET_CODE (operands[1]) == REG
5372 && GET_CODE (operands[2]) == REG
5373 && GET_CODE (operands[3]) == REG
5374 && GET_CODE (operands[4]) == REG
5375 && GET_CODE (operands[5]) == REG))
5376 return 0;
5378 /* Only 2 real operands to the addition. One of the input operands must
5379 be the same as the output operand. */
5380 if (! rtx_equal_p (operands[3], operands[4])
5381 && ! rtx_equal_p (operands[3], operands[5]))
5382 return 0;
5384 /* Inout operand of add can not conflict with any operands from multiply. */
5385 if (rtx_equal_p (operands[3], operands[0])
5386 || rtx_equal_p (operands[3], operands[1])
5387 || rtx_equal_p (operands[3], operands[2]))
5388 return 0;
5390 /* multiply can not feed into addition operands. */
5391 if (rtx_equal_p (operands[4], operands[0])
5392 || rtx_equal_p (operands[5], operands[0]))
5393 return 0;
5395 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5396 if (mode == SFmode
5397 && (REGNO (operands[0]) < 57
5398 || REGNO (operands[1]) < 57
5399 || REGNO (operands[2]) < 57
5400 || REGNO (operands[3]) < 57
5401 || REGNO (operands[4]) < 57
5402 || REGNO (operands[5]) < 57))
5403 return 0;
5405 /* Passed. Operands are suitable for fmpyadd. */
5406 return 1;
5409 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5410 use in fmpysub instructions. */
5412 fmpysuboperands (operands)
5413 rtx *operands;
5415 enum machine_mode mode = GET_MODE (operands[0]);
5417 /* Must be a floating point mode. */
5418 if (mode != SFmode && mode != DFmode)
5419 return 0;
5421 /* All modes must be the same. */
5422 if (! (mode == GET_MODE (operands[1])
5423 && mode == GET_MODE (operands[2])
5424 && mode == GET_MODE (operands[3])
5425 && mode == GET_MODE (operands[4])
5426 && mode == GET_MODE (operands[5])))
5427 return 0;
5429 /* All operands must be registers. */
5430 if (! (GET_CODE (operands[1]) == REG
5431 && GET_CODE (operands[2]) == REG
5432 && GET_CODE (operands[3]) == REG
5433 && GET_CODE (operands[4]) == REG
5434 && GET_CODE (operands[5]) == REG))
5435 return 0;
5437 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5438 operation, so operands[4] must be the same as operand[3]. */
5439 if (! rtx_equal_p (operands[3], operands[4]))
5440 return 0;
5442 /* multiply can not feed into subtraction. */
5443 if (rtx_equal_p (operands[5], operands[0]))
5444 return 0;
5446 /* Inout operand of sub can not conflict with any operands from multiply. */
5447 if (rtx_equal_p (operands[3], operands[0])
5448 || rtx_equal_p (operands[3], operands[1])
5449 || rtx_equal_p (operands[3], operands[2]))
5450 return 0;
5452 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5453 if (mode == SFmode
5454 && (REGNO (operands[0]) < 57
5455 || REGNO (operands[1]) < 57
5456 || REGNO (operands[2]) < 57
5457 || REGNO (operands[3]) < 57
5458 || REGNO (operands[4]) < 57
5459 || REGNO (operands[5]) < 57))
5460 return 0;
5462 /* Passed. Operands are suitable for fmpysub. */
5463 return 1;
5467 plus_xor_ior_operator (op, mode)
5468 rtx op;
5469 enum machine_mode mode;
5471 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5472 || GET_CODE (op) == IOR);
5475 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5476 constants for shadd instructions. */
5478 shadd_constant_p (val)
5479 int val;
5481 if (val == 2 || val == 4 || val == 8)
5482 return 1;
5483 else
5484 return 0;
5487 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5488 the valid constant for shadd instructions. */
5490 shadd_operand (op, mode)
5491 rtx op;
5492 enum machine_mode mode;
5494 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5497 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5500 basereg_operand (op, mode)
5501 rtx op;
5502 enum machine_mode mode;
5504 /* cse will create some unscaled indexed addresses, however; it
5505 generally isn't a win on the PA, so avoid creating unscaled
5506 indexed addresses until after cse is finished. */
5507 if (!cse_not_expected)
5508 return 0;
5510 /* Once reload has started everything is considered valid. Reload should
5511 only create indexed addresses using the stack/frame pointer, and any
5512 others were checked for validity when created by the combine pass.
5514 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5515 we don't have to worry about the braindamaged implicit space register
5516 selection using the basereg only (rather than effective address)
5517 screwing us over. */
5518 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5519 return (GET_CODE (op) == REG);
5521 /* Stack is always OK for indexing. */
5522 if (op == stack_pointer_rtx)
5523 return 1;
5525 /* While it's always safe to index off the frame pointer, it's not
5526 always profitable, particularly when the frame pointer is being
5527 eliminated. */
5528 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5529 return 1;
5531 /* The only other valid OPs are pseudo registers with
5532 REGNO_POINTER_FLAG set. */
5533 if (GET_CODE (op) != REG
5534 || REGNO (op) < FIRST_PSEUDO_REGISTER
5535 || ! register_operand (op, mode))
5536 return 0;
5538 return REGNO_POINTER_FLAG (REGNO (op));
5541 /* Return 1 if this operand is anything other than a hard register. */
5544 non_hard_reg_operand (op, mode)
5545 rtx op;
5546 enum machine_mode mode;
5548 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5551 /* Return 1 if INSN branches forward. Should be using insn_addresses
5552 to avoid walking through all the insns... */
5554 forward_branch_p (insn)
5555 rtx insn;
5557 rtx label = JUMP_LABEL (insn);
5559 while (insn)
5561 if (insn == label)
5562 break;
5563 else
5564 insn = NEXT_INSN (insn);
5567 return (insn == label);
5570 /* Return 1 if OP is an equality comparison, else return 0. */
5572 eq_neq_comparison_operator (op, mode)
5573 rtx op;
5574 enum machine_mode mode;
5576 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5579 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5581 movb_comparison_operator (op, mode)
5582 rtx op;
5583 enum machine_mode mode;
5585 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5586 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5589 /* Return 1 if INSN is in the delay slot of a call instruction. */
5591 jump_in_call_delay (insn)
5592 rtx insn;
5595 if (GET_CODE (insn) != JUMP_INSN)
5596 return 0;
5598 if (PREV_INSN (insn)
5599 && PREV_INSN (PREV_INSN (insn))
5600 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5602 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5604 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5605 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5608 else
5609 return 0;
5612 /* Output an unconditional move and branch insn. */
5614 char *
5615 output_parallel_movb (operands, length)
5616 rtx *operands;
5617 int length;
5619 /* These are the cases in which we win. */
5620 if (length == 4)
5621 return "mov%I1b,tr %1,%0,%2";
5623 /* None of these cases wins, but they don't lose either. */
5624 if (dbr_sequence_length () == 0)
5626 /* Nothing in the delay slot, fake it by putting the combined
5627 insn (the copy or add) in the delay slot of a bl. */
5628 if (GET_CODE (operands[1]) == CONST_INT)
5629 return "bl %2,0\n\tldi %1,%0";
5630 else
5631 return "bl %2,0\n\tcopy %1,%0";
5633 else
5635 /* Something in the delay slot, but we've got a long branch. */
5636 if (GET_CODE (operands[1]) == CONST_INT)
5637 return "ldi %1,%0\n\tbl %2,0";
5638 else
5639 return "copy %1,%0\n\tbl %2,0";
5643 /* Output an unconditional add and branch insn. */
5645 char *
5646 output_parallel_addb (operands, length)
5647 rtx *operands;
5648 int length;
5650 /* To make life easy we want operand0 to be the shared input/output
5651 operand and operand1 to be the readonly operand. */
5652 if (operands[0] == operands[1])
5653 operands[1] = operands[2];
5655 /* These are the cases in which we win. */
5656 if (length == 4)
5657 return "add%I1b,tr %1,%0,%3";
5659 /* None of these cases win, but they don't lose either. */
5660 if (dbr_sequence_length () == 0)
5662 /* Nothing in the delay slot, fake it by putting the combined
5663 insn (the copy or add) in the delay slot of a bl. */
5664 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5666 else
5668 /* Something in the delay slot, but we've got a long branch. */
5669 return "add%I1 %1,%0,%0\n\tbl %3,0";
5673 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5674 is used to discourage creating parallel movb/addb insns since a jump
5675 which immediately follows a call can execute in the delay slot of the
5676 call. */
5678 following_call (insn)
5679 rtx insn;
5681 /* Find the previous real insn, skipping NOTEs. */
5682 insn = PREV_INSN (insn);
5683 while (insn && GET_CODE (insn) == NOTE)
5684 insn = PREV_INSN (insn);
5686 /* Check for CALL_INSNs and millicode calls. */
5687 if (insn
5688 && (GET_CODE (insn) == CALL_INSN
5689 || (GET_CODE (insn) == INSN
5690 && GET_CODE (PATTERN (insn)) != SEQUENCE
5691 && GET_CODE (PATTERN (insn)) != USE
5692 && GET_CODE (PATTERN (insn)) != CLOBBER
5693 && get_attr_type (insn) == TYPE_MILLI)))
5694 return 1;
5696 return 0;
5699 /* We use this hook to perform a PA specific optimization which is difficult
5700 to do in earlier passes.
5702 We want the delay slots of branches within jump tables to be filled.
5703 None of the compiler passes at the moment even has the notion that a
5704 PA jump table doesn't contain addresses, but instead contains actual
5705 instructions!
5707 Because we actually jump into the table, the addresses of each entry
5708 must stay constant in relation to the beginning of the table (which
5709 itself must stay constant relative to the instruction to jump into
5710 it). I don't believe we can guarantee earlier passes of the compiler
5711 will adhere to those rules.
5713 So, late in the compilation process we find all the jump tables, and
5714 expand them into real code -- eg each entry in the jump table vector
5715 will get an appropriate label followed by a jump to the final target.
5717 Reorg and the final jump pass can then optimize these branches and
5718 fill their delay slots. We end up with smaller, more efficient code.
5720 The jump instructions within the table are special; we must be able
5721 to identify them during assembly output (if the jumps don't get filled
5722 we need to emit a nop rather than nullifying the delay slot)). We
5723 identify jumps in switch tables by marking the SET with DImode. */
5725 pa_reorg (insns)
5726 rtx insns;
5728 rtx insn;
5730 remove_useless_addtr_insns (insns, 1);
5732 pa_combine_instructions (get_insns ());
5734 /* This is fairly cheap, so always run it if optimizing. */
5735 if (optimize > 0)
5737 /* Find and explode all ADDR_VEC insns. */
5738 insns = get_insns ();
5739 for (insn = insns; insn; insn = NEXT_INSN (insn))
5741 rtx pattern, tmp, location;
5742 unsigned int length, i;
5744 /* Find an ADDR_VEC insn to explode. */
5745 if (GET_CODE (insn) != JUMP_INSN
5746 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5747 continue;
5749 /* If needed, emit marker for the beginning of the branch table. */
5750 if (TARGET_GAS)
5751 emit_insn_before (gen_begin_brtab (), insn);
5753 pattern = PATTERN (insn);
5754 location = PREV_INSN (insn);
5755 length = XVECLEN (pattern, 0);
5757 for (i = 0; i < length; i++)
5759 /* Emit the jump itself. */
5760 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5761 tmp = emit_jump_insn_after (tmp, location);
5762 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5763 LABEL_NUSES (JUMP_LABEL (tmp))++;
5765 /* Emit a BARRIER after the jump. */
5766 location = NEXT_INSN (location);
5767 emit_barrier_after (location);
5769 /* Put a CODE_LABEL before each so jump.c does not optimize
5770 the jumps away. */
5771 location = NEXT_INSN (location);
5772 tmp = gen_label_rtx ();
5773 LABEL_NUSES (tmp) = 1;
5774 emit_label_after (tmp, location);
5775 location = NEXT_INSN (location);
5778 /* If needed, emit marker for the end of the branch table. */
5779 if (TARGET_GAS)
5780 emit_insn_before (gen_end_brtab (), location);
5781 /* Delete the ADDR_VEC. */
5782 delete_insn (insn);
5785 else if (TARGET_GAS)
5787 /* Sill need an end_brtab insn. */
5788 insns = get_insns ();
5789 for (insn = insns; insn; insn = NEXT_INSN (insn))
5791 /* Find an ADDR_VEC insn. */
5792 if (GET_CODE (insn) != JUMP_INSN
5793 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5794 continue;
5796 /* Now generate markers for the beginning and end of the
5797 branc table. */
5798 emit_insn_before (gen_begin_brtab (), insn);
5799 emit_insn_after (gen_end_brtab (), insn);
5804 /* The PA has a number of odd instructions which can perform multiple
5805 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5806 it may be profitable to combine two instructions into one instruction
5807 with two outputs. It's not profitable PA2.0 machines because the
5808 two outputs would take two slots in the reorder buffers.
5810 This routine finds instructions which can be combined and combines
5811 them. We only support some of the potential combinations, and we
5812 only try common ways to find suitable instructions.
5814 * addb can add two registers or a register and a small integer
5815 and jump to a nearby (+-8k) location. Normally the jump to the
5816 nearby location is conditional on the result of the add, but by
5817 using the "true" condition we can make the jump unconditional.
5818 Thus addb can perform two independent operations in one insn.
5820 * movb is similar to addb in that it can perform a reg->reg
5821 or small immediate->reg copy and jump to a nearby (+-8k location).
5823 * fmpyadd and fmpysub can perform a FP multiply and either an
5824 FP add or FP sub if the operands of the multiply and add/sub are
5825 independent (there are other minor restrictions). Note both
5826 the fmpy and fadd/fsub can in theory move to better spots according
5827 to data dependencies, but for now we require the fmpy stay at a
5828 fixed location.
5830 * Many of the memory operations can perform pre & post updates
5831 of index registers. GCC's pre/post increment/decrement addressing
5832 is far too simple to take advantage of all the possibilities. This
5833 pass may not be suitable since those insns may not be independent.
5835 * comclr can compare two ints or an int and a register, nullify
5836 the following instruction and zero some other register. This
5837 is more difficult to use as it's harder to find an insn which
5838 will generate a comclr than finding something like an unconditional
5839 branch. (conditional moves & long branches create comclr insns).
5841 * Most arithmetic operations can conditionally skip the next
5842 instruction. They can be viewed as "perform this operation
5843 and conditionally jump to this nearby location" (where nearby
5844 is an insns away). These are difficult to use due to the
5845 branch length restrictions. */
5847 pa_combine_instructions (insns)
5848 rtx insns;
5850 rtx anchor, new;
5852 /* This can get expensive since the basic algorithm is on the
5853 order of O(n^2) (or worse). Only do it for -O2 or higher
5854 levels of optimizaton. */
5855 if (optimize < 2)
5856 return;
5858 /* Walk down the list of insns looking for "anchor" insns which
5859 may be combined with "floating" insns. As the name implies,
5860 "anchor" instructions don't move, while "floating" insns may
5861 move around. */
5862 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5863 new = make_insn_raw (new);
5865 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5867 enum attr_pa_combine_type anchor_attr;
5868 enum attr_pa_combine_type floater_attr;
5870 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5871 Also ignore any special USE insns. */
5872 if (GET_CODE (anchor) != INSN
5873 && GET_CODE (anchor) != JUMP_INSN
5874 && GET_CODE (anchor) != CALL_INSN
5875 || GET_CODE (PATTERN (anchor)) == USE
5876 || GET_CODE (PATTERN (anchor)) == CLOBBER
5877 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5878 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5879 continue;
5881 anchor_attr = get_attr_pa_combine_type (anchor);
5882 /* See if anchor is an insn suitable for combination. */
5883 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5884 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5885 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5886 && ! forward_branch_p (anchor)))
5888 rtx floater;
5890 for (floater = PREV_INSN (anchor);
5891 floater;
5892 floater = PREV_INSN (floater))
5894 if (GET_CODE (floater) == NOTE
5895 || (GET_CODE (floater) == INSN
5896 && (GET_CODE (PATTERN (floater)) == USE
5897 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5898 continue;
5900 /* Anything except a regular INSN will stop our search. */
5901 if (GET_CODE (floater) != INSN
5902 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5903 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5905 floater = NULL_RTX;
5906 break;
5909 /* See if FLOATER is suitable for combination with the
5910 anchor. */
5911 floater_attr = get_attr_pa_combine_type (floater);
5912 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5913 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5914 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5915 && floater_attr == PA_COMBINE_TYPE_FMPY))
5917 /* If ANCHOR and FLOATER can be combined, then we're
5918 done with this pass. */
5919 if (pa_can_combine_p (new, anchor, floater, 0,
5920 SET_DEST (PATTERN (floater)),
5921 XEXP (SET_SRC (PATTERN (floater)), 0),
5922 XEXP (SET_SRC (PATTERN (floater)), 1)))
5923 break;
5926 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5927 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
5929 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
5931 if (pa_can_combine_p (new, anchor, floater, 0,
5932 SET_DEST (PATTERN (floater)),
5933 XEXP (SET_SRC (PATTERN (floater)), 0),
5934 XEXP (SET_SRC (PATTERN (floater)), 1)))
5935 break;
5937 else
5939 if (pa_can_combine_p (new, anchor, floater, 0,
5940 SET_DEST (PATTERN (floater)),
5941 SET_SRC (PATTERN (floater)),
5942 SET_SRC (PATTERN (floater))))
5943 break;
5948 /* If we didn't find anything on the backwards scan try forwards. */
5949 if (!floater
5950 && (anchor_attr == PA_COMBINE_TYPE_FMPY
5951 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
5953 for (floater = anchor; floater; floater = NEXT_INSN (floater))
5955 if (GET_CODE (floater) == NOTE
5956 || (GET_CODE (floater) == INSN
5957 && (GET_CODE (PATTERN (floater)) == USE
5958 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5960 continue;
5962 /* Anything except a regular INSN will stop our search. */
5963 if (GET_CODE (floater) != INSN
5964 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5965 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5967 floater = NULL_RTX;
5968 break;
5971 /* See if FLOATER is suitable for combination with the
5972 anchor. */
5973 floater_attr = get_attr_pa_combine_type (floater);
5974 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5975 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5976 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5977 && floater_attr == PA_COMBINE_TYPE_FMPY))
5979 /* If ANCHOR and FLOATER can be combined, then we're
5980 done with this pass. */
5981 if (pa_can_combine_p (new, anchor, floater, 1,
5982 SET_DEST (PATTERN (floater)),
5983 XEXP (SET_SRC (PATTERN(floater)),0),
5984 XEXP(SET_SRC(PATTERN(floater)),1)))
5985 break;
5990 /* FLOATER will be nonzero if we found a suitable floating
5991 insn for combination with ANCHOR. */
5992 if (floater
5993 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5994 || anchor_attr == PA_COMBINE_TYPE_FMPY))
5996 /* Emit the new instruction and delete the old anchor. */
5997 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
5998 gen_rtvec (2, PATTERN (anchor),
5999 PATTERN (floater))),
6000 anchor);
6001 PUT_CODE (anchor, NOTE);
6002 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6003 NOTE_SOURCE_FILE (anchor) = 0;
6005 /* Emit a special USE insn for FLOATER, then delete
6006 the floating insn. */
6007 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
6008 delete_insn (floater);
6010 continue;
6012 else if (floater
6013 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6015 rtx temp;
6016 /* Emit the new_jump instruction and delete the old anchor. */
6017 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
6018 gen_rtvec (2, PATTERN (anchor),
6019 PATTERN (floater))),
6020 anchor);
6021 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6022 PUT_CODE (anchor, NOTE);
6023 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6024 NOTE_SOURCE_FILE (anchor) = 0;
6026 /* Emit a special USE insn for FLOATER, then delete
6027 the floating insn. */
6028 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
6029 delete_insn (floater);
6030 continue;
6037 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6038 rtx new, anchor, floater;
6039 int reversed;
6040 rtx dest, src1, src2;
6042 int insn_code_number;
6043 rtx start, end;
6045 /* Create a PARALLEL with the patterns of ANCHOR and
6046 FLOATER, try to recognize it, then test constraints
6047 for the resulting pattern.
6049 If the pattern doesn't match or the constraints
6050 aren't met keep searching for a suitable floater
6051 insn. */
6052 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6053 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6054 INSN_CODE (new) = -1;
6055 insn_code_number = recog_memoized (new);
6056 if (insn_code_number < 0
6057 || !constrain_operands (insn_code_number, 1))
6058 return 0;
6060 if (reversed)
6062 start = anchor;
6063 end = floater;
6065 else
6067 start = floater;
6068 end = anchor;
6071 /* There's up to three operands to consider. One
6072 output and two inputs.
6074 The output must not be used between FLOATER & ANCHOR
6075 exclusive. The inputs must not be set between
6076 FLOATER and ANCHOR exclusive. */
6078 if (reg_used_between_p (dest, start, end))
6079 return 0;
6081 if (reg_set_between_p (src1, start, end))
6082 return 0;
6084 if (reg_set_between_p (src2, start, end))
6085 return 0;
6087 /* If we get here, then everything is good. */
6088 return 1;