* pa.c (pa_reorg): Explode ADDR_DIFF_VEC insns too.
[official-gcc.git] / gcc / config / pa / pa.c
blobb1b9d9dc1bcd7b25bb865c8b7759b6896b05bc02
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
63 static rtx find_addr_reg ();
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
68 unsigned int total_code_bytes;
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
73 struct deferred_plabel
75 rtx internal_label;
76 char *name;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
80 void
81 override_options ()
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
92 else if (! strcmp (pa_cpu_string, "700"))
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
97 else if (! strcmp (pa_cpu_string, "7100LC"))
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
102 else
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
109 warning ("PIC code generation is not supported in the portable runtime model\n");
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
117 if (flag_pic && profile_flag)
119 warning ("PIC code generation is not compatable with profiling\n");
122 if (TARGET_SPACE && (flag_pic || profile_flag))
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
164 symbolic_expression_p (x)
165 register rtx x;
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
172 return (symbolic_operand (x, VOIDmode));
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
180 switch (GET_CODE (op))
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
220 if (register_operand (op, mode))
221 return 1;
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
226 return 0;
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
237 if (register_operand (op, mode))
238 return 1;
240 if (op == CONST0_RTX (mode))
241 return 1;
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
246 return 0;
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
267 if (register_operand (op, mode))
268 return 1;
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
298 return memory_address_p (mode, op);
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
308 if (register_operand (op, mode))
309 return 1;
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
314 return 0;
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
322 if (!flag_pic)
323 return 0;
325 switch (GET_CODE (op))
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
343 return reg_renumber && FP_REG_P (op);
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
468 return register_operand (op, mode) || int5_operand (op, mode);
471 /* True iff zdepi can be used to generate this CONST_INT. */
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
476 unsigned HOST_WIDE_INT lsb_mask, t;
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
500 /* True iff depi or extru can be used to compute (reg & OP). */
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
510 /* True iff depi can be used to compute (reg | MASK). */
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
519 /* True iff depi can be used to compute (reg | OP). */
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
577 rtx pic_ref = orig;
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
586 if (GET_CODE (orig) == SYMBOL_REF)
588 if (reg == 0)
589 abort ();
591 if (flag_pic == 2)
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
606 else if (GET_CODE (orig) == CONST)
608 rtx base;
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
614 if (reg == 0)
615 abort ();
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
633 return pic_ref;
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
649 For the PA, transform:
651 memory(X + <large int>)
653 into:
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
662 This is for CSE to find several similar references, and only use one Z.
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
692 rtx orig = x;
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
752 else
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
764 return plus_constant (ptr_reg, offset - newoffset);
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
822 /* Figure out what the base and index are. */
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
843 if (base == 0)
844 return orig;
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
931 rtx regx1, regx2, regy1, regy2, y;
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1004 else
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1017 return orig;
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1023 PIC addresses are very expensive.
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1028 hppa_address_cost (X)
1029 rtx X;
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1040 /* Emit insns to move operands[1] into operands[0].
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1140 use scratch_reg to hold the address of the memory location.
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1150 rtx xoperands[2];
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1173 /* D might not fit in 14 bits either; for such cases load D into
1174 scratch reg. */
1175 if (GET_CODE (operand1) == MEM
1176 && !memory_address_p (SImode, XEXP (operand1, 0)))
1178 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1179 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1180 SImode,
1181 XEXP (XEXP (operand1, 0), 0),
1182 scratch_reg));
1183 emit_move_insn (scratch_reg, gen_rtx (MEM, GET_MODE (operand1),
1184 scratch_reg));
1186 else
1187 emit_move_insn (scratch_reg, operand1);
1188 emit_move_insn (operand0, scratch_reg);
1189 return 1;
1191 /* Handle most common case: storing into a register. */
1192 else if (register_operand (operand0, mode))
1194 if (register_operand (operand1, mode)
1195 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1196 || (operand1 == CONST0_RTX (mode))
1197 || (GET_CODE (operand1) == HIGH
1198 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1199 /* Only `general_operands' can come here, so MEM is ok. */
1200 || GET_CODE (operand1) == MEM)
1202 /* Run this case quickly. */
1203 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1204 return 1;
1207 else if (GET_CODE (operand0) == MEM)
1209 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1210 && !(reload_in_progress || reload_completed))
1212 rtx temp = gen_reg_rtx (DFmode);
1214 emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
1215 emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
1216 return 1;
1218 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1220 /* Run this case quickly. */
1221 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1222 return 1;
1224 if (! (reload_in_progress || reload_completed))
1226 operands[0] = validize_mem (operand0);
1227 operands[1] = operand1 = force_reg (mode, operand1);
1231 /* Simplify the source if we need to. */
1232 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1233 || (GET_CODE (operand1) == HIGH
1234 && symbolic_operand (XEXP (operand1, 0), mode)))
1236 int ishighonly = 0;
1238 if (GET_CODE (operand1) == HIGH)
1240 ishighonly = 1;
1241 operand1 = XEXP (operand1, 0);
1243 if (symbolic_operand (operand1, mode))
1245 /* Argh. The assembler and linker can't handle arithmetic
1246 involving plabels.
1248 So we force the plabel into memory, load operand0 from
1249 the memory location, then add in the constant part. */
1250 if (GET_CODE (operand1) == CONST
1251 && GET_CODE (XEXP (operand1, 0)) == PLUS
1252 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1254 rtx temp, const_part;
1256 /* Figure out what (if any) scratch register to use. */
1257 if (reload_in_progress || reload_completed)
1258 scratch_reg = scratch_reg ? scratch_reg : operand0;
1259 else if (flag_pic)
1260 scratch_reg = gen_reg_rtx (Pmode);
1262 /* Save away the constant part of the expression. */
1263 const_part = XEXP (XEXP (operand1, 0), 1);
1264 if (GET_CODE (const_part) != CONST_INT)
1265 abort ();
1267 /* Force the function label into memory. */
1268 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1270 /* Get the address of the memory location. PIC-ify it if
1271 necessary. */
1272 temp = XEXP (temp, 0);
1273 if (flag_pic)
1274 temp = legitimize_pic_address (temp, mode, scratch_reg);
1276 /* Put the address of the memory location into our destination
1277 register. */
1278 operands[1] = temp;
1279 emit_move_sequence (operands, mode, scratch_reg);
1281 /* Now load from the memory location into our destination
1282 register. */
1283 operands[1] = gen_rtx (MEM, Pmode, operands[0]);
1284 emit_move_sequence (operands, mode, scratch_reg);
1286 /* And add back in the constant part. */
1287 expand_inc (operand0, const_part);
1289 return 1;
1292 if (flag_pic)
1294 rtx temp;
1296 if (reload_in_progress || reload_completed)
1297 temp = scratch_reg ? scratch_reg : operand0;
1298 else
1299 temp = gen_reg_rtx (Pmode);
1301 /* (const (plus (symbol) (const_int))) must be forced to
1302 memory during/after reload if the const_int will not fit
1303 in 14 bits. */
1304 if (GET_CODE (operand1) == CONST
1305 && GET_CODE (XEXP (operand1, 0)) == PLUS
1306 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1307 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1308 && (reload_completed || reload_in_progress)
1309 && flag_pic)
1311 operands[1] = force_const_mem (mode, operand1);
1312 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1313 mode, temp);
1314 emit_move_sequence (operands, mode, temp);
1316 else
1318 operands[1] = legitimize_pic_address (operand1, mode, temp);
1319 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1322 /* On the HPPA, references to data space are supposed to use dp,
1323 register 27, but showing it in the RTL inhibits various cse
1324 and loop optimizations. */
1325 else
1327 rtx temp, set;
1329 if (reload_in_progress || reload_completed)
1330 temp = scratch_reg ? scratch_reg : operand0;
1331 else
1332 temp = gen_reg_rtx (mode);
1334 /* Loading a SYMBOL_REF into a register makes that register
1335 safe to be used as the base in an indexed address.
1337 Don't mark hard registers though. That loses. */
1338 if (GET_CODE (operand0) == REG
1339 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1340 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1341 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1342 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1343 if (ishighonly)
1344 set = gen_rtx (SET, mode, operand0, temp);
1345 else
1346 set = gen_rtx (SET, VOIDmode,
1347 operand0,
1348 gen_rtx (LO_SUM, mode, temp, operand1));
1350 emit_insn (gen_rtx (SET, VOIDmode,
1351 temp,
1352 gen_rtx (HIGH, mode, operand1)));
1353 emit_insn (set);
1356 return 1;
1358 else if (GET_CODE (operand1) != CONST_INT
1359 || ! cint_ok_for_move (INTVAL (operand1)))
1361 rtx temp;
1363 if (reload_in_progress || reload_completed)
1364 temp = operand0;
1365 else
1366 temp = gen_reg_rtx (mode);
1368 emit_insn (gen_rtx (SET, VOIDmode, temp,
1369 gen_rtx (HIGH, mode, operand1)));
1370 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1373 /* Now have insn-emit do whatever it normally does. */
1374 return 0;
1377 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1378 it will need a link/runtime reloc). */
1381 reloc_needed (exp)
1382 tree exp;
1384 int reloc = 0;
1386 switch (TREE_CODE (exp))
1388 case ADDR_EXPR:
1389 return 1;
1391 case PLUS_EXPR:
1392 case MINUS_EXPR:
1393 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1394 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1395 break;
1397 case NOP_EXPR:
1398 case CONVERT_EXPR:
1399 case NON_LVALUE_EXPR:
1400 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1401 break;
1403 case CONSTRUCTOR:
1405 register tree link;
1406 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1407 if (TREE_VALUE (link) != 0)
1408 reloc |= reloc_needed (TREE_VALUE (link));
1410 break;
1412 case ERROR_MARK:
1413 break;
1415 return reloc;
1418 /* Does operand (which is a symbolic_operand) live in text space? If
1419 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1422 read_only_operand (operand)
1423 rtx operand;
1425 if (GET_CODE (operand) == CONST)
1426 operand = XEXP (XEXP (operand, 0), 0);
1427 if (flag_pic)
1429 if (GET_CODE (operand) == SYMBOL_REF)
1430 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1432 else
1434 if (GET_CODE (operand) == SYMBOL_REF)
1435 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1437 return 1;
1441 /* Return the best assembler insn template
1442 for moving operands[1] into operands[0] as a fullword. */
1443 char *
1444 singlemove_string (operands)
1445 rtx *operands;
1447 HOST_WIDE_INT intval;
1449 if (GET_CODE (operands[0]) == MEM)
1450 return "stw %r1,%0";
1451 if (GET_CODE (operands[1]) == MEM)
1452 return "ldw %1,%0";
1453 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1455 long i;
1456 REAL_VALUE_TYPE d;
1458 if (GET_MODE (operands[1]) != SFmode)
1459 abort ();
1461 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1462 bit pattern. */
1463 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1464 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1466 operands[1] = GEN_INT (i);
1467 /* Fall through to CONST_INT case. */
1469 if (GET_CODE (operands[1]) == CONST_INT)
1471 intval = INTVAL (operands[1]);
1473 if (VAL_14_BITS_P (intval))
1474 return "ldi %1,%0";
1475 else if ((intval & 0x7ff) == 0)
1476 return "ldil L'%1,%0";
1477 else if (zdepi_cint_p (intval))
1478 return "zdepi %Z1,%0";
1479 else
1480 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1482 return "copy %1,%0";
1486 /* Compute position (in OP[1]) and width (in OP[2])
1487 useful for copying IMM to a register using the zdepi
1488 instructions. Store the immediate value to insert in OP[0]. */
1489 void
1490 compute_zdepi_operands (imm, op)
1491 unsigned HOST_WIDE_INT imm;
1492 unsigned *op;
1494 int lsb, len;
1496 /* Find the least significant set bit in IMM. */
1497 for (lsb = 0; lsb < 32; lsb++)
1499 if ((imm & 1) != 0)
1500 break;
1501 imm >>= 1;
1504 /* Choose variants based on *sign* of the 5-bit field. */
1505 if ((imm & 0x10) == 0)
1506 len = (lsb <= 28) ? 4 : 32 - lsb;
1507 else
1509 /* Find the width of the bitstring in IMM. */
1510 for (len = 5; len < 32; len++)
1512 if ((imm & (1 << len)) == 0)
1513 break;
1516 /* Sign extend IMM as a 5-bit value. */
1517 imm = (imm & 0xf) - 0x10;
1520 op[0] = imm;
1521 op[1] = 31 - lsb;
1522 op[2] = len;
1525 /* Output assembler code to perform a doubleword move insn
1526 with operands OPERANDS. */
1528 char *
1529 output_move_double (operands)
1530 rtx *operands;
1532 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1533 rtx latehalf[2];
1534 rtx addreg0 = 0, addreg1 = 0;
1536 /* First classify both operands. */
1538 if (REG_P (operands[0]))
1539 optype0 = REGOP;
1540 else if (offsettable_memref_p (operands[0]))
1541 optype0 = OFFSOP;
1542 else if (GET_CODE (operands[0]) == MEM)
1543 optype0 = MEMOP;
1544 else
1545 optype0 = RNDOP;
1547 if (REG_P (operands[1]))
1548 optype1 = REGOP;
1549 else if (CONSTANT_P (operands[1]))
1550 optype1 = CNSTOP;
1551 else if (offsettable_memref_p (operands[1]))
1552 optype1 = OFFSOP;
1553 else if (GET_CODE (operands[1]) == MEM)
1554 optype1 = MEMOP;
1555 else
1556 optype1 = RNDOP;
1558 /* Check for the cases that the operand constraints are not
1559 supposed to allow to happen. Abort if we get one,
1560 because generating code for these cases is painful. */
1562 if (optype0 != REGOP && optype1 != REGOP)
1563 abort ();
1565 /* Handle auto decrementing and incrementing loads and stores
1566 specifically, since the structure of the function doesn't work
1567 for them without major modification. Do it better when we learn
1568 this port about the general inc/dec addressing of PA.
1569 (This was written by tege. Chide him if it doesn't work.) */
1571 if (optype0 == MEMOP)
1573 /* We have to output the address syntax ourselves, since print_operand
1574 doesn't deal with the addresses we want to use. Fix this later. */
1576 rtx addr = XEXP (operands[0], 0);
1577 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1579 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1581 operands[0] = XEXP (addr, 0);
1582 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1583 abort ();
1585 if (!reg_overlap_mentioned_p (high_reg, addr))
1587 /* No overlap between high target register and address
1588 register. (We do this in a non-obvious way to
1589 save a register file writeback) */
1590 if (GET_CODE (addr) == POST_INC)
1591 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1592 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1594 else
1595 abort();
1597 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1599 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1601 operands[0] = XEXP (addr, 0);
1602 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1603 abort ();
1605 if (!reg_overlap_mentioned_p (high_reg, addr))
1607 /* No overlap between high target register and address
1608 register. (We do this in a non-obvious way to
1609 save a register file writeback) */
1610 if (GET_CODE (addr) == PRE_INC)
1611 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1612 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1614 else
1615 abort();
1618 if (optype1 == MEMOP)
1620 /* We have to output the address syntax ourselves, since print_operand
1621 doesn't deal with the addresses we want to use. Fix this later. */
1623 rtx addr = XEXP (operands[1], 0);
1624 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1626 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1628 operands[1] = XEXP (addr, 0);
1629 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1630 abort ();
1632 if (!reg_overlap_mentioned_p (high_reg, addr))
1634 /* No overlap between high target register and address
1635 register. (We do this in a non-obvious way to
1636 save a register file writeback) */
1637 if (GET_CODE (addr) == POST_INC)
1638 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1639 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1641 else
1643 /* This is an undefined situation. We should load into the
1644 address register *and* update that register. Probably
1645 we don't need to handle this at all. */
1646 if (GET_CODE (addr) == POST_INC)
1647 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1648 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1651 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1653 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1655 operands[1] = XEXP (addr, 0);
1656 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1657 abort ();
1659 if (!reg_overlap_mentioned_p (high_reg, addr))
1661 /* No overlap between high target register and address
1662 register. (We do this in a non-obvious way to
1663 save a register file writeback) */
1664 if (GET_CODE (addr) == PRE_INC)
1665 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1666 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1668 else
1670 /* This is an undefined situation. We should load into the
1671 address register *and* update that register. Probably
1672 we don't need to handle this at all. */
1673 if (GET_CODE (addr) == PRE_INC)
1674 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1675 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1678 else if (GET_CODE (addr) == PLUS
1679 && GET_CODE (XEXP (addr, 0)) == MULT)
1681 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1683 if (!reg_overlap_mentioned_p (high_reg, addr))
1685 rtx xoperands[3];
1687 xoperands[0] = high_reg;
1688 xoperands[1] = XEXP (addr, 1);
1689 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1690 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1691 output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
1692 return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
1694 else
1696 rtx xoperands[3];
1698 xoperands[0] = high_reg;
1699 xoperands[1] = XEXP (addr, 1);
1700 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1701 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1702 output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
1703 return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
1709 /* If an operand is an unoffsettable memory ref, find a register
1710 we can increment temporarily to make it refer to the second word. */
1712 if (optype0 == MEMOP)
1713 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1715 if (optype1 == MEMOP)
1716 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1718 /* Ok, we can do one word at a time.
1719 Normally we do the low-numbered word first.
1721 In either case, set up in LATEHALF the operands to use
1722 for the high-numbered word and in some cases alter the
1723 operands in OPERANDS to be suitable for the low-numbered word. */
1725 if (optype0 == REGOP)
1726 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1727 else if (optype0 == OFFSOP)
1728 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1729 else
1730 latehalf[0] = operands[0];
1732 if (optype1 == REGOP)
1733 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1734 else if (optype1 == OFFSOP)
1735 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1736 else if (optype1 == CNSTOP)
1737 split_double (operands[1], &operands[1], &latehalf[1]);
1738 else
1739 latehalf[1] = operands[1];
1741 /* If the first move would clobber the source of the second one,
1742 do them in the other order.
1744 This can happen in two cases:
1746 mem -> register where the first half of the destination register
1747 is the same register used in the memory's address. Reload
1748 can create such insns.
1750 mem in this case will be either register indirect or register
1751 indirect plus a valid offset.
1753 register -> register move where REGNO(dst) == REGNO(src + 1)
1754 someone (Tim/Tege?) claimed this can happen for parameter loads.
1756 Handle mem -> register case first. */
1757 if (optype0 == REGOP
1758 && (optype1 == MEMOP || optype1 == OFFSOP)
1759 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1760 operands[1], 0))
1762 /* Do the late half first. */
1763 if (addreg1)
1764 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1765 output_asm_insn (singlemove_string (latehalf), latehalf);
1767 /* Then clobber. */
1768 if (addreg1)
1769 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1770 return singlemove_string (operands);
1773 /* Now handle register -> register case. */
1774 if (optype0 == REGOP && optype1 == REGOP
1775 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1777 output_asm_insn (singlemove_string (latehalf), latehalf);
1778 return singlemove_string (operands);
1781 /* Normal case: do the two words, low-numbered first. */
1783 output_asm_insn (singlemove_string (operands), operands);
1785 /* Make any unoffsettable addresses point at high-numbered word. */
1786 if (addreg0)
1787 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1788 if (addreg1)
1789 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1791 /* Do that word. */
1792 output_asm_insn (singlemove_string (latehalf), latehalf);
1794 /* Undo the adds we just did. */
1795 if (addreg0)
1796 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1797 if (addreg1)
1798 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1800 return "";
1803 char *
1804 output_fp_move_double (operands)
1805 rtx *operands;
1807 if (FP_REG_P (operands[0]))
1809 if (FP_REG_P (operands[1])
1810 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1811 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1812 else
1813 output_asm_insn ("fldd%F1 %1,%0", operands);
1815 else if (FP_REG_P (operands[1]))
1817 output_asm_insn ("fstd%F0 %1,%0", operands);
1819 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1821 if (GET_CODE (operands[0]) == REG)
1823 rtx xoperands[2];
1824 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1825 xoperands[0] = operands[0];
1826 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1828 /* This is a pain. You have to be prepared to deal with an
1829 arbitrary address here including pre/post increment/decrement.
1831 so avoid this in the MD. */
1832 else
1833 abort ();
1835 else abort ();
1836 return "";
1839 /* Return a REG that occurs in ADDR with coefficient 1.
1840 ADDR can be effectively incremented by incrementing REG. */
1842 static rtx
1843 find_addr_reg (addr)
1844 rtx addr;
1846 while (GET_CODE (addr) == PLUS)
1848 if (GET_CODE (XEXP (addr, 0)) == REG)
1849 addr = XEXP (addr, 0);
1850 else if (GET_CODE (XEXP (addr, 1)) == REG)
1851 addr = XEXP (addr, 1);
1852 else if (CONSTANT_P (XEXP (addr, 0)))
1853 addr = XEXP (addr, 1);
1854 else if (CONSTANT_P (XEXP (addr, 1)))
1855 addr = XEXP (addr, 0);
1856 else
1857 abort ();
1859 if (GET_CODE (addr) == REG)
1860 return addr;
1861 abort ();
1864 /* Emit code to perform a block move.
1866 OPERANDS[0] is the destination pointer as a REG, clobbered.
1867 OPERANDS[1] is the source pointer as a REG, clobbered.
1868 OPERANDS[2] is a register for temporary storage.
1869 OPERANDS[4] is the size as a CONST_INT
1870 OPERANDS[3] is a register for temporary storage.
1871 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1872 OPERNADS[6] is another temporary register. */
1874 char *
1875 output_block_move (operands, size_is_constant)
1876 rtx *operands;
1877 int size_is_constant;
1879 int align = INTVAL (operands[5]);
1880 unsigned long n_bytes = INTVAL (operands[4]);
1882 /* We can't move more than four bytes at a time because the PA
1883 has no longer integer move insns. (Could use fp mem ops?) */
1884 if (align > 4)
1885 align = 4;
1887 /* Note that we know each loop below will execute at least twice
1888 (else we would have open-coded the copy). */
1889 switch (align)
1891 case 4:
1892 /* Pre-adjust the loop counter. */
1893 operands[4] = GEN_INT (n_bytes - 8);
1894 output_asm_insn ("ldi %4,%2", operands);
1896 /* Copying loop. */
1897 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1898 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1899 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1900 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1901 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1903 /* Handle the residual. There could be up to 7 bytes of
1904 residual to copy! */
1905 if (n_bytes % 8 != 0)
1907 operands[4] = GEN_INT (n_bytes % 4);
1908 if (n_bytes % 8 >= 4)
1909 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1910 if (n_bytes % 4 != 0)
1911 output_asm_insn ("ldw 0(0,%1),%6", operands);
1912 if (n_bytes % 8 >= 4)
1913 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1914 if (n_bytes % 4 != 0)
1915 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1917 return "";
1919 case 2:
1920 /* Pre-adjust the loop counter. */
1921 operands[4] = GEN_INT (n_bytes - 4);
1922 output_asm_insn ("ldi %4,%2", operands);
1924 /* Copying loop. */
1925 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1926 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1927 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1928 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1929 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1931 /* Handle the residual. */
1932 if (n_bytes % 4 != 0)
1934 if (n_bytes % 4 >= 2)
1935 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1936 if (n_bytes % 2 != 0)
1937 output_asm_insn ("ldb 0(0,%1),%6", operands);
1938 if (n_bytes % 4 >= 2)
1939 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1940 if (n_bytes % 2 != 0)
1941 output_asm_insn ("stb %6,0(0,%0)", operands);
1943 return "";
1945 case 1:
1946 /* Pre-adjust the loop counter. */
1947 operands[4] = GEN_INT (n_bytes - 2);
1948 output_asm_insn ("ldi %4,%2", operands);
1950 /* Copying loop. */
1951 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1952 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1953 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1954 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1955 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1957 /* Handle the residual. */
1958 if (n_bytes % 2 != 0)
1960 output_asm_insn ("ldb 0(0,%1),%3", operands);
1961 output_asm_insn ("stb %3,0(0,%0)", operands);
1963 return "";
1965 default:
1966 abort ();
1970 /* Count the number of insns necessary to handle this block move.
1972 Basic structure is the same as emit_block_move, except that we
1973 count insns rather than emit them. */
1976 compute_movstrsi_length (insn)
1977 rtx insn;
1979 rtx pat = PATTERN (insn);
1980 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1981 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1982 unsigned int n_insns = 0;
1984 /* We can't move more than four bytes at a time because the PA
1985 has no longer integer move insns. (Could use fp mem ops?) */
1986 if (align > 4)
1987 align = 4;
1989 /* The basic copying loop. */
1990 n_insns = 6;
1992 /* Residuals. */
1993 if (n_bytes % (2 * align) != 0)
1995 if ((n_bytes % (2 * align)) >= align)
1996 n_insns += 2;
1998 if ((n_bytes % align) != 0)
1999 n_insns += 2;
2002 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2003 return n_insns * 4;
2007 char *
2008 output_and (operands)
2009 rtx *operands;
2011 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2013 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2014 int ls0, ls1, ms0, p, len;
2016 for (ls0 = 0; ls0 < 32; ls0++)
2017 if ((mask & (1 << ls0)) == 0)
2018 break;
2020 for (ls1 = ls0; ls1 < 32; ls1++)
2021 if ((mask & (1 << ls1)) != 0)
2022 break;
2024 for (ms0 = ls1; ms0 < 32; ms0++)
2025 if ((mask & (1 << ms0)) == 0)
2026 break;
2028 if (ms0 != 32)
2029 abort();
2031 if (ls1 == 32)
2033 len = ls0;
2035 if (len == 0)
2036 abort ();
2038 operands[2] = GEN_INT (len);
2039 return "extru %1,31,%2,%0";
2041 else
2043 /* We could use this `depi' for the case above as well, but `depi'
2044 requires one more register file access than an `extru'. */
2046 p = 31 - ls0;
2047 len = ls1 - ls0;
2049 operands[2] = GEN_INT (p);
2050 operands[3] = GEN_INT (len);
2051 return "depi 0,%2,%3,%0";
2054 else
2055 return "and %1,%2,%0";
2058 char *
2059 output_ior (operands)
2060 rtx *operands;
2062 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2063 int bs0, bs1, p, len;
2065 if (INTVAL (operands[2]) == 0)
2066 return "copy %1,%0";
2068 for (bs0 = 0; bs0 < 32; bs0++)
2069 if ((mask & (1 << bs0)) != 0)
2070 break;
2072 for (bs1 = bs0; bs1 < 32; bs1++)
2073 if ((mask & (1 << bs1)) == 0)
2074 break;
2076 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2077 abort();
2079 p = 31 - bs0;
2080 len = bs1 - bs0;
2082 operands[2] = GEN_INT (p);
2083 operands[3] = GEN_INT (len);
2084 return "depi -1,%2,%3,%0";
2087 /* Output an ascii string. */
2088 void
2089 output_ascii (file, p, size)
2090 FILE *file;
2091 unsigned char *p;
2092 int size;
2094 int i;
2095 int chars_output;
2096 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2098 /* The HP assembler can only take strings of 256 characters at one
2099 time. This is a limitation on input line length, *not* the
2100 length of the string. Sigh. Even worse, it seems that the
2101 restriction is in number of input characters (see \xnn &
2102 \whatever). So we have to do this very carefully. */
2104 fputs ("\t.STRING \"", file);
2106 chars_output = 0;
2107 for (i = 0; i < size; i += 4)
2109 int co = 0;
2110 int io = 0;
2111 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2113 register unsigned int c = p[i + io];
2115 if (c == '\"' || c == '\\')
2116 partial_output[co++] = '\\';
2117 if (c >= ' ' && c < 0177)
2118 partial_output[co++] = c;
2119 else
2121 unsigned int hexd;
2122 partial_output[co++] = '\\';
2123 partial_output[co++] = 'x';
2124 hexd = c / 16 - 0 + '0';
2125 if (hexd > '9')
2126 hexd -= '9' - 'a' + 1;
2127 partial_output[co++] = hexd;
2128 hexd = c % 16 - 0 + '0';
2129 if (hexd > '9')
2130 hexd -= '9' - 'a' + 1;
2131 partial_output[co++] = hexd;
2134 if (chars_output + co > 243)
2136 fputs ("\"\n\t.STRING \"", file);
2137 chars_output = 0;
2139 fwrite (partial_output, 1, co, file);
2140 chars_output += co;
2141 co = 0;
2143 fputs ("\"\n", file);
2146 /* Try to rewrite floating point comparisons & branches to avoid
2147 useless add,tr insns.
2149 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2150 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2151 first attempt to remove useless add,tr insns. It is zero
2152 for the second pass as reorg sometimes leaves bogus REG_DEAD
2153 notes lying around.
2155 When CHECK_NOTES is zero we can only eliminate add,tr insns
2156 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2157 instructions. */
2158 void
2159 remove_useless_addtr_insns (insns, check_notes)
2160 rtx insns;
2161 int check_notes;
2163 rtx insn;
2164 int all;
2165 static int pass = 0;
2167 /* This is fairly cheap, so always run it when optimizing. */
2168 if (optimize > 0)
2170 int fcmp_count = 0;
2171 int fbranch_count = 0;
2173 /* Walk all the insns in this function looking for fcmp & fbranch
2174 instructions. Keep track of how many of each we find. */
2175 insns = get_insns ();
2176 for (insn = insns; insn; insn = next_insn (insn))
2178 rtx tmp;
2180 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2181 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2182 continue;
2184 tmp = PATTERN (insn);
2186 /* It must be a set. */
2187 if (GET_CODE (tmp) != SET)
2188 continue;
2190 /* If the destination is CCFP, then we've found an fcmp insn. */
2191 tmp = SET_DEST (tmp);
2192 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2194 fcmp_count++;
2195 continue;
2198 tmp = PATTERN (insn);
2199 /* If this is an fbranch instruction, bump the fbranch counter. */
2200 if (GET_CODE (tmp) == SET
2201 && SET_DEST (tmp) == pc_rtx
2202 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2203 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2204 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2205 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2207 fbranch_count++;
2208 continue;
2213 /* Find all floating point compare + branch insns. If possible,
2214 reverse the comparison & the branch to avoid add,tr insns. */
2215 for (insn = insns; insn; insn = next_insn (insn))
2217 rtx tmp, next;
2219 /* Ignore anything that isn't an INSN. */
2220 if (GET_CODE (insn) != INSN)
2221 continue;
2223 tmp = PATTERN (insn);
2225 /* It must be a set. */
2226 if (GET_CODE (tmp) != SET)
2227 continue;
2229 /* The destination must be CCFP, which is register zero. */
2230 tmp = SET_DEST (tmp);
2231 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2232 continue;
2234 /* INSN should be a set of CCFP.
2236 See if the result of this insn is used in a reversed FP
2237 conditional branch. If so, reverse our condition and
2238 the branch. Doing so avoids useless add,tr insns. */
2239 next = next_insn (insn);
2240 while (next)
2242 /* Jumps, calls and labels stop our search. */
2243 if (GET_CODE (next) == JUMP_INSN
2244 || GET_CODE (next) == CALL_INSN
2245 || GET_CODE (next) == CODE_LABEL)
2246 break;
2248 /* As does another fcmp insn. */
2249 if (GET_CODE (next) == INSN
2250 && GET_CODE (PATTERN (next)) == SET
2251 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2252 && REGNO (SET_DEST (PATTERN (next))) == 0)
2253 break;
2255 next = next_insn (next);
2258 /* Is NEXT_INSN a branch? */
2259 if (next
2260 && GET_CODE (next) == JUMP_INSN)
2262 rtx pattern = PATTERN (next);
2264 /* If it a reversed fp conditional branch (eg uses add,tr)
2265 and CCFP dies, then reverse our conditional and the branch
2266 to avoid the add,tr. */
2267 if (GET_CODE (pattern) == SET
2268 && SET_DEST (pattern) == pc_rtx
2269 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2270 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2271 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2272 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2273 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2274 && (fcmp_count == fbranch_count
2275 || (check_notes
2276 && find_regno_note (next, REG_DEAD, 0))))
2278 /* Reverse the branch. */
2279 tmp = XEXP (SET_SRC (pattern), 1);
2280 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2281 XEXP (SET_SRC (pattern), 2) = tmp;
2282 INSN_CODE (next) = -1;
2284 /* Reverse our condition. */
2285 tmp = PATTERN (insn);
2286 PUT_CODE (XEXP (tmp, 1),
2287 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2293 pass = !pass;
2297 /* You may have trouble believing this, but this is the HP-PA stack
2298 layout. Wow.
2300 Offset Contents
2302 Variable arguments (optional; any number may be allocated)
2304 SP-(4*(N+9)) arg word N
2306 SP-56 arg word 5
2307 SP-52 arg word 4
2309 Fixed arguments (must be allocated; may remain unused)
2311 SP-48 arg word 3
2312 SP-44 arg word 2
2313 SP-40 arg word 1
2314 SP-36 arg word 0
2316 Frame Marker
2318 SP-32 External Data Pointer (DP)
2319 SP-28 External sr4
2320 SP-24 External/stub RP (RP')
2321 SP-20 Current RP
2322 SP-16 Static Link
2323 SP-12 Clean up
2324 SP-8 Calling Stub RP (RP'')
2325 SP-4 Previous SP
2327 Top of Frame
2329 SP-0 Stack Pointer (points to next available address)
2333 /* This function saves registers as follows. Registers marked with ' are
2334 this function's registers (as opposed to the previous function's).
2335 If a frame_pointer isn't needed, r4 is saved as a general register;
2336 the space for the frame pointer is still allocated, though, to keep
2337 things simple.
2340 Top of Frame
2342 SP (FP') Previous FP
2343 SP + 4 Alignment filler (sigh)
2344 SP + 8 Space for locals reserved here.
2348 SP + n All call saved register used.
2352 SP + o All call saved fp registers used.
2356 SP + p (SP') points to next available address.
2360 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2361 Handle case where DISP > 8k by using the add_high_const pattern.
2363 Note in DISP > 8k case, we will leave the high part of the address
2364 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2365 static void
2366 store_reg (reg, disp, base)
2367 int reg, disp, base;
2369 if (VAL_14_BITS_P (disp))
2371 emit_move_insn (gen_rtx (MEM, SImode,
2372 gen_rtx (PLUS, SImode,
2373 gen_rtx (REG, SImode, base),
2374 GEN_INT (disp))),
2375 gen_rtx (REG, SImode, reg));
2377 else
2379 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2380 gen_rtx (REG, SImode, base),
2381 GEN_INT (disp)));
2382 emit_move_insn (gen_rtx (MEM, SImode,
2383 gen_rtx (LO_SUM, SImode,
2384 gen_rtx (REG, SImode, 1),
2385 GEN_INT (disp))),
2386 gen_rtx (REG, SImode, reg));
2390 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2391 Handle case where DISP > 8k by using the add_high_const pattern.
2393 Note in DISP > 8k case, we will leave the high part of the address
2394 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2395 static void
2396 load_reg (reg, disp, base)
2397 int reg, disp, base;
2399 if (VAL_14_BITS_P (disp))
2401 emit_move_insn (gen_rtx (REG, SImode, reg),
2402 gen_rtx (MEM, SImode,
2403 gen_rtx (PLUS, SImode,
2404 gen_rtx (REG, SImode, base),
2405 GEN_INT (disp))));
2407 else
2409 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2410 gen_rtx (REG, SImode, base),
2411 GEN_INT (disp)));
2412 emit_move_insn (gen_rtx (REG, SImode, reg),
2413 gen_rtx (MEM, SImode,
2414 gen_rtx (LO_SUM, SImode,
2415 gen_rtx (REG, SImode, 1),
2416 GEN_INT (disp))));
2420 /* Emit RTL to set REG to the value specified by BASE+DISP.
2421 Handle case where DISP > 8k by using the add_high_const pattern.
2423 Note in DISP > 8k case, we will leave the high part of the address
2424 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2425 static void
2426 set_reg_plus_d(reg, base, disp)
2427 int reg, base, disp;
2429 if (VAL_14_BITS_P (disp))
2431 emit_move_insn (gen_rtx (REG, SImode, reg),
2432 gen_rtx (PLUS, SImode,
2433 gen_rtx (REG, SImode, base),
2434 GEN_INT (disp)));
2436 else
2438 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2439 gen_rtx (REG, SImode, base),
2440 GEN_INT (disp)));
2441 emit_move_insn (gen_rtx (REG, SImode, reg),
2442 gen_rtx (LO_SUM, SImode,
2443 gen_rtx (REG, SImode, 1),
2444 GEN_INT (disp)));
2448 /* Global variables set by FUNCTION_PROLOGUE. */
2449 /* Size of frame. Need to know this to emit return insns from
2450 leaf procedures. */
2451 static int actual_fsize;
2452 static int local_fsize, save_fregs;
2455 compute_frame_size (size, fregs_live)
2456 int size;
2457 int *fregs_live;
2459 extern int current_function_outgoing_args_size;
2460 int i, fsize;
2462 /* 8 is space for frame pointer + filler. If any frame is allocated
2463 we need to add this in because of STARTING_FRAME_OFFSET. */
2464 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2466 /* We must leave enough space for all the callee saved registers
2467 from 3 .. highest used callee save register since we don't
2468 know if we're going to have an inline or out of line prologue
2469 and epilogue. */
2470 for (i = 18; i >= 3; i--)
2471 if (regs_ever_live[i])
2473 fsize += 4 * (i - 2);
2474 break;
2477 /* Round the stack. */
2478 fsize = (fsize + 7) & ~7;
2480 /* We must leave enough space for all the callee saved registers
2481 from 3 .. highest used callee save register since we don't
2482 know if we're going to have an inline or out of line prologue
2483 and epilogue. */
2484 for (i = 66; i >= 48; i -= 2)
2485 if (regs_ever_live[i] || regs_ever_live[i + 1])
2487 if (fregs_live)
2488 *fregs_live = 1;
2490 fsize += 4 * (i - 46);
2491 break;
2494 fsize += current_function_outgoing_args_size;
2495 if (! leaf_function_p () || fsize)
2496 fsize += 32;
2497 return (fsize + 63) & ~63;
2500 rtx hp_profile_label_rtx;
2501 static char hp_profile_label_name[8];
2502 void
2503 output_function_prologue (file, size)
2504 FILE *file;
2505 int size;
2507 /* The function's label and associated .PROC must never be
2508 separated and must be output *after* any profiling declarations
2509 to avoid changing spaces/subspaces within a procedure. */
2510 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2511 fputs ("\t.PROC\n", file);
2513 /* hppa_expand_prologue does the dirty work now. We just need
2514 to output the assembler directives which denote the start
2515 of a function. */
2516 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2517 if (regs_ever_live[2] || profile_flag)
2518 fputs (",CALLS,SAVE_RP", file);
2519 else
2520 fputs (",NO_CALLS", file);
2522 if (frame_pointer_needed)
2523 fputs (",SAVE_SP", file);
2525 /* Pass on information about the number of callee register saves
2526 performed in the prologue.
2528 The compiler is supposed to pass the highest register number
2529 saved, the assembler then has to adjust that number before
2530 entering it into the unwind descriptor (to account for any
2531 caller saved registers with lower register numbers than the
2532 first callee saved register). */
2533 if (gr_saved)
2534 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2536 if (fr_saved)
2537 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2539 fputs ("\n\t.ENTRY\n", file);
2541 /* Horrid hack. emit_function_prologue will modify this RTL in
2542 place to get the expected results. */
2543 if (profile_flag)
2544 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2545 hp_profile_labelno);
2547 /* If we're using GAS and not using the portable runtime model, then
2548 we don't need to accumulate the total number of code bytes. */
2549 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2550 total_code_bytes = 0;
2551 else if (insn_addresses)
2553 unsigned int old_total = total_code_bytes;
2555 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2556 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2558 /* Be prepared to handle overflows. */
2559 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2561 else
2562 total_code_bytes = -1;
2564 remove_useless_addtr_insns (get_insns (), 0);
2567 void
2568 hppa_expand_prologue()
2570 extern char call_used_regs[];
2571 int size = get_frame_size ();
2572 int merge_sp_adjust_with_store = 0;
2573 int i, offset;
2574 rtx tmpreg, size_rtx;
2576 gr_saved = 0;
2577 fr_saved = 0;
2578 save_fregs = 0;
2579 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2580 actual_fsize = compute_frame_size (size, &save_fregs);
2582 /* Compute a few things we will use often. */
2583 tmpreg = gen_rtx (REG, SImode, 1);
2584 size_rtx = GEN_INT (actual_fsize);
2586 /* Handle out of line prologues and epilogues. */
2587 if (TARGET_SPACE)
2589 rtx operands[2];
2590 int saves = 0;
2591 int outline_insn_count = 0;
2592 int inline_insn_count = 0;
2594 /* Count the number of insns for the inline and out of line
2595 variants so we can choose one appropriately.
2597 No need to screw with counting actual_fsize operations -- they're
2598 done for both inline and out of line prologues. */
2599 if (regs_ever_live[2])
2600 inline_insn_count += 1;
2602 if (! cint_ok_for_move (local_fsize))
2603 outline_insn_count += 2;
2604 else
2605 outline_insn_count += 1;
2607 /* Put the register save info into %r22. */
2608 for (i = 18; i >= 3; i--)
2609 if (regs_ever_live[i] && ! call_used_regs[i])
2611 /* -1 because the stack adjustment is normally done in
2612 the same insn as a register save. */
2613 inline_insn_count += (i - 2) - 1;
2614 saves = i;
2615 break;
2618 for (i = 66; i >= 48; i -= 2)
2619 if (regs_ever_live[i] || regs_ever_live[i + 1])
2621 /* +1 needed as we load %r1 with the start of the freg
2622 save area. */
2623 inline_insn_count += (i/2 - 23) + 1;
2624 saves |= ((i/2 - 12 ) << 16);
2625 break;
2628 if (frame_pointer_needed)
2629 inline_insn_count += 3;
2631 if (! cint_ok_for_move (saves))
2632 outline_insn_count += 2;
2633 else
2634 outline_insn_count += 1;
2636 if (TARGET_PORTABLE_RUNTIME)
2637 outline_insn_count += 2;
2638 else
2639 outline_insn_count += 1;
2641 /* If there's a lot of insns in the prologue, then do it as
2642 an out-of-line sequence. */
2643 if (inline_insn_count > outline_insn_count)
2645 /* Put the local_fisze into %r19. */
2646 operands[0] = gen_rtx (REG, SImode, 19);
2647 operands[1] = GEN_INT (local_fsize);
2648 emit_move_insn (operands[0], operands[1]);
2650 /* Put the stack size into %r21. */
2651 operands[0] = gen_rtx (REG, SImode, 21);
2652 operands[1] = size_rtx;
2653 emit_move_insn (operands[0], operands[1]);
2655 operands[0] = gen_rtx (REG, SImode, 22);
2656 operands[1] = GEN_INT (saves);
2657 emit_move_insn (operands[0], operands[1]);
2659 /* Now call the out-of-line prologue. */
2660 emit_insn (gen_outline_prologue_call ());
2661 emit_insn (gen_blockage ());
2663 /* Note that we're using an out-of-line prologue. */
2664 out_of_line_prologue_epilogue = 1;
2665 return;
2669 out_of_line_prologue_epilogue = 0;
2671 /* Save RP first. The calling conventions manual states RP will
2672 always be stored into the caller's frame at sp-20. */
2673 if (regs_ever_live[2] || profile_flag)
2674 store_reg (2, -20, STACK_POINTER_REGNUM);
2676 /* Allocate the local frame and set up the frame pointer if needed. */
2677 if (actual_fsize)
2678 if (frame_pointer_needed)
2680 /* Copy the old frame pointer temporarily into %r1. Set up the
2681 new stack pointer, then store away the saved old frame pointer
2682 into the stack at sp+actual_fsize and at the same time update
2683 the stack pointer by actual_fsize bytes. Two versions, first
2684 handles small (<8k) frames. The second handles large (>8k)
2685 frames. */
2686 emit_move_insn (tmpreg, frame_pointer_rtx);
2687 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2688 if (VAL_14_BITS_P (actual_fsize))
2689 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2690 else
2692 /* It is incorrect to store the saved frame pointer at *sp,
2693 then increment sp (writes beyond the current stack boundary).
2695 So instead use stwm to store at *sp and post-increment the
2696 stack pointer as an atomic operation. Then increment sp to
2697 finish allocating the new frame. */
2698 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2699 set_reg_plus_d (STACK_POINTER_REGNUM,
2700 STACK_POINTER_REGNUM,
2701 actual_fsize - 64);
2704 /* no frame pointer needed. */
2705 else
2707 /* In some cases we can perform the first callee register save
2708 and allocating the stack frame at the same time. If so, just
2709 make a note of it and defer allocating the frame until saving
2710 the callee registers. */
2711 if (VAL_14_BITS_P (-actual_fsize)
2712 && local_fsize == 0
2713 && ! profile_flag
2714 && ! flag_pic)
2715 merge_sp_adjust_with_store = 1;
2716 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2717 else if (actual_fsize != 0)
2718 set_reg_plus_d (STACK_POINTER_REGNUM,
2719 STACK_POINTER_REGNUM,
2720 actual_fsize);
2722 /* The hppa calling conventions say that that %r19, the pic offset
2723 register, is saved at sp - 32 (in this function's frame) when
2724 generating PIC code. FIXME: What is the correct thing to do
2725 for functions which make no calls and allocate no frame? Do
2726 we need to allocate a frame, or can we just omit the save? For
2727 now we'll just omit the save. */
2728 if (actual_fsize != 0 && flag_pic)
2729 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2731 /* Profiling code.
2733 Instead of taking one argument, the counter label, as most normal
2734 mcounts do, _mcount appears to behave differently on the HPPA. It
2735 takes the return address of the caller, the address of this routine,
2736 and the address of the label. Also, it isn't magic, so
2737 argument registers have to be preserved. */
2738 if (profile_flag)
2740 int pc_offset, i, arg_offset, basereg, offsetadj;
2742 pc_offset = 4 + (frame_pointer_needed
2743 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2744 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2746 /* When the function has a frame pointer, use it as the base
2747 register for saving/restore registers. Else use the stack
2748 pointer. Adjust the offset according to the frame size if
2749 this function does not have a frame pointer. */
2751 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2752 : STACK_POINTER_REGNUM;
2753 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2755 /* Horrid hack. emit_function_prologue will modify this RTL in
2756 place to get the expected results. sprintf here is just to
2757 put something in the name. */
2758 sprintf(hp_profile_label_name, "LP$%04d", -1);
2759 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2760 hp_profile_label_name);
2761 if (current_function_returns_struct)
2762 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2764 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2765 if (regs_ever_live [i])
2767 store_reg (i, arg_offset, basereg);
2768 /* Deal with arg_offset not fitting in 14 bits. */
2769 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2772 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2773 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2774 emit_move_insn (gen_rtx (REG, SImode, 24),
2775 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2776 /* %r25 is set from within the output pattern. */
2777 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2779 /* Restore argument registers. */
2780 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2781 if (regs_ever_live [i])
2782 load_reg (i, arg_offset, basereg);
2784 if (current_function_returns_struct)
2785 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2789 /* Normal register save.
2791 Do not save the frame pointer in the frame_pointer_needed case. It
2792 was done earlier. */
2793 if (frame_pointer_needed)
2795 for (i = 18, offset = local_fsize; i >= 4; i--)
2796 if (regs_ever_live[i] && ! call_used_regs[i])
2798 store_reg (i, offset, FRAME_POINTER_REGNUM);
2799 offset += 4;
2800 gr_saved++;
2802 /* Account for %r3 which is saved in a special place. */
2803 gr_saved++;
2805 /* No frame pointer needed. */
2806 else
2808 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2809 if (regs_ever_live[i] && ! call_used_regs[i])
2811 /* If merge_sp_adjust_with_store is nonzero, then we can
2812 optimize the first GR save. */
2813 if (merge_sp_adjust_with_store)
2815 merge_sp_adjust_with_store = 0;
2816 emit_insn (gen_post_stwm (stack_pointer_rtx,
2817 gen_rtx (REG, SImode, i),
2818 GEN_INT (-offset)));
2820 else
2821 store_reg (i, offset, STACK_POINTER_REGNUM);
2822 offset += 4;
2823 gr_saved++;
2826 /* If we wanted to merge the SP adjustment with a GR save, but we never
2827 did any GR saves, then just emit the adjustment here. */
2828 if (merge_sp_adjust_with_store)
2829 set_reg_plus_d (STACK_POINTER_REGNUM,
2830 STACK_POINTER_REGNUM,
2831 actual_fsize);
2834 /* Align pointer properly (doubleword boundary). */
2835 offset = (offset + 7) & ~7;
2837 /* Floating point register store. */
2838 if (save_fregs)
2840 /* First get the frame or stack pointer to the start of the FP register
2841 save area. */
2842 if (frame_pointer_needed)
2843 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2844 else
2845 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2847 /* Now actually save the FP registers. */
2848 for (i = 66; i >= 48; i -= 2)
2850 if (regs_ever_live[i] || regs_ever_live[i + 1])
2852 emit_move_insn (gen_rtx (MEM, DFmode,
2853 gen_rtx (POST_INC, DFmode, tmpreg)),
2854 gen_rtx (REG, DFmode, i));
2855 fr_saved++;
2860 /* When generating PIC code it is necessary to save/restore the
2861 PIC register around each function call. We used to do this
2862 in the call patterns themselves, but that implementation
2863 made incorrect assumptions about using global variables to hold
2864 per-function rtl code generated in the backend.
2866 So instead, we copy the PIC register into a reserved callee saved
2867 register in the prologue. Then after each call we reload the PIC
2868 register from the callee saved register. We also reload the PIC
2869 register from the callee saved register in the epilogue ensure the
2870 PIC register is valid at function exit.
2872 This may (depending on the exact characteristics of the function)
2873 even be more efficient.
2875 Avoid this if the callee saved register wasn't used (these are
2876 leaf functions). */
2877 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2878 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2879 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2883 void
2884 output_function_epilogue (file, size)
2885 FILE *file;
2886 int size;
2888 rtx insn = get_last_insn ();
2889 int i;
2891 /* hppa_expand_epilogue does the dirty work now. We just need
2892 to output the assembler directives which denote the end
2893 of a function.
2895 To make debuggers happy, emit a nop if the epilogue was completely
2896 eliminated due to a volatile call as the last insn in the
2897 current function. That way the return address (in %r2) will
2898 always point to a valid instruction in the current function. */
2900 /* Get the last real insn. */
2901 if (GET_CODE (insn) == NOTE)
2902 insn = prev_real_insn (insn);
2904 /* If it is a sequence, then look inside. */
2905 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2906 insn = XVECEXP (PATTERN (insn), 0, 0);
2908 /* If insn is a CALL_INSN, then it must be a call to a volatile
2909 function (otherwise there would be epilogue insns). */
2910 if (insn && GET_CODE (insn) == CALL_INSN)
2911 fputs ("\tnop\n", file);
2913 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2916 void
2917 hppa_expand_epilogue ()
2919 rtx tmpreg;
2920 int offset,i;
2921 int merge_sp_adjust_with_load = 0;
2923 /* Handle out of line prologues and epilogues. */
2924 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2926 int saves = 0;
2927 rtx operands[2];
2929 /* Put the register save info into %r22. */
2930 for (i = 18; i >= 3; i--)
2931 if (regs_ever_live[i] && ! call_used_regs[i])
2933 saves = i;
2934 break;
2937 for (i = 66; i >= 48; i -= 2)
2938 if (regs_ever_live[i] || regs_ever_live[i + 1])
2940 saves |= ((i/2 - 12 ) << 16);
2941 break;
2944 emit_insn (gen_blockage ());
2946 /* Put the local_fisze into %r19. */
2947 operands[0] = gen_rtx (REG, SImode, 19);
2948 operands[1] = GEN_INT (local_fsize);
2949 emit_move_insn (operands[0], operands[1]);
2951 /* Put the stack size into %r21. */
2952 operands[0] = gen_rtx (REG, SImode, 21);
2953 operands[1] = GEN_INT (actual_fsize);
2954 emit_move_insn (operands[0], operands[1]);
2956 operands[0] = gen_rtx (REG, SImode, 22);
2957 operands[1] = GEN_INT (saves);
2958 emit_move_insn (operands[0], operands[1]);
2960 /* Now call the out-of-line epilogue. */
2961 emit_insn (gen_outline_epilogue_call ());
2962 return;
2965 /* We will use this often. */
2966 tmpreg = gen_rtx (REG, SImode, 1);
2968 /* Try to restore RP early to avoid load/use interlocks when
2969 RP gets used in the return (bv) instruction. This appears to still
2970 be necessary even when we schedule the prologue and epilogue. */
2971 if (frame_pointer_needed
2972 && (regs_ever_live [2] || profile_flag))
2973 load_reg (2, -20, FRAME_POINTER_REGNUM);
2975 /* No frame pointer, and stack is smaller than 8k. */
2976 else if (! frame_pointer_needed
2977 && VAL_14_BITS_P (actual_fsize + 20)
2978 && (regs_ever_live[2] || profile_flag))
2979 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2981 /* General register restores. */
2982 if (frame_pointer_needed)
2984 for (i = 18, offset = local_fsize; i >= 4; i--)
2985 if (regs_ever_live[i] && ! call_used_regs[i])
2987 load_reg (i, offset, FRAME_POINTER_REGNUM);
2988 offset += 4;
2991 else
2993 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2995 if (regs_ever_live[i] && ! call_used_regs[i])
2997 /* Only for the first load.
2998 merge_sp_adjust_with_load holds the register load
2999 with which we will merge the sp adjustment. */
3000 if (VAL_14_BITS_P (actual_fsize + 20)
3001 && local_fsize == 0
3002 && ! merge_sp_adjust_with_load)
3003 merge_sp_adjust_with_load = i;
3004 else
3005 load_reg (i, offset, STACK_POINTER_REGNUM);
3006 offset += 4;
3011 /* Align pointer properly (doubleword boundary). */
3012 offset = (offset + 7) & ~7;
3014 /* FP register restores. */
3015 if (save_fregs)
3017 /* Adjust the register to index off of. */
3018 if (frame_pointer_needed)
3019 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3020 else
3021 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3023 /* Actually do the restores now. */
3024 for (i = 66; i >= 48; i -= 2)
3026 if (regs_ever_live[i] || regs_ever_live[i + 1])
3028 emit_move_insn (gen_rtx (REG, DFmode, i),
3029 gen_rtx (MEM, DFmode,
3030 gen_rtx (POST_INC, DFmode, tmpreg)));
3035 /* Emit a blockage insn here to keep these insns from being moved to
3036 an earlier spot in the epilogue, or into the main instruction stream.
3038 This is necessary as we must not cut the stack back before all the
3039 restores are finished. */
3040 emit_insn (gen_blockage ());
3041 /* No frame pointer, but we have a stack greater than 8k. We restore
3042 %r2 very late in this case. (All other cases are restored as early
3043 as possible.) */
3044 if (! frame_pointer_needed
3045 && ! VAL_14_BITS_P (actual_fsize + 20)
3046 && (regs_ever_live[2] || profile_flag))
3048 set_reg_plus_d (STACK_POINTER_REGNUM,
3049 STACK_POINTER_REGNUM,
3050 - actual_fsize);
3052 /* This used to try and be clever by not depending on the value in
3053 %r30 and instead use the value held in %r1 (so that the 2nd insn
3054 which sets %r30 could be put in the delay slot of the return insn).
3056 That won't work since if the stack is exactly 8k set_reg_plus_d
3057 doesn't set %r1, just %r30. */
3058 load_reg (2, - 20, STACK_POINTER_REGNUM);
3061 /* Reset stack pointer (and possibly frame pointer). The stack
3062 pointer is initially set to fp + 64 to avoid a race condition. */
3063 else if (frame_pointer_needed)
3065 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3066 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3067 stack_pointer_rtx,
3068 GEN_INT (-64)));
3070 /* If we were deferring a callee register restore, do it now. */
3071 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3072 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3073 merge_sp_adjust_with_load),
3074 stack_pointer_rtx,
3075 GEN_INT (- actual_fsize)));
3076 else if (actual_fsize != 0)
3077 set_reg_plus_d (STACK_POINTER_REGNUM,
3078 STACK_POINTER_REGNUM,
3079 - actual_fsize);
3082 /* Fetch the return address for the frame COUNT steps up from
3083 the current frame, after the prologue. FRAMEADDR is the
3084 frame pointer of the COUNT frame.
3086 We want to ignore any export stub remnants here.
3088 The value returned is used in two different ways:
3090 1. To find a function's caller.
3092 2. To change the return address for a function.
3094 This function handles most instances of case 1; however, it will
3095 fail if there are two levels of stubs to execute on the return
3096 path. The only way I believe that can happen is if the return value
3097 needs a parameter relocation, which never happens for C code.
3099 This function handles most instances of case 2; however, it will
3100 fail if we did not originally have stub code on the return path
3101 but will need code on the new return path. This can happen if
3102 the caller & callee are both in the main program, but the new
3103 return location is in a shared library.
3105 To handle this correctly we need to set the return pointer at
3106 frame-20 to point to a return stub frame-24 to point to the
3107 location we wish to return to. */
3110 return_addr_rtx (count, frameaddr)
3111 int count;
3112 rtx frameaddr;
3114 rtx label;
3115 rtx saved_rp;
3116 rtx ins;
3118 saved_rp = gen_reg_rtx (Pmode);
3120 /* First, we start off with the normal return address pointer from
3121 -20[frameaddr]. */
3123 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3125 /* Get pointer to the instruction stream. We have to mask out the
3126 privilege level from the two low order bits of the return address
3127 pointer here so that ins will point to the start of the first
3128 instruction that would have been executed if we returned. */
3129 ins = copy_to_reg (gen_rtx (AND, Pmode,
3130 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3131 MASK_RETURN_ADDR));
3132 label = gen_label_rtx ();
3134 /* Check the instruction stream at the normal return address for the
3135 export stub:
3137 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3138 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3139 0x00011820 | stub+16: mtsp r1,sr0
3140 0xe0400002 | stub+20: be,n 0(sr0,rp)
3142 If it is an export stub, than our return address is really in
3143 -24[frameaddr]. */
3145 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3146 GEN_INT (0x4bc23fd1),
3147 NE, NULL_RTX, SImode, 1, 0);
3148 emit_jump_insn (gen_bne (label));
3150 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3151 GEN_INT (0x004010a1),
3152 NE, NULL_RTX, SImode, 1, 0);
3153 emit_jump_insn (gen_bne (label));
3155 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3156 GEN_INT (0x00011820),
3157 NE, NULL_RTX, SImode, 1, 0);
3158 emit_jump_insn (gen_bne (label));
3160 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3161 GEN_INT (0xe0400002),
3162 NE, NULL_RTX, SImode, 1, 0);
3164 /* If there is no export stub then just use our initial guess of
3165 -20[frameaddr]. */
3167 emit_jump_insn (gen_bne (label));
3169 /* Here we know that our return address pointer points to an export
3170 stub. We don't want to return the address of the export stub,
3171 but rather the return address that leads back into user code.
3172 That return address is stored at -24[frameaddr]. */
3174 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3176 emit_label (label);
3177 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3180 /* This is only valid once reload has completed because it depends on
3181 knowing exactly how much (if any) frame there is and...
3183 It's only valid if there is no frame marker to de-allocate and...
3185 It's only valid if %r2 hasn't been saved into the caller's frame
3186 (we're not profiling and %r2 isn't live anywhere). */
3188 hppa_can_use_return_insn_p ()
3190 return (reload_completed
3191 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3192 && ! profile_flag
3193 && ! regs_ever_live[2]
3194 && ! frame_pointer_needed);
3197 void
3198 emit_bcond_fp (code, operand0)
3199 enum rtx_code code;
3200 rtx operand0;
3202 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3203 gen_rtx (IF_THEN_ELSE, VOIDmode,
3204 gen_rtx (code, VOIDmode,
3205 gen_rtx (REG, CCFPmode, 0),
3206 const0_rtx),
3207 gen_rtx (LABEL_REF, VOIDmode, operand0),
3208 pc_rtx)));
3213 gen_cmp_fp (code, operand0, operand1)
3214 enum rtx_code code;
3215 rtx operand0, operand1;
3217 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3218 gen_rtx (code, CCFPmode, operand0, operand1));
3221 /* Adjust the cost of a scheduling dependency. Return the new cost of
3222 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3225 pa_adjust_cost (insn, link, dep_insn, cost)
3226 rtx insn;
3227 rtx link;
3228 rtx dep_insn;
3229 int cost;
3231 if (! recog_memoized (insn))
3232 return 0;
3234 if (REG_NOTE_KIND (link) == 0)
3236 /* Data dependency; DEP_INSN writes a register that INSN reads some
3237 cycles later. */
3239 if (get_attr_type (insn) == TYPE_FPSTORE)
3241 rtx pat = PATTERN (insn);
3242 rtx dep_pat = PATTERN (dep_insn);
3243 if (GET_CODE (pat) == PARALLEL)
3245 /* This happens for the fstXs,mb patterns. */
3246 pat = XVECEXP (pat, 0, 0);
3248 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3249 /* If this happens, we have to extend this to schedule
3250 optimally. Return 0 for now. */
3251 return 0;
3253 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3255 if (! recog_memoized (dep_insn))
3256 return 0;
3257 /* DEP_INSN is writing its result to the register
3258 being stored in the fpstore INSN. */
3259 switch (get_attr_type (dep_insn))
3261 case TYPE_FPLOAD:
3262 /* This cost 3 cycles, not 2 as the md says for the
3263 700 and 7100. Note scaling of cost for 7100. */
3264 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3266 case TYPE_FPALU:
3267 case TYPE_FPMULSGL:
3268 case TYPE_FPMULDBL:
3269 case TYPE_FPDIVSGL:
3270 case TYPE_FPDIVDBL:
3271 case TYPE_FPSQRTSGL:
3272 case TYPE_FPSQRTDBL:
3273 /* In these important cases, we save one cycle compared to
3274 when flop instruction feed each other. */
3275 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3277 default:
3278 return cost;
3283 /* For other data dependencies, the default cost specified in the
3284 md is correct. */
3285 return cost;
3287 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3289 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3290 cycles later. */
3292 if (get_attr_type (insn) == TYPE_FPLOAD)
3294 rtx pat = PATTERN (insn);
3295 rtx dep_pat = PATTERN (dep_insn);
3296 if (GET_CODE (pat) == PARALLEL)
3298 /* This happens for the fldXs,mb patterns. */
3299 pat = XVECEXP (pat, 0, 0);
3301 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3302 /* If this happens, we have to extend this to schedule
3303 optimally. Return 0 for now. */
3304 return 0;
3306 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3308 if (! recog_memoized (dep_insn))
3309 return 0;
3310 switch (get_attr_type (dep_insn))
3312 case TYPE_FPALU:
3313 case TYPE_FPMULSGL:
3314 case TYPE_FPMULDBL:
3315 case TYPE_FPDIVSGL:
3316 case TYPE_FPDIVDBL:
3317 case TYPE_FPSQRTSGL:
3318 case TYPE_FPSQRTDBL:
3319 /* A fpload can't be issued until one cycle before a
3320 preceding arithmetic operation has finished if
3321 the target of the fpload is any of the sources
3322 (or destination) of the arithmetic operation. */
3323 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3325 default:
3326 return 0;
3330 else if (get_attr_type (insn) == TYPE_FPALU)
3332 rtx pat = PATTERN (insn);
3333 rtx dep_pat = PATTERN (dep_insn);
3334 if (GET_CODE (pat) == PARALLEL)
3336 /* This happens for the fldXs,mb patterns. */
3337 pat = XVECEXP (pat, 0, 0);
3339 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3340 /* If this happens, we have to extend this to schedule
3341 optimally. Return 0 for now. */
3342 return 0;
3344 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3346 if (! recog_memoized (dep_insn))
3347 return 0;
3348 switch (get_attr_type (dep_insn))
3350 case TYPE_FPDIVSGL:
3351 case TYPE_FPDIVDBL:
3352 case TYPE_FPSQRTSGL:
3353 case TYPE_FPSQRTDBL:
3354 /* An ALU flop can't be issued until two cycles before a
3355 preceding divide or sqrt operation has finished if
3356 the target of the ALU flop is any of the sources
3357 (or destination) of the divide or sqrt operation. */
3358 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3360 default:
3361 return 0;
3366 /* For other anti dependencies, the cost is 0. */
3367 return 0;
3369 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3371 /* Output dependency; DEP_INSN writes a register that INSN writes some
3372 cycles later. */
3373 if (get_attr_type (insn) == TYPE_FPLOAD)
3375 rtx pat = PATTERN (insn);
3376 rtx dep_pat = PATTERN (dep_insn);
3377 if (GET_CODE (pat) == PARALLEL)
3379 /* This happens for the fldXs,mb patterns. */
3380 pat = XVECEXP (pat, 0, 0);
3382 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3383 /* If this happens, we have to extend this to schedule
3384 optimally. Return 0 for now. */
3385 return 0;
3387 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3389 if (! recog_memoized (dep_insn))
3390 return 0;
3391 switch (get_attr_type (dep_insn))
3393 case TYPE_FPALU:
3394 case TYPE_FPMULSGL:
3395 case TYPE_FPMULDBL:
3396 case TYPE_FPDIVSGL:
3397 case TYPE_FPDIVDBL:
3398 case TYPE_FPSQRTSGL:
3399 case TYPE_FPSQRTDBL:
3400 /* A fpload can't be issued until one cycle before a
3401 preceding arithmetic operation has finished if
3402 the target of the fpload is the destination of the
3403 arithmetic operation. */
3404 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3406 default:
3407 return 0;
3411 else if (get_attr_type (insn) == TYPE_FPALU)
3413 rtx pat = PATTERN (insn);
3414 rtx dep_pat = PATTERN (dep_insn);
3415 if (GET_CODE (pat) == PARALLEL)
3417 /* This happens for the fldXs,mb patterns. */
3418 pat = XVECEXP (pat, 0, 0);
3420 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3421 /* If this happens, we have to extend this to schedule
3422 optimally. Return 0 for now. */
3423 return 0;
3425 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3427 if (! recog_memoized (dep_insn))
3428 return 0;
3429 switch (get_attr_type (dep_insn))
3431 case TYPE_FPDIVSGL:
3432 case TYPE_FPDIVDBL:
3433 case TYPE_FPSQRTSGL:
3434 case TYPE_FPSQRTDBL:
3435 /* An ALU flop can't be issued until two cycles before a
3436 preceding divide or sqrt operation has finished if
3437 the target of the ALU flop is also the target of
3438 of the divide or sqrt operation. */
3439 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3441 default:
3442 return 0;
3447 /* For other output dependencies, the cost is 0. */
3448 return 0;
3450 else
3451 abort ();
3454 /* Return any length adjustment needed by INSN which already has its length
3455 computed as LENGTH. Return zero if no adjustment is necessary.
3457 For the PA: function calls, millicode calls, and backwards short
3458 conditional branches with unfilled delay slots need an adjustment by +1
3459 (to account for the NOP which will be inserted into the instruction stream).
3461 Also compute the length of an inline block move here as it is too
3462 complicated to express as a length attribute in pa.md. */
3464 pa_adjust_insn_length (insn, length)
3465 rtx insn;
3466 int length;
3468 rtx pat = PATTERN (insn);
3470 /* Call insns which are *not* indirect and have unfilled delay slots. */
3471 if (GET_CODE (insn) == CALL_INSN)
3474 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3475 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3476 return 4;
3477 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3478 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3479 == SYMBOL_REF)
3480 return 4;
3481 else
3482 return 0;
3484 /* Jumps inside switch tables which have unfilled delay slots
3485 also need adjustment. */
3486 else if (GET_CODE (insn) == JUMP_INSN
3487 && simplejump_p (insn)
3488 && GET_MODE (PATTERN (insn)) == DImode)
3489 return 4;
3490 /* Millicode insn with an unfilled delay slot. */
3491 else if (GET_CODE (insn) == INSN
3492 && GET_CODE (pat) != SEQUENCE
3493 && GET_CODE (pat) != USE
3494 && GET_CODE (pat) != CLOBBER
3495 && get_attr_type (insn) == TYPE_MILLI)
3496 return 4;
3497 /* Block move pattern. */
3498 else if (GET_CODE (insn) == INSN
3499 && GET_CODE (pat) == PARALLEL
3500 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3501 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3502 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3503 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3504 return compute_movstrsi_length (insn) - 4;
3505 /* Conditional branch with an unfilled delay slot. */
3506 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3508 /* Adjust a short backwards conditional with an unfilled delay slot. */
3509 if (GET_CODE (pat) == SET
3510 && length == 4
3511 && ! forward_branch_p (insn))
3512 return 4;
3513 else if (GET_CODE (pat) == PARALLEL
3514 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3515 && length == 4)
3516 return 4;
3517 /* Adjust dbra insn with short backwards conditional branch with
3518 unfilled delay slot -- only for case where counter is in a
3519 general register register. */
3520 else if (GET_CODE (pat) == PARALLEL
3521 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3522 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3523 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3524 && length == 4
3525 && ! forward_branch_p (insn))
3526 return 4;
3527 else
3528 return 0;
3530 return 0;
3533 /* Print operand X (an rtx) in assembler syntax to file FILE.
3534 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3535 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3537 void
3538 print_operand (file, x, code)
3539 FILE *file;
3540 rtx x;
3541 int code;
3543 switch (code)
3545 case '#':
3546 /* Output a 'nop' if there's nothing for the delay slot. */
3547 if (dbr_sequence_length () == 0)
3548 fputs ("\n\tnop", file);
3549 return;
3550 case '*':
3551 /* Output an nullification completer if there's nothing for the */
3552 /* delay slot or nullification is requested. */
3553 if (dbr_sequence_length () == 0 ||
3554 (final_sequence &&
3555 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3556 fputs (",n", file);
3557 return;
3558 case 'R':
3559 /* Print out the second register name of a register pair.
3560 I.e., R (6) => 7. */
3561 fputs (reg_names[REGNO (x)+1], file);
3562 return;
3563 case 'r':
3564 /* A register or zero. */
3565 if (x == const0_rtx
3566 || (x == CONST0_RTX (DFmode))
3567 || (x == CONST0_RTX (SFmode)))
3569 fputs ("0", file);
3570 return;
3572 else
3573 break;
3574 case 'C': /* Plain (C)ondition */
3575 case 'X':
3576 switch (GET_CODE (x))
3578 case EQ:
3579 fputs ("=", file); break;
3580 case NE:
3581 fputs ("<>", file); break;
3582 case GT:
3583 fputs (">", file); break;
3584 case GE:
3585 fputs (">=", file); break;
3586 case GEU:
3587 fputs (">>=", file); break;
3588 case GTU:
3589 fputs (">>", file); break;
3590 case LT:
3591 fputs ("<", file); break;
3592 case LE:
3593 fputs ("<=", file); break;
3594 case LEU:
3595 fputs ("<<=", file); break;
3596 case LTU:
3597 fputs ("<<", file); break;
3598 default:
3599 abort ();
3601 return;
3602 case 'N': /* Condition, (N)egated */
3603 switch (GET_CODE (x))
3605 case EQ:
3606 fputs ("<>", file); break;
3607 case NE:
3608 fputs ("=", file); break;
3609 case GT:
3610 fputs ("<=", file); break;
3611 case GE:
3612 fputs ("<", file); break;
3613 case GEU:
3614 fputs ("<<", file); break;
3615 case GTU:
3616 fputs ("<<=", file); break;
3617 case LT:
3618 fputs (">=", file); break;
3619 case LE:
3620 fputs (">", file); break;
3621 case LEU:
3622 fputs (">>", file); break;
3623 case LTU:
3624 fputs (">>=", file); break;
3625 default:
3626 abort ();
3628 return;
3629 /* For floating point comparisons. Need special conditions to deal
3630 with NaNs properly. */
3631 case 'Y':
3632 switch (GET_CODE (x))
3634 case EQ:
3635 fputs ("!=", file); break;
3636 case NE:
3637 fputs ("=", file); break;
3638 case GT:
3639 fputs ("<=", file); break;
3640 case GE:
3641 fputs ("<", file); break;
3642 case LT:
3643 fputs (">=", file); break;
3644 case LE:
3645 fputs (">", file); break;
3646 default:
3647 abort ();
3649 return;
3650 case 'S': /* Condition, operands are (S)wapped. */
3651 switch (GET_CODE (x))
3653 case EQ:
3654 fputs ("=", file); break;
3655 case NE:
3656 fputs ("<>", file); break;
3657 case GT:
3658 fputs ("<", file); break;
3659 case GE:
3660 fputs ("<=", file); break;
3661 case GEU:
3662 fputs ("<<=", file); break;
3663 case GTU:
3664 fputs ("<<", file); break;
3665 case LT:
3666 fputs (">", file); break;
3667 case LE:
3668 fputs (">=", file); break;
3669 case LEU:
3670 fputs (">>=", file); break;
3671 case LTU:
3672 fputs (">>", file); break;
3673 default:
3674 abort ();
3676 return;
3677 case 'B': /* Condition, (B)oth swapped and negate. */
3678 switch (GET_CODE (x))
3680 case EQ:
3681 fputs ("<>", file); break;
3682 case NE:
3683 fputs ("=", file); break;
3684 case GT:
3685 fputs (">=", file); break;
3686 case GE:
3687 fputs (">", file); break;
3688 case GEU:
3689 fputs (">>", file); break;
3690 case GTU:
3691 fputs (">>=", file); break;
3692 case LT:
3693 fputs ("<=", file); break;
3694 case LE:
3695 fputs ("<", file); break;
3696 case LEU:
3697 fputs ("<<", file); break;
3698 case LTU:
3699 fputs ("<<=", file); break;
3700 default:
3701 abort ();
3703 return;
3704 case 'k':
3705 if (GET_CODE (x) == CONST_INT)
3707 fprintf (file, "%d", ~INTVAL (x));
3708 return;
3710 abort();
3711 case 'L':
3712 if (GET_CODE (x) == CONST_INT)
3714 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3715 return;
3717 abort();
3718 case 'O':
3719 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3721 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3722 return;
3724 abort();
3725 case 'P':
3726 if (GET_CODE (x) == CONST_INT)
3728 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3729 return;
3731 abort();
3732 case 'I':
3733 if (GET_CODE (x) == CONST_INT)
3734 fputs ("i", file);
3735 return;
3736 case 'M':
3737 case 'F':
3738 switch (GET_CODE (XEXP (x, 0)))
3740 case PRE_DEC:
3741 case PRE_INC:
3742 fputs ("s,mb", file);
3743 break;
3744 case POST_DEC:
3745 case POST_INC:
3746 fputs ("s,ma", file);
3747 break;
3748 case PLUS:
3749 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3750 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3751 fputs ("x,s", file);
3752 else if (code == 'F')
3753 fputs ("s", file);
3754 break;
3755 default:
3756 if (code == 'F')
3757 fputs ("s", file);
3758 break;
3760 return;
3761 case 'G':
3762 output_global_address (file, x, 0);
3763 return;
3764 case 'H':
3765 output_global_address (file, x, 1);
3766 return;
3767 case 0: /* Don't do anything special */
3768 break;
3769 case 'Z':
3771 unsigned op[3];
3772 compute_zdepi_operands (INTVAL (x), op);
3773 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3774 return;
3776 default:
3777 abort ();
3779 if (GET_CODE (x) == REG)
3781 fputs (reg_names [REGNO (x)], file);
3782 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3783 fputs ("L", file);
3785 else if (GET_CODE (x) == MEM)
3787 int size = GET_MODE_SIZE (GET_MODE (x));
3788 rtx base = XEXP (XEXP (x, 0), 0);
3789 switch (GET_CODE (XEXP (x, 0)))
3791 case PRE_DEC:
3792 case POST_DEC:
3793 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3794 break;
3795 case PRE_INC:
3796 case POST_INC:
3797 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3798 break;
3799 default:
3800 if (GET_CODE (XEXP (x, 0)) == PLUS
3801 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3802 fprintf (file, "%s(0,%s)",
3803 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3804 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3805 else if (GET_CODE (XEXP (x, 0)) == PLUS
3806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3807 fprintf (file, "%s(0,%s)",
3808 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3809 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3810 else
3811 output_address (XEXP (x, 0));
3812 break;
3815 else
3816 output_addr_const (file, x);
3819 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3821 void
3822 output_global_address (file, x, round_constant)
3823 FILE *file;
3824 rtx x;
3825 int round_constant;
3828 /* Imagine (high (const (plus ...))). */
3829 if (GET_CODE (x) == HIGH)
3830 x = XEXP (x, 0);
3832 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3833 assemble_name (file, XSTR (x, 0));
3834 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3836 assemble_name (file, XSTR (x, 0));
3837 fputs ("-$global$", file);
3839 else if (GET_CODE (x) == CONST)
3841 char *sep = "";
3842 int offset = 0; /* assembler wants -$global$ at end */
3843 rtx base;
3845 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3847 base = XEXP (XEXP (x, 0), 0);
3848 output_addr_const (file, base);
3850 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3851 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3852 else abort ();
3854 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3856 base = XEXP (XEXP (x, 0), 1);
3857 output_addr_const (file, base);
3859 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3860 offset = INTVAL (XEXP (XEXP (x, 0),1));
3861 else abort ();
3863 /* How bogus. The compiler is apparently responsible for
3864 rounding the constant if it uses an LR field selector.
3866 The linker and/or assembler seem a better place since
3867 they have to do this kind of thing already.
3869 If we fail to do this, HP's optimizing linker may eliminate
3870 an addil, but not update the ldw/stw/ldo instruction that
3871 uses the result of the addil. */
3872 if (round_constant)
3873 offset = ((offset + 0x1000) & ~0x1fff);
3875 if (GET_CODE (XEXP (x, 0)) == PLUS)
3877 if (offset < 0)
3879 offset = -offset;
3880 sep = "-";
3882 else
3883 sep = "+";
3885 else if (GET_CODE (XEXP (x, 0)) == MINUS
3886 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3887 sep = "-";
3888 else abort ();
3890 if (!read_only_operand (base) && !flag_pic)
3891 fputs ("-$global$", file);
3892 if (offset)
3893 fprintf (file,"%s%d", sep, offset);
3895 else
3896 output_addr_const (file, x);
3899 void
3900 output_deferred_plabels (file)
3901 FILE *file;
3903 int i;
3904 /* If we have deferred plabels, then we need to switch into the data
3905 section and align it to a 4 byte boundary before we output the
3906 deferred plabels. */
3907 if (n_deferred_plabels)
3909 data_section ();
3910 ASM_OUTPUT_ALIGN (file, 2);
3913 /* Now output the deferred plabels. */
3914 for (i = 0; i < n_deferred_plabels; i++)
3916 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
3917 assemble_integer (gen_rtx (SYMBOL_REF, VOIDmode,
3918 deferred_plabels[i].name), 4, 1);
3922 /* HP's millicode routines mean something special to the assembler.
3923 Keep track of which ones we have used. */
3925 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3926 static char imported[(int)end1000];
3927 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3928 static char import_string[] = ".IMPORT $$....,MILLICODE";
3929 #define MILLI_START 10
3931 static void
3932 import_milli (code)
3933 enum millicodes code;
3935 char str[sizeof (import_string)];
3937 if (!imported[(int)code])
3939 imported[(int)code] = 1;
3940 strcpy (str, import_string);
3941 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3942 output_asm_insn (str, 0);
3946 /* The register constraints have put the operands and return value in
3947 the proper registers. */
3949 char *
3950 output_mul_insn (unsignedp, insn)
3951 int unsignedp;
3952 rtx insn;
3954 import_milli (mulI);
3955 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3958 /* Emit the rtl for doing a division by a constant. */
3960 /* Do magic division millicodes exist for this value? */
3961 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3962 1, 1};
3964 /* We'll use an array to keep track of the magic millicodes and
3965 whether or not we've used them already. [n][0] is signed, [n][1] is
3966 unsigned. */
3968 static int div_milli[16][2];
3971 div_operand (op, mode)
3972 rtx op;
3973 enum machine_mode mode;
3975 return (mode == SImode
3976 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3977 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3978 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3982 emit_hpdiv_const (operands, unsignedp)
3983 rtx *operands;
3984 int unsignedp;
3986 if (GET_CODE (operands[2]) == CONST_INT
3987 && INTVAL (operands[2]) > 0
3988 && INTVAL (operands[2]) < 16
3989 && magic_milli[INTVAL (operands[2])])
3991 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3992 emit
3993 (gen_rtx
3994 (PARALLEL, VOIDmode,
3995 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3996 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3997 gen_rtx (REG, SImode, 26),
3998 operands[2])),
3999 gen_rtx (CLOBBER, VOIDmode, operands[3]),
4000 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
4001 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
4002 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
4003 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
4004 return 1;
4006 return 0;
4009 char *
4010 output_div_insn (operands, unsignedp, insn)
4011 rtx *operands;
4012 int unsignedp;
4013 rtx insn;
4015 int divisor;
4017 /* If the divisor is a constant, try to use one of the special
4018 opcodes .*/
4019 if (GET_CODE (operands[0]) == CONST_INT)
4021 static char buf[100];
4022 divisor = INTVAL (operands[0]);
4023 if (!div_milli[divisor][unsignedp])
4025 div_milli[divisor][unsignedp] = 1;
4026 if (unsignedp)
4027 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4028 else
4029 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4031 if (unsignedp)
4033 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4034 return output_millicode_call (insn,
4035 gen_rtx (SYMBOL_REF, SImode, buf));
4037 else
4039 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4040 return output_millicode_call (insn,
4041 gen_rtx (SYMBOL_REF, SImode, buf));
4044 /* Divisor isn't a special constant. */
4045 else
4047 if (unsignedp)
4049 import_milli (divU);
4050 return output_millicode_call (insn,
4051 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
4053 else
4055 import_milli (divI);
4056 return output_millicode_call (insn,
4057 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
4062 /* Output a $$rem millicode to do mod. */
4064 char *
4065 output_mod_insn (unsignedp, insn)
4066 int unsignedp;
4067 rtx insn;
4069 if (unsignedp)
4071 import_milli (remU);
4072 return output_millicode_call (insn,
4073 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
4075 else
4077 import_milli (remI);
4078 return output_millicode_call (insn,
4079 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
4083 void
4084 output_arg_descriptor (call_insn)
4085 rtx call_insn;
4087 char *arg_regs[4];
4088 enum machine_mode arg_mode;
4089 rtx link;
4090 int i, output_flag = 0;
4091 int regno;
4093 for (i = 0; i < 4; i++)
4094 arg_regs[i] = 0;
4096 /* Specify explicitly that no argument relocations should take place
4097 if using the portable runtime calling conventions. */
4098 if (TARGET_PORTABLE_RUNTIME)
4100 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4101 asm_out_file);
4102 return;
4105 if (GET_CODE (call_insn) != CALL_INSN)
4106 abort ();
4107 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4109 rtx use = XEXP (link, 0);
4111 if (! (GET_CODE (use) == USE
4112 && GET_CODE (XEXP (use, 0)) == REG
4113 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4114 continue;
4116 arg_mode = GET_MODE (XEXP (use, 0));
4117 regno = REGNO (XEXP (use, 0));
4118 if (regno >= 23 && regno <= 26)
4120 arg_regs[26 - regno] = "GR";
4121 if (arg_mode == DImode)
4122 arg_regs[25 - regno] = "GR";
4124 else if (regno >= 32 && regno <= 39)
4126 if (arg_mode == SFmode)
4127 arg_regs[(regno - 32) / 2] = "FR";
4128 else
4130 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4131 arg_regs[(regno - 34) / 2] = "FR";
4132 arg_regs[(regno - 34) / 2 + 1] = "FU";
4133 #else
4134 arg_regs[(regno - 34) / 2] = "FU";
4135 arg_regs[(regno - 34) / 2 + 1] = "FR";
4136 #endif
4140 fputs ("\t.CALL ", asm_out_file);
4141 for (i = 0; i < 4; i++)
4143 if (arg_regs[i])
4145 if (output_flag++)
4146 fputc (',', asm_out_file);
4147 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4150 fputc ('\n', asm_out_file);
4153 /* Return the class of any secondary reload register that is needed to
4154 move IN into a register in class CLASS using mode MODE.
4156 Profiling has showed this routine and its descendants account for
4157 a significant amount of compile time (~7%). So it has been
4158 optimized to reduce redundant computations and eliminate useless
4159 function calls.
4161 It might be worthwhile to try and make this a leaf function too. */
4163 enum reg_class
4164 secondary_reload_class (class, mode, in)
4165 enum reg_class class;
4166 enum machine_mode mode;
4167 rtx in;
4169 int regno, is_symbolic;
4171 /* Trying to load a constant into a FP register during PIC code
4172 generation will require %r1 as a scratch register. */
4173 if (flag_pic == 2
4174 && GET_MODE_CLASS (mode) == MODE_INT
4175 && FP_REG_CLASS_P (class)
4176 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4177 return R1_REGS;
4179 /* Profiling showed the PA port spends about 1.3% of its compilation
4180 time in true_regnum from calls inside secondary_reload_class. */
4182 if (GET_CODE (in) == REG)
4184 regno = REGNO (in);
4185 if (regno >= FIRST_PSEUDO_REGISTER)
4186 regno = true_regnum (in);
4188 else if (GET_CODE (in) == SUBREG)
4189 regno = true_regnum (in);
4190 else
4191 regno = -1;
4193 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4194 && GET_MODE_CLASS (mode) == MODE_INT
4195 && FP_REG_CLASS_P (class))
4196 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4197 return GENERAL_REGS;
4199 if (GET_CODE (in) == HIGH)
4200 in = XEXP (in, 0);
4202 /* Profiling has showed GCC spends about 2.6% of its compilation
4203 time in symbolic_operand from calls inside secondary_reload_class.
4205 We use an inline copy and only compute its return value once to avoid
4206 useless work. */
4207 switch (GET_CODE (in))
4209 rtx tmp;
4211 case SYMBOL_REF:
4212 case LABEL_REF:
4213 is_symbolic = 1;
4214 break;
4215 case CONST:
4216 tmp = XEXP (in, 0);
4217 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4218 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4219 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4220 break;
4221 default:
4222 is_symbolic = 0;
4223 break;
4226 if (!flag_pic
4227 && is_symbolic
4228 && read_only_operand (in))
4229 return NO_REGS;
4231 if (class != R1_REGS && is_symbolic)
4232 return R1_REGS;
4234 return NO_REGS;
4237 enum direction
4238 function_arg_padding (mode, type)
4239 enum machine_mode mode;
4240 tree type;
4242 int size;
4244 if (mode == BLKmode)
4246 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4247 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4248 else
4249 return upward; /* Don't know if this is right, but */
4250 /* same as old definition. */
4252 else
4253 size = GET_MODE_BITSIZE (mode);
4254 if (size < PARM_BOUNDARY)
4255 return downward;
4256 else if (size % PARM_BOUNDARY)
4257 return upward;
4258 else
4259 return none;
4263 /* Do what is necessary for `va_start'. The argument is ignored;
4264 We look at the current function to determine if stdargs or varargs
4265 is used and fill in an initial va_list. A pointer to this constructor
4266 is returned. */
4268 struct rtx_def *
4269 hppa_builtin_saveregs (arglist)
4270 tree arglist;
4272 rtx offset;
4273 tree fntype = TREE_TYPE (current_function_decl);
4274 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4275 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4276 != void_type_node)))
4277 ? UNITS_PER_WORD : 0);
4279 if (argadj)
4280 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4281 else
4282 offset = current_function_arg_offset_rtx;
4284 /* Store general registers on the stack. */
4285 move_block_from_reg (23,
4286 gen_rtx (MEM, BLKmode,
4287 plus_constant
4288 (current_function_internal_arg_pointer, -16)),
4289 4, 4 * UNITS_PER_WORD);
4290 return copy_to_reg (expand_binop (Pmode, add_optab,
4291 current_function_internal_arg_pointer,
4292 offset, 0, 0, OPTAB_LIB_WIDEN));
4295 /* This routine handles all the normal conditional branch sequences we
4296 might need to generate. It handles compare immediate vs compare
4297 register, nullification of delay slots, varying length branches,
4298 negated branches, and all combinations of the above. It returns the
4299 output appropriate to emit the branch corresponding to all given
4300 parameters. */
4302 char *
4303 output_cbranch (operands, nullify, length, negated, insn)
4304 rtx *operands;
4305 int nullify, length, negated;
4306 rtx insn;
4308 static char buf[100];
4309 int useskip = 0;
4311 /* A conditional branch to the following instruction (eg the delay slot) is
4312 asking for a disaster. This can happen when not optimizing.
4314 In such cases it is safe to emit nothing. */
4316 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4317 return "";
4319 /* If this is a long branch with its delay slot unfilled, set `nullify'
4320 as it can nullify the delay slot and save a nop. */
4321 if (length == 8 && dbr_sequence_length () == 0)
4322 nullify = 1;
4324 /* If this is a short forward conditional branch which did not get
4325 its delay slot filled, the delay slot can still be nullified. */
4326 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4327 nullify = forward_branch_p (insn);
4329 /* A forward branch over a single nullified insn can be done with a
4330 comclr instruction. This avoids a single cycle penalty due to
4331 mis-predicted branch if we fall through (branch not taken). */
4332 if (length == 4
4333 && next_real_insn (insn) != 0
4334 && get_attr_length (next_real_insn (insn)) == 4
4335 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4336 && nullify)
4337 useskip = 1;
4339 switch (length)
4341 /* All short conditional branches except backwards with an unfilled
4342 delay slot. */
4343 case 4:
4344 if (useskip)
4345 strcpy (buf, "com%I2clr,");
4346 else
4347 strcpy (buf, "com%I2b,");
4348 if (negated)
4349 strcat (buf, "%B3");
4350 else
4351 strcat (buf, "%S3");
4352 if (useskip)
4353 strcat (buf, " %2,%1,0");
4354 else if (nullify)
4355 strcat (buf, ",n %2,%1,%0");
4356 else
4357 strcat (buf, " %2,%1,%0");
4358 break;
4360 /* All long conditionals. Note an short backward branch with an
4361 unfilled delay slot is treated just like a long backward branch
4362 with an unfilled delay slot. */
4363 case 8:
4364 /* Handle weird backwards branch with a filled delay slot
4365 with is nullified. */
4366 if (dbr_sequence_length () != 0
4367 && ! forward_branch_p (insn)
4368 && nullify)
4370 strcpy (buf, "com%I2b,");
4371 if (negated)
4372 strcat (buf, "%S3");
4373 else
4374 strcat (buf, "%B3");
4375 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4377 /* Handle short backwards branch with an unfilled delay slot.
4378 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4379 taken and untaken branches. */
4380 else if (dbr_sequence_length () == 0
4381 && ! forward_branch_p (insn)
4382 && insn_addresses
4383 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4384 - insn_addresses[INSN_UID (insn)] - 8))
4386 strcpy (buf, "com%I2b,");
4387 if (negated)
4388 strcat (buf, "%B3 %2,%1,%0%#");
4389 else
4390 strcat (buf, "%S3 %2,%1,%0%#");
4392 else
4394 strcpy (buf, "com%I2clr,");
4395 if (negated)
4396 strcat (buf, "%S3");
4397 else
4398 strcat (buf, "%B3");
4399 if (nullify)
4400 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4401 else
4402 strcat (buf, " %2,%1,0\n\tbl %0,0");
4404 break;
4406 case 20:
4407 /* Very long branch. Right now we only handle these when not
4408 optimizing. See "jump" pattern in pa.md for details. */
4409 if (optimize)
4410 abort ();
4412 /* Create a reversed conditional branch which branches around
4413 the following insns. */
4414 if (negated)
4415 strcpy (buf, "com%I2b,%S3,n %2,%1,.+20");
4416 else
4417 strcpy (buf, "com%I2b,%B3,n %2,%1,.+20");
4418 output_asm_insn (buf, operands);
4420 /* Output an insn to save %r1. */
4421 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4423 /* Now output a very long branch to the original target. */
4424 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
4426 /* Now restore the value of %r1 in the delay slot. We're not
4427 optimizing so we know nothing else can be in the delay slot. */
4428 return "ldw -16(%%r30),%%r1";
4430 case 28:
4431 /* Very long branch when generating PIC code. Right now we only
4432 handle these when not optimizing. See "jump" pattern in pa.md
4433 for details. */
4434 if (optimize)
4435 abort ();
4437 /* Create a reversed conditional branch which branches around
4438 the following insns. */
4439 if (negated)
4440 strcpy (buf, "com%I2b,%S3,n %2,%1,.+28");
4441 else
4442 strcpy (buf, "com%I2b,%B3,n %2,%1,.+28");
4443 output_asm_insn (buf, operands);
4445 /* Output an insn to save %r1. */
4446 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4448 /* Now output a very long PIC branch to the original target. */
4450 rtx xoperands[5];
4452 xoperands[0] = operands[0];
4453 xoperands[1] = operands[1];
4454 xoperands[2] = operands[2];
4455 xoperands[3] = operands[3];
4456 xoperands[4] = gen_label_rtx ();
4458 output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands);
4459 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4460 CODE_LABEL_NUMBER (xoperands[4]));
4461 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands);
4464 /* Now restore the value of %r1 in the delay slot. We're not
4465 optimizing so we know nothing else can be in the delay slot. */
4466 return "ldw -16(%%r30),%%r1";
4468 default:
4469 abort();
4471 return buf;
4474 /* This routine handles all the branch-on-bit conditional branch sequences we
4475 might need to generate. It handles nullification of delay slots,
4476 varying length branches, negated branches and all combinations of the
4477 above. it returns the appropriate output template to emit the branch. */
4479 char *
4480 output_bb (operands, nullify, length, negated, insn, which)
4481 rtx *operands;
4482 int nullify, length, negated;
4483 rtx insn;
4484 int which;
4486 static char buf[100];
4487 int useskip = 0;
4489 /* A conditional branch to the following instruction (eg the delay slot) is
4490 asking for a disaster. I do not think this can happen as this pattern
4491 is only used when optimizing; jump optimization should eliminate the
4492 jump. But be prepared just in case. */
4494 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4495 return "";
4497 /* If this is a long branch with its delay slot unfilled, set `nullify'
4498 as it can nullify the delay slot and save a nop. */
4499 if (length == 8 && dbr_sequence_length () == 0)
4500 nullify = 1;
4502 /* If this is a short forward conditional branch which did not get
4503 its delay slot filled, the delay slot can still be nullified. */
4504 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4505 nullify = forward_branch_p (insn);
4507 /* A forward branch over a single nullified insn can be done with a
4508 extrs instruction. This avoids a single cycle penalty due to
4509 mis-predicted branch if we fall through (branch not taken). */
4511 if (length == 4
4512 && next_real_insn (insn) != 0
4513 && get_attr_length (next_real_insn (insn)) == 4
4514 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4515 && nullify)
4516 useskip = 1;
4518 switch (length)
4521 /* All short conditional branches except backwards with an unfilled
4522 delay slot. */
4523 case 4:
4524 if (useskip)
4525 strcpy (buf, "extrs,");
4526 else
4527 strcpy (buf, "bb,");
4528 if ((which == 0 && negated)
4529 || (which == 1 && ! negated))
4530 strcat (buf, ">=");
4531 else
4532 strcat (buf, "<");
4533 if (useskip)
4534 strcat (buf, " %0,%1,1,0");
4535 else if (nullify && negated)
4536 strcat (buf, ",n %0,%1,%3");
4537 else if (nullify && ! negated)
4538 strcat (buf, ",n %0,%1,%2");
4539 else if (! nullify && negated)
4540 strcat (buf, "%0,%1,%3");
4541 else if (! nullify && ! negated)
4542 strcat (buf, " %0,%1,%2");
4543 break;
4545 /* All long conditionals. Note an short backward branch with an
4546 unfilled delay slot is treated just like a long backward branch
4547 with an unfilled delay slot. */
4548 case 8:
4549 /* Handle weird backwards branch with a filled delay slot
4550 with is nullified. */
4551 if (dbr_sequence_length () != 0
4552 && ! forward_branch_p (insn)
4553 && nullify)
4555 strcpy (buf, "bb,");
4556 if ((which == 0 && negated)
4557 || (which == 1 && ! negated))
4558 strcat (buf, "<");
4559 else
4560 strcat (buf, ">=");
4561 if (negated)
4562 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4563 else
4564 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4566 /* Handle short backwards branch with an unfilled delay slot.
4567 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4568 taken and untaken branches. */
4569 else if (dbr_sequence_length () == 0
4570 && ! forward_branch_p (insn)
4571 && insn_addresses
4572 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4573 - insn_addresses[INSN_UID (insn)] - 8))
4575 strcpy (buf, "bb,");
4576 if ((which == 0 && negated)
4577 || (which == 1 && ! negated))
4578 strcat (buf, ">=");
4579 else
4580 strcat (buf, "<");
4581 if (negated)
4582 strcat (buf, " %0,%1,%3%#");
4583 else
4584 strcat (buf, " %0,%1,%2%#");
4586 else
4588 strcpy (buf, "extrs,");
4589 if ((which == 0 && negated)
4590 || (which == 1 && ! negated))
4591 strcat (buf, "<");
4592 else
4593 strcat (buf, ">=");
4594 if (nullify && negated)
4595 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4596 else if (nullify && ! negated)
4597 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4598 else if (negated)
4599 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4600 else
4601 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4603 break;
4605 default:
4606 abort();
4608 return buf;
4611 /* This routine handles all the branch-on-variable-bit conditional branch
4612 sequences we might need to generate. It handles nullification of delay
4613 slots, varying length branches, negated branches and all combinations
4614 of the above. it returns the appropriate output template to emit the
4615 branch. */
4617 char *
4618 output_bvb (operands, nullify, length, negated, insn, which)
4619 rtx *operands;
4620 int nullify, length, negated;
4621 rtx insn;
4622 int which;
4624 static char buf[100];
4625 int useskip = 0;
4627 /* A conditional branch to the following instruction (eg the delay slot) is
4628 asking for a disaster. I do not think this can happen as this pattern
4629 is only used when optimizing; jump optimization should eliminate the
4630 jump. But be prepared just in case. */
4632 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4633 return "";
4635 /* If this is a long branch with its delay slot unfilled, set `nullify'
4636 as it can nullify the delay slot and save a nop. */
4637 if (length == 8 && dbr_sequence_length () == 0)
4638 nullify = 1;
4640 /* If this is a short forward conditional branch which did not get
4641 its delay slot filled, the delay slot can still be nullified. */
4642 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4643 nullify = forward_branch_p (insn);
4645 /* A forward branch over a single nullified insn can be done with a
4646 extrs instruction. This avoids a single cycle penalty due to
4647 mis-predicted branch if we fall through (branch not taken). */
4649 if (length == 4
4650 && next_real_insn (insn) != 0
4651 && get_attr_length (next_real_insn (insn)) == 4
4652 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4653 && nullify)
4654 useskip = 1;
4656 switch (length)
4659 /* All short conditional branches except backwards with an unfilled
4660 delay slot. */
4661 case 4:
4662 if (useskip)
4663 strcpy (buf, "vextrs,");
4664 else
4665 strcpy (buf, "bvb,");
4666 if ((which == 0 && negated)
4667 || (which == 1 && ! negated))
4668 strcat (buf, ">=");
4669 else
4670 strcat (buf, "<");
4671 if (useskip)
4672 strcat (buf, " %0,1,0");
4673 else if (nullify && negated)
4674 strcat (buf, ",n %0,%3");
4675 else if (nullify && ! negated)
4676 strcat (buf, ",n %0,%2");
4677 else if (! nullify && negated)
4678 strcat (buf, "%0,%3");
4679 else if (! nullify && ! negated)
4680 strcat (buf, " %0,%2");
4681 break;
4683 /* All long conditionals. Note an short backward branch with an
4684 unfilled delay slot is treated just like a long backward branch
4685 with an unfilled delay slot. */
4686 case 8:
4687 /* Handle weird backwards branch with a filled delay slot
4688 with is nullified. */
4689 if (dbr_sequence_length () != 0
4690 && ! forward_branch_p (insn)
4691 && nullify)
4693 strcpy (buf, "bvb,");
4694 if ((which == 0 && negated)
4695 || (which == 1 && ! negated))
4696 strcat (buf, "<");
4697 else
4698 strcat (buf, ">=");
4699 if (negated)
4700 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4701 else
4702 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4704 /* Handle short backwards branch with an unfilled delay slot.
4705 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4706 taken and untaken branches. */
4707 else if (dbr_sequence_length () == 0
4708 && ! forward_branch_p (insn)
4709 && insn_addresses
4710 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4711 - insn_addresses[INSN_UID (insn)] - 8))
4713 strcpy (buf, "bvb,");
4714 if ((which == 0 && negated)
4715 || (which == 1 && ! negated))
4716 strcat (buf, ">=");
4717 else
4718 strcat (buf, "<");
4719 if (negated)
4720 strcat (buf, " %0,%3%#");
4721 else
4722 strcat (buf, " %0,%2%#");
4724 else
4726 strcpy (buf, "vextrs,");
4727 if ((which == 0 && negated)
4728 || (which == 1 && ! negated))
4729 strcat (buf, "<");
4730 else
4731 strcat (buf, ">=");
4732 if (nullify && negated)
4733 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4734 else if (nullify && ! negated)
4735 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4736 else if (negated)
4737 strcat (buf, " %0,1,0\n\tbl %3,0");
4738 else
4739 strcat (buf, " %0,1,0\n\tbl %2,0");
4741 break;
4743 default:
4744 abort();
4746 return buf;
4749 /* Return the output template for emitting a dbra type insn.
4751 Note it may perform some output operations on its own before
4752 returning the final output string. */
4753 char *
4754 output_dbra (operands, insn, which_alternative)
4755 rtx *operands;
4756 rtx insn;
4757 int which_alternative;
4760 /* A conditional branch to the following instruction (eg the delay slot) is
4761 asking for a disaster. Be prepared! */
4763 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4765 if (which_alternative == 0)
4766 return "ldo %1(%0),%0";
4767 else if (which_alternative == 1)
4769 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4770 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4771 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4772 return "fldws -16(0,%%r30),%0";
4774 else
4776 output_asm_insn ("ldw %0,%4", operands);
4777 return "ldo %1(%4),%4\n\tstw %4,%0";
4781 if (which_alternative == 0)
4783 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4784 int length = get_attr_length (insn);
4786 /* If this is a long branch with its delay slot unfilled, set `nullify'
4787 as it can nullify the delay slot and save a nop. */
4788 if (length == 8 && dbr_sequence_length () == 0)
4789 nullify = 1;
4791 /* If this is a short forward conditional branch which did not get
4792 its delay slot filled, the delay slot can still be nullified. */
4793 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4794 nullify = forward_branch_p (insn);
4796 /* Handle short versions first. */
4797 if (length == 4 && nullify)
4798 return "addib,%C2,n %1,%0,%3";
4799 else if (length == 4 && ! nullify)
4800 return "addib,%C2 %1,%0,%3";
4801 else if (length == 8)
4803 /* Handle weird backwards branch with a fulled delay slot
4804 which is nullified. */
4805 if (dbr_sequence_length () != 0
4806 && ! forward_branch_p (insn)
4807 && nullify)
4808 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4809 /* Handle short backwards branch with an unfilled delay slot.
4810 Using a addb;nop rather than addi;bl saves 1 cycle for both
4811 taken and untaken branches. */
4812 else if (dbr_sequence_length () == 0
4813 && ! forward_branch_p (insn)
4814 && insn_addresses
4815 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4816 - insn_addresses[INSN_UID (insn)] - 8))
4817 return "addib,%C2 %1,%0,%3%#";
4819 /* Handle normal cases. */
4820 if (nullify)
4821 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4822 else
4823 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4825 else
4826 abort();
4828 /* Deal with gross reload from FP register case. */
4829 else if (which_alternative == 1)
4831 /* Move loop counter from FP register to MEM then into a GR,
4832 increment the GR, store the GR into MEM, and finally reload
4833 the FP register from MEM from within the branch's delay slot. */
4834 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4835 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4836 if (get_attr_length (insn) == 24)
4837 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4838 else
4839 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4841 /* Deal with gross reload from memory case. */
4842 else
4844 /* Reload loop counter from memory, the store back to memory
4845 happens in the branch's delay slot. */
4846 output_asm_insn ("ldw %0,%4", operands);
4847 if (get_attr_length (insn) == 12)
4848 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4849 else
4850 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4854 /* Return the output template for emitting a dbra type insn.
4856 Note it may perform some output operations on its own before
4857 returning the final output string. */
4858 char *
4859 output_movb (operands, insn, which_alternative, reverse_comparison)
4860 rtx *operands;
4861 rtx insn;
4862 int which_alternative;
4863 int reverse_comparison;
4866 /* A conditional branch to the following instruction (eg the delay slot) is
4867 asking for a disaster. Be prepared! */
4869 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4871 if (which_alternative == 0)
4872 return "copy %1,%0";
4873 else if (which_alternative == 1)
4875 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4876 return "fldws -16(0,%%r30),%0";
4878 else if (which_alternative == 2)
4879 return "stw %1,%0";
4880 else
4881 return "mtsar %r1";
4884 /* Support the second variant. */
4885 if (reverse_comparison)
4886 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4888 if (which_alternative == 0)
4890 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4891 int length = get_attr_length (insn);
4893 /* If this is a long branch with its delay slot unfilled, set `nullify'
4894 as it can nullify the delay slot and save a nop. */
4895 if (length == 8 && dbr_sequence_length () == 0)
4896 nullify = 1;
4898 /* If this is a short forward conditional branch which did not get
4899 its delay slot filled, the delay slot can still be nullified. */
4900 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4901 nullify = forward_branch_p (insn);
4903 /* Handle short versions first. */
4904 if (length == 4 && nullify)
4905 return "movb,%C2,n %1,%0,%3";
4906 else if (length == 4 && ! nullify)
4907 return "movb,%C2 %1,%0,%3";
4908 else if (length == 8)
4910 /* Handle weird backwards branch with a filled delay slot
4911 which is nullified. */
4912 if (dbr_sequence_length () != 0
4913 && ! forward_branch_p (insn)
4914 && nullify)
4915 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4917 /* Handle short backwards branch with an unfilled delay slot.
4918 Using a movb;nop rather than or;bl saves 1 cycle for both
4919 taken and untaken branches. */
4920 else if (dbr_sequence_length () == 0
4921 && ! forward_branch_p (insn)
4922 && insn_addresses
4923 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4924 - insn_addresses[INSN_UID (insn)] - 8))
4925 return "movb,%C2 %1,%0,%3%#";
4926 /* Handle normal cases. */
4927 if (nullify)
4928 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4929 else
4930 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4932 else
4933 abort();
4935 /* Deal with gross reload from FP register case. */
4936 else if (which_alternative == 1)
4938 /* Move loop counter from FP register to MEM then into a GR,
4939 increment the GR, store the GR into MEM, and finally reload
4940 the FP register from MEM from within the branch's delay slot. */
4941 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4942 if (get_attr_length (insn) == 12)
4943 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4944 else
4945 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4947 /* Deal with gross reload from memory case. */
4948 else if (which_alternative == 2)
4950 /* Reload loop counter from memory, the store back to memory
4951 happens in the branch's delay slot. */
4952 if (get_attr_length (insn) == 8)
4953 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4954 else
4955 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4957 /* Handle SAR as a destination. */
4958 else
4960 if (get_attr_length (insn) == 8)
4961 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4962 else
4963 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4968 /* INSN is a millicode call. It may have an unconditional jump in its delay
4969 slot.
4971 CALL_DEST is the routine we are calling. */
4973 char *
4974 output_millicode_call (insn, call_dest)
4975 rtx insn;
4976 rtx call_dest;
4978 int distance;
4979 rtx xoperands[4];
4980 rtx seq_insn;
4982 /* Handle common case -- empty delay slot or no jump in the delay slot,
4983 and we're sure that the branch will reach the beginning of the $CODE$
4984 subspace. */
4985 if ((dbr_sequence_length () == 0
4986 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4987 || (dbr_sequence_length () != 0
4988 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4989 && get_attr_length (insn) == 4))
4991 xoperands[0] = call_dest;
4992 output_asm_insn ("bl %0,%%r31%#", xoperands);
4993 return "";
4996 /* This call may not reach the beginning of the $CODE$ subspace. */
4997 if (get_attr_length (insn) > 4)
4999 int delay_insn_deleted = 0;
5000 rtx xoperands[2];
5001 rtx link;
5003 /* We need to emit an inline long-call branch. */
5004 if (dbr_sequence_length () != 0
5005 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5007 /* A non-jump insn in the delay slot. By definition we can
5008 emit this insn before the call. */
5009 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5011 /* Now delete the delay insn. */
5012 PUT_CODE (NEXT_INSN (insn), NOTE);
5013 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5014 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5015 delay_insn_deleted = 1;
5018 /* If we're allowed to use be/ble instructions, then this is the
5019 best sequence to use for a long millicode call. */
5020 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
5021 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
5023 xoperands[0] = call_dest;
5024 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
5025 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
5026 output_asm_insn ("nop", xoperands);
5028 /* Pure portable runtime doesn't allow be/ble; we also don't have
5029 PIC support int he assembler/linker, so this sequence is needed. */
5030 else if (TARGET_PORTABLE_RUNTIME)
5032 xoperands[0] = call_dest;
5033 /* Get the address of our target into %r29. */
5034 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5035 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5037 /* Get our return address into %r31. */
5038 output_asm_insn ("blr 0,%%r31", xoperands);
5040 /* Jump to our target address in %r29. */
5041 output_asm_insn ("bv,n 0(%%r29)", xoperands);
5043 /* Empty delay slot. Note this insn gets fetched twice and
5044 executed once. To be safe we use a nop. */
5045 output_asm_insn ("nop", xoperands);
5046 return "";
5048 /* PIC long millicode call sequence. */
5049 else
5051 xoperands[0] = call_dest;
5052 xoperands[1] = gen_label_rtx ();
5053 /* Get our address + 8 into %r1. */
5054 output_asm_insn ("bl .+8,%%r1", xoperands);
5056 /* Add %r1 to the offset of our target from the next insn. */
5057 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5058 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5059 CODE_LABEL_NUMBER (xoperands[1]));
5060 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5062 /* Get the return address into %r31. */
5063 output_asm_insn ("blr 0,%%r31", xoperands);
5065 /* Branch to our target which is in %r1. */
5066 output_asm_insn ("bv,n 0(%%r1)", xoperands);
5068 /* Empty delay slot. Note this insn gets fetched twice and
5069 executed once. To be safe we use a nop. */
5070 output_asm_insn ("nop", xoperands);
5073 /* If we had a jump in the call's delay slot, output it now. */
5074 if (dbr_sequence_length () != 0
5075 && !delay_insn_deleted)
5077 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5078 output_asm_insn ("b,n %0", xoperands);
5080 /* Now delete the delay insn. */
5081 PUT_CODE (NEXT_INSN (insn), NOTE);
5082 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5083 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5085 return "";
5088 /* This call has an unconditional jump in its delay slot and the
5089 call is known to reach its target or the beginning of the current
5090 subspace. */
5092 /* Use the containing sequence insn's address. */
5093 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5095 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5096 - insn_addresses[INSN_UID (seq_insn)] - 8;
5098 /* If the branch was too far away, emit a normal call followed
5099 by a nop, followed by the unconditional branch.
5101 If the branch is close, then adjust %r2 from within the
5102 call's delay slot. */
5104 xoperands[0] = call_dest;
5105 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5106 if (! VAL_14_BITS_P (distance))
5107 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
5108 else
5110 xoperands[3] = gen_label_rtx ();
5111 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
5112 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5113 CODE_LABEL_NUMBER (xoperands[3]));
5116 /* Delete the jump. */
5117 PUT_CODE (NEXT_INSN (insn), NOTE);
5118 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5119 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5120 return "";
5123 extern struct obstack permanent_obstack;
5124 extern struct obstack *saveable_obstack;
5125 extern struct obstack *rtl_obstack;
5126 extern struct obstack *current_obstack;
5128 /* INSN is either a function call. It may have an unconditional jump
5129 in its delay slot.
5131 CALL_DEST is the routine we are calling. */
5133 char *
5134 output_call (insn, call_dest)
5135 rtx insn;
5136 rtx call_dest;
5138 int distance;
5139 rtx xoperands[4];
5140 rtx seq_insn;
5142 /* Handle common case -- empty delay slot or no jump in the delay slot,
5143 and we're sure that the branch will reach the beginning of the $CODE$
5144 subspace. */
5145 if ((dbr_sequence_length () == 0
5146 && get_attr_length (insn) == 8)
5147 || (dbr_sequence_length () != 0
5148 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5149 && get_attr_length (insn) == 4))
5151 xoperands[0] = call_dest;
5152 output_asm_insn ("bl %0,%%r2%#", xoperands);
5153 return "";
5156 /* This call may not reach the beginning of the $CODE$ subspace. */
5157 if (get_attr_length (insn) > 8)
5159 int delay_insn_deleted = 0;
5160 rtx xoperands[2];
5161 rtx link;
5163 /* We need to emit an inline long-call branch. Furthermore,
5164 because we're changing a named function call into an indirect
5165 function call well after the parameters have been set up, we
5166 need to make sure any FP args appear in both the integer
5167 and FP registers. Also, we need move any delay slot insn
5168 out of the delay slot. And finally, we can't rely on the linker
5169 being able to fix the call to $$dyncall! -- Yuk!. */
5170 if (dbr_sequence_length () != 0
5171 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5173 /* A non-jump insn in the delay slot. By definition we can
5174 emit this insn before the call (and in fact before argument
5175 relocating. */
5176 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5178 /* Now delete the delay insn. */
5179 PUT_CODE (NEXT_INSN (insn), NOTE);
5180 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5181 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5182 delay_insn_deleted = 1;
5185 /* Now copy any FP arguments into integer registers. */
5186 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5188 int arg_mode, regno;
5189 rtx use = XEXP (link, 0);
5190 if (! (GET_CODE (use) == USE
5191 && GET_CODE (XEXP (use, 0)) == REG
5192 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5193 continue;
5195 arg_mode = GET_MODE (XEXP (use, 0));
5196 regno = REGNO (XEXP (use, 0));
5197 /* Is it a floating point register? */
5198 if (regno >= 32 && regno <= 39)
5200 /* Copy from the FP register into an integer register
5201 (via memory). */
5202 if (arg_mode == SFmode)
5204 xoperands[0] = XEXP (use, 0);
5205 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5206 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5207 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5209 else
5211 xoperands[0] = XEXP (use, 0);
5212 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5213 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5214 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5215 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5220 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5221 we don't have any direct calls in that case. */
5223 int i;
5224 char *name = XSTR (call_dest, 0);
5226 /* See if we have already put this function on the list
5227 of deferred plabels. This list is generally small,
5228 so a liner search is not too ugly. If it proves too
5229 slow replace it with something faster. */
5230 for (i = 0; i < n_deferred_plabels; i++)
5231 if (strcmp (name, deferred_plabels[i].name) == 0)
5232 break;
5234 /* If the deferred plabel list is empty, or this entry was
5235 not found on the list, create a new entry on the list. */
5236 if (deferred_plabels == NULL || i == n_deferred_plabels)
5238 struct obstack *ambient_obstack = current_obstack;
5239 struct obstack *ambient_rtl_obstack = rtl_obstack;
5240 char *real_name;
5242 /* Any RTL we create here needs to live until the end of
5243 the compilation unit and therefore must live on the
5244 permanent obstack. */
5245 current_obstack = &permanent_obstack;
5246 rtl_obstack = &permanent_obstack;
5248 if (deferred_plabels == 0)
5249 deferred_plabels = (struct deferred_plabel *)
5250 xmalloc (1 * sizeof (struct deferred_plabel));
5251 else
5252 deferred_plabels = (struct deferred_plabel *)
5253 xrealloc (deferred_plabels,
5254 ((n_deferred_plabels + 1)
5255 * sizeof (struct deferred_plabel)));
5257 i = n_deferred_plabels++;
5258 deferred_plabels[i].internal_label = gen_label_rtx ();
5259 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5260 strlen (name) + 1);
5261 strcpy (deferred_plabels[i].name, name);
5263 /* Switch back to normal obstack allocation. */
5264 current_obstack = ambient_obstack;
5265 rtl_obstack = ambient_rtl_obstack;
5267 /* Gross. We have just implicitly taken the address of this
5268 function, mark it as such. */
5269 STRIP_NAME_ENCODING (real_name, name);
5270 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5273 /* We have to load the address of the function using a procedure
5274 label (plabel). Inline plabels can lose for PIC and other
5275 cases, so avoid them by creating a 32bit plabel in the data
5276 segment. */
5277 if (flag_pic)
5279 xoperands[0] = deferred_plabels[i].internal_label;
5280 xoperands[1] = gen_label_rtx ();
5282 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5283 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5284 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5286 /* Get our address + 8 into %r1. */
5287 output_asm_insn ("bl .+8,%%r1", xoperands);
5289 /* Add %r1 to the offset of dyncall from the next insn. */
5290 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5291 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5292 CODE_LABEL_NUMBER (xoperands[1]));
5293 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5295 /* Get the return address into %r31. */
5296 output_asm_insn ("blr 0,%%r31", xoperands);
5298 /* Branch to our target which is in %r1. */
5299 output_asm_insn ("bv 0(%%r1)", xoperands);
5301 /* Copy the return address into %r2 also. */
5302 output_asm_insn ("copy %%r31,%%r2", xoperands);
5304 else
5306 xoperands[0] = deferred_plabels[i].internal_label;
5308 /* Get the address of our target into %r22. */
5309 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5310 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5312 /* Get the high part of the address of $dyncall into %r2, then
5313 add in the low part in the branch instruction. */
5314 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5315 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5317 /* Copy the return pointer into both %r31 and %r2. */
5318 output_asm_insn ("copy %%r31,%%r2", xoperands);
5322 /* If we had a jump in the call's delay slot, output it now. */
5323 if (dbr_sequence_length () != 0
5324 && !delay_insn_deleted)
5326 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5327 output_asm_insn ("b,n %0", xoperands);
5329 /* Now delete the delay insn. */
5330 PUT_CODE (NEXT_INSN (insn), NOTE);
5331 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5332 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5334 return "";
5337 /* This call has an unconditional jump in its delay slot and the
5338 call is known to reach its target or the beginning of the current
5339 subspace. */
5341 /* Use the containing sequence insn's address. */
5342 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5344 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5345 - insn_addresses[INSN_UID (seq_insn)] - 8;
5347 /* If the branch was too far away, emit a normal call followed
5348 by a nop, followed by the unconditional branch.
5350 If the branch is close, then adjust %r2 from within the
5351 call's delay slot. */
5353 xoperands[0] = call_dest;
5354 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5355 if (! VAL_14_BITS_P (distance))
5356 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5357 else
5359 xoperands[3] = gen_label_rtx ();
5360 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5361 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5362 CODE_LABEL_NUMBER (xoperands[3]));
5365 /* Delete the jump. */
5366 PUT_CODE (NEXT_INSN (insn), NOTE);
5367 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5368 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5369 return "";
5372 /* In HPUX 8.0's shared library scheme, special relocations are needed
5373 for function labels if they might be passed to a function
5374 in a shared library (because shared libraries don't live in code
5375 space), and special magic is needed to construct their address.
5377 For reasons too disgusting to describe storage for the new name
5378 is allocated either on the saveable_obstack (released at function
5379 exit) or on the permanent_obstack for things that can never change
5380 (libcall names for example). */
5382 void
5383 hppa_encode_label (sym, permanent)
5384 rtx sym;
5385 int permanent;
5387 char *str = XSTR (sym, 0);
5388 int len = strlen (str);
5389 char *newstr;
5391 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5392 len + 2);
5394 if (str[0] == '*')
5395 *newstr++ = *str++;
5396 strcpy (newstr + 1, str);
5397 *newstr = '@';
5398 XSTR (sym,0) = newstr;
5402 function_label_operand (op, mode)
5403 rtx op;
5404 enum machine_mode mode;
5406 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5409 /* Returns 1 if OP is a function label involved in a simple addition
5410 with a constant. Used to keep certain patterns from matching
5411 during instruction combination. */
5413 is_function_label_plus_const (op)
5414 rtx op;
5416 /* Strip off any CONST. */
5417 if (GET_CODE (op) == CONST)
5418 op = XEXP (op, 0);
5420 return (GET_CODE (op) == PLUS
5421 && function_label_operand (XEXP (op, 0), Pmode)
5422 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5425 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5426 use in fmpyadd instructions. */
5428 fmpyaddoperands (operands)
5429 rtx *operands;
5431 enum machine_mode mode = GET_MODE (operands[0]);
5433 /* Must be a floating point mode. */
5434 if (mode != SFmode && mode != DFmode)
5435 return 0;
5437 /* All modes must be the same. */
5438 if (! (mode == GET_MODE (operands[1])
5439 && mode == GET_MODE (operands[2])
5440 && mode == GET_MODE (operands[3])
5441 && mode == GET_MODE (operands[4])
5442 && mode == GET_MODE (operands[5])))
5443 return 0;
5445 /* All operands must be registers. */
5446 if (! (GET_CODE (operands[1]) == REG
5447 && GET_CODE (operands[2]) == REG
5448 && GET_CODE (operands[3]) == REG
5449 && GET_CODE (operands[4]) == REG
5450 && GET_CODE (operands[5]) == REG))
5451 return 0;
5453 /* Only 2 real operands to the addition. One of the input operands must
5454 be the same as the output operand. */
5455 if (! rtx_equal_p (operands[3], operands[4])
5456 && ! rtx_equal_p (operands[3], operands[5]))
5457 return 0;
5459 /* Inout operand of add can not conflict with any operands from multiply. */
5460 if (rtx_equal_p (operands[3], operands[0])
5461 || rtx_equal_p (operands[3], operands[1])
5462 || rtx_equal_p (operands[3], operands[2]))
5463 return 0;
5465 /* multiply can not feed into addition operands. */
5466 if (rtx_equal_p (operands[4], operands[0])
5467 || rtx_equal_p (operands[5], operands[0]))
5468 return 0;
5470 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5471 if (mode == SFmode
5472 && (REGNO (operands[0]) < 57
5473 || REGNO (operands[1]) < 57
5474 || REGNO (operands[2]) < 57
5475 || REGNO (operands[3]) < 57
5476 || REGNO (operands[4]) < 57
5477 || REGNO (operands[5]) < 57))
5478 return 0;
5480 /* Passed. Operands are suitable for fmpyadd. */
5481 return 1;
5484 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5485 use in fmpysub instructions. */
5487 fmpysuboperands (operands)
5488 rtx *operands;
5490 enum machine_mode mode = GET_MODE (operands[0]);
5492 /* Must be a floating point mode. */
5493 if (mode != SFmode && mode != DFmode)
5494 return 0;
5496 /* All modes must be the same. */
5497 if (! (mode == GET_MODE (operands[1])
5498 && mode == GET_MODE (operands[2])
5499 && mode == GET_MODE (operands[3])
5500 && mode == GET_MODE (operands[4])
5501 && mode == GET_MODE (operands[5])))
5502 return 0;
5504 /* All operands must be registers. */
5505 if (! (GET_CODE (operands[1]) == REG
5506 && GET_CODE (operands[2]) == REG
5507 && GET_CODE (operands[3]) == REG
5508 && GET_CODE (operands[4]) == REG
5509 && GET_CODE (operands[5]) == REG))
5510 return 0;
5512 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5513 operation, so operands[4] must be the same as operand[3]. */
5514 if (! rtx_equal_p (operands[3], operands[4]))
5515 return 0;
5517 /* multiply can not feed into subtraction. */
5518 if (rtx_equal_p (operands[5], operands[0]))
5519 return 0;
5521 /* Inout operand of sub can not conflict with any operands from multiply. */
5522 if (rtx_equal_p (operands[3], operands[0])
5523 || rtx_equal_p (operands[3], operands[1])
5524 || rtx_equal_p (operands[3], operands[2]))
5525 return 0;
5527 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5528 if (mode == SFmode
5529 && (REGNO (operands[0]) < 57
5530 || REGNO (operands[1]) < 57
5531 || REGNO (operands[2]) < 57
5532 || REGNO (operands[3]) < 57
5533 || REGNO (operands[4]) < 57
5534 || REGNO (operands[5]) < 57))
5535 return 0;
5537 /* Passed. Operands are suitable for fmpysub. */
5538 return 1;
5542 plus_xor_ior_operator (op, mode)
5543 rtx op;
5544 enum machine_mode mode;
5546 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5547 || GET_CODE (op) == IOR);
5550 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5551 constants for shadd instructions. */
5553 shadd_constant_p (val)
5554 int val;
5556 if (val == 2 || val == 4 || val == 8)
5557 return 1;
5558 else
5559 return 0;
5562 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5563 the valid constant for shadd instructions. */
5565 shadd_operand (op, mode)
5566 rtx op;
5567 enum machine_mode mode;
5569 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5572 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5575 basereg_operand (op, mode)
5576 rtx op;
5577 enum machine_mode mode;
5579 /* cse will create some unscaled indexed addresses, however; it
5580 generally isn't a win on the PA, so avoid creating unscaled
5581 indexed addresses until after cse is finished. */
5582 if (!cse_not_expected)
5583 return 0;
5585 /* Once reload has started everything is considered valid. Reload should
5586 only create indexed addresses using the stack/frame pointer, and any
5587 others were checked for validity when created by the combine pass.
5589 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5590 we don't have to worry about the braindamaged implicit space register
5591 selection using the basereg only (rather than effective address)
5592 screwing us over. */
5593 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5594 return (GET_CODE (op) == REG);
5596 /* Stack is always OK for indexing. */
5597 if (op == stack_pointer_rtx)
5598 return 1;
5600 /* While it's always safe to index off the frame pointer, it's not
5601 always profitable, particularly when the frame pointer is being
5602 eliminated. */
5603 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5604 return 1;
5606 /* The only other valid OPs are pseudo registers with
5607 REGNO_POINTER_FLAG set. */
5608 if (GET_CODE (op) != REG
5609 || REGNO (op) < FIRST_PSEUDO_REGISTER
5610 || ! register_operand (op, mode))
5611 return 0;
5613 return REGNO_POINTER_FLAG (REGNO (op));
5616 /* Return 1 if this operand is anything other than a hard register. */
5619 non_hard_reg_operand (op, mode)
5620 rtx op;
5621 enum machine_mode mode;
5623 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5626 /* Return 1 if INSN branches forward. Should be using insn_addresses
5627 to avoid walking through all the insns... */
5629 forward_branch_p (insn)
5630 rtx insn;
5632 rtx label = JUMP_LABEL (insn);
5634 while (insn)
5636 if (insn == label)
5637 break;
5638 else
5639 insn = NEXT_INSN (insn);
5642 return (insn == label);
5645 /* Return 1 if OP is an equality comparison, else return 0. */
5647 eq_neq_comparison_operator (op, mode)
5648 rtx op;
5649 enum machine_mode mode;
5651 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5654 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5656 movb_comparison_operator (op, mode)
5657 rtx op;
5658 enum machine_mode mode;
5660 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5661 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5664 /* Return 1 if INSN is in the delay slot of a call instruction. */
5666 jump_in_call_delay (insn)
5667 rtx insn;
5670 if (GET_CODE (insn) != JUMP_INSN)
5671 return 0;
5673 if (PREV_INSN (insn)
5674 && PREV_INSN (PREV_INSN (insn))
5675 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5677 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5679 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5680 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5683 else
5684 return 0;
5687 /* Output an unconditional move and branch insn. */
5689 char *
5690 output_parallel_movb (operands, length)
5691 rtx *operands;
5692 int length;
5694 /* These are the cases in which we win. */
5695 if (length == 4)
5696 return "mov%I1b,tr %1,%0,%2";
5698 /* None of these cases wins, but they don't lose either. */
5699 if (dbr_sequence_length () == 0)
5701 /* Nothing in the delay slot, fake it by putting the combined
5702 insn (the copy or add) in the delay slot of a bl. */
5703 if (GET_CODE (operands[1]) == CONST_INT)
5704 return "bl %2,0\n\tldi %1,%0";
5705 else
5706 return "bl %2,0\n\tcopy %1,%0";
5708 else
5710 /* Something in the delay slot, but we've got a long branch. */
5711 if (GET_CODE (operands[1]) == CONST_INT)
5712 return "ldi %1,%0\n\tbl %2,0";
5713 else
5714 return "copy %1,%0\n\tbl %2,0";
5718 /* Output an unconditional add and branch insn. */
5720 char *
5721 output_parallel_addb (operands, length)
5722 rtx *operands;
5723 int length;
5725 /* To make life easy we want operand0 to be the shared input/output
5726 operand and operand1 to be the readonly operand. */
5727 if (operands[0] == operands[1])
5728 operands[1] = operands[2];
5730 /* These are the cases in which we win. */
5731 if (length == 4)
5732 return "add%I1b,tr %1,%0,%3";
5734 /* None of these cases win, but they don't lose either. */
5735 if (dbr_sequence_length () == 0)
5737 /* Nothing in the delay slot, fake it by putting the combined
5738 insn (the copy or add) in the delay slot of a bl. */
5739 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5741 else
5743 /* Something in the delay slot, but we've got a long branch. */
5744 return "add%I1 %1,%0,%0\n\tbl %3,0";
5748 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5749 is used to discourage creating parallel movb/addb insns since a jump
5750 which immediately follows a call can execute in the delay slot of the
5751 call. */
5753 following_call (insn)
5754 rtx insn;
5756 /* Find the previous real insn, skipping NOTEs. */
5757 insn = PREV_INSN (insn);
5758 while (insn && GET_CODE (insn) == NOTE)
5759 insn = PREV_INSN (insn);
5761 /* Check for CALL_INSNs and millicode calls. */
5762 if (insn
5763 && (GET_CODE (insn) == CALL_INSN
5764 || (GET_CODE (insn) == INSN
5765 && GET_CODE (PATTERN (insn)) != SEQUENCE
5766 && GET_CODE (PATTERN (insn)) != USE
5767 && GET_CODE (PATTERN (insn)) != CLOBBER
5768 && get_attr_type (insn) == TYPE_MILLI)))
5769 return 1;
5771 return 0;
5774 /* We use this hook to perform a PA specific optimization which is difficult
5775 to do in earlier passes.
5777 We want the delay slots of branches within jump tables to be filled.
5778 None of the compiler passes at the moment even has the notion that a
5779 PA jump table doesn't contain addresses, but instead contains actual
5780 instructions!
5782 Because we actually jump into the table, the addresses of each entry
5783 must stay constant in relation to the beginning of the table (which
5784 itself must stay constant relative to the instruction to jump into
5785 it). I don't believe we can guarantee earlier passes of the compiler
5786 will adhere to those rules.
5788 So, late in the compilation process we find all the jump tables, and
5789 expand them into real code -- eg each entry in the jump table vector
5790 will get an appropriate label followed by a jump to the final target.
5792 Reorg and the final jump pass can then optimize these branches and
5793 fill their delay slots. We end up with smaller, more efficient code.
5795 The jump instructions within the table are special; we must be able
5796 to identify them during assembly output (if the jumps don't get filled
5797 we need to emit a nop rather than nullifying the delay slot)). We
5798 identify jumps in switch tables by marking the SET with DImode. */
5800 pa_reorg (insns)
5801 rtx insns;
5803 rtx insn;
5805 remove_useless_addtr_insns (insns, 1);
5807 pa_combine_instructions (get_insns ());
5809 /* This is fairly cheap, so always run it if optimizing. */
5810 if (optimize > 0 && !TARGET_BIG_SWITCH)
5812 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
5813 insns = get_insns ();
5814 for (insn = insns; insn; insn = NEXT_INSN (insn))
5816 rtx pattern, tmp, location;
5817 unsigned int length, i;
5819 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
5820 if (GET_CODE (insn) != JUMP_INSN
5821 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
5822 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
5823 continue;
5825 /* If needed, emit marker for the beginning of the branch table. */
5826 if (TARGET_GAS)
5827 emit_insn_before (gen_begin_brtab (), insn);
5829 pattern = PATTERN (insn);
5830 location = PREV_INSN (insn);
5831 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
5833 for (i = 0; i < length; i++)
5835 /* Emit a label before each jump to keep jump.c from
5836 removing this code. */
5837 tmp = gen_label_rtx ();
5838 LABEL_NUSES (tmp) = 1;
5839 emit_label_after (tmp, location);
5840 location = NEXT_INSN (location);
5842 if (GET_CODE (pattern) == ADDR_VEC)
5844 /* Emit the jump itself. */
5845 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5846 tmp = emit_jump_insn_after (tmp, location);
5847 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5848 LABEL_NUSES (JUMP_LABEL (tmp))++;
5849 location = NEXT_INSN (location);
5851 else
5853 /* Emit the jump itself. */
5854 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 1, i), 0));
5855 tmp = emit_jump_insn_after (tmp, location);
5856 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
5857 LABEL_NUSES (JUMP_LABEL (tmp))++;
5858 location = NEXT_INSN (location);
5861 /* Emit a BARRIER after the jump. */
5862 emit_barrier_after (location);
5863 location = NEXT_INSN (location);
5866 /* If needed, emit marker for the end of the branch table. */
5867 if (TARGET_GAS)
5869 emit_insn_before (gen_end_brtab (), location);
5870 location = NEXT_INSN (location);
5871 emit_barrier_after (location);
5874 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
5875 delete_insn (insn);
5878 else if (TARGET_GAS)
5880 /* Sill need an end_brtab insn. */
5881 insns = get_insns ();
5882 for (insn = insns; insn; insn = NEXT_INSN (insn))
5884 /* Find an ADDR_VEC insn. */
5885 if (GET_CODE (insn) != JUMP_INSN
5886 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
5887 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
5888 continue;
5890 /* Now generate markers for the beginning and end of the
5891 branc table. */
5892 emit_insn_before (gen_begin_brtab (), insn);
5893 emit_insn_after (gen_end_brtab (), insn);
5898 /* The PA has a number of odd instructions which can perform multiple
5899 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5900 it may be profitable to combine two instructions into one instruction
5901 with two outputs. It's not profitable PA2.0 machines because the
5902 two outputs would take two slots in the reorder buffers.
5904 This routine finds instructions which can be combined and combines
5905 them. We only support some of the potential combinations, and we
5906 only try common ways to find suitable instructions.
5908 * addb can add two registers or a register and a small integer
5909 and jump to a nearby (+-8k) location. Normally the jump to the
5910 nearby location is conditional on the result of the add, but by
5911 using the "true" condition we can make the jump unconditional.
5912 Thus addb can perform two independent operations in one insn.
5914 * movb is similar to addb in that it can perform a reg->reg
5915 or small immediate->reg copy and jump to a nearby (+-8k location).
5917 * fmpyadd and fmpysub can perform a FP multiply and either an
5918 FP add or FP sub if the operands of the multiply and add/sub are
5919 independent (there are other minor restrictions). Note both
5920 the fmpy and fadd/fsub can in theory move to better spots according
5921 to data dependencies, but for now we require the fmpy stay at a
5922 fixed location.
5924 * Many of the memory operations can perform pre & post updates
5925 of index registers. GCC's pre/post increment/decrement addressing
5926 is far too simple to take advantage of all the possibilities. This
5927 pass may not be suitable since those insns may not be independent.
5929 * comclr can compare two ints or an int and a register, nullify
5930 the following instruction and zero some other register. This
5931 is more difficult to use as it's harder to find an insn which
5932 will generate a comclr than finding something like an unconditional
5933 branch. (conditional moves & long branches create comclr insns).
5935 * Most arithmetic operations can conditionally skip the next
5936 instruction. They can be viewed as "perform this operation
5937 and conditionally jump to this nearby location" (where nearby
5938 is an insns away). These are difficult to use due to the
5939 branch length restrictions. */
5941 pa_combine_instructions (insns)
5942 rtx insns;
5944 rtx anchor, new;
5946 /* This can get expensive since the basic algorithm is on the
5947 order of O(n^2) (or worse). Only do it for -O2 or higher
5948 levels of optimizaton. */
5949 if (optimize < 2)
5950 return;
5952 /* Walk down the list of insns looking for "anchor" insns which
5953 may be combined with "floating" insns. As the name implies,
5954 "anchor" instructions don't move, while "floating" insns may
5955 move around. */
5956 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5957 new = make_insn_raw (new);
5959 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5961 enum attr_pa_combine_type anchor_attr;
5962 enum attr_pa_combine_type floater_attr;
5964 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5965 Also ignore any special USE insns. */
5966 if (GET_CODE (anchor) != INSN
5967 && GET_CODE (anchor) != JUMP_INSN
5968 && GET_CODE (anchor) != CALL_INSN
5969 || GET_CODE (PATTERN (anchor)) == USE
5970 || GET_CODE (PATTERN (anchor)) == CLOBBER
5971 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5972 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5973 continue;
5975 anchor_attr = get_attr_pa_combine_type (anchor);
5976 /* See if anchor is an insn suitable for combination. */
5977 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5978 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5979 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5980 && ! forward_branch_p (anchor)))
5982 rtx floater;
5984 for (floater = PREV_INSN (anchor);
5985 floater;
5986 floater = PREV_INSN (floater))
5988 if (GET_CODE (floater) == NOTE
5989 || (GET_CODE (floater) == INSN
5990 && (GET_CODE (PATTERN (floater)) == USE
5991 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5992 continue;
5994 /* Anything except a regular INSN will stop our search. */
5995 if (GET_CODE (floater) != INSN
5996 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5997 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5999 floater = NULL_RTX;
6000 break;
6003 /* See if FLOATER is suitable for combination with the
6004 anchor. */
6005 floater_attr = get_attr_pa_combine_type (floater);
6006 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6007 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6008 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6009 && floater_attr == PA_COMBINE_TYPE_FMPY))
6011 /* If ANCHOR and FLOATER can be combined, then we're
6012 done with this pass. */
6013 if (pa_can_combine_p (new, anchor, floater, 0,
6014 SET_DEST (PATTERN (floater)),
6015 XEXP (SET_SRC (PATTERN (floater)), 0),
6016 XEXP (SET_SRC (PATTERN (floater)), 1)))
6017 break;
6020 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6021 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6023 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6025 if (pa_can_combine_p (new, anchor, floater, 0,
6026 SET_DEST (PATTERN (floater)),
6027 XEXP (SET_SRC (PATTERN (floater)), 0),
6028 XEXP (SET_SRC (PATTERN (floater)), 1)))
6029 break;
6031 else
6033 if (pa_can_combine_p (new, anchor, floater, 0,
6034 SET_DEST (PATTERN (floater)),
6035 SET_SRC (PATTERN (floater)),
6036 SET_SRC (PATTERN (floater))))
6037 break;
6042 /* If we didn't find anything on the backwards scan try forwards. */
6043 if (!floater
6044 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6045 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6047 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6049 if (GET_CODE (floater) == NOTE
6050 || (GET_CODE (floater) == INSN
6051 && (GET_CODE (PATTERN (floater)) == USE
6052 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6054 continue;
6056 /* Anything except a regular INSN will stop our search. */
6057 if (GET_CODE (floater) != INSN
6058 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6059 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6061 floater = NULL_RTX;
6062 break;
6065 /* See if FLOATER is suitable for combination with the
6066 anchor. */
6067 floater_attr = get_attr_pa_combine_type (floater);
6068 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6069 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6070 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6071 && floater_attr == PA_COMBINE_TYPE_FMPY))
6073 /* If ANCHOR and FLOATER can be combined, then we're
6074 done with this pass. */
6075 if (pa_can_combine_p (new, anchor, floater, 1,
6076 SET_DEST (PATTERN (floater)),
6077 XEXP (SET_SRC (PATTERN(floater)),0),
6078 XEXP(SET_SRC(PATTERN(floater)),1)))
6079 break;
6084 /* FLOATER will be nonzero if we found a suitable floating
6085 insn for combination with ANCHOR. */
6086 if (floater
6087 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6088 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6090 /* Emit the new instruction and delete the old anchor. */
6091 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
6092 gen_rtvec (2, PATTERN (anchor),
6093 PATTERN (floater))),
6094 anchor);
6095 PUT_CODE (anchor, NOTE);
6096 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6097 NOTE_SOURCE_FILE (anchor) = 0;
6099 /* Emit a special USE insn for FLOATER, then delete
6100 the floating insn. */
6101 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
6102 delete_insn (floater);
6104 continue;
6106 else if (floater
6107 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6109 rtx temp;
6110 /* Emit the new_jump instruction and delete the old anchor. */
6111 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
6112 gen_rtvec (2, PATTERN (anchor),
6113 PATTERN (floater))),
6114 anchor);
6115 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6116 PUT_CODE (anchor, NOTE);
6117 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6118 NOTE_SOURCE_FILE (anchor) = 0;
6120 /* Emit a special USE insn for FLOATER, then delete
6121 the floating insn. */
6122 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
6123 delete_insn (floater);
6124 continue;
6131 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6132 rtx new, anchor, floater;
6133 int reversed;
6134 rtx dest, src1, src2;
6136 int insn_code_number;
6137 rtx start, end;
6139 /* Create a PARALLEL with the patterns of ANCHOR and
6140 FLOATER, try to recognize it, then test constraints
6141 for the resulting pattern.
6143 If the pattern doesn't match or the constraints
6144 aren't met keep searching for a suitable floater
6145 insn. */
6146 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6147 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6148 INSN_CODE (new) = -1;
6149 insn_code_number = recog_memoized (new);
6150 if (insn_code_number < 0
6151 || !constrain_operands (insn_code_number, 1))
6152 return 0;
6154 if (reversed)
6156 start = anchor;
6157 end = floater;
6159 else
6161 start = floater;
6162 end = anchor;
6165 /* There's up to three operands to consider. One
6166 output and two inputs.
6168 The output must not be used between FLOATER & ANCHOR
6169 exclusive. The inputs must not be set between
6170 FLOATER and ANCHOR exclusive. */
6172 if (reg_used_between_p (dest, start, end))
6173 return 0;
6175 if (reg_set_between_p (src1, start, end))
6176 return 0;
6178 if (reg_set_between_p (src2, start, end))
6179 return 0;
6181 /* If we get here, then everything is good. */
6182 return 1;