* target.h (asm_out.byte_op, asm_out.aligned_op, asm_out.unaligned_op,
[official-gcc.git] / gcc / config / pa / pa.c
blob6234a3356f44da9b04ecf21b286c30e81c3bd082
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "libfuncs.h"
38 #include "reload.h"
39 #include "c-tree.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "obstack.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
50 #ifndef DO_FRAME_NOTES
51 #ifdef INCOMING_RETURN_ADDR_RTX
52 #define DO_FRAME_NOTES 1
53 #else
54 #define DO_FRAME_NOTES 0
55 #endif
56 #endif
58 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
59 static void pa_combine_instructions PARAMS ((rtx));
60 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
61 static int forward_branch_p PARAMS ((rtx));
62 static int shadd_constant_p PARAMS ((int));
63 static void pa_add_gc_roots PARAMS ((void));
64 static void mark_deferred_plabels PARAMS ((void *));
65 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
66 static int compute_movstrsi_length PARAMS ((rtx));
67 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
68 static void remove_useless_addtr_insns PARAMS ((rtx, int));
69 static rtx store_reg PARAMS ((int, int, int));
70 static rtx load_reg PARAMS ((int, int, int));
71 static rtx set_reg_plus_d PARAMS ((int, int, int));
72 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
73 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
74 static int pa_adjust_priority PARAMS ((rtx, int));
75 static int pa_issue_rate PARAMS ((void));
77 /* Save the operands last given to a compare for use when we
78 generate a scc or bcc insn. */
80 rtx hppa_compare_op0, hppa_compare_op1;
81 enum cmp_type hppa_branch_type;
83 /* Which cpu we are scheduling for. */
84 enum processor_type pa_cpu;
86 /* String to hold which cpu we are scheduling for. */
87 const char *pa_cpu_string;
89 /* Which architecture we are generating code for. */
90 enum architecture_type pa_arch;
92 /* String to hold which architecture we are generating code for. */
93 const char *pa_arch_string;
95 /* Counts for the number of callee-saved general and floating point
96 registers which were saved by the current function's prologue. */
97 static int gr_saved, fr_saved;
99 static rtx find_addr_reg PARAMS ((rtx));
101 /* Keep track of the number of bytes we have output in the CODE subspaces
102 during this compilation so we'll know when to emit inline long-calls. */
104 unsigned int total_code_bytes;
106 /* Variables to handle plabels that we discover are necessary at assembly
107 output time. They are output after the current function. */
109 struct deferred_plabel
111 rtx internal_label;
112 char *name;
113 } *deferred_plabels = 0;
114 int n_deferred_plabels = 0;
116 /* Initialize the GCC target structure. */
118 #undef TARGET_ASM_ALIGNED_HI_OP
119 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
120 #undef TARGET_ASM_ALIGNED_SI_OP
121 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
122 #undef TARGET_ASM_ALIGNED_DI_OP
123 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
124 #undef TARGET_ASM_UNALIGNED_HI_OP
125 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
126 #undef TARGET_ASM_UNALIGNED_SI_OP
127 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
128 #undef TARGET_ASM_UNALIGNED_DI_OP
129 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
130 #undef TARGET_ASM_INTEGER
131 #define TARGET_ASM_INTEGER pa_assemble_integer
133 #undef TARGET_ASM_FUNCTION_PROLOGUE
134 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
135 #undef TARGET_ASM_FUNCTION_EPILOGUE
136 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
138 #undef TARGET_SCHED_ADJUST_COST
139 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
140 #undef TARGET_SCHED_ADJUST_PRIORITY
141 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
142 #undef TARGET_SCHED_ISSUE_RATE
143 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
145 struct gcc_target targetm = TARGET_INITIALIZER;
147 void
148 override_options ()
150 /* Default to 7100LC scheduling. */
151 if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100"))
153 pa_cpu_string = "7100";
154 pa_cpu = PROCESSOR_7100;
156 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700"))
158 pa_cpu_string = "700";
159 pa_cpu = PROCESSOR_700;
161 else if (pa_cpu_string == NULL
162 || ! strcmp (pa_cpu_string, "7100LC"))
164 pa_cpu_string = "7100LC";
165 pa_cpu = PROCESSOR_7100LC;
167 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200"))
169 pa_cpu_string = "7200";
170 pa_cpu = PROCESSOR_7200;
172 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
174 pa_cpu_string = "8000";
175 pa_cpu = PROCESSOR_8000;
177 else
179 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
182 /* Set the instruction set architecture. */
183 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
185 pa_arch_string = "1.0";
186 pa_arch = ARCHITECTURE_10;
187 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
189 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
191 pa_arch_string = "1.1";
192 pa_arch = ARCHITECTURE_11;
193 target_flags &= ~MASK_PA_20;
194 target_flags |= MASK_PA_11;
196 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
198 pa_arch_string = "2.0";
199 pa_arch = ARCHITECTURE_20;
200 target_flags |= MASK_PA_11 | MASK_PA_20;
202 else if (pa_arch_string)
204 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
207 if (flag_pic && TARGET_PORTABLE_RUNTIME)
209 warning ("PIC code generation is not supported in the portable runtime model\n");
212 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
214 warning ("PIC code generation is not compatible with fast indirect calls\n");
217 if (! TARGET_GAS && write_symbols != NO_DEBUG)
219 warning ("-g is only supported when using GAS on this processor,");
220 warning ("-g option disabled");
221 write_symbols = NO_DEBUG;
224 /* We only support the "big PIC" model now. And we always generate PIC
225 code when in 64bit mode. */
226 if (flag_pic == 1 || TARGET_64BIT)
227 flag_pic = 2;
229 /* We can't guarantee that .dword is available for 32-bit targets. */
230 if (UNITS_PER_WORD == 4)
231 targetm.asm_out.aligned_op.di = NULL;
233 /* The unaligned ops are only available when using GAS. */
234 if (!TARGET_GAS)
236 targetm.asm_out.unaligned_op.hi = NULL;
237 targetm.asm_out.unaligned_op.si = NULL;
238 targetm.asm_out.unaligned_op.di = NULL;
241 /* Register global variables with the garbage collector. */
242 pa_add_gc_roots ();
245 /* Return non-zero only if OP is a register of mode MODE,
246 or CONST0_RTX. */
248 reg_or_0_operand (op, mode)
249 rtx op;
250 enum machine_mode mode;
252 return (op == CONST0_RTX (mode) || register_operand (op, mode));
255 /* Return non-zero if OP is suitable for use in a call to a named
256 function.
258 For 2.5 try to eliminate either call_operand_address or
259 function_label_operand, they perform very similar functions. */
261 call_operand_address (op, mode)
262 rtx op;
263 enum machine_mode mode ATTRIBUTE_UNUSED;
265 return (GET_MODE (op) == word_mode
266 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
269 /* Return 1 if X contains a symbolic expression. We know these
270 expressions will have one of a few well defined forms, so
271 we need only check those forms. */
273 symbolic_expression_p (x)
274 register rtx x;
277 /* Strip off any HIGH. */
278 if (GET_CODE (x) == HIGH)
279 x = XEXP (x, 0);
281 return (symbolic_operand (x, VOIDmode));
285 symbolic_operand (op, mode)
286 register rtx op;
287 enum machine_mode mode ATTRIBUTE_UNUSED;
289 switch (GET_CODE (op))
291 case SYMBOL_REF:
292 case LABEL_REF:
293 return 1;
294 case CONST:
295 op = XEXP (op, 0);
296 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
297 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
298 && GET_CODE (XEXP (op, 1)) == CONST_INT);
299 default:
300 return 0;
304 /* Return truth value of statement that OP is a symbolic memory
305 operand of mode MODE. */
308 symbolic_memory_operand (op, mode)
309 rtx op;
310 enum machine_mode mode ATTRIBUTE_UNUSED;
312 if (GET_CODE (op) == SUBREG)
313 op = SUBREG_REG (op);
314 if (GET_CODE (op) != MEM)
315 return 0;
316 op = XEXP (op, 0);
317 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
318 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
321 /* Return 1 if the operand is either a register or a memory operand that is
322 not symbolic. */
325 reg_or_nonsymb_mem_operand (op, mode)
326 register rtx op;
327 enum machine_mode mode;
329 if (register_operand (op, mode))
330 return 1;
332 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
333 return 1;
335 return 0;
338 /* Return 1 if the operand is either a register, zero, or a memory operand
339 that is not symbolic. */
342 reg_or_0_or_nonsymb_mem_operand (op, mode)
343 register rtx op;
344 enum machine_mode mode;
346 if (register_operand (op, mode))
347 return 1;
349 if (op == CONST0_RTX (mode))
350 return 1;
352 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
353 return 1;
355 return 0;
358 /* Accept any constant that can be moved in one instructions into a
359 general register. */
361 cint_ok_for_move (intval)
362 HOST_WIDE_INT intval;
364 /* OK if ldo, ldil, or zdepi, can be used. */
365 return (CONST_OK_FOR_LETTER_P (intval, 'J')
366 || CONST_OK_FOR_LETTER_P (intval, 'N')
367 || CONST_OK_FOR_LETTER_P (intval, 'K'));
370 /* Accept anything that can be moved in one instruction into a general
371 register. */
373 move_operand (op, mode)
374 rtx op;
375 enum machine_mode mode;
377 if (register_operand (op, mode))
378 return 1;
380 if (GET_CODE (op) == CONSTANT_P_RTX)
381 return 1;
383 if (GET_CODE (op) == CONST_INT)
384 return cint_ok_for_move (INTVAL (op));
386 if (GET_CODE (op) == SUBREG)
387 op = SUBREG_REG (op);
388 if (GET_CODE (op) != MEM)
389 return 0;
391 op = XEXP (op, 0);
393 /* We consider a LO_SUM DLT reference a move_operand now since it has
394 been merged into the normal movsi/movdi patterns. */
395 if (GET_CODE (op) == LO_SUM
396 && GET_CODE (XEXP (op, 0)) == REG
397 && REG_OK_FOR_BASE_P (XEXP (op, 0))
398 && GET_CODE (XEXP (op, 1)) == UNSPEC
399 && GET_MODE (op) == Pmode)
400 return 1;
402 /* Since move_operand is only used for source operands, we can always
403 allow scaled indexing! */
404 if (! TARGET_DISABLE_INDEXING
405 && GET_CODE (op) == PLUS
406 && ((GET_CODE (XEXP (op, 0)) == MULT
407 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
408 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
409 && INTVAL (XEXP (XEXP (op, 0), 1))
410 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
411 && GET_CODE (XEXP (op, 1)) == REG)
412 || (GET_CODE (XEXP (op, 1)) == MULT
413 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
414 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
415 && INTVAL (XEXP (XEXP (op, 1), 1))
416 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
417 && GET_CODE (XEXP (op, 0)) == REG)))
418 return 1;
420 return memory_address_p (mode, op);
423 /* Accept REG and any CONST_INT that can be moved in one instruction into a
424 general register. */
426 reg_or_cint_move_operand (op, mode)
427 rtx op;
428 enum machine_mode mode;
430 if (register_operand (op, mode))
431 return 1;
433 if (GET_CODE (op) == CONST_INT)
434 return cint_ok_for_move (INTVAL (op));
436 return 0;
440 pic_label_operand (op, mode)
441 rtx op;
442 enum machine_mode mode ATTRIBUTE_UNUSED;
444 if (!flag_pic)
445 return 0;
447 switch (GET_CODE (op))
449 case LABEL_REF:
450 return 1;
451 case CONST:
452 op = XEXP (op, 0);
453 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
454 && GET_CODE (XEXP (op, 1)) == CONST_INT);
455 default:
456 return 0;
461 fp_reg_operand (op, mode)
462 rtx op;
463 enum machine_mode mode ATTRIBUTE_UNUSED;
465 return reg_renumber && FP_REG_P (op);
470 /* Return truth value of whether OP can be used as an operand in a
471 three operand arithmetic insn that accepts registers of mode MODE
472 or 14-bit signed integers. */
474 arith_operand (op, mode)
475 rtx op;
476 enum machine_mode mode;
478 return (register_operand (op, mode)
479 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
482 /* Return truth value of whether OP can be used as an operand in a
483 three operand arithmetic insn that accepts registers of mode MODE
484 or 11-bit signed integers. */
486 arith11_operand (op, mode)
487 rtx op;
488 enum machine_mode mode;
490 return (register_operand (op, mode)
491 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
494 /* A constant integer suitable for use in a PRE_MODIFY memory
495 reference. */
497 pre_cint_operand (op, mode)
498 rtx op;
499 enum machine_mode mode ATTRIBUTE_UNUSED;
501 return (GET_CODE (op) == CONST_INT
502 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
505 /* A constant integer suitable for use in a POST_MODIFY memory
506 reference. */
508 post_cint_operand (op, mode)
509 rtx op;
510 enum machine_mode mode ATTRIBUTE_UNUSED;
512 return (GET_CODE (op) == CONST_INT
513 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
517 arith_double_operand (op, mode)
518 rtx op;
519 enum machine_mode mode;
521 return (register_operand (op, mode)
522 || (GET_CODE (op) == CONST_DOUBLE
523 && GET_MODE (op) == mode
524 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
525 && ((CONST_DOUBLE_HIGH (op) >= 0)
526 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
529 /* Return truth value of whether OP is an integer which fits the
530 range constraining immediate operands in three-address insns, or
531 is an integer register. */
534 ireg_or_int5_operand (op, mode)
535 rtx op;
536 enum machine_mode mode ATTRIBUTE_UNUSED;
538 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
539 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
542 /* Return nonzero if OP is an integer register, else return zero. */
544 ireg_operand (op, mode)
545 rtx op;
546 enum machine_mode mode ATTRIBUTE_UNUSED;
548 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
551 /* Return truth value of whether OP is an integer which fits the
552 range constraining immediate operands in three-address insns. */
555 int5_operand (op, mode)
556 rtx op;
557 enum machine_mode mode ATTRIBUTE_UNUSED;
559 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
563 uint5_operand (op, mode)
564 rtx op;
565 enum machine_mode mode ATTRIBUTE_UNUSED;
567 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
571 int11_operand (op, mode)
572 rtx op;
573 enum machine_mode mode ATTRIBUTE_UNUSED;
575 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
579 uint32_operand (op, mode)
580 rtx op;
581 enum machine_mode mode ATTRIBUTE_UNUSED;
583 #if HOST_BITS_PER_WIDE_INT > 32
584 /* All allowed constants will fit a CONST_INT. */
585 return (GET_CODE (op) == CONST_INT
586 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
587 #else
588 return (GET_CODE (op) == CONST_INT
589 || (GET_CODE (op) == CONST_DOUBLE
590 && CONST_DOUBLE_HIGH (op) == 0));
591 #endif
595 arith5_operand (op, mode)
596 rtx op;
597 enum machine_mode mode;
599 return register_operand (op, mode) || int5_operand (op, mode);
602 /* True iff zdepi can be used to generate this CONST_INT.
603 zdepi first sign extends a 5 bit signed number to a given field
604 length, then places this field anywhere in a zero. */
606 zdepi_cint_p (x)
607 unsigned HOST_WIDE_INT x;
609 unsigned HOST_WIDE_INT lsb_mask, t;
611 /* This might not be obvious, but it's at least fast.
612 This function is critical; we don't have the time loops would take. */
613 lsb_mask = x & -x;
614 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
615 /* Return true iff t is a power of two. */
616 return ((t & (t - 1)) == 0);
619 /* True iff depi or extru can be used to compute (reg & mask).
620 Accept bit pattern like these:
621 0....01....1
622 1....10....0
623 1..10..01..1 */
625 and_mask_p (mask)
626 unsigned HOST_WIDE_INT mask;
628 mask = ~mask;
629 mask += mask & -mask;
630 return (mask & (mask - 1)) == 0;
633 /* True iff depi or extru can be used to compute (reg & OP). */
635 and_operand (op, mode)
636 rtx op;
637 enum machine_mode mode;
639 return (register_operand (op, mode)
640 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
643 /* True iff depi can be used to compute (reg | MASK). */
645 ior_mask_p (mask)
646 unsigned HOST_WIDE_INT mask;
648 mask += mask & -mask;
649 return (mask & (mask - 1)) == 0;
652 /* True iff depi can be used to compute (reg | OP). */
654 ior_operand (op, mode)
655 rtx op;
656 enum machine_mode mode ATTRIBUTE_UNUSED;
658 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
662 lhs_lshift_operand (op, mode)
663 rtx op;
664 enum machine_mode mode;
666 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
669 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
670 Such values can be the left hand side x in (x << r), using the zvdepi
671 instruction. */
673 lhs_lshift_cint_operand (op, mode)
674 rtx op;
675 enum machine_mode mode ATTRIBUTE_UNUSED;
677 unsigned HOST_WIDE_INT x;
678 if (GET_CODE (op) != CONST_INT)
679 return 0;
680 x = INTVAL (op) >> 4;
681 return (x & (x + 1)) == 0;
685 arith32_operand (op, mode)
686 rtx op;
687 enum machine_mode mode;
689 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
693 pc_or_label_operand (op, mode)
694 rtx op;
695 enum machine_mode mode ATTRIBUTE_UNUSED;
697 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
700 /* Legitimize PIC addresses. If the address is already
701 position-independent, we return ORIG. Newly generated
702 position-independent addresses go to REG. If we need more
703 than one register, we lose. */
706 legitimize_pic_address (orig, mode, reg)
707 rtx orig, reg;
708 enum machine_mode mode;
710 rtx pic_ref = orig;
712 /* Labels need special handling. */
713 if (pic_label_operand (orig, mode))
715 /* We do not want to go through the movXX expanders here since that
716 would create recursion.
718 Nor do we really want to call a generator for a named pattern
719 since that requires multiple patterns if we want to support
720 multiple word sizes.
722 So instead we just emit the raw set, which avoids the movXX
723 expanders completely. */
724 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
725 current_function_uses_pic_offset_table = 1;
726 return reg;
728 if (GET_CODE (orig) == SYMBOL_REF)
730 if (reg == 0)
731 abort ();
733 emit_move_insn (reg,
734 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
735 gen_rtx_HIGH (word_mode, orig)));
736 pic_ref
737 = gen_rtx_MEM (Pmode,
738 gen_rtx_LO_SUM (Pmode, reg,
739 gen_rtx_UNSPEC (Pmode,
740 gen_rtvec (1, orig),
741 0)));
743 current_function_uses_pic_offset_table = 1;
744 RTX_UNCHANGING_P (pic_ref) = 1;
745 emit_move_insn (reg, pic_ref);
746 return reg;
748 else if (GET_CODE (orig) == CONST)
750 rtx base;
752 if (GET_CODE (XEXP (orig, 0)) == PLUS
753 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
754 return orig;
756 if (reg == 0)
757 abort ();
759 if (GET_CODE (XEXP (orig, 0)) == PLUS)
761 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
762 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
763 base == reg ? 0 : reg);
765 else abort ();
766 if (GET_CODE (orig) == CONST_INT)
768 if (INT_14_BITS (orig))
769 return plus_constant (base, INTVAL (orig));
770 orig = force_reg (Pmode, orig);
772 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
773 /* Likewise, should we set special REG_NOTEs here? */
775 return pic_ref;
778 /* Try machine-dependent ways of modifying an illegitimate address
779 to be legitimate. If we find one, return the new, valid address.
780 This macro is used in only one place: `memory_address' in explow.c.
782 OLDX is the address as it was before break_out_memory_refs was called.
783 In some cases it is useful to look at this to decide what needs to be done.
785 MODE and WIN are passed so that this macro can use
786 GO_IF_LEGITIMATE_ADDRESS.
788 It is always safe for this macro to do nothing. It exists to recognize
789 opportunities to optimize the output.
791 For the PA, transform:
793 memory(X + <large int>)
795 into:
797 if (<large int> & mask) >= 16
798 Y = (<large int> & ~mask) + mask + 1 Round up.
799 else
800 Y = (<large int> & ~mask) Round down.
801 Z = X + Y
802 memory (Z + (<large int> - Y));
804 This is for CSE to find several similar references, and only use one Z.
806 X can either be a SYMBOL_REF or REG, but because combine can not
807 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
808 D will not fit in 14 bits.
810 MODE_FLOAT references allow displacements which fit in 5 bits, so use
811 0x1f as the mask.
813 MODE_INT references allow displacements which fit in 14 bits, so use
814 0x3fff as the mask.
816 This relies on the fact that most mode MODE_FLOAT references will use FP
817 registers and most mode MODE_INT references will use integer registers.
818 (In the rare case of an FP register used in an integer MODE, we depend
819 on secondary reloads to clean things up.)
822 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
823 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
824 addressing modes to be used).
826 Put X and Z into registers. Then put the entire expression into
827 a register. */
830 hppa_legitimize_address (x, oldx, mode)
831 rtx x, oldx ATTRIBUTE_UNUSED;
832 enum machine_mode mode;
834 rtx orig = x;
836 if (flag_pic)
837 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
839 /* Strip off CONST. */
840 if (GET_CODE (x) == CONST)
841 x = XEXP (x, 0);
843 /* Special case. Get the SYMBOL_REF into a register and use indexing.
844 That should always be safe. */
845 if (GET_CODE (x) == PLUS
846 && GET_CODE (XEXP (x, 0)) == REG
847 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
849 rtx reg = force_reg (Pmode, XEXP (x, 1));
850 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
853 /* Note we must reject symbols which represent function addresses
854 since the assembler/linker can't handle arithmetic on plabels. */
855 if (GET_CODE (x) == PLUS
856 && GET_CODE (XEXP (x, 1)) == CONST_INT
857 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
858 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
859 || GET_CODE (XEXP (x, 0)) == REG))
861 rtx int_part, ptr_reg;
862 int newoffset;
863 int offset = INTVAL (XEXP (x, 1));
864 int mask;
866 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
867 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
869 /* Choose which way to round the offset. Round up if we
870 are >= halfway to the next boundary. */
871 if ((offset & mask) >= ((mask + 1) / 2))
872 newoffset = (offset & ~ mask) + mask + 1;
873 else
874 newoffset = (offset & ~ mask);
876 /* If the newoffset will not fit in 14 bits (ldo), then
877 handling this would take 4 or 5 instructions (2 to load
878 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
879 add the new offset and the SYMBOL_REF.) Combine can
880 not handle 4->2 or 5->2 combinations, so do not create
881 them. */
882 if (! VAL_14_BITS_P (newoffset)
883 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
885 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
886 rtx tmp_reg
887 = force_reg (Pmode,
888 gen_rtx_HIGH (Pmode, const_part));
889 ptr_reg
890 = force_reg (Pmode,
891 gen_rtx_LO_SUM (Pmode,
892 tmp_reg, const_part));
894 else
896 if (! VAL_14_BITS_P (newoffset))
897 int_part = force_reg (Pmode, GEN_INT (newoffset));
898 else
899 int_part = GEN_INT (newoffset);
901 ptr_reg = force_reg (Pmode,
902 gen_rtx_PLUS (Pmode,
903 force_reg (Pmode, XEXP (x, 0)),
904 int_part));
906 return plus_constant (ptr_reg, offset - newoffset);
909 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
911 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
912 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
913 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
914 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
915 || GET_CODE (XEXP (x, 1)) == SUBREG)
916 && GET_CODE (XEXP (x, 1)) != CONST)
918 int val = INTVAL (XEXP (XEXP (x, 0), 1));
919 rtx reg1, reg2;
921 reg1 = XEXP (x, 1);
922 if (GET_CODE (reg1) != REG)
923 reg1 = force_reg (Pmode, force_operand (reg1, 0));
925 reg2 = XEXP (XEXP (x, 0), 0);
926 if (GET_CODE (reg2) != REG)
927 reg2 = force_reg (Pmode, force_operand (reg2, 0));
929 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
930 gen_rtx_MULT (Pmode,
931 reg2,
932 GEN_INT (val)),
933 reg1));
936 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
938 Only do so for floating point modes since this is more speculative
939 and we lose if it's an integer store. */
940 if (GET_CODE (x) == PLUS
941 && GET_CODE (XEXP (x, 0)) == PLUS
942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
943 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
944 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
945 && (mode == SFmode || mode == DFmode))
948 /* First, try and figure out what to use as a base register. */
949 rtx reg1, reg2, base, idx, orig_base;
951 reg1 = XEXP (XEXP (x, 0), 1);
952 reg2 = XEXP (x, 1);
953 base = NULL_RTX;
954 idx = NULL_RTX;
956 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
957 then emit_move_sequence will turn on REG_POINTER so we'll know
958 it's a base register below. */
959 if (GET_CODE (reg1) != REG)
960 reg1 = force_reg (Pmode, force_operand (reg1, 0));
962 if (GET_CODE (reg2) != REG)
963 reg2 = force_reg (Pmode, force_operand (reg2, 0));
965 /* Figure out what the base and index are. */
967 if (GET_CODE (reg1) == REG
968 && REG_POINTER (reg1))
970 base = reg1;
971 orig_base = XEXP (XEXP (x, 0), 1);
972 idx = gen_rtx_PLUS (Pmode,
973 gen_rtx_MULT (Pmode,
974 XEXP (XEXP (XEXP (x, 0), 0), 0),
975 XEXP (XEXP (XEXP (x, 0), 0), 1)),
976 XEXP (x, 1));
978 else if (GET_CODE (reg2) == REG
979 && REG_POINTER (reg2))
981 base = reg2;
982 orig_base = XEXP (x, 1);
983 idx = XEXP (x, 0);
986 if (base == 0)
987 return orig;
989 /* If the index adds a large constant, try to scale the
990 constant so that it can be loaded with only one insn. */
991 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
992 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
993 / INTVAL (XEXP (XEXP (idx, 0), 1)))
994 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
996 /* Divide the CONST_INT by the scale factor, then add it to A. */
997 int val = INTVAL (XEXP (idx, 1));
999 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1000 reg1 = XEXP (XEXP (idx, 0), 0);
1001 if (GET_CODE (reg1) != REG)
1002 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1004 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1006 /* We can now generate a simple scaled indexed address. */
1007 return
1008 force_reg
1009 (Pmode, gen_rtx_PLUS (Pmode,
1010 gen_rtx_MULT (Pmode, reg1,
1011 XEXP (XEXP (idx, 0), 1)),
1012 base));
1015 /* If B + C is still a valid base register, then add them. */
1016 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1017 && INTVAL (XEXP (idx, 1)) <= 4096
1018 && INTVAL (XEXP (idx, 1)) >= -4096)
1020 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1021 rtx reg1, reg2;
1023 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1025 reg2 = XEXP (XEXP (idx, 0), 0);
1026 if (GET_CODE (reg2) != CONST_INT)
1027 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1029 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1030 gen_rtx_MULT (Pmode,
1031 reg2,
1032 GEN_INT (val)),
1033 reg1));
1036 /* Get the index into a register, then add the base + index and
1037 return a register holding the result. */
1039 /* First get A into a register. */
1040 reg1 = XEXP (XEXP (idx, 0), 0);
1041 if (GET_CODE (reg1) != REG)
1042 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1044 /* And get B into a register. */
1045 reg2 = XEXP (idx, 1);
1046 if (GET_CODE (reg2) != REG)
1047 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1049 reg1 = force_reg (Pmode,
1050 gen_rtx_PLUS (Pmode,
1051 gen_rtx_MULT (Pmode, reg1,
1052 XEXP (XEXP (idx, 0), 1)),
1053 reg2));
1055 /* Add the result to our base register and return. */
1056 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1060 /* Uh-oh. We might have an address for x[n-100000]. This needs
1061 special handling to avoid creating an indexed memory address
1062 with x-100000 as the base.
1064 If the constant part is small enough, then it's still safe because
1065 there is a guard page at the beginning and end of the data segment.
1067 Scaled references are common enough that we want to try and rearrange the
1068 terms so that we can use indexing for these addresses too. Only
1069 do the optimization for floatint point modes. */
1071 if (GET_CODE (x) == PLUS
1072 && symbolic_expression_p (XEXP (x, 1)))
1074 /* Ugly. We modify things here so that the address offset specified
1075 by the index expression is computed first, then added to x to form
1076 the entire address. */
1078 rtx regx1, regx2, regy1, regy2, y;
1080 /* Strip off any CONST. */
1081 y = XEXP (x, 1);
1082 if (GET_CODE (y) == CONST)
1083 y = XEXP (y, 0);
1085 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1087 /* See if this looks like
1088 (plus (mult (reg) (shadd_const))
1089 (const (plus (symbol_ref) (const_int))))
1091 Where const_int is small. In that case the const
1092 expression is a valid pointer for indexing.
1094 If const_int is big, but can be divided evenly by shadd_const
1095 and added to (reg). This allows more scaled indexed addresses. */
1096 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1097 && GET_CODE (XEXP (x, 0)) == MULT
1098 && GET_CODE (XEXP (y, 1)) == CONST_INT
1099 && INTVAL (XEXP (y, 1)) >= -4096
1100 && INTVAL (XEXP (y, 1)) <= 4095
1101 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1102 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1104 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1105 rtx reg1, reg2;
1107 reg1 = XEXP (x, 1);
1108 if (GET_CODE (reg1) != REG)
1109 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1111 reg2 = XEXP (XEXP (x, 0), 0);
1112 if (GET_CODE (reg2) != REG)
1113 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115 return force_reg (Pmode,
1116 gen_rtx_PLUS (Pmode,
1117 gen_rtx_MULT (Pmode,
1118 reg2,
1119 GEN_INT (val)),
1120 reg1));
1122 else if ((mode == DFmode || mode == SFmode)
1123 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1124 && GET_CODE (XEXP (x, 0)) == MULT
1125 && GET_CODE (XEXP (y, 1)) == CONST_INT
1126 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1127 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1128 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1130 regx1
1131 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1132 / INTVAL (XEXP (XEXP (x, 0), 1))));
1133 regx2 = XEXP (XEXP (x, 0), 0);
1134 if (GET_CODE (regx2) != REG)
1135 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1136 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1137 regx2, regx1));
1138 return
1139 force_reg (Pmode,
1140 gen_rtx_PLUS (Pmode,
1141 gen_rtx_MULT (Pmode, regx2,
1142 XEXP (XEXP (x, 0), 1)),
1143 force_reg (Pmode, XEXP (y, 0))));
1145 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1146 && INTVAL (XEXP (y, 1)) >= -4096
1147 && INTVAL (XEXP (y, 1)) <= 4095)
1149 /* This is safe because of the guard page at the
1150 beginning and end of the data space. Just
1151 return the original address. */
1152 return orig;
1154 else
1156 /* Doesn't look like one we can optimize. */
1157 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1158 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1159 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1160 regx1 = force_reg (Pmode,
1161 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1162 regx1, regy2));
1163 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1168 return orig;
1171 /* For the HPPA, REG and REG+CONST is cost 0
1172 and addresses involving symbolic constants are cost 2.
1174 PIC addresses are very expensive.
1176 It is no coincidence that this has the same structure
1177 as GO_IF_LEGITIMATE_ADDRESS. */
1179 hppa_address_cost (X)
1180 rtx X;
1182 if (GET_CODE (X) == PLUS)
1183 return 1;
1184 else if (GET_CODE (X) == LO_SUM)
1185 return 1;
1186 else if (GET_CODE (X) == HIGH)
1187 return 2;
1188 return 4;
1191 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1192 new rtx with the correct mode. */
1193 static inline rtx
1194 force_mode (mode, orig)
1195 enum machine_mode mode;
1196 rtx orig;
1198 if (mode == GET_MODE (orig))
1199 return orig;
1201 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1202 abort ();
1204 return gen_rtx_REG (mode, REGNO (orig));
1207 /* Emit insns to move operands[1] into operands[0].
1209 Return 1 if we have written out everything that needs to be done to
1210 do the move. Otherwise, return 0 and the caller will emit the move
1211 normally.
1213 Note SCRATCH_REG may not be in the proper mode depending on how it
1214 will be used. This routine is resposible for creating a new copy
1215 of SCRATCH_REG in the proper mode. */
1218 emit_move_sequence (operands, mode, scratch_reg)
1219 rtx *operands;
1220 enum machine_mode mode;
1221 rtx scratch_reg;
1223 register rtx operand0 = operands[0];
1224 register rtx operand1 = operands[1];
1225 register rtx tem;
1227 if (scratch_reg
1228 && reload_in_progress && GET_CODE (operand0) == REG
1229 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1230 operand0 = reg_equiv_mem[REGNO (operand0)];
1231 else if (scratch_reg
1232 && reload_in_progress && GET_CODE (operand0) == SUBREG
1233 && GET_CODE (SUBREG_REG (operand0)) == REG
1234 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1236 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1237 the code which tracks sets/uses for delete_output_reload. */
1238 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1239 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1240 SUBREG_BYTE (operand0));
1241 operand0 = alter_subreg (&temp);
1244 if (scratch_reg
1245 && reload_in_progress && GET_CODE (operand1) == REG
1246 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1247 operand1 = reg_equiv_mem[REGNO (operand1)];
1248 else if (scratch_reg
1249 && reload_in_progress && GET_CODE (operand1) == SUBREG
1250 && GET_CODE (SUBREG_REG (operand1)) == REG
1251 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1253 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1254 the code which tracks sets/uses for delete_output_reload. */
1255 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1256 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1257 SUBREG_BYTE (operand1));
1258 operand1 = alter_subreg (&temp);
1261 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1262 && ((tem = find_replacement (&XEXP (operand0, 0)))
1263 != XEXP (operand0, 0)))
1264 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1265 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1266 && ((tem = find_replacement (&XEXP (operand1, 0)))
1267 != XEXP (operand1, 0)))
1268 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1270 /* Handle secondary reloads for loads/stores of FP registers from
1271 REG+D addresses where D does not fit in 5 bits, including
1272 (subreg (mem (addr))) cases. */
1273 if (fp_reg_operand (operand0, mode)
1274 && ((GET_CODE (operand1) == MEM
1275 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1276 || ((GET_CODE (operand1) == SUBREG
1277 && GET_CODE (XEXP (operand1, 0)) == MEM
1278 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1279 && scratch_reg)
1281 if (GET_CODE (operand1) == SUBREG)
1282 operand1 = XEXP (operand1, 0);
1284 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1285 it in WORD_MODE regardless of what mode it was originally given
1286 to us. */
1287 scratch_reg = force_mode (word_mode, scratch_reg);
1289 /* D might not fit in 14 bits either; for such cases load D into
1290 scratch reg. */
1291 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1293 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1294 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1295 Pmode,
1296 XEXP (XEXP (operand1, 0), 0),
1297 scratch_reg));
1299 else
1300 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1301 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1302 gen_rtx_MEM (mode, scratch_reg)));
1303 return 1;
1305 else if (fp_reg_operand (operand1, mode)
1306 && ((GET_CODE (operand0) == MEM
1307 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1308 || ((GET_CODE (operand0) == SUBREG)
1309 && GET_CODE (XEXP (operand0, 0)) == MEM
1310 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1311 && scratch_reg)
1313 if (GET_CODE (operand0) == SUBREG)
1314 operand0 = XEXP (operand0, 0);
1316 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1317 it in WORD_MODE regardless of what mode it was originally given
1318 to us. */
1319 scratch_reg = force_mode (word_mode, scratch_reg);
1321 /* D might not fit in 14 bits either; for such cases load D into
1322 scratch reg. */
1323 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1325 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1326 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1327 0)),
1328 Pmode,
1329 XEXP (XEXP (operand0, 0),
1331 scratch_reg));
1333 else
1334 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1335 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1336 operand1));
1337 return 1;
1339 /* Handle secondary reloads for loads of FP registers from constant
1340 expressions by forcing the constant into memory.
1342 use scratch_reg to hold the address of the memory location.
1344 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1345 NO_REGS when presented with a const_int and an register class
1346 containing only FP registers. Doing so unfortunately creates
1347 more problems than it solves. Fix this for 2.5. */
1348 else if (fp_reg_operand (operand0, mode)
1349 && CONSTANT_P (operand1)
1350 && scratch_reg)
1352 rtx xoperands[2];
1354 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1355 it in WORD_MODE regardless of what mode it was originally given
1356 to us. */
1357 scratch_reg = force_mode (word_mode, scratch_reg);
1359 /* Force the constant into memory and put the address of the
1360 memory location into scratch_reg. */
1361 xoperands[0] = scratch_reg;
1362 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1363 emit_move_sequence (xoperands, Pmode, 0);
1365 /* Now load the destination register. */
1366 emit_insn (gen_rtx_SET (mode, operand0,
1367 gen_rtx_MEM (mode, scratch_reg)));
1368 return 1;
1370 /* Handle secondary reloads for SAR. These occur when trying to load
1371 the SAR from memory, FP register, or with a constant. */
1372 else if (GET_CODE (operand0) == REG
1373 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1374 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1375 && (GET_CODE (operand1) == MEM
1376 || GET_CODE (operand1) == CONST_INT
1377 || (GET_CODE (operand1) == REG
1378 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1379 && scratch_reg)
1381 /* D might not fit in 14 bits either; for such cases load D into
1382 scratch reg. */
1383 if (GET_CODE (operand1) == MEM
1384 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1386 /* We are reloading the address into the scratch register, so we
1387 want to make sure the scratch register is a full register. */
1388 scratch_reg = force_mode (word_mode, scratch_reg);
1390 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1391 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1392 0)),
1393 Pmode,
1394 XEXP (XEXP (operand1, 0),
1396 scratch_reg));
1398 /* Now we are going to load the scratch register from memory,
1399 we want to load it in the same width as the original MEM,
1400 which must be the same as the width of the ultimate destination,
1401 OPERAND0. */
1402 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1404 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1405 scratch_reg));
1407 else
1409 /* We want to load the scratch register using the same mode as
1410 the ultimate destination. */
1411 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1413 emit_move_insn (scratch_reg, operand1);
1416 /* And emit the insn to set the ultimate destination. We know that
1417 the scratch register has the same mode as the destination at this
1418 point. */
1419 emit_move_insn (operand0, scratch_reg);
1420 return 1;
1422 /* Handle most common case: storing into a register. */
1423 else if (register_operand (operand0, mode))
1425 if (register_operand (operand1, mode)
1426 || (GET_CODE (operand1) == CONST_INT
1427 && cint_ok_for_move (INTVAL (operand1)))
1428 || (operand1 == CONST0_RTX (mode))
1429 || (GET_CODE (operand1) == HIGH
1430 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1431 /* Only `general_operands' can come here, so MEM is ok. */
1432 || GET_CODE (operand1) == MEM)
1434 /* Run this case quickly. */
1435 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1436 return 1;
1439 else if (GET_CODE (operand0) == MEM)
1441 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1442 && !(reload_in_progress || reload_completed))
1444 rtx temp = gen_reg_rtx (DFmode);
1446 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1447 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1448 return 1;
1450 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1452 /* Run this case quickly. */
1453 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1454 return 1;
1456 if (! (reload_in_progress || reload_completed))
1458 operands[0] = validize_mem (operand0);
1459 operands[1] = operand1 = force_reg (mode, operand1);
1463 /* Simplify the source if we need to.
1464 Note we do have to handle function labels here, even though we do
1465 not consider them legitimate constants. Loop optimizations can
1466 call the emit_move_xxx with one as a source. */
1467 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1468 || function_label_operand (operand1, mode)
1469 || (GET_CODE (operand1) == HIGH
1470 && symbolic_operand (XEXP (operand1, 0), mode)))
1472 int ishighonly = 0;
1474 if (GET_CODE (operand1) == HIGH)
1476 ishighonly = 1;
1477 operand1 = XEXP (operand1, 0);
1479 if (symbolic_operand (operand1, mode))
1481 /* Argh. The assembler and linker can't handle arithmetic
1482 involving plabels.
1484 So we force the plabel into memory, load operand0 from
1485 the memory location, then add in the constant part. */
1486 if ((GET_CODE (operand1) == CONST
1487 && GET_CODE (XEXP (operand1, 0)) == PLUS
1488 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1489 || function_label_operand (operand1, mode))
1491 rtx temp, const_part;
1493 /* Figure out what (if any) scratch register to use. */
1494 if (reload_in_progress || reload_completed)
1496 scratch_reg = scratch_reg ? scratch_reg : operand0;
1497 /* SCRATCH_REG will hold an address and maybe the actual
1498 data. We want it in WORD_MODE regardless of what mode it
1499 was originally given to us. */
1500 scratch_reg = force_mode (word_mode, scratch_reg);
1502 else if (flag_pic)
1503 scratch_reg = gen_reg_rtx (Pmode);
1505 if (GET_CODE (operand1) == CONST)
1507 /* Save away the constant part of the expression. */
1508 const_part = XEXP (XEXP (operand1, 0), 1);
1509 if (GET_CODE (const_part) != CONST_INT)
1510 abort ();
1512 /* Force the function label into memory. */
1513 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1515 else
1517 /* No constant part. */
1518 const_part = NULL_RTX;
1520 /* Force the function label into memory. */
1521 temp = force_const_mem (mode, operand1);
1525 /* Get the address of the memory location. PIC-ify it if
1526 necessary. */
1527 temp = XEXP (temp, 0);
1528 if (flag_pic)
1529 temp = legitimize_pic_address (temp, mode, scratch_reg);
1531 /* Put the address of the memory location into our destination
1532 register. */
1533 operands[1] = temp;
1534 emit_move_sequence (operands, mode, scratch_reg);
1536 /* Now load from the memory location into our destination
1537 register. */
1538 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1539 emit_move_sequence (operands, mode, scratch_reg);
1541 /* And add back in the constant part. */
1542 if (const_part != NULL_RTX)
1543 expand_inc (operand0, const_part);
1545 return 1;
1548 if (flag_pic)
1550 rtx temp;
1552 if (reload_in_progress || reload_completed)
1554 temp = scratch_reg ? scratch_reg : operand0;
1555 /* TEMP will hold an address and maybe the actual
1556 data. We want it in WORD_MODE regardless of what mode it
1557 was originally given to us. */
1558 temp = force_mode (word_mode, temp);
1560 else
1561 temp = gen_reg_rtx (Pmode);
1563 /* (const (plus (symbol) (const_int))) must be forced to
1564 memory during/after reload if the const_int will not fit
1565 in 14 bits. */
1566 if (GET_CODE (operand1) == CONST
1567 && GET_CODE (XEXP (operand1, 0)) == PLUS
1568 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1569 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1570 && (reload_completed || reload_in_progress)
1571 && flag_pic)
1573 operands[1] = force_const_mem (mode, operand1);
1574 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1575 mode, temp);
1576 emit_move_sequence (operands, mode, temp);
1578 else
1580 operands[1] = legitimize_pic_address (operand1, mode, temp);
1581 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1584 /* On the HPPA, references to data space are supposed to use dp,
1585 register 27, but showing it in the RTL inhibits various cse
1586 and loop optimizations. */
1587 else
1589 rtx temp, set;
1591 if (reload_in_progress || reload_completed)
1593 temp = scratch_reg ? scratch_reg : operand0;
1594 /* TEMP will hold an address and maybe the actual
1595 data. We want it in WORD_MODE regardless of what mode it
1596 was originally given to us. */
1597 temp = force_mode (word_mode, temp);
1599 else
1600 temp = gen_reg_rtx (mode);
1602 /* Loading a SYMBOL_REF into a register makes that register
1603 safe to be used as the base in an indexed address.
1605 Don't mark hard registers though. That loses. */
1606 if (GET_CODE (operand0) == REG
1607 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1608 REG_POINTER (operand0) = 1;
1609 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1610 REG_POINTER (temp) = 1;
1611 if (ishighonly)
1612 set = gen_rtx_SET (mode, operand0, temp);
1613 else
1614 set = gen_rtx_SET (VOIDmode,
1615 operand0,
1616 gen_rtx_LO_SUM (mode, temp, operand1));
1618 emit_insn (gen_rtx_SET (VOIDmode,
1619 temp,
1620 gen_rtx_HIGH (mode, operand1)));
1621 emit_insn (set);
1624 return 1;
1626 else if (GET_CODE (operand1) != CONST_INT
1627 || ! cint_ok_for_move (INTVAL (operand1)))
1629 rtx extend = NULL_RTX;
1630 rtx temp;
1632 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1633 && HOST_BITS_PER_WIDE_INT > 32
1634 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1636 HOST_WIDE_INT val = INTVAL (operand1);
1637 HOST_WIDE_INT nval;
1639 /* Extract the low order 32 bits of the value and sign extend.
1640 If the new value is the same as the original value, we can
1641 can use the original value as-is. If the new value is
1642 different, we use it and insert the most-significant 32-bits
1643 of the original value into the final result. */
1644 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1645 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1646 if (val != nval)
1648 #if HOST_BITS_PER_WIDE_INT > 32
1649 extend = GEN_INT (val >> 32);
1650 #endif
1651 operand1 = GEN_INT (nval);
1655 if (reload_in_progress || reload_completed)
1656 temp = operand0;
1657 else
1658 temp = gen_reg_rtx (mode);
1660 if (GET_CODE (operand1) == CONST_INT)
1662 /* Directly break constant into low and high parts. This
1663 provides better optimization opportunities because various
1664 passes recognize constants split with PLUS but not LO_SUM.
1665 We use a 14-bit signed low part except when the addition
1666 of 0x4000 to the high part might change the sign of the
1667 high part. */
1668 HOST_WIDE_INT value = INTVAL (operand1);
1669 HOST_WIDE_INT low = value & 0x3fff;
1670 HOST_WIDE_INT high = value & ~ 0x3fff;
1672 if (low >= 0x2000)
1674 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1675 high += 0x2000;
1676 else
1677 high += 0x4000;
1680 low = value - high;
1682 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1683 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1685 else
1687 emit_insn (gen_rtx_SET (VOIDmode, temp,
1688 gen_rtx_HIGH (mode, operand1)));
1689 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1692 emit_move_insn (operands[0], operands[1]);
1694 if (extend != NULL_RTX)
1695 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1696 extend));
1698 return 1;
1701 /* Now have insn-emit do whatever it normally does. */
1702 return 0;
1705 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1706 it will need a link/runtime reloc). */
1709 reloc_needed (exp)
1710 tree exp;
1712 int reloc = 0;
1714 switch (TREE_CODE (exp))
1716 case ADDR_EXPR:
1717 return 1;
1719 case PLUS_EXPR:
1720 case MINUS_EXPR:
1721 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1722 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1723 break;
1725 case NOP_EXPR:
1726 case CONVERT_EXPR:
1727 case NON_LVALUE_EXPR:
1728 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1729 break;
1731 case CONSTRUCTOR:
1733 register tree link;
1734 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1735 if (TREE_VALUE (link) != 0)
1736 reloc |= reloc_needed (TREE_VALUE (link));
1738 break;
1740 case ERROR_MARK:
1741 break;
1743 default:
1744 break;
1746 return reloc;
1749 /* Does operand (which is a symbolic_operand) live in text space? If
1750 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1753 read_only_operand (operand, mode)
1754 rtx operand;
1755 enum machine_mode mode ATTRIBUTE_UNUSED;
1757 if (GET_CODE (operand) == CONST)
1758 operand = XEXP (XEXP (operand, 0), 0);
1759 if (flag_pic)
1761 if (GET_CODE (operand) == SYMBOL_REF)
1762 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1764 else
1766 if (GET_CODE (operand) == SYMBOL_REF)
1767 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1769 return 1;
1773 /* Return the best assembler insn template
1774 for moving operands[1] into operands[0] as a fullword. */
1775 const char *
1776 singlemove_string (operands)
1777 rtx *operands;
1779 HOST_WIDE_INT intval;
1781 if (GET_CODE (operands[0]) == MEM)
1782 return "stw %r1,%0";
1783 if (GET_CODE (operands[1]) == MEM)
1784 return "ldw %1,%0";
1785 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1787 long i;
1788 REAL_VALUE_TYPE d;
1790 if (GET_MODE (operands[1]) != SFmode)
1791 abort ();
1793 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1794 bit pattern. */
1795 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1796 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1798 operands[1] = GEN_INT (i);
1799 /* Fall through to CONST_INT case. */
1801 if (GET_CODE (operands[1]) == CONST_INT)
1803 intval = INTVAL (operands[1]);
1805 if (VAL_14_BITS_P (intval))
1806 return "ldi %1,%0";
1807 else if ((intval & 0x7ff) == 0)
1808 return "ldil L'%1,%0";
1809 else if (zdepi_cint_p (intval))
1810 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1811 else
1812 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1814 return "copy %1,%0";
1818 /* Compute position (in OP[1]) and width (in OP[2])
1819 useful for copying IMM to a register using the zdepi
1820 instructions. Store the immediate value to insert in OP[0]. */
1821 static void
1822 compute_zdepwi_operands (imm, op)
1823 unsigned HOST_WIDE_INT imm;
1824 unsigned *op;
1826 int lsb, len;
1828 /* Find the least significant set bit in IMM. */
1829 for (lsb = 0; lsb < 32; lsb++)
1831 if ((imm & 1) != 0)
1832 break;
1833 imm >>= 1;
1836 /* Choose variants based on *sign* of the 5-bit field. */
1837 if ((imm & 0x10) == 0)
1838 len = (lsb <= 28) ? 4 : 32 - lsb;
1839 else
1841 /* Find the width of the bitstring in IMM. */
1842 for (len = 5; len < 32; len++)
1844 if ((imm & (1 << len)) == 0)
1845 break;
1848 /* Sign extend IMM as a 5-bit value. */
1849 imm = (imm & 0xf) - 0x10;
1852 op[0] = imm;
1853 op[1] = 31 - lsb;
1854 op[2] = len;
1857 /* Compute position (in OP[1]) and width (in OP[2])
1858 useful for copying IMM to a register using the depdi,z
1859 instructions. Store the immediate value to insert in OP[0]. */
1860 void
1861 compute_zdepdi_operands (imm, op)
1862 unsigned HOST_WIDE_INT imm;
1863 unsigned *op;
1865 HOST_WIDE_INT lsb, len;
1867 /* Find the least significant set bit in IMM. */
1868 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1870 if ((imm & 1) != 0)
1871 break;
1872 imm >>= 1;
1875 /* Choose variants based on *sign* of the 5-bit field. */
1876 if ((imm & 0x10) == 0)
1877 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1878 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1879 else
1881 /* Find the width of the bitstring in IMM. */
1882 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1884 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
1885 break;
1888 /* Sign extend IMM as a 5-bit value. */
1889 imm = (imm & 0xf) - 0x10;
1892 op[0] = imm;
1893 op[1] = 63 - lsb;
1894 op[2] = len;
1897 /* Output assembler code to perform a doubleword move insn
1898 with operands OPERANDS. */
1900 const char *
1901 output_move_double (operands)
1902 rtx *operands;
1904 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1905 rtx latehalf[2];
1906 rtx addreg0 = 0, addreg1 = 0;
1908 /* First classify both operands. */
1910 if (REG_P (operands[0]))
1911 optype0 = REGOP;
1912 else if (offsettable_memref_p (operands[0]))
1913 optype0 = OFFSOP;
1914 else if (GET_CODE (operands[0]) == MEM)
1915 optype0 = MEMOP;
1916 else
1917 optype0 = RNDOP;
1919 if (REG_P (operands[1]))
1920 optype1 = REGOP;
1921 else if (CONSTANT_P (operands[1]))
1922 optype1 = CNSTOP;
1923 else if (offsettable_memref_p (operands[1]))
1924 optype1 = OFFSOP;
1925 else if (GET_CODE (operands[1]) == MEM)
1926 optype1 = MEMOP;
1927 else
1928 optype1 = RNDOP;
1930 /* Check for the cases that the operand constraints are not
1931 supposed to allow to happen. Abort if we get one,
1932 because generating code for these cases is painful. */
1934 if (optype0 != REGOP && optype1 != REGOP)
1935 abort ();
1937 /* Handle auto decrementing and incrementing loads and stores
1938 specifically, since the structure of the function doesn't work
1939 for them without major modification. Do it better when we learn
1940 this port about the general inc/dec addressing of PA.
1941 (This was written by tege. Chide him if it doesn't work.) */
1943 if (optype0 == MEMOP)
1945 /* We have to output the address syntax ourselves, since print_operand
1946 doesn't deal with the addresses we want to use. Fix this later. */
1948 rtx addr = XEXP (operands[0], 0);
1949 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1951 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1953 operands[0] = XEXP (addr, 0);
1954 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1955 abort ();
1957 if (!reg_overlap_mentioned_p (high_reg, addr))
1959 /* No overlap between high target register and address
1960 register. (We do this in a non-obvious way to
1961 save a register file writeback) */
1962 if (GET_CODE (addr) == POST_INC)
1963 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
1964 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
1966 else
1967 abort ();
1969 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1971 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1973 operands[0] = XEXP (addr, 0);
1974 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1975 abort ();
1977 if (!reg_overlap_mentioned_p (high_reg, addr))
1979 /* No overlap between high target register and address
1980 register. (We do this in a non-obvious way to
1981 save a register file writeback) */
1982 if (GET_CODE (addr) == PRE_INC)
1983 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
1984 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
1986 else
1987 abort ();
1990 if (optype1 == MEMOP)
1992 /* We have to output the address syntax ourselves, since print_operand
1993 doesn't deal with the addresses we want to use. Fix this later. */
1995 rtx addr = XEXP (operands[1], 0);
1996 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1998 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2000 operands[1] = XEXP (addr, 0);
2001 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2002 abort ();
2004 if (!reg_overlap_mentioned_p (high_reg, addr))
2006 /* No overlap between high target register and address
2007 register. (We do this in a non-obvious way to
2008 save a register file writeback) */
2009 if (GET_CODE (addr) == POST_INC)
2010 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2011 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2013 else
2015 /* This is an undefined situation. We should load into the
2016 address register *and* update that register. Probably
2017 we don't need to handle this at all. */
2018 if (GET_CODE (addr) == POST_INC)
2019 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2020 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2023 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2025 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2027 operands[1] = XEXP (addr, 0);
2028 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2029 abort ();
2031 if (!reg_overlap_mentioned_p (high_reg, addr))
2033 /* No overlap between high target register and address
2034 register. (We do this in a non-obvious way to
2035 save a register file writeback) */
2036 if (GET_CODE (addr) == PRE_INC)
2037 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2038 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2040 else
2042 /* This is an undefined situation. We should load into the
2043 address register *and* update that register. Probably
2044 we don't need to handle this at all. */
2045 if (GET_CODE (addr) == PRE_INC)
2046 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2047 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2050 else if (GET_CODE (addr) == PLUS
2051 && GET_CODE (XEXP (addr, 0)) == MULT)
2053 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2055 if (!reg_overlap_mentioned_p (high_reg, addr))
2057 rtx xoperands[3];
2059 xoperands[0] = high_reg;
2060 xoperands[1] = XEXP (addr, 1);
2061 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2062 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2063 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2064 xoperands);
2065 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2067 else
2069 rtx xoperands[3];
2071 xoperands[0] = high_reg;
2072 xoperands[1] = XEXP (addr, 1);
2073 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2074 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2075 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2076 xoperands);
2077 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2082 /* If an operand is an unoffsettable memory ref, find a register
2083 we can increment temporarily to make it refer to the second word. */
2085 if (optype0 == MEMOP)
2086 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2088 if (optype1 == MEMOP)
2089 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2091 /* Ok, we can do one word at a time.
2092 Normally we do the low-numbered word first.
2094 In either case, set up in LATEHALF the operands to use
2095 for the high-numbered word and in some cases alter the
2096 operands in OPERANDS to be suitable for the low-numbered word. */
2098 if (optype0 == REGOP)
2099 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2100 else if (optype0 == OFFSOP)
2101 latehalf[0] = adjust_address (operands[0], SImode, 4);
2102 else
2103 latehalf[0] = operands[0];
2105 if (optype1 == REGOP)
2106 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2107 else if (optype1 == OFFSOP)
2108 latehalf[1] = adjust_address (operands[1], SImode, 4);
2109 else if (optype1 == CNSTOP)
2110 split_double (operands[1], &operands[1], &latehalf[1]);
2111 else
2112 latehalf[1] = operands[1];
2114 /* If the first move would clobber the source of the second one,
2115 do them in the other order.
2117 This can happen in two cases:
2119 mem -> register where the first half of the destination register
2120 is the same register used in the memory's address. Reload
2121 can create such insns.
2123 mem in this case will be either register indirect or register
2124 indirect plus a valid offset.
2126 register -> register move where REGNO(dst) == REGNO(src + 1)
2127 someone (Tim/Tege?) claimed this can happen for parameter loads.
2129 Handle mem -> register case first. */
2130 if (optype0 == REGOP
2131 && (optype1 == MEMOP || optype1 == OFFSOP)
2132 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2133 operands[1], 0))
2135 /* Do the late half first. */
2136 if (addreg1)
2137 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2138 output_asm_insn (singlemove_string (latehalf), latehalf);
2140 /* Then clobber. */
2141 if (addreg1)
2142 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2143 return singlemove_string (operands);
2146 /* Now handle register -> register case. */
2147 if (optype0 == REGOP && optype1 == REGOP
2148 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2150 output_asm_insn (singlemove_string (latehalf), latehalf);
2151 return singlemove_string (operands);
2154 /* Normal case: do the two words, low-numbered first. */
2156 output_asm_insn (singlemove_string (operands), operands);
2158 /* Make any unoffsettable addresses point at high-numbered word. */
2159 if (addreg0)
2160 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2161 if (addreg1)
2162 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2164 /* Do that word. */
2165 output_asm_insn (singlemove_string (latehalf), latehalf);
2167 /* Undo the adds we just did. */
2168 if (addreg0)
2169 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2170 if (addreg1)
2171 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2173 return "";
2176 const char *
2177 output_fp_move_double (operands)
2178 rtx *operands;
2180 if (FP_REG_P (operands[0]))
2182 if (FP_REG_P (operands[1])
2183 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2184 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2185 else
2186 output_asm_insn ("fldd%F1 %1,%0", operands);
2188 else if (FP_REG_P (operands[1]))
2190 output_asm_insn ("fstd%F0 %1,%0", operands);
2192 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2194 if (GET_CODE (operands[0]) == REG)
2196 rtx xoperands[2];
2197 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2198 xoperands[0] = operands[0];
2199 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2201 /* This is a pain. You have to be prepared to deal with an
2202 arbitrary address here including pre/post increment/decrement.
2204 so avoid this in the MD. */
2205 else
2206 abort ();
2208 else abort ();
2209 return "";
2212 /* Return a REG that occurs in ADDR with coefficient 1.
2213 ADDR can be effectively incremented by incrementing REG. */
2215 static rtx
2216 find_addr_reg (addr)
2217 rtx addr;
2219 while (GET_CODE (addr) == PLUS)
2221 if (GET_CODE (XEXP (addr, 0)) == REG)
2222 addr = XEXP (addr, 0);
2223 else if (GET_CODE (XEXP (addr, 1)) == REG)
2224 addr = XEXP (addr, 1);
2225 else if (CONSTANT_P (XEXP (addr, 0)))
2226 addr = XEXP (addr, 1);
2227 else if (CONSTANT_P (XEXP (addr, 1)))
2228 addr = XEXP (addr, 0);
2229 else
2230 abort ();
2232 if (GET_CODE (addr) == REG)
2233 return addr;
2234 abort ();
2237 /* Emit code to perform a block move.
2239 OPERANDS[0] is the destination pointer as a REG, clobbered.
2240 OPERANDS[1] is the source pointer as a REG, clobbered.
2241 OPERANDS[2] is a register for temporary storage.
2242 OPERANDS[4] is the size as a CONST_INT
2243 OPERANDS[3] is a register for temporary storage.
2244 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2245 OPERANDS[6] is another temporary register. */
2247 const char *
2248 output_block_move (operands, size_is_constant)
2249 rtx *operands;
2250 int size_is_constant ATTRIBUTE_UNUSED;
2252 int align = INTVAL (operands[5]);
2253 unsigned long n_bytes = INTVAL (operands[4]);
2255 /* We can't move more than four bytes at a time because the PA
2256 has no longer integer move insns. (Could use fp mem ops?) */
2257 if (align > 4)
2258 align = 4;
2260 /* Note that we know each loop below will execute at least twice
2261 (else we would have open-coded the copy). */
2262 switch (align)
2264 case 4:
2265 /* Pre-adjust the loop counter. */
2266 operands[4] = GEN_INT (n_bytes - 8);
2267 output_asm_insn ("ldi %4,%2", operands);
2269 /* Copying loop. */
2270 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2271 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2272 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2273 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2274 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2276 /* Handle the residual. There could be up to 7 bytes of
2277 residual to copy! */
2278 if (n_bytes % 8 != 0)
2280 operands[4] = GEN_INT (n_bytes % 4);
2281 if (n_bytes % 8 >= 4)
2282 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2283 if (n_bytes % 4 != 0)
2284 output_asm_insn ("ldw 0(%1),%6", operands);
2285 if (n_bytes % 8 >= 4)
2286 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2287 if (n_bytes % 4 != 0)
2288 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2290 return "";
2292 case 2:
2293 /* Pre-adjust the loop counter. */
2294 operands[4] = GEN_INT (n_bytes - 4);
2295 output_asm_insn ("ldi %4,%2", operands);
2297 /* Copying loop. */
2298 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2299 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2300 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2301 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2302 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2304 /* Handle the residual. */
2305 if (n_bytes % 4 != 0)
2307 if (n_bytes % 4 >= 2)
2308 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2309 if (n_bytes % 2 != 0)
2310 output_asm_insn ("ldb 0(%1),%6", operands);
2311 if (n_bytes % 4 >= 2)
2312 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2313 if (n_bytes % 2 != 0)
2314 output_asm_insn ("stb %6,0(%0)", operands);
2316 return "";
2318 case 1:
2319 /* Pre-adjust the loop counter. */
2320 operands[4] = GEN_INT (n_bytes - 2);
2321 output_asm_insn ("ldi %4,%2", operands);
2323 /* Copying loop. */
2324 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2325 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2326 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2327 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2328 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2330 /* Handle the residual. */
2331 if (n_bytes % 2 != 0)
2333 output_asm_insn ("ldb 0(%1),%3", operands);
2334 output_asm_insn ("stb %3,0(%0)", operands);
2336 return "";
2338 default:
2339 abort ();
2343 /* Count the number of insns necessary to handle this block move.
2345 Basic structure is the same as emit_block_move, except that we
2346 count insns rather than emit them. */
2348 static int
2349 compute_movstrsi_length (insn)
2350 rtx insn;
2352 rtx pat = PATTERN (insn);
2353 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2354 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
2355 unsigned int n_insns = 0;
2357 /* We can't move more than four bytes at a time because the PA
2358 has no longer integer move insns. (Could use fp mem ops?) */
2359 if (align > 4)
2360 align = 4;
2362 /* The basic copying loop. */
2363 n_insns = 6;
2365 /* Residuals. */
2366 if (n_bytes % (2 * align) != 0)
2368 if ((n_bytes % (2 * align)) >= align)
2369 n_insns += 2;
2371 if ((n_bytes % align) != 0)
2372 n_insns += 2;
2375 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2376 return n_insns * 4;
2380 const char *
2381 output_and (operands)
2382 rtx *operands;
2384 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2386 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2387 int ls0, ls1, ms0, p, len;
2389 for (ls0 = 0; ls0 < 32; ls0++)
2390 if ((mask & (1 << ls0)) == 0)
2391 break;
2393 for (ls1 = ls0; ls1 < 32; ls1++)
2394 if ((mask & (1 << ls1)) != 0)
2395 break;
2397 for (ms0 = ls1; ms0 < 32; ms0++)
2398 if ((mask & (1 << ms0)) == 0)
2399 break;
2401 if (ms0 != 32)
2402 abort ();
2404 if (ls1 == 32)
2406 len = ls0;
2408 if (len == 0)
2409 abort ();
2411 operands[2] = GEN_INT (len);
2412 return "{extru|extrw,u} %1,31,%2,%0";
2414 else
2416 /* We could use this `depi' for the case above as well, but `depi'
2417 requires one more register file access than an `extru'. */
2419 p = 31 - ls0;
2420 len = ls1 - ls0;
2422 operands[2] = GEN_INT (p);
2423 operands[3] = GEN_INT (len);
2424 return "{depi|depwi} 0,%2,%3,%0";
2427 else
2428 return "and %1,%2,%0";
2431 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2432 storing the result in operands[0]. */
2433 const char *
2434 output_64bit_and (operands)
2435 rtx *operands;
2437 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2439 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2440 int ls0, ls1, ms0, p, len;
2442 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2443 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2444 break;
2446 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2447 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2448 break;
2450 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2451 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2452 break;
2454 if (ms0 != HOST_BITS_PER_WIDE_INT)
2455 abort ();
2457 if (ls1 == HOST_BITS_PER_WIDE_INT)
2459 len = ls0;
2461 if (len == 0)
2462 abort ();
2464 operands[2] = GEN_INT (len);
2465 return "extrd,u %1,63,%2,%0";
2467 else
2469 /* We could use this `depi' for the case above as well, but `depi'
2470 requires one more register file access than an `extru'. */
2472 p = 63 - ls0;
2473 len = ls1 - ls0;
2475 operands[2] = GEN_INT (p);
2476 operands[3] = GEN_INT (len);
2477 return "depdi 0,%2,%3,%0";
2480 else
2481 return "and %1,%2,%0";
2484 const char *
2485 output_ior (operands)
2486 rtx *operands;
2488 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2489 int bs0, bs1, p, len;
2491 if (INTVAL (operands[2]) == 0)
2492 return "copy %1,%0";
2494 for (bs0 = 0; bs0 < 32; bs0++)
2495 if ((mask & (1 << bs0)) != 0)
2496 break;
2498 for (bs1 = bs0; bs1 < 32; bs1++)
2499 if ((mask & (1 << bs1)) == 0)
2500 break;
2502 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2503 abort ();
2505 p = 31 - bs0;
2506 len = bs1 - bs0;
2508 operands[2] = GEN_INT (p);
2509 operands[3] = GEN_INT (len);
2510 return "{depi|depwi} -1,%2,%3,%0";
2513 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2514 storing the result in operands[0]. */
2515 const char *
2516 output_64bit_ior (operands)
2517 rtx *operands;
2519 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2520 int bs0, bs1, p, len;
2522 if (INTVAL (operands[2]) == 0)
2523 return "copy %1,%0";
2525 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2526 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2527 break;
2529 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2530 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2531 break;
2533 if (bs1 != HOST_BITS_PER_WIDE_INT
2534 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2535 abort ();
2537 p = 63 - bs0;
2538 len = bs1 - bs0;
2540 operands[2] = GEN_INT (p);
2541 operands[3] = GEN_INT (len);
2542 return "depdi -1,%2,%3,%0";
2545 /* Target hook for assembling integer objects. This code handles
2546 aligned SI and DI integers specially, since function references must
2547 be preceded by P%. */
2549 static bool
2550 pa_assemble_integer (x, size, aligned_p)
2551 rtx x;
2552 unsigned int size;
2553 int aligned_p;
2555 if (size == UNITS_PER_WORD && aligned_p
2556 && function_label_operand (x, VOIDmode))
2558 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2559 output_addr_const (asm_out_file, x);
2560 fputc ('\n', asm_out_file);
2561 return true;
2563 return default_assemble_integer (x, size, aligned_p);
2566 /* Output an ascii string. */
2567 void
2568 output_ascii (file, p, size)
2569 FILE *file;
2570 const char *p;
2571 int size;
2573 int i;
2574 int chars_output;
2575 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2577 /* The HP assembler can only take strings of 256 characters at one
2578 time. This is a limitation on input line length, *not* the
2579 length of the string. Sigh. Even worse, it seems that the
2580 restriction is in number of input characters (see \xnn &
2581 \whatever). So we have to do this very carefully. */
2583 fputs ("\t.STRING \"", file);
2585 chars_output = 0;
2586 for (i = 0; i < size; i += 4)
2588 int co = 0;
2589 int io = 0;
2590 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2592 register unsigned int c = (unsigned char) p[i + io];
2594 if (c == '\"' || c == '\\')
2595 partial_output[co++] = '\\';
2596 if (c >= ' ' && c < 0177)
2597 partial_output[co++] = c;
2598 else
2600 unsigned int hexd;
2601 partial_output[co++] = '\\';
2602 partial_output[co++] = 'x';
2603 hexd = c / 16 - 0 + '0';
2604 if (hexd > '9')
2605 hexd -= '9' - 'a' + 1;
2606 partial_output[co++] = hexd;
2607 hexd = c % 16 - 0 + '0';
2608 if (hexd > '9')
2609 hexd -= '9' - 'a' + 1;
2610 partial_output[co++] = hexd;
2613 if (chars_output + co > 243)
2615 fputs ("\"\n\t.STRING \"", file);
2616 chars_output = 0;
2618 fwrite (partial_output, 1, co, file);
2619 chars_output += co;
2620 co = 0;
2622 fputs ("\"\n", file);
2625 /* Try to rewrite floating point comparisons & branches to avoid
2626 useless add,tr insns.
2628 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2629 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2630 first attempt to remove useless add,tr insns. It is zero
2631 for the second pass as reorg sometimes leaves bogus REG_DEAD
2632 notes lying around.
2634 When CHECK_NOTES is zero we can only eliminate add,tr insns
2635 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2636 instructions. */
2637 static void
2638 remove_useless_addtr_insns (insns, check_notes)
2639 rtx insns;
2640 int check_notes;
2642 rtx insn;
2643 static int pass = 0;
2645 /* This is fairly cheap, so always run it when optimizing. */
2646 if (optimize > 0)
2648 int fcmp_count = 0;
2649 int fbranch_count = 0;
2651 /* Walk all the insns in this function looking for fcmp & fbranch
2652 instructions. Keep track of how many of each we find. */
2653 insns = get_insns ();
2654 for (insn = insns; insn; insn = next_insn (insn))
2656 rtx tmp;
2658 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2659 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2660 continue;
2662 tmp = PATTERN (insn);
2664 /* It must be a set. */
2665 if (GET_CODE (tmp) != SET)
2666 continue;
2668 /* If the destination is CCFP, then we've found an fcmp insn. */
2669 tmp = SET_DEST (tmp);
2670 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2672 fcmp_count++;
2673 continue;
2676 tmp = PATTERN (insn);
2677 /* If this is an fbranch instruction, bump the fbranch counter. */
2678 if (GET_CODE (tmp) == SET
2679 && SET_DEST (tmp) == pc_rtx
2680 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2681 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2682 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2683 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2685 fbranch_count++;
2686 continue;
2691 /* Find all floating point compare + branch insns. If possible,
2692 reverse the comparison & the branch to avoid add,tr insns. */
2693 for (insn = insns; insn; insn = next_insn (insn))
2695 rtx tmp, next;
2697 /* Ignore anything that isn't an INSN. */
2698 if (GET_CODE (insn) != INSN)
2699 continue;
2701 tmp = PATTERN (insn);
2703 /* It must be a set. */
2704 if (GET_CODE (tmp) != SET)
2705 continue;
2707 /* The destination must be CCFP, which is register zero. */
2708 tmp = SET_DEST (tmp);
2709 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2710 continue;
2712 /* INSN should be a set of CCFP.
2714 See if the result of this insn is used in a reversed FP
2715 conditional branch. If so, reverse our condition and
2716 the branch. Doing so avoids useless add,tr insns. */
2717 next = next_insn (insn);
2718 while (next)
2720 /* Jumps, calls and labels stop our search. */
2721 if (GET_CODE (next) == JUMP_INSN
2722 || GET_CODE (next) == CALL_INSN
2723 || GET_CODE (next) == CODE_LABEL)
2724 break;
2726 /* As does another fcmp insn. */
2727 if (GET_CODE (next) == INSN
2728 && GET_CODE (PATTERN (next)) == SET
2729 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2730 && REGNO (SET_DEST (PATTERN (next))) == 0)
2731 break;
2733 next = next_insn (next);
2736 /* Is NEXT_INSN a branch? */
2737 if (next
2738 && GET_CODE (next) == JUMP_INSN)
2740 rtx pattern = PATTERN (next);
2742 /* If it a reversed fp conditional branch (eg uses add,tr)
2743 and CCFP dies, then reverse our conditional and the branch
2744 to avoid the add,tr. */
2745 if (GET_CODE (pattern) == SET
2746 && SET_DEST (pattern) == pc_rtx
2747 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2748 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2749 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2750 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2751 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2752 && (fcmp_count == fbranch_count
2753 || (check_notes
2754 && find_regno_note (next, REG_DEAD, 0))))
2756 /* Reverse the branch. */
2757 tmp = XEXP (SET_SRC (pattern), 1);
2758 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2759 XEXP (SET_SRC (pattern), 2) = tmp;
2760 INSN_CODE (next) = -1;
2762 /* Reverse our condition. */
2763 tmp = PATTERN (insn);
2764 PUT_CODE (XEXP (tmp, 1),
2765 (reverse_condition_maybe_unordered
2766 (GET_CODE (XEXP (tmp, 1)))));
2772 pass = !pass;
2776 /* You may have trouble believing this, but this is the 32 bit HP-PA
2777 stack layout. Wow.
2779 Offset Contents
2781 Variable arguments (optional; any number may be allocated)
2783 SP-(4*(N+9)) arg word N
2785 SP-56 arg word 5
2786 SP-52 arg word 4
2788 Fixed arguments (must be allocated; may remain unused)
2790 SP-48 arg word 3
2791 SP-44 arg word 2
2792 SP-40 arg word 1
2793 SP-36 arg word 0
2795 Frame Marker
2797 SP-32 External Data Pointer (DP)
2798 SP-28 External sr4
2799 SP-24 External/stub RP (RP')
2800 SP-20 Current RP
2801 SP-16 Static Link
2802 SP-12 Clean up
2803 SP-8 Calling Stub RP (RP'')
2804 SP-4 Previous SP
2806 Top of Frame
2808 SP-0 Stack Pointer (points to next available address)
2812 /* This function saves registers as follows. Registers marked with ' are
2813 this function's registers (as opposed to the previous function's).
2814 If a frame_pointer isn't needed, r4 is saved as a general register;
2815 the space for the frame pointer is still allocated, though, to keep
2816 things simple.
2819 Top of Frame
2821 SP (FP') Previous FP
2822 SP + 4 Alignment filler (sigh)
2823 SP + 8 Space for locals reserved here.
2827 SP + n All call saved register used.
2831 SP + o All call saved fp registers used.
2835 SP + p (SP') points to next available address.
2839 /* Global variables set by output_function_prologue(). */
2840 /* Size of frame. Need to know this to emit return insns from
2841 leaf procedures. */
2842 static int actual_fsize;
2843 static int local_fsize, save_fregs;
2845 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2846 Handle case where DISP > 8k by using the add_high_const patterns.
2848 Note in DISP > 8k case, we will leave the high part of the address
2849 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2851 static rtx
2852 store_reg (reg, disp, base)
2853 int reg, disp, base;
2855 rtx i, dest, src, basereg;
2857 src = gen_rtx_REG (word_mode, reg);
2858 basereg = gen_rtx_REG (Pmode, base);
2859 if (VAL_14_BITS_P (disp))
2861 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2862 i = emit_move_insn (dest, src);
2864 else
2866 rtx delta = GEN_INT (disp);
2867 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2868 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2869 emit_move_insn (tmpreg, high);
2870 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2871 i = emit_move_insn (dest, src);
2873 return i;
2876 /* Emit RTL to set REG to the value specified by BASE+DISP.
2877 Handle case where DISP > 8k by using the add_high_const patterns.
2879 Note in DISP > 8k case, we will leave the high part of the address
2880 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2882 static rtx
2883 set_reg_plus_d (reg, base, disp)
2884 int reg, base, disp;
2886 rtx i;
2888 if (VAL_14_BITS_P (disp))
2890 i = emit_move_insn (gen_rtx_REG (Pmode, reg),
2891 plus_constant (gen_rtx_REG (Pmode, base), disp));
2893 else
2895 rtx delta = GEN_INT (disp);
2896 emit_move_insn (gen_rtx_REG (Pmode, 1),
2897 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
2898 gen_rtx_HIGH (Pmode, delta)));
2899 i = emit_move_insn (gen_rtx_REG (Pmode, reg),
2900 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
2901 delta));
2903 return i;
2907 compute_frame_size (size, fregs_live)
2908 int size;
2909 int *fregs_live;
2911 int i, fsize;
2913 /* Space for frame pointer + filler. If any frame is allocated
2914 we need to add this in because of STARTING_FRAME_OFFSET.
2916 Similar code also appears in hppa_expand_prologue. Change both
2917 of them at the same time. */
2918 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
2920 /* Account for space used by the callee general register saves. */
2921 for (i = 18; i >= 3; i--)
2922 if (regs_ever_live[i])
2923 fsize += UNITS_PER_WORD;
2925 /* Round the stack. */
2926 fsize = (fsize + 7) & ~7;
2928 /* Account for space used by the callee floating point register saves. */
2929 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
2930 if (regs_ever_live[i]
2931 || (! TARGET_64BIT && regs_ever_live[i + 1]))
2933 if (fregs_live)
2934 *fregs_live = 1;
2936 /* We always save both halves of the FP register, so always
2937 increment the frame size by 8 bytes. */
2938 fsize += 8;
2941 /* The various ABIs include space for the outgoing parameters in the
2942 size of the current function's stack frame. */
2943 fsize += current_function_outgoing_args_size;
2945 /* Allocate space for the fixed frame marker. This space must be
2946 allocated for any function that makes calls or otherwise allocates
2947 stack space. */
2948 if (!current_function_is_leaf || fsize)
2949 fsize += TARGET_64BIT ? 16 : 32;
2951 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
2954 /* Generate the assembly code for function entry. FILE is a stdio
2955 stream to output the code to. SIZE is an int: how many units of
2956 temporary storage to allocate.
2958 Refer to the array `regs_ever_live' to determine which registers to
2959 save; `regs_ever_live[I]' is nonzero if register number I is ever
2960 used in the function. This function is responsible for knowing
2961 which registers should not be saved even if used. */
2963 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
2964 of memory. If any fpu reg is used in the function, we allocate
2965 such a block here, at the bottom of the frame, just in case it's needed.
2967 If this function is a leaf procedure, then we may choose not
2968 to do a "save" insn. The decision about whether or not
2969 to do this is made in regclass.c. */
2971 void
2972 pa_output_function_prologue (file, size)
2973 FILE *file;
2974 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2976 /* The function's label and associated .PROC must never be
2977 separated and must be output *after* any profiling declarations
2978 to avoid changing spaces/subspaces within a procedure. */
2979 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2980 fputs ("\t.PROC\n", file);
2982 /* hppa_expand_prologue does the dirty work now. We just need
2983 to output the assembler directives which denote the start
2984 of a function. */
2985 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2986 if (regs_ever_live[2])
2987 fputs (",CALLS,SAVE_RP", file);
2988 else
2989 fputs (",NO_CALLS", file);
2991 if (frame_pointer_needed)
2992 fputs (",SAVE_SP", file);
2994 /* Pass on information about the number of callee register saves
2995 performed in the prologue.
2997 The compiler is supposed to pass the highest register number
2998 saved, the assembler then has to adjust that number before
2999 entering it into the unwind descriptor (to account for any
3000 caller saved registers with lower register numbers than the
3001 first callee saved register). */
3002 if (gr_saved)
3003 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3005 if (fr_saved)
3006 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3008 fputs ("\n\t.ENTRY\n", file);
3010 /* If we're using GAS and not using the portable runtime model, then
3011 we don't need to accumulate the total number of code bytes. */
3012 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
3013 total_code_bytes = 0;
3014 else if (INSN_ADDRESSES_SET_P ())
3016 unsigned int old_total = total_code_bytes;
3018 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn ()));
3019 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3021 /* Be prepared to handle overflows. */
3022 if (old_total > total_code_bytes)
3023 total_code_bytes = -1;
3025 else
3026 total_code_bytes = -1;
3028 remove_useless_addtr_insns (get_insns (), 0);
3031 #if DO_FRAME_NOTES
3032 #define FRP(INSN) \
3033 do \
3035 rtx insn = INSN; \
3036 RTX_FRAME_RELATED_P (insn) = 1; \
3038 while (0)
3039 #else
3040 #define FRP(INSN) INSN
3041 #endif
3043 void
3044 hppa_expand_prologue ()
3046 extern char call_used_regs[];
3047 int size = get_frame_size ();
3048 int merge_sp_adjust_with_store = 0;
3049 int i, offset;
3050 rtx tmpreg, size_rtx;
3052 gr_saved = 0;
3053 fr_saved = 0;
3054 save_fregs = 0;
3056 /* Allocate space for frame pointer + filler. If any frame is allocated
3057 we need to add this in because of STARTING_FRAME_OFFSET.
3059 Similar code also appears in compute_frame_size. Change both
3060 of them at the same time. */
3061 local_fsize = size + (size || frame_pointer_needed
3062 ? STARTING_FRAME_OFFSET : 0);
3064 actual_fsize = compute_frame_size (size, &save_fregs);
3066 /* Compute a few things we will use often. */
3067 tmpreg = gen_rtx_REG (word_mode, 1);
3068 size_rtx = GEN_INT (actual_fsize);
3070 /* Save RP first. The calling conventions manual states RP will
3071 always be stored into the caller's frame at sp - 20 or sp - 16
3072 depending on which ABI is in use. */
3073 if (regs_ever_live[2])
3074 FRP (store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM));
3076 /* Allocate the local frame and set up the frame pointer if needed. */
3077 if (actual_fsize != 0)
3079 if (frame_pointer_needed)
3081 /* Copy the old frame pointer temporarily into %r1. Set up the
3082 new stack pointer, then store away the saved old frame pointer
3083 into the stack at sp+actual_fsize and at the same time update
3084 the stack pointer by actual_fsize bytes. Two versions, first
3085 handles small (<8k) frames. The second handles large (>=8k)
3086 frames. */
3087 emit_move_insn (tmpreg, frame_pointer_rtx);
3088 FRP (emit_move_insn (frame_pointer_rtx, stack_pointer_rtx));
3089 if (VAL_14_BITS_P (actual_fsize))
3091 rtx insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
3092 size_rtx));
3093 if (DO_FRAME_NOTES)
3095 rtvec vec;
3096 RTX_FRAME_RELATED_P (insn) = 1;
3097 vec = gen_rtvec (2,
3098 gen_rtx_SET (VOIDmode,
3099 gen_rtx_MEM (word_mode,
3100 stack_pointer_rtx),
3101 frame_pointer_rtx),
3102 gen_rtx_SET (VOIDmode,
3103 stack_pointer_rtx,
3104 gen_rtx_PLUS (word_mode,
3105 stack_pointer_rtx,
3106 size_rtx)));
3107 REG_NOTES (insn)
3108 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3109 gen_rtx_SEQUENCE (VOIDmode, vec),
3110 REG_NOTES (insn));
3113 else
3115 /* It is incorrect to store the saved frame pointer at *sp,
3116 then increment sp (writes beyond the current stack boundary).
3118 So instead use stwm to store at *sp and post-increment the
3119 stack pointer as an atomic operation. Then increment sp to
3120 finish allocating the new frame. */
3121 int adjust1 = 8192 - 64;
3122 int adjust2 = actual_fsize - adjust1;
3123 rtx delta = GEN_INT (adjust1);
3124 rtx insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
3125 delta));
3126 if (DO_FRAME_NOTES)
3128 rtvec vec;
3129 RTX_FRAME_RELATED_P (insn) = 1;
3130 vec = gen_rtvec (2,
3131 gen_rtx_SET (VOIDmode,
3132 gen_rtx_MEM (word_mode,
3133 stack_pointer_rtx),
3134 frame_pointer_rtx),
3135 gen_rtx_SET (VOIDmode,
3136 stack_pointer_rtx,
3137 gen_rtx_PLUS (word_mode,
3138 stack_pointer_rtx,
3139 delta)));
3140 REG_NOTES (insn)
3141 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3142 gen_rtx_SEQUENCE (VOIDmode, vec),
3143 REG_NOTES (insn));
3146 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3147 STACK_POINTER_REGNUM,
3148 adjust2));
3150 /* Prevent register spills from being scheduled before the
3151 stack pointer is raised. Necessary as we will be storing
3152 registers using the frame pointer as a base register, and
3153 we happen to set fp before raising sp. */
3154 emit_insn (gen_blockage ());
3156 /* no frame pointer needed. */
3157 else
3159 /* In some cases we can perform the first callee register save
3160 and allocating the stack frame at the same time. If so, just
3161 make a note of it and defer allocating the frame until saving
3162 the callee registers. */
3163 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3164 merge_sp_adjust_with_store = 1;
3165 /* Can not optimize. Adjust the stack frame by actual_fsize
3166 bytes. */
3167 else
3168 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3169 STACK_POINTER_REGNUM,
3170 actual_fsize));
3174 /* Normal register save.
3176 Do not save the frame pointer in the frame_pointer_needed case. It
3177 was done earlier. */
3178 if (frame_pointer_needed)
3180 for (i = 18, offset = local_fsize; i >= 4; i--)
3181 if (regs_ever_live[i] && ! call_used_regs[i])
3183 FRP (store_reg (i, offset, FRAME_POINTER_REGNUM));
3184 offset += UNITS_PER_WORD;
3185 gr_saved++;
3187 /* Account for %r3 which is saved in a special place. */
3188 gr_saved++;
3190 /* No frame pointer needed. */
3191 else
3193 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3194 if (regs_ever_live[i] && ! call_used_regs[i])
3196 /* If merge_sp_adjust_with_store is nonzero, then we can
3197 optimize the first GR save. */
3198 if (merge_sp_adjust_with_store)
3200 rtx delta = GEN_INT (-offset);
3201 merge_sp_adjust_with_store = 0;
3202 FRP (emit_insn (gen_post_store (stack_pointer_rtx,
3203 gen_rtx_REG (word_mode, i),
3204 delta)));
3206 else
3207 FRP (store_reg (i, offset, STACK_POINTER_REGNUM));
3208 offset += UNITS_PER_WORD;
3209 gr_saved++;
3212 /* If we wanted to merge the SP adjustment with a GR save, but we never
3213 did any GR saves, then just emit the adjustment here. */
3214 if (merge_sp_adjust_with_store)
3215 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3216 STACK_POINTER_REGNUM,
3217 actual_fsize));
3220 /* The hppa calling conventions say that %r19, the pic offset
3221 register, is saved at sp - 32 (in this function's frame)
3222 when generating PIC code. FIXME: What is the correct thing
3223 to do for functions which make no calls and allocate no
3224 frame? Do we need to allocate a frame, or can we just omit
3225 the save? For now we'll just omit the save. */
3226 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3227 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3229 /* Align pointer properly (doubleword boundary). */
3230 offset = (offset + 7) & ~7;
3232 /* Floating point register store. */
3233 if (save_fregs)
3235 /* First get the frame or stack pointer to the start of the FP register
3236 save area. */
3237 if (frame_pointer_needed)
3238 FRP (set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset));
3239 else
3240 FRP (set_reg_plus_d (1, STACK_POINTER_REGNUM, offset));
3242 /* Now actually save the FP registers. */
3243 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3245 if (regs_ever_live[i]
3246 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3248 rtx addr, reg;
3249 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3250 reg = gen_rtx_REG (DFmode, i);
3251 FRP (emit_move_insn (addr, reg));
3252 fr_saved++;
3258 /* ?!? Do we want frame notes in the epilogue yet? */
3259 #undef DO_FRAME_NOTES
3260 #define DO_FRAME_NOTES 0
3261 #undef FRP
3262 #define FRP(INSN) INSN
3264 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3265 Handle case where DISP > 8k by using the add_high_const patterns. */
3267 static rtx
3268 load_reg (reg, disp, base)
3269 int reg, disp, base;
3271 rtx i, src, dest, basereg;
3273 dest = gen_rtx_REG (word_mode, reg);
3274 basereg = gen_rtx_REG (Pmode, base);
3275 if (VAL_14_BITS_P (disp))
3277 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3278 i = emit_move_insn (dest, src);
3280 else
3282 rtx delta = GEN_INT (disp);
3283 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3284 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3285 emit_move_insn (tmpreg, high);
3286 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3287 i = emit_move_insn (dest, src);
3289 return i;
3292 /* This function generates the assembly code for function exit.
3293 Args are as for output_function_prologue ().
3295 The function epilogue should not depend on the current stack
3296 pointer! It should use the frame pointer only. This is mandatory
3297 because of alloca; we also take advantage of it to omit stack
3298 adjustments before returning. */
3300 static void
3301 pa_output_function_epilogue (file, size)
3302 FILE *file;
3303 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3305 rtx insn = get_last_insn ();
3307 /* hppa_expand_epilogue does the dirty work now. We just need
3308 to output the assembler directives which denote the end
3309 of a function.
3311 To make debuggers happy, emit a nop if the epilogue was completely
3312 eliminated due to a volatile call as the last insn in the
3313 current function. That way the return address (in %r2) will
3314 always point to a valid instruction in the current function. */
3316 /* Get the last real insn. */
3317 if (GET_CODE (insn) == NOTE)
3318 insn = prev_real_insn (insn);
3320 /* If it is a sequence, then look inside. */
3321 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3322 insn = XVECEXP (PATTERN (insn), 0, 0);
3324 /* If insn is a CALL_INSN, then it must be a call to a volatile
3325 function (otherwise there would be epilogue insns). */
3326 if (insn && GET_CODE (insn) == CALL_INSN)
3327 fputs ("\tnop\n", file);
3329 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3332 void
3333 hppa_expand_epilogue ()
3335 rtx tmpreg;
3336 int offset, i;
3337 int merge_sp_adjust_with_load = 0;
3338 int ret_off = 0;
3340 /* We will use this often. */
3341 tmpreg = gen_rtx_REG (word_mode, 1);
3343 /* Try to restore RP early to avoid load/use interlocks when
3344 RP gets used in the return (bv) instruction. This appears to still
3345 be necessary even when we schedule the prologue and epilogue. */
3346 if (regs_ever_live [2])
3348 ret_off = TARGET_64BIT ? -16 : -20;
3349 if (frame_pointer_needed)
3351 FRP (load_reg (2, ret_off, FRAME_POINTER_REGNUM));
3352 ret_off = 0;
3354 else
3356 /* No frame pointer, and stack is smaller than 8k. */
3357 if (VAL_14_BITS_P (ret_off - actual_fsize))
3359 FRP (load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM));
3360 ret_off = 0;
3365 /* General register restores. */
3366 if (frame_pointer_needed)
3368 for (i = 18, offset = local_fsize; i >= 4; i--)
3369 if (regs_ever_live[i] && ! call_used_regs[i])
3371 FRP (load_reg (i, offset, FRAME_POINTER_REGNUM));
3372 offset += UNITS_PER_WORD;
3375 else
3377 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3379 if (regs_ever_live[i] && ! call_used_regs[i])
3381 /* Only for the first load.
3382 merge_sp_adjust_with_load holds the register load
3383 with which we will merge the sp adjustment. */
3384 if (merge_sp_adjust_with_load == 0
3385 && local_fsize == 0
3386 && VAL_14_BITS_P (-actual_fsize))
3387 merge_sp_adjust_with_load = i;
3388 else
3389 FRP (load_reg (i, offset, STACK_POINTER_REGNUM));
3390 offset += UNITS_PER_WORD;
3395 /* Align pointer properly (doubleword boundary). */
3396 offset = (offset + 7) & ~7;
3398 /* FP register restores. */
3399 if (save_fregs)
3401 /* Adjust the register to index off of. */
3402 if (frame_pointer_needed)
3403 FRP (set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset));
3404 else
3405 FRP (set_reg_plus_d (1, STACK_POINTER_REGNUM, offset));
3407 /* Actually do the restores now. */
3408 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3409 if (regs_ever_live[i]
3410 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3412 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3413 rtx dest = gen_rtx_REG (DFmode, i);
3414 FRP (emit_move_insn (dest, src));
3418 /* Emit a blockage insn here to keep these insns from being moved to
3419 an earlier spot in the epilogue, or into the main instruction stream.
3421 This is necessary as we must not cut the stack back before all the
3422 restores are finished. */
3423 emit_insn (gen_blockage ());
3425 /* Reset stack pointer (and possibly frame pointer). The stack
3426 pointer is initially set to fp + 64 to avoid a race condition. */
3427 if (frame_pointer_needed)
3429 rtx delta = GEN_INT (-64);
3430 FRP (set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64));
3431 FRP (emit_insn (gen_pre_load (frame_pointer_rtx,
3432 stack_pointer_rtx,
3433 delta)));
3435 /* If we were deferring a callee register restore, do it now. */
3436 else if (merge_sp_adjust_with_load)
3438 rtx delta = GEN_INT (-actual_fsize);
3439 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3440 FRP (emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)));
3442 else if (actual_fsize != 0)
3443 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3444 STACK_POINTER_REGNUM,
3445 - actual_fsize));
3447 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3448 frame greater than 8k), do so now. */
3449 if (ret_off != 0)
3450 FRP (load_reg (2, ret_off, STACK_POINTER_REGNUM));
3454 hppa_pic_save_rtx ()
3456 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3459 void
3460 hppa_profile_hook (label_no)
3461 int label_no ATTRIBUTE_UNUSED;
3463 rtx call_insn;
3465 /* No profiling for inline functions. We don't want extra calls to
3466 _mcount when the inline function is expanded. Even if that made
3467 sense, it wouldn't work here as there is no function label for
3468 the inline expansion. */
3469 if (DECL_INLINE (cfun->decl))
3470 return;
3472 if (TARGET_64BIT)
3473 emit_move_insn (arg_pointer_rtx,
3474 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3475 GEN_INT (64)));
3477 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3479 #ifndef NO_PROFILE_COUNTERS
3481 rtx count_label_rtx, addr, r24;
3482 char label_name[16];
3484 ASM_GENERATE_INTERNAL_LABEL (label_name, "LP", label_no);
3485 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (label_name));
3487 if (flag_pic)
3489 rtx tmpreg;
3491 current_function_uses_pic_offset_table = 1;
3492 tmpreg = gen_rtx_REG (Pmode, 1);
3493 emit_move_insn (tmpreg,
3494 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3495 gen_rtx_HIGH (Pmode, count_label_rtx)));
3496 addr = gen_rtx_MEM (Pmode,
3497 gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx));
3499 else
3501 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3502 emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, count_label_rtx));
3503 addr = gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx);
3505 r24 = gen_rtx_REG (Pmode, 24);
3506 emit_move_insn (r24, addr);
3508 /* %r25 is set from within the output pattern. */
3509 call_insn =
3510 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3511 GEN_INT (TARGET_64BIT ? 24 : 12),
3512 XEXP (DECL_RTL (cfun->decl), 0)));
3514 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3516 #else
3517 /* %r25 is set from within the output pattern. */
3518 call_insn =
3519 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3520 GEN_INT (TARGET_64BIT ? 16 : 8),
3521 XEXP (DECL_RTL (cfun->decl), 0)));
3522 #endif
3524 /* Indicate the _mcount call cannot throw, nor will it execute a
3525 non-local goto. */
3526 REG_NOTES (call_insn)
3527 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3529 if (flag_pic)
3531 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3532 if (TARGET_64BIT)
3533 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3535 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3539 /* Fetch the return address for the frame COUNT steps up from
3540 the current frame, after the prologue. FRAMEADDR is the
3541 frame pointer of the COUNT frame.
3543 We want to ignore any export stub remnants here. To handle this,
3544 we examine the code at the return address, and if it is an export
3545 stub, we return a memory rtx for the stub return address stored
3546 at frame-24.
3548 The value returned is used in two different ways:
3550 1. To find a function's caller.
3552 2. To change the return address for a function.
3554 This function handles most instances of case 1; however, it will
3555 fail if there are two levels of stubs to execute on the return
3556 path. The only way I believe that can happen is if the return value
3557 needs a parameter relocation, which never happens for C code.
3559 This function handles most instances of case 2; however, it will
3560 fail if we did not originally have stub code on the return path
3561 but will need stub code on the new return path. This can happen if
3562 the caller & callee are both in the main program, but the new
3563 return location is in a shared library. */
3566 return_addr_rtx (count, frameaddr)
3567 int count;
3568 rtx frameaddr;
3570 rtx label;
3571 rtx rp;
3572 rtx saved_rp;
3573 rtx ins;
3575 if (count != 0)
3576 return NULL_RTX;
3578 rp = get_hard_reg_initial_val (Pmode, 2);
3580 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3581 return rp;
3583 saved_rp = gen_reg_rtx (Pmode);
3584 emit_move_insn (saved_rp, rp);
3586 /* Get pointer to the instruction stream. We have to mask out the
3587 privilege level from the two low order bits of the return address
3588 pointer here so that ins will point to the start of the first
3589 instruction that would have been executed if we returned. */
3590 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3591 label = gen_label_rtx ();
3593 /* Check the instruction stream at the normal return address for the
3594 export stub:
3596 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3597 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3598 0x00011820 | stub+16: mtsp r1,sr0
3599 0xe0400002 | stub+20: be,n 0(sr0,rp)
3601 If it is an export stub, than our return address is really in
3602 -24[frameaddr]. */
3604 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3605 NULL_RTX, SImode, 1);
3606 emit_jump_insn (gen_bne (label));
3608 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3609 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3610 emit_jump_insn (gen_bne (label));
3612 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3613 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3614 emit_jump_insn (gen_bne (label));
3616 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3617 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3619 /* If there is no export stub then just use the value saved from
3620 the return pointer register. */
3622 emit_jump_insn (gen_bne (label));
3624 /* Here we know that our return address points to an export
3625 stub. We don't want to return the address of the export stub,
3626 but rather the return address of the export stub. That return
3627 address is stored at -24[frameaddr]. */
3629 emit_move_insn (saved_rp,
3630 gen_rtx_MEM (Pmode,
3631 memory_address (Pmode,
3632 plus_constant (frameaddr,
3633 -24))));
3635 emit_label (label);
3636 return saved_rp;
3639 /* This is only valid once reload has completed because it depends on
3640 knowing exactly how much (if any) frame there is and...
3642 It's only valid if there is no frame marker to de-allocate and...
3644 It's only valid if %r2 hasn't been saved into the caller's frame
3645 (we're not profiling and %r2 isn't live anywhere). */
3647 hppa_can_use_return_insn_p ()
3649 return (reload_completed
3650 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3651 && ! regs_ever_live[2]
3652 && ! frame_pointer_needed);
3655 void
3656 emit_bcond_fp (code, operand0)
3657 enum rtx_code code;
3658 rtx operand0;
3660 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3661 gen_rtx_IF_THEN_ELSE (VOIDmode,
3662 gen_rtx_fmt_ee (code,
3663 VOIDmode,
3664 gen_rtx_REG (CCFPmode, 0),
3665 const0_rtx),
3666 gen_rtx_LABEL_REF (VOIDmode, operand0),
3667 pc_rtx)));
3672 gen_cmp_fp (code, operand0, operand1)
3673 enum rtx_code code;
3674 rtx operand0, operand1;
3676 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3677 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3680 /* Adjust the cost of a scheduling dependency. Return the new cost of
3681 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3683 static int
3684 pa_adjust_cost (insn, link, dep_insn, cost)
3685 rtx insn;
3686 rtx link;
3687 rtx dep_insn;
3688 int cost;
3690 enum attr_type attr_type;
3692 /* Don't adjust costs for a pa8000 chip. */
3693 if (pa_cpu >= PROCESSOR_8000)
3694 return cost;
3696 if (! recog_memoized (insn))
3697 return 0;
3699 attr_type = get_attr_type (insn);
3701 if (REG_NOTE_KIND (link) == 0)
3703 /* Data dependency; DEP_INSN writes a register that INSN reads some
3704 cycles later. */
3706 if (attr_type == TYPE_FPSTORE)
3708 rtx pat = PATTERN (insn);
3709 rtx dep_pat = PATTERN (dep_insn);
3710 if (GET_CODE (pat) == PARALLEL)
3712 /* This happens for the fstXs,mb patterns. */
3713 pat = XVECEXP (pat, 0, 0);
3715 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3716 /* If this happens, we have to extend this to schedule
3717 optimally. Return 0 for now. */
3718 return 0;
3720 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3722 if (! recog_memoized (dep_insn))
3723 return 0;
3724 /* DEP_INSN is writing its result to the register
3725 being stored in the fpstore INSN. */
3726 switch (get_attr_type (dep_insn))
3728 case TYPE_FPLOAD:
3729 /* This cost 3 cycles, not 2 as the md says for the
3730 700 and 7100. */
3731 return cost + 1;
3733 case TYPE_FPALU:
3734 case TYPE_FPMULSGL:
3735 case TYPE_FPMULDBL:
3736 case TYPE_FPDIVSGL:
3737 case TYPE_FPDIVDBL:
3738 case TYPE_FPSQRTSGL:
3739 case TYPE_FPSQRTDBL:
3740 /* In these important cases, we save one cycle compared to
3741 when flop instruction feed each other. */
3742 return cost - 1;
3744 default:
3745 return cost;
3750 /* For other data dependencies, the default cost specified in the
3751 md is correct. */
3752 return cost;
3754 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3756 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3757 cycles later. */
3759 if (attr_type == TYPE_FPLOAD)
3761 rtx pat = PATTERN (insn);
3762 rtx dep_pat = PATTERN (dep_insn);
3763 if (GET_CODE (pat) == PARALLEL)
3765 /* This happens for the fldXs,mb patterns. */
3766 pat = XVECEXP (pat, 0, 0);
3768 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3769 /* If this happens, we have to extend this to schedule
3770 optimally. Return 0 for now. */
3771 return 0;
3773 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3775 if (! recog_memoized (dep_insn))
3776 return 0;
3777 switch (get_attr_type (dep_insn))
3779 case TYPE_FPALU:
3780 case TYPE_FPMULSGL:
3781 case TYPE_FPMULDBL:
3782 case TYPE_FPDIVSGL:
3783 case TYPE_FPDIVDBL:
3784 case TYPE_FPSQRTSGL:
3785 case TYPE_FPSQRTDBL:
3786 /* A fpload can't be issued until one cycle before a
3787 preceding arithmetic operation has finished if
3788 the target of the fpload is any of the sources
3789 (or destination) of the arithmetic operation. */
3790 return cost - 1;
3792 default:
3793 return 0;
3797 else if (attr_type == TYPE_FPALU)
3799 rtx pat = PATTERN (insn);
3800 rtx dep_pat = PATTERN (dep_insn);
3801 if (GET_CODE (pat) == PARALLEL)
3803 /* This happens for the fldXs,mb patterns. */
3804 pat = XVECEXP (pat, 0, 0);
3806 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3807 /* If this happens, we have to extend this to schedule
3808 optimally. Return 0 for now. */
3809 return 0;
3811 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3813 if (! recog_memoized (dep_insn))
3814 return 0;
3815 switch (get_attr_type (dep_insn))
3817 case TYPE_FPDIVSGL:
3818 case TYPE_FPDIVDBL:
3819 case TYPE_FPSQRTSGL:
3820 case TYPE_FPSQRTDBL:
3821 /* An ALU flop can't be issued until two cycles before a
3822 preceding divide or sqrt operation has finished if
3823 the target of the ALU flop is any of the sources
3824 (or destination) of the divide or sqrt operation. */
3825 return cost - 2;
3827 default:
3828 return 0;
3833 /* For other anti dependencies, the cost is 0. */
3834 return 0;
3836 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3838 /* Output dependency; DEP_INSN writes a register that INSN writes some
3839 cycles later. */
3840 if (attr_type == TYPE_FPLOAD)
3842 rtx pat = PATTERN (insn);
3843 rtx dep_pat = PATTERN (dep_insn);
3844 if (GET_CODE (pat) == PARALLEL)
3846 /* This happens for the fldXs,mb patterns. */
3847 pat = XVECEXP (pat, 0, 0);
3849 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3850 /* If this happens, we have to extend this to schedule
3851 optimally. Return 0 for now. */
3852 return 0;
3854 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3856 if (! recog_memoized (dep_insn))
3857 return 0;
3858 switch (get_attr_type (dep_insn))
3860 case TYPE_FPALU:
3861 case TYPE_FPMULSGL:
3862 case TYPE_FPMULDBL:
3863 case TYPE_FPDIVSGL:
3864 case TYPE_FPDIVDBL:
3865 case TYPE_FPSQRTSGL:
3866 case TYPE_FPSQRTDBL:
3867 /* A fpload can't be issued until one cycle before a
3868 preceding arithmetic operation has finished if
3869 the target of the fpload is the destination of the
3870 arithmetic operation. */
3871 return cost - 1;
3873 default:
3874 return 0;
3878 else if (attr_type == TYPE_FPALU)
3880 rtx pat = PATTERN (insn);
3881 rtx dep_pat = PATTERN (dep_insn);
3882 if (GET_CODE (pat) == PARALLEL)
3884 /* This happens for the fldXs,mb patterns. */
3885 pat = XVECEXP (pat, 0, 0);
3887 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3888 /* If this happens, we have to extend this to schedule
3889 optimally. Return 0 for now. */
3890 return 0;
3892 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3894 if (! recog_memoized (dep_insn))
3895 return 0;
3896 switch (get_attr_type (dep_insn))
3898 case TYPE_FPDIVSGL:
3899 case TYPE_FPDIVDBL:
3900 case TYPE_FPSQRTSGL:
3901 case TYPE_FPSQRTDBL:
3902 /* An ALU flop can't be issued until two cycles before a
3903 preceding divide or sqrt operation has finished if
3904 the target of the ALU flop is also the target of
3905 the divide or sqrt operation. */
3906 return cost - 2;
3908 default:
3909 return 0;
3914 /* For other output dependencies, the cost is 0. */
3915 return 0;
3917 else
3918 abort ();
3921 /* Adjust scheduling priorities. We use this to try and keep addil
3922 and the next use of %r1 close together. */
3923 static int
3924 pa_adjust_priority (insn, priority)
3925 rtx insn;
3926 int priority;
3928 rtx set = single_set (insn);
3929 rtx src, dest;
3930 if (set)
3932 src = SET_SRC (set);
3933 dest = SET_DEST (set);
3934 if (GET_CODE (src) == LO_SUM
3935 && symbolic_operand (XEXP (src, 1), VOIDmode)
3936 && ! read_only_operand (XEXP (src, 1), VOIDmode))
3937 priority >>= 3;
3939 else if (GET_CODE (src) == MEM
3940 && GET_CODE (XEXP (src, 0)) == LO_SUM
3941 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
3942 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
3943 priority >>= 1;
3945 else if (GET_CODE (dest) == MEM
3946 && GET_CODE (XEXP (dest, 0)) == LO_SUM
3947 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
3948 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
3949 priority >>= 3;
3951 return priority;
3954 /* The 700 can only issue a single insn at a time.
3955 The 7XXX processors can issue two insns at a time.
3956 The 8000 can issue 4 insns at a time. */
3957 static int
3958 pa_issue_rate ()
3960 switch (pa_cpu)
3962 case PROCESSOR_700: return 1;
3963 case PROCESSOR_7100: return 2;
3964 case PROCESSOR_7100LC: return 2;
3965 case PROCESSOR_7200: return 2;
3966 case PROCESSOR_8000: return 4;
3968 default:
3969 abort ();
3975 /* Return any length adjustment needed by INSN which already has its length
3976 computed as LENGTH. Return zero if no adjustment is necessary.
3978 For the PA: function calls, millicode calls, and backwards short
3979 conditional branches with unfilled delay slots need an adjustment by +1
3980 (to account for the NOP which will be inserted into the instruction stream).
3982 Also compute the length of an inline block move here as it is too
3983 complicated to express as a length attribute in pa.md. */
3985 pa_adjust_insn_length (insn, length)
3986 rtx insn;
3987 int length;
3989 rtx pat = PATTERN (insn);
3991 /* Call insns which are *not* indirect and have unfilled delay slots. */
3992 if (GET_CODE (insn) == CALL_INSN)
3995 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3996 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3997 return 4;
3998 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3999 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4000 == SYMBOL_REF)
4001 return 4;
4002 else
4003 return 0;
4005 /* Jumps inside switch tables which have unfilled delay slots
4006 also need adjustment. */
4007 else if (GET_CODE (insn) == JUMP_INSN
4008 && simplejump_p (insn)
4009 && GET_MODE (insn) == SImode)
4010 return 4;
4011 /* Millicode insn with an unfilled delay slot. */
4012 else if (GET_CODE (insn) == INSN
4013 && GET_CODE (pat) != SEQUENCE
4014 && GET_CODE (pat) != USE
4015 && GET_CODE (pat) != CLOBBER
4016 && get_attr_type (insn) == TYPE_MILLI)
4017 return 4;
4018 /* Block move pattern. */
4019 else if (GET_CODE (insn) == INSN
4020 && GET_CODE (pat) == PARALLEL
4021 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4022 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4023 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4024 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4025 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4026 return compute_movstrsi_length (insn) - 4;
4027 /* Conditional branch with an unfilled delay slot. */
4028 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4030 /* Adjust a short backwards conditional with an unfilled delay slot. */
4031 if (GET_CODE (pat) == SET
4032 && length == 4
4033 && ! forward_branch_p (insn))
4034 return 4;
4035 else if (GET_CODE (pat) == PARALLEL
4036 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4037 && length == 4)
4038 return 4;
4039 /* Adjust dbra insn with short backwards conditional branch with
4040 unfilled delay slot -- only for case where counter is in a
4041 general register register. */
4042 else if (GET_CODE (pat) == PARALLEL
4043 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4044 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4045 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4046 && length == 4
4047 && ! forward_branch_p (insn))
4048 return 4;
4049 else
4050 return 0;
4052 return 0;
4055 /* Print operand X (an rtx) in assembler syntax to file FILE.
4056 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4057 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4059 void
4060 print_operand (file, x, code)
4061 FILE *file;
4062 rtx x;
4063 int code;
4065 switch (code)
4067 case '#':
4068 /* Output a 'nop' if there's nothing for the delay slot. */
4069 if (dbr_sequence_length () == 0)
4070 fputs ("\n\tnop", file);
4071 return;
4072 case '*':
4073 /* Output an nullification completer if there's nothing for the */
4074 /* delay slot or nullification is requested. */
4075 if (dbr_sequence_length () == 0 ||
4076 (final_sequence &&
4077 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4078 fputs (",n", file);
4079 return;
4080 case 'R':
4081 /* Print out the second register name of a register pair.
4082 I.e., R (6) => 7. */
4083 fputs (reg_names[REGNO (x) + 1], file);
4084 return;
4085 case 'r':
4086 /* A register or zero. */
4087 if (x == const0_rtx
4088 || (x == CONST0_RTX (DFmode))
4089 || (x == CONST0_RTX (SFmode)))
4091 fputs ("%r0", file);
4092 return;
4094 else
4095 break;
4096 case 'f':
4097 /* A register or zero (floating point). */
4098 if (x == const0_rtx
4099 || (x == CONST0_RTX (DFmode))
4100 || (x == CONST0_RTX (SFmode)))
4102 fputs ("%fr0", file);
4103 return;
4105 else
4106 break;
4107 case 'A':
4109 rtx xoperands[2];
4111 xoperands[0] = XEXP (XEXP (x, 0), 0);
4112 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4113 output_global_address (file, xoperands[1], 0);
4114 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4115 return;
4118 case 'C': /* Plain (C)ondition */
4119 case 'X':
4120 switch (GET_CODE (x))
4122 case EQ:
4123 fputs ("=", file); break;
4124 case NE:
4125 fputs ("<>", file); break;
4126 case GT:
4127 fputs (">", file); break;
4128 case GE:
4129 fputs (">=", file); break;
4130 case GEU:
4131 fputs (">>=", file); break;
4132 case GTU:
4133 fputs (">>", file); break;
4134 case LT:
4135 fputs ("<", file); break;
4136 case LE:
4137 fputs ("<=", file); break;
4138 case LEU:
4139 fputs ("<<=", file); break;
4140 case LTU:
4141 fputs ("<<", file); break;
4142 default:
4143 abort ();
4145 return;
4146 case 'N': /* Condition, (N)egated */
4147 switch (GET_CODE (x))
4149 case EQ:
4150 fputs ("<>", file); break;
4151 case NE:
4152 fputs ("=", file); break;
4153 case GT:
4154 fputs ("<=", file); break;
4155 case GE:
4156 fputs ("<", file); break;
4157 case GEU:
4158 fputs ("<<", file); break;
4159 case GTU:
4160 fputs ("<<=", file); break;
4161 case LT:
4162 fputs (">=", file); break;
4163 case LE:
4164 fputs (">", file); break;
4165 case LEU:
4166 fputs (">>", file); break;
4167 case LTU:
4168 fputs (">>=", file); break;
4169 default:
4170 abort ();
4172 return;
4173 /* For floating point comparisons. Note that the output
4174 predicates are the complement of the desired mode. */
4175 case 'Y':
4176 switch (GET_CODE (x))
4178 case EQ:
4179 fputs ("!=", file); break;
4180 case NE:
4181 fputs ("=", file); break;
4182 case GT:
4183 fputs ("!>", file); break;
4184 case GE:
4185 fputs ("!>=", file); break;
4186 case LT:
4187 fputs ("!<", file); break;
4188 case LE:
4189 fputs ("!<=", file); break;
4190 case LTGT:
4191 fputs ("!<>", file); break;
4192 case UNLE:
4193 fputs (">", file); break;
4194 case UNLT:
4195 fputs (">=", file); break;
4196 case UNGE:
4197 fputs ("<", file); break;
4198 case UNGT:
4199 fputs ("<=", file); break;
4200 case UNEQ:
4201 fputs ("<>", file); break;
4202 case UNORDERED:
4203 fputs ("<=>", file); break;
4204 case ORDERED:
4205 fputs ("!<=>", file); break;
4206 default:
4207 abort ();
4209 return;
4210 case 'S': /* Condition, operands are (S)wapped. */
4211 switch (GET_CODE (x))
4213 case EQ:
4214 fputs ("=", file); break;
4215 case NE:
4216 fputs ("<>", file); break;
4217 case GT:
4218 fputs ("<", file); break;
4219 case GE:
4220 fputs ("<=", file); break;
4221 case GEU:
4222 fputs ("<<=", file); break;
4223 case GTU:
4224 fputs ("<<", file); break;
4225 case LT:
4226 fputs (">", file); break;
4227 case LE:
4228 fputs (">=", file); break;
4229 case LEU:
4230 fputs (">>=", file); break;
4231 case LTU:
4232 fputs (">>", file); break;
4233 default:
4234 abort ();
4236 return;
4237 case 'B': /* Condition, (B)oth swapped and negate. */
4238 switch (GET_CODE (x))
4240 case EQ:
4241 fputs ("<>", file); break;
4242 case NE:
4243 fputs ("=", file); break;
4244 case GT:
4245 fputs (">=", file); break;
4246 case GE:
4247 fputs (">", file); break;
4248 case GEU:
4249 fputs (">>", file); break;
4250 case GTU:
4251 fputs (">>=", file); break;
4252 case LT:
4253 fputs ("<=", file); break;
4254 case LE:
4255 fputs ("<", file); break;
4256 case LEU:
4257 fputs ("<<", file); break;
4258 case LTU:
4259 fputs ("<<=", file); break;
4260 default:
4261 abort ();
4263 return;
4264 case 'k':
4265 if (GET_CODE (x) == CONST_INT)
4267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4268 return;
4270 abort ();
4271 case 'Q':
4272 if (GET_CODE (x) == CONST_INT)
4274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4275 return;
4277 abort ();
4278 case 'L':
4279 if (GET_CODE (x) == CONST_INT)
4281 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4282 return;
4284 abort ();
4285 case 'O':
4286 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4288 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4289 return;
4291 abort ();
4292 case 'p':
4293 if (GET_CODE (x) == CONST_INT)
4295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4296 return;
4298 abort ();
4299 case 'P':
4300 if (GET_CODE (x) == CONST_INT)
4302 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4303 return;
4305 abort ();
4306 case 'I':
4307 if (GET_CODE (x) == CONST_INT)
4308 fputs ("i", file);
4309 return;
4310 case 'M':
4311 case 'F':
4312 switch (GET_CODE (XEXP (x, 0)))
4314 case PRE_DEC:
4315 case PRE_INC:
4316 if (ASSEMBLER_DIALECT == 0)
4317 fputs ("s,mb", file);
4318 else
4319 fputs (",mb", file);
4320 break;
4321 case POST_DEC:
4322 case POST_INC:
4323 if (ASSEMBLER_DIALECT == 0)
4324 fputs ("s,ma", file);
4325 else
4326 fputs (",ma", file);
4327 break;
4328 case PLUS:
4329 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4330 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4332 if (ASSEMBLER_DIALECT == 0)
4333 fputs ("x,s", file);
4334 else
4335 fputs (",s", file);
4337 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4338 fputs ("s", file);
4339 break;
4340 default:
4341 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4342 fputs ("s", file);
4343 break;
4345 return;
4346 case 'G':
4347 output_global_address (file, x, 0);
4348 return;
4349 case 'H':
4350 output_global_address (file, x, 1);
4351 return;
4352 case 0: /* Don't do anything special */
4353 break;
4354 case 'Z':
4356 unsigned op[3];
4357 compute_zdepwi_operands (INTVAL (x), op);
4358 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4359 return;
4361 case 'z':
4363 unsigned op[3];
4364 compute_zdepdi_operands (INTVAL (x), op);
4365 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4366 return;
4368 case 'c':
4369 /* We can get here from a .vtable_inherit due to our
4370 CONSTANT_ADDRESS_P rejecting perfectly good constant
4371 addresses. */
4372 break;
4373 default:
4374 abort ();
4376 if (GET_CODE (x) == REG)
4378 fputs (reg_names [REGNO (x)], file);
4379 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4381 fputs ("R", file);
4382 return;
4384 if (FP_REG_P (x)
4385 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4386 && (REGNO (x) & 1) == 0)
4387 fputs ("L", file);
4389 else if (GET_CODE (x) == MEM)
4391 int size = GET_MODE_SIZE (GET_MODE (x));
4392 rtx base = NULL_RTX;
4393 switch (GET_CODE (XEXP (x, 0)))
4395 case PRE_DEC:
4396 case POST_DEC:
4397 base = XEXP (XEXP (x, 0), 0);
4398 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4399 break;
4400 case PRE_INC:
4401 case POST_INC:
4402 base = XEXP (XEXP (x, 0), 0);
4403 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4404 break;
4405 default:
4406 if (GET_CODE (XEXP (x, 0)) == PLUS
4407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4408 fprintf (file, "%s(%s)",
4409 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4410 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4411 else if (GET_CODE (XEXP (x, 0)) == PLUS
4412 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4413 fprintf (file, "%s(%s)",
4414 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4415 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4416 else
4417 output_address (XEXP (x, 0));
4418 break;
4421 else
4422 output_addr_const (file, x);
4425 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4427 void
4428 output_global_address (file, x, round_constant)
4429 FILE *file;
4430 rtx x;
4431 int round_constant;
4434 /* Imagine (high (const (plus ...))). */
4435 if (GET_CODE (x) == HIGH)
4436 x = XEXP (x, 0);
4438 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4439 assemble_name (file, XSTR (x, 0));
4440 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4442 assemble_name (file, XSTR (x, 0));
4443 fputs ("-$global$", file);
4445 else if (GET_CODE (x) == CONST)
4447 const char *sep = "";
4448 int offset = 0; /* assembler wants -$global$ at end */
4449 rtx base = NULL_RTX;
4451 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4453 base = XEXP (XEXP (x, 0), 0);
4454 output_addr_const (file, base);
4456 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4457 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4458 else abort ();
4460 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4462 base = XEXP (XEXP (x, 0), 1);
4463 output_addr_const (file, base);
4465 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4466 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4467 else abort ();
4469 /* How bogus. The compiler is apparently responsible for
4470 rounding the constant if it uses an LR field selector.
4472 The linker and/or assembler seem a better place since
4473 they have to do this kind of thing already.
4475 If we fail to do this, HP's optimizing linker may eliminate
4476 an addil, but not update the ldw/stw/ldo instruction that
4477 uses the result of the addil. */
4478 if (round_constant)
4479 offset = ((offset + 0x1000) & ~0x1fff);
4481 if (GET_CODE (XEXP (x, 0)) == PLUS)
4483 if (offset < 0)
4485 offset = -offset;
4486 sep = "-";
4488 else
4489 sep = "+";
4491 else if (GET_CODE (XEXP (x, 0)) == MINUS
4492 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4493 sep = "-";
4494 else abort ();
4496 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4497 fputs ("-$global$", file);
4498 if (offset)
4499 fprintf (file, "%s%d", sep, offset);
4501 else
4502 output_addr_const (file, x);
4505 void
4506 output_deferred_plabels (file)
4507 FILE *file;
4509 int i;
4510 /* If we have deferred plabels, then we need to switch into the data
4511 section and align it to a 4 byte boundary before we output the
4512 deferred plabels. */
4513 if (n_deferred_plabels)
4515 data_section ();
4516 ASM_OUTPUT_ALIGN (file, 2);
4519 /* Now output the deferred plabels. */
4520 for (i = 0; i < n_deferred_plabels; i++)
4522 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4523 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4524 4, 32, 1);
4528 /* HP's millicode routines mean something special to the assembler.
4529 Keep track of which ones we have used. */
4531 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4532 static void import_milli PARAMS ((enum millicodes));
4533 static char imported[(int) end1000];
4534 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4535 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4536 #define MILLI_START 10
4538 static void
4539 import_milli (code)
4540 enum millicodes code;
4542 char str[sizeof (import_string)];
4544 if (!imported[(int) code])
4546 imported[(int) code] = 1;
4547 strcpy (str, import_string);
4548 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4549 output_asm_insn (str, 0);
4553 /* The register constraints have put the operands and return value in
4554 the proper registers. */
4556 const char *
4557 output_mul_insn (unsignedp, insn)
4558 int unsignedp ATTRIBUTE_UNUSED;
4559 rtx insn;
4561 import_milli (mulI);
4562 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4565 /* Emit the rtl for doing a division by a constant. */
4567 /* Do magic division millicodes exist for this value? */
4568 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4569 1, 1};
4571 /* We'll use an array to keep track of the magic millicodes and
4572 whether or not we've used them already. [n][0] is signed, [n][1] is
4573 unsigned. */
4575 static int div_milli[16][2];
4578 div_operand (op, mode)
4579 rtx op;
4580 enum machine_mode mode;
4582 return (mode == SImode
4583 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4584 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4585 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4589 emit_hpdiv_const (operands, unsignedp)
4590 rtx *operands;
4591 int unsignedp;
4593 if (GET_CODE (operands[2]) == CONST_INT
4594 && INTVAL (operands[2]) > 0
4595 && INTVAL (operands[2]) < 16
4596 && magic_milli[INTVAL (operands[2])])
4598 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4600 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4601 emit
4602 (gen_rtx
4603 (PARALLEL, VOIDmode,
4604 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4605 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4606 SImode,
4607 gen_rtx_REG (SImode, 26),
4608 operands[2])),
4609 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4610 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4611 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4612 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4613 gen_rtx_CLOBBER (VOIDmode, ret))));
4614 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4615 return 1;
4617 return 0;
4620 const char *
4621 output_div_insn (operands, unsignedp, insn)
4622 rtx *operands;
4623 int unsignedp;
4624 rtx insn;
4626 int divisor;
4628 /* If the divisor is a constant, try to use one of the special
4629 opcodes .*/
4630 if (GET_CODE (operands[0]) == CONST_INT)
4632 static char buf[100];
4633 divisor = INTVAL (operands[0]);
4634 if (!div_milli[divisor][unsignedp])
4636 div_milli[divisor][unsignedp] = 1;
4637 if (unsignedp)
4638 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4639 else
4640 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4642 if (unsignedp)
4644 sprintf (buf, "$$divU_");
4645 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4646 return output_millicode_call (insn,
4647 gen_rtx_SYMBOL_REF (SImode, buf));
4649 else
4651 sprintf (buf, "$$divI_");
4652 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4653 return output_millicode_call (insn,
4654 gen_rtx_SYMBOL_REF (SImode, buf));
4657 /* Divisor isn't a special constant. */
4658 else
4660 if (unsignedp)
4662 import_milli (divU);
4663 return output_millicode_call (insn,
4664 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4666 else
4668 import_milli (divI);
4669 return output_millicode_call (insn,
4670 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4675 /* Output a $$rem millicode to do mod. */
4677 const char *
4678 output_mod_insn (unsignedp, insn)
4679 int unsignedp;
4680 rtx insn;
4682 if (unsignedp)
4684 import_milli (remU);
4685 return output_millicode_call (insn,
4686 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4688 else
4690 import_milli (remI);
4691 return output_millicode_call (insn,
4692 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4696 void
4697 output_arg_descriptor (call_insn)
4698 rtx call_insn;
4700 const char *arg_regs[4];
4701 enum machine_mode arg_mode;
4702 rtx link;
4703 int i, output_flag = 0;
4704 int regno;
4706 /* We neither need nor want argument location descriptors for the
4707 64bit runtime environment or the ELF32 environment. */
4708 if (TARGET_64BIT || TARGET_ELF32)
4709 return;
4711 for (i = 0; i < 4; i++)
4712 arg_regs[i] = 0;
4714 /* Specify explicitly that no argument relocations should take place
4715 if using the portable runtime calling conventions. */
4716 if (TARGET_PORTABLE_RUNTIME)
4718 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4719 asm_out_file);
4720 return;
4723 if (GET_CODE (call_insn) != CALL_INSN)
4724 abort ();
4725 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4727 rtx use = XEXP (link, 0);
4729 if (! (GET_CODE (use) == USE
4730 && GET_CODE (XEXP (use, 0)) == REG
4731 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4732 continue;
4734 arg_mode = GET_MODE (XEXP (use, 0));
4735 regno = REGNO (XEXP (use, 0));
4736 if (regno >= 23 && regno <= 26)
4738 arg_regs[26 - regno] = "GR";
4739 if (arg_mode == DImode)
4740 arg_regs[25 - regno] = "GR";
4742 else if (regno >= 32 && regno <= 39)
4744 if (arg_mode == SFmode)
4745 arg_regs[(regno - 32) / 2] = "FR";
4746 else
4748 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4749 arg_regs[(regno - 34) / 2] = "FR";
4750 arg_regs[(regno - 34) / 2 + 1] = "FU";
4751 #else
4752 arg_regs[(regno - 34) / 2] = "FU";
4753 arg_regs[(regno - 34) / 2 + 1] = "FR";
4754 #endif
4758 fputs ("\t.CALL ", asm_out_file);
4759 for (i = 0; i < 4; i++)
4761 if (arg_regs[i])
4763 if (output_flag++)
4764 fputc (',', asm_out_file);
4765 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4768 fputc ('\n', asm_out_file);
4771 /* Return the class of any secondary reload register that is needed to
4772 move IN into a register in class CLASS using mode MODE.
4774 Profiling has showed this routine and its descendants account for
4775 a significant amount of compile time (~7%). So it has been
4776 optimized to reduce redundant computations and eliminate useless
4777 function calls.
4779 It might be worthwhile to try and make this a leaf function too. */
4781 enum reg_class
4782 secondary_reload_class (class, mode, in)
4783 enum reg_class class;
4784 enum machine_mode mode;
4785 rtx in;
4787 int regno, is_symbolic;
4789 /* Trying to load a constant into a FP register during PIC code
4790 generation will require %r1 as a scratch register. */
4791 if (flag_pic
4792 && GET_MODE_CLASS (mode) == MODE_INT
4793 && FP_REG_CLASS_P (class)
4794 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4795 return R1_REGS;
4797 /* Profiling showed the PA port spends about 1.3% of its compilation
4798 time in true_regnum from calls inside secondary_reload_class. */
4800 if (GET_CODE (in) == REG)
4802 regno = REGNO (in);
4803 if (regno >= FIRST_PSEUDO_REGISTER)
4804 regno = true_regnum (in);
4806 else if (GET_CODE (in) == SUBREG)
4807 regno = true_regnum (in);
4808 else
4809 regno = -1;
4811 /* If we have something like (mem (mem (...)), we can safely assume the
4812 inner MEM will end up in a general register after reloading, so there's
4813 no need for a secondary reload. */
4814 if (GET_CODE (in) == MEM
4815 && GET_CODE (XEXP (in, 0)) == MEM)
4816 return NO_REGS;
4818 /* Handle out of range displacement for integer mode loads/stores of
4819 FP registers. */
4820 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4821 && GET_MODE_CLASS (mode) == MODE_INT
4822 && FP_REG_CLASS_P (class))
4823 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4824 return GENERAL_REGS;
4826 /* A SAR<->FP register copy requires a secondary register (GPR) as
4827 well as secondary memory. */
4828 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
4829 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
4830 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
4831 return GENERAL_REGS;
4833 if (GET_CODE (in) == HIGH)
4834 in = XEXP (in, 0);
4836 /* Profiling has showed GCC spends about 2.6% of its compilation
4837 time in symbolic_operand from calls inside secondary_reload_class.
4839 We use an inline copy and only compute its return value once to avoid
4840 useless work. */
4841 switch (GET_CODE (in))
4843 rtx tmp;
4845 case SYMBOL_REF:
4846 case LABEL_REF:
4847 is_symbolic = 1;
4848 break;
4849 case CONST:
4850 tmp = XEXP (in, 0);
4851 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4852 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4853 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4854 break;
4856 default:
4857 is_symbolic = 0;
4858 break;
4861 if (!flag_pic
4862 && is_symbolic
4863 && read_only_operand (in, VOIDmode))
4864 return NO_REGS;
4866 if (class != R1_REGS && is_symbolic)
4867 return R1_REGS;
4869 return NO_REGS;
4872 enum direction
4873 function_arg_padding (mode, type)
4874 enum machine_mode mode;
4875 tree type;
4877 int size;
4879 if (mode == BLKmode)
4881 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4882 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4883 else
4884 return upward; /* Don't know if this is right, but */
4885 /* same as old definition. */
4887 else
4888 size = GET_MODE_BITSIZE (mode);
4889 if (size < PARM_BOUNDARY)
4890 return downward;
4891 else if (size % PARM_BOUNDARY)
4892 return upward;
4893 else
4894 return none;
4898 /* Do what is necessary for `va_start'. We look at the current function
4899 to determine if stdargs or varargs is used and fill in an initial
4900 va_list. A pointer to this constructor is returned. */
4902 struct rtx_def *
4903 hppa_builtin_saveregs ()
4905 rtx offset, dest;
4906 tree fntype = TREE_TYPE (current_function_decl);
4907 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4908 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4909 != void_type_node)))
4910 ? UNITS_PER_WORD : 0);
4912 if (argadj)
4913 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4914 else
4915 offset = current_function_arg_offset_rtx;
4917 if (TARGET_64BIT)
4919 int i, off;
4921 /* Adjust for varargs/stdarg differences. */
4922 if (argadj)
4923 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
4924 else
4925 offset = current_function_arg_offset_rtx;
4927 /* We need to save %r26 .. %r19 inclusive starting at offset -64
4928 from the incoming arg pointer and growing to larger addresses. */
4929 for (i = 26, off = -64; i >= 19; i--, off += 8)
4930 emit_move_insn (gen_rtx_MEM (word_mode,
4931 plus_constant (arg_pointer_rtx, off)),
4932 gen_rtx_REG (word_mode, i));
4934 /* The incoming args pointer points just beyond the flushback area;
4935 normally this is not a serious concern. However, when we are doing
4936 varargs/stdargs we want to make the arg pointer point to the start
4937 of the incoming argument area. */
4938 emit_move_insn (virtual_incoming_args_rtx,
4939 plus_constant (arg_pointer_rtx, -64));
4941 /* Now return a pointer to the first anonymous argument. */
4942 return copy_to_reg (expand_binop (Pmode, add_optab,
4943 virtual_incoming_args_rtx,
4944 offset, 0, 0, OPTAB_LIB_WIDEN));
4947 /* Store general registers on the stack. */
4948 dest = gen_rtx_MEM (BLKmode,
4949 plus_constant (current_function_internal_arg_pointer,
4950 -16));
4951 set_mem_alias_set (dest, get_varargs_alias_set ());
4952 set_mem_align (dest, BITS_PER_WORD);
4953 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
4955 /* move_block_from_reg will emit code to store the argument registers
4956 individually as scalar stores.
4958 However, other insns may later load from the same addresses for
4959 a structure load (passing a struct to a varargs routine).
4961 The alias code assumes that such aliasing can never happen, so we
4962 have to keep memory referencing insns from moving up beyond the
4963 last argument register store. So we emit a blockage insn here. */
4964 emit_insn (gen_blockage ());
4966 return copy_to_reg (expand_binop (Pmode, add_optab,
4967 current_function_internal_arg_pointer,
4968 offset, 0, 0, OPTAB_LIB_WIDEN));
4971 void
4972 hppa_va_start (stdarg_p, valist, nextarg)
4973 int stdarg_p ATTRIBUTE_UNUSED;
4974 tree valist;
4975 rtx nextarg;
4977 nextarg = expand_builtin_saveregs ();
4978 std_expand_builtin_va_start (1, valist, nextarg);
4982 hppa_va_arg (valist, type)
4983 tree valist, type;
4985 HOST_WIDE_INT align, size, ofs;
4986 tree t, ptr, pptr;
4988 if (TARGET_64BIT)
4990 /* Every argument in PA64 is passed by value (including large structs).
4991 Arguments with size greater than 8 must be aligned 0 MOD 16. */
4993 size = int_size_in_bytes (type);
4994 if (size > UNITS_PER_WORD)
4996 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4997 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
4998 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4999 build_int_2 (-2 * UNITS_PER_WORD, -1));
5000 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5001 TREE_SIDE_EFFECTS (t) = 1;
5002 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5004 return std_expand_builtin_va_arg (valist, type);
5007 /* Compute the rounded size of the type. */
5008 align = PARM_BOUNDARY / BITS_PER_UNIT;
5009 size = int_size_in_bytes (type);
5011 ptr = build_pointer_type (type);
5013 /* "Large" types are passed by reference. */
5014 if (size > 8)
5016 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5017 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5018 TREE_SIDE_EFFECTS (t) = 1;
5020 pptr = build_pointer_type (ptr);
5021 t = build1 (NOP_EXPR, pptr, t);
5022 TREE_SIDE_EFFECTS (t) = 1;
5024 t = build1 (INDIRECT_REF, ptr, t);
5025 TREE_SIDE_EFFECTS (t) = 1;
5027 else
5029 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5030 build_int_2 (-size, -1));
5032 /* Copied from va-pa.h, but we probably don't need to align
5033 to word size, since we generate and preserve that invariant. */
5034 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5035 build_int_2 ((size > 4 ? -8 : -4), -1));
5037 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5038 TREE_SIDE_EFFECTS (t) = 1;
5040 ofs = (8 - size) % 4;
5041 if (ofs)
5043 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
5044 TREE_SIDE_EFFECTS (t) = 1;
5047 t = build1 (NOP_EXPR, ptr, t);
5048 TREE_SIDE_EFFECTS (t) = 1;
5051 /* Calculate! */
5052 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
5057 /* This routine handles all the normal conditional branch sequences we
5058 might need to generate. It handles compare immediate vs compare
5059 register, nullification of delay slots, varying length branches,
5060 negated branches, and all combinations of the above. It returns the
5061 output appropriate to emit the branch corresponding to all given
5062 parameters. */
5064 const char *
5065 output_cbranch (operands, nullify, length, negated, insn)
5066 rtx *operands;
5067 int nullify, length, negated;
5068 rtx insn;
5070 static char buf[100];
5071 int useskip = 0;
5073 /* A conditional branch to the following instruction (eg the delay slot) is
5074 asking for a disaster. This can happen when not optimizing.
5076 In such cases it is safe to emit nothing. */
5078 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5079 return "";
5081 /* If this is a long branch with its delay slot unfilled, set `nullify'
5082 as it can nullify the delay slot and save a nop. */
5083 if (length == 8 && dbr_sequence_length () == 0)
5084 nullify = 1;
5086 /* If this is a short forward conditional branch which did not get
5087 its delay slot filled, the delay slot can still be nullified. */
5088 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5089 nullify = forward_branch_p (insn);
5091 /* A forward branch over a single nullified insn can be done with a
5092 comclr instruction. This avoids a single cycle penalty due to
5093 mis-predicted branch if we fall through (branch not taken). */
5094 if (length == 4
5095 && next_real_insn (insn) != 0
5096 && get_attr_length (next_real_insn (insn)) == 4
5097 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5098 && nullify)
5099 useskip = 1;
5101 switch (length)
5103 /* All short conditional branches except backwards with an unfilled
5104 delay slot. */
5105 case 4:
5106 if (useskip)
5107 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5108 else
5109 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5110 if (GET_MODE (operands[1]) == DImode)
5111 strcat (buf, "*");
5112 if (negated)
5113 strcat (buf, "%B3");
5114 else
5115 strcat (buf, "%S3");
5116 if (useskip)
5117 strcat (buf, " %2,%r1,%%r0");
5118 else if (nullify)
5119 strcat (buf, ",n %2,%r1,%0");
5120 else
5121 strcat (buf, " %2,%r1,%0");
5122 break;
5124 /* All long conditionals. Note an short backward branch with an
5125 unfilled delay slot is treated just like a long backward branch
5126 with an unfilled delay slot. */
5127 case 8:
5128 /* Handle weird backwards branch with a filled delay slot
5129 with is nullified. */
5130 if (dbr_sequence_length () != 0
5131 && ! forward_branch_p (insn)
5132 && nullify)
5134 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5135 if (GET_MODE (operands[1]) == DImode)
5136 strcat (buf, "*");
5137 if (negated)
5138 strcat (buf, "%S3");
5139 else
5140 strcat (buf, "%B3");
5141 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5143 /* Handle short backwards branch with an unfilled delay slot.
5144 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5145 taken and untaken branches. */
5146 else if (dbr_sequence_length () == 0
5147 && ! forward_branch_p (insn)
5148 && INSN_ADDRESSES_SET_P ()
5149 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5150 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5152 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5153 if (GET_MODE (operands[1]) == DImode)
5154 strcat (buf, "*");
5155 if (negated)
5156 strcat (buf, "%B3 %2,%r1,%0%#");
5157 else
5158 strcat (buf, "%S3 %2,%r1,%0%#");
5160 else
5162 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5163 if (GET_MODE (operands[1]) == DImode)
5164 strcat (buf, "*");
5165 if (negated)
5166 strcat (buf, "%S3");
5167 else
5168 strcat (buf, "%B3");
5169 if (nullify)
5170 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5171 else
5172 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5174 break;
5176 case 20:
5177 /* Very long branch. Right now we only handle these when not
5178 optimizing. See "jump" pattern in pa.md for details. */
5179 if (optimize)
5180 abort ();
5182 /* Create a reversed conditional branch which branches around
5183 the following insns. */
5184 if (negated)
5185 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5186 else
5187 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5188 if (GET_MODE (operands[1]) == DImode)
5190 if (negated)
5191 strcpy (buf,
5192 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5193 else
5194 strcpy (buf,
5195 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5197 output_asm_insn (buf, operands);
5199 /* Output an insn to save %r1. */
5200 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5202 /* Now output a very long branch to the original target. */
5203 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5205 /* Now restore the value of %r1 in the delay slot. We're not
5206 optimizing so we know nothing else can be in the delay slot. */
5207 return "ldw -16(%%r30),%%r1";
5209 case 28:
5210 /* Very long branch when generating PIC code. Right now we only
5211 handle these when not optimizing. See "jump" pattern in pa.md
5212 for details. */
5213 if (optimize)
5214 abort ();
5216 /* Create a reversed conditional branch which branches around
5217 the following insns. */
5218 if (negated)
5219 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5220 else
5221 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5222 if (GET_MODE (operands[1]) == DImode)
5224 if (negated)
5225 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5226 else
5227 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5229 output_asm_insn (buf, operands);
5231 /* Output an insn to save %r1. */
5232 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5234 /* Now output a very long PIC branch to the original target. */
5236 rtx xoperands[5];
5238 xoperands[0] = operands[0];
5239 xoperands[1] = operands[1];
5240 xoperands[2] = operands[2];
5241 xoperands[3] = operands[3];
5242 xoperands[4] = gen_label_rtx ();
5244 output_asm_insn ("{bl|b,l} .+8,%%r1\n\taddil L'%l0-%l4,%%r1",
5245 xoperands);
5246 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5247 CODE_LABEL_NUMBER (xoperands[4]));
5248 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv %%r0(%%r1)",
5249 xoperands);
5252 /* Now restore the value of %r1 in the delay slot. We're not
5253 optimizing so we know nothing else can be in the delay slot. */
5254 return "ldw -16(%%r30),%%r1";
5256 default:
5257 abort ();
5259 return buf;
5262 /* This routine handles all the branch-on-bit conditional branch sequences we
5263 might need to generate. It handles nullification of delay slots,
5264 varying length branches, negated branches and all combinations of the
5265 above. it returns the appropriate output template to emit the branch. */
5267 const char *
5268 output_bb (operands, nullify, length, negated, insn, which)
5269 rtx *operands ATTRIBUTE_UNUSED;
5270 int nullify, length, negated;
5271 rtx insn;
5272 int which;
5274 static char buf[100];
5275 int useskip = 0;
5277 /* A conditional branch to the following instruction (eg the delay slot) is
5278 asking for a disaster. I do not think this can happen as this pattern
5279 is only used when optimizing; jump optimization should eliminate the
5280 jump. But be prepared just in case. */
5282 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5283 return "";
5285 /* If this is a long branch with its delay slot unfilled, set `nullify'
5286 as it can nullify the delay slot and save a nop. */
5287 if (length == 8 && dbr_sequence_length () == 0)
5288 nullify = 1;
5290 /* If this is a short forward conditional branch which did not get
5291 its delay slot filled, the delay slot can still be nullified. */
5292 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5293 nullify = forward_branch_p (insn);
5295 /* A forward branch over a single nullified insn can be done with a
5296 extrs instruction. This avoids a single cycle penalty due to
5297 mis-predicted branch if we fall through (branch not taken). */
5299 if (length == 4
5300 && next_real_insn (insn) != 0
5301 && get_attr_length (next_real_insn (insn)) == 4
5302 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5303 && nullify)
5304 useskip = 1;
5306 switch (length)
5309 /* All short conditional branches except backwards with an unfilled
5310 delay slot. */
5311 case 4:
5312 if (useskip)
5313 strcpy (buf, "{extrs,|extrw,s,}");
5314 else
5315 strcpy (buf, "bb,");
5316 if (useskip && GET_MODE (operands[0]) == DImode)
5317 strcpy (buf, "extrd,s,*");
5318 else if (GET_MODE (operands[0]) == DImode)
5319 strcpy (buf, "bb,*");
5320 if ((which == 0 && negated)
5321 || (which == 1 && ! negated))
5322 strcat (buf, ">=");
5323 else
5324 strcat (buf, "<");
5325 if (useskip)
5326 strcat (buf, " %0,%1,1,%%r0");
5327 else if (nullify && negated)
5328 strcat (buf, ",n %0,%1,%3");
5329 else if (nullify && ! negated)
5330 strcat (buf, ",n %0,%1,%2");
5331 else if (! nullify && negated)
5332 strcat (buf, "%0,%1,%3");
5333 else if (! nullify && ! negated)
5334 strcat (buf, " %0,%1,%2");
5335 break;
5337 /* All long conditionals. Note an short backward branch with an
5338 unfilled delay slot is treated just like a long backward branch
5339 with an unfilled delay slot. */
5340 case 8:
5341 /* Handle weird backwards branch with a filled delay slot
5342 with is nullified. */
5343 if (dbr_sequence_length () != 0
5344 && ! forward_branch_p (insn)
5345 && nullify)
5347 strcpy (buf, "bb,");
5348 if (GET_MODE (operands[0]) == DImode)
5349 strcat (buf, "*");
5350 if ((which == 0 && negated)
5351 || (which == 1 && ! negated))
5352 strcat (buf, "<");
5353 else
5354 strcat (buf, ">=");
5355 if (negated)
5356 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5357 else
5358 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5360 /* Handle short backwards branch with an unfilled delay slot.
5361 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5362 taken and untaken branches. */
5363 else if (dbr_sequence_length () == 0
5364 && ! forward_branch_p (insn)
5365 && INSN_ADDRESSES_SET_P ()
5366 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5367 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5369 strcpy (buf, "bb,");
5370 if (GET_MODE (operands[0]) == DImode)
5371 strcat (buf, "*");
5372 if ((which == 0 && negated)
5373 || (which == 1 && ! negated))
5374 strcat (buf, ">=");
5375 else
5376 strcat (buf, "<");
5377 if (negated)
5378 strcat (buf, " %0,%1,%3%#");
5379 else
5380 strcat (buf, " %0,%1,%2%#");
5382 else
5384 strcpy (buf, "{extrs,|extrw,s,}");
5385 if (GET_MODE (operands[0]) == DImode)
5386 strcpy (buf, "extrd,s,*");
5387 if ((which == 0 && negated)
5388 || (which == 1 && ! negated))
5389 strcat (buf, "<");
5390 else
5391 strcat (buf, ">=");
5392 if (nullify && negated)
5393 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5394 else if (nullify && ! negated)
5395 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5396 else if (negated)
5397 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5398 else
5399 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5401 break;
5403 default:
5404 abort ();
5406 return buf;
5409 /* This routine handles all the branch-on-variable-bit conditional branch
5410 sequences we might need to generate. It handles nullification of delay
5411 slots, varying length branches, negated branches and all combinations
5412 of the above. it returns the appropriate output template to emit the
5413 branch. */
5415 const char *
5416 output_bvb (operands, nullify, length, negated, insn, which)
5417 rtx *operands ATTRIBUTE_UNUSED;
5418 int nullify, length, negated;
5419 rtx insn;
5420 int which;
5422 static char buf[100];
5423 int useskip = 0;
5425 /* A conditional branch to the following instruction (eg the delay slot) is
5426 asking for a disaster. I do not think this can happen as this pattern
5427 is only used when optimizing; jump optimization should eliminate the
5428 jump. But be prepared just in case. */
5430 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5431 return "";
5433 /* If this is a long branch with its delay slot unfilled, set `nullify'
5434 as it can nullify the delay slot and save a nop. */
5435 if (length == 8 && dbr_sequence_length () == 0)
5436 nullify = 1;
5438 /* If this is a short forward conditional branch which did not get
5439 its delay slot filled, the delay slot can still be nullified. */
5440 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5441 nullify = forward_branch_p (insn);
5443 /* A forward branch over a single nullified insn can be done with a
5444 extrs instruction. This avoids a single cycle penalty due to
5445 mis-predicted branch if we fall through (branch not taken). */
5447 if (length == 4
5448 && next_real_insn (insn) != 0
5449 && get_attr_length (next_real_insn (insn)) == 4
5450 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5451 && nullify)
5452 useskip = 1;
5454 switch (length)
5457 /* All short conditional branches except backwards with an unfilled
5458 delay slot. */
5459 case 4:
5460 if (useskip)
5461 strcpy (buf, "{vextrs,|extrw,s,}");
5462 else
5463 strcpy (buf, "{bvb,|bb,}");
5464 if (useskip && GET_MODE (operands[0]) == DImode)
5465 strcpy (buf, "extrd,s,*}");
5466 else if (GET_MODE (operands[0]) == DImode)
5467 strcpy (buf, "bb,*");
5468 if ((which == 0 && negated)
5469 || (which == 1 && ! negated))
5470 strcat (buf, ">=");
5471 else
5472 strcat (buf, "<");
5473 if (useskip)
5474 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5475 else if (nullify && negated)
5476 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5477 else if (nullify && ! negated)
5478 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5479 else if (! nullify && negated)
5480 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5481 else if (! nullify && ! negated)
5482 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5483 break;
5485 /* All long conditionals. Note an short backward branch with an
5486 unfilled delay slot is treated just like a long backward branch
5487 with an unfilled delay slot. */
5488 case 8:
5489 /* Handle weird backwards branch with a filled delay slot
5490 with is nullified. */
5491 if (dbr_sequence_length () != 0
5492 && ! forward_branch_p (insn)
5493 && nullify)
5495 strcpy (buf, "{bvb,|bb,}");
5496 if (GET_MODE (operands[0]) == DImode)
5497 strcat (buf, "*");
5498 if ((which == 0 && negated)
5499 || (which == 1 && ! negated))
5500 strcat (buf, "<");
5501 else
5502 strcat (buf, ">=");
5503 if (negated)
5504 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5505 else
5506 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5508 /* Handle short backwards branch with an unfilled delay slot.
5509 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5510 taken and untaken branches. */
5511 else if (dbr_sequence_length () == 0
5512 && ! forward_branch_p (insn)
5513 && INSN_ADDRESSES_SET_P ()
5514 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5515 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5517 strcpy (buf, "{bvb,|bb,}");
5518 if (GET_MODE (operands[0]) == DImode)
5519 strcat (buf, "*");
5520 if ((which == 0 && negated)
5521 || (which == 1 && ! negated))
5522 strcat (buf, ">=");
5523 else
5524 strcat (buf, "<");
5525 if (negated)
5526 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5527 else
5528 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5530 else
5532 strcpy (buf, "{vextrs,|extrw,s,}");
5533 if (GET_MODE (operands[0]) == DImode)
5534 strcpy (buf, "extrd,s,*");
5535 if ((which == 0 && negated)
5536 || (which == 1 && ! negated))
5537 strcat (buf, "<");
5538 else
5539 strcat (buf, ">=");
5540 if (nullify && negated)
5541 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5542 else if (nullify && ! negated)
5543 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5544 else if (negated)
5545 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5546 else
5547 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5549 break;
5551 default:
5552 abort ();
5554 return buf;
5557 /* Return the output template for emitting a dbra type insn.
5559 Note it may perform some output operations on its own before
5560 returning the final output string. */
5561 const char *
5562 output_dbra (operands, insn, which_alternative)
5563 rtx *operands;
5564 rtx insn;
5565 int which_alternative;
5568 /* A conditional branch to the following instruction (eg the delay slot) is
5569 asking for a disaster. Be prepared! */
5571 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5573 if (which_alternative == 0)
5574 return "ldo %1(%0),%0";
5575 else if (which_alternative == 1)
5577 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
5578 output_asm_insn ("ldw -16(%%r30),%4", operands);
5579 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5580 return "{fldws|fldw} -16(%%r30),%0";
5582 else
5584 output_asm_insn ("ldw %0,%4", operands);
5585 return "ldo %1(%4),%4\n\tstw %4,%0";
5589 if (which_alternative == 0)
5591 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5592 int length = get_attr_length (insn);
5594 /* If this is a long branch with its delay slot unfilled, set `nullify'
5595 as it can nullify the delay slot and save a nop. */
5596 if (length == 8 && dbr_sequence_length () == 0)
5597 nullify = 1;
5599 /* If this is a short forward conditional branch which did not get
5600 its delay slot filled, the delay slot can still be nullified. */
5601 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5602 nullify = forward_branch_p (insn);
5604 /* Handle short versions first. */
5605 if (length == 4 && nullify)
5606 return "addib,%C2,n %1,%0,%3";
5607 else if (length == 4 && ! nullify)
5608 return "addib,%C2 %1,%0,%3";
5609 else if (length == 8)
5611 /* Handle weird backwards branch with a fulled delay slot
5612 which is nullified. */
5613 if (dbr_sequence_length () != 0
5614 && ! forward_branch_p (insn)
5615 && nullify)
5616 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5617 /* Handle short backwards branch with an unfilled delay slot.
5618 Using a addb;nop rather than addi;bl saves 1 cycle for both
5619 taken and untaken branches. */
5620 else if (dbr_sequence_length () == 0
5621 && ! forward_branch_p (insn)
5622 && INSN_ADDRESSES_SET_P ()
5623 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5624 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5625 return "addib,%C2 %1,%0,%3%#";
5627 /* Handle normal cases. */
5628 if (nullify)
5629 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5630 else
5631 return "addi,%N2 %1,%0,%0\n\tb %3";
5633 else
5634 abort ();
5636 /* Deal with gross reload from FP register case. */
5637 else if (which_alternative == 1)
5639 /* Move loop counter from FP register to MEM then into a GR,
5640 increment the GR, store the GR into MEM, and finally reload
5641 the FP register from MEM from within the branch's delay slot. */
5642 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
5643 operands);
5644 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5645 if (get_attr_length (insn) == 24)
5646 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5647 else
5648 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5650 /* Deal with gross reload from memory case. */
5651 else
5653 /* Reload loop counter from memory, the store back to memory
5654 happens in the branch's delay slot. */
5655 output_asm_insn ("ldw %0,%4", operands);
5656 if (get_attr_length (insn) == 12)
5657 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5658 else
5659 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5663 /* Return the output template for emitting a dbra type insn.
5665 Note it may perform some output operations on its own before
5666 returning the final output string. */
5667 const char *
5668 output_movb (operands, insn, which_alternative, reverse_comparison)
5669 rtx *operands;
5670 rtx insn;
5671 int which_alternative;
5672 int reverse_comparison;
5675 /* A conditional branch to the following instruction (eg the delay slot) is
5676 asking for a disaster. Be prepared! */
5678 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5680 if (which_alternative == 0)
5681 return "copy %1,%0";
5682 else if (which_alternative == 1)
5684 output_asm_insn ("stw %1,-16(%%r30)", operands);
5685 return "{fldws|fldw} -16(%%r30),%0";
5687 else if (which_alternative == 2)
5688 return "stw %1,%0";
5689 else
5690 return "mtsar %r1";
5693 /* Support the second variant. */
5694 if (reverse_comparison)
5695 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5697 if (which_alternative == 0)
5699 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5700 int length = get_attr_length (insn);
5702 /* If this is a long branch with its delay slot unfilled, set `nullify'
5703 as it can nullify the delay slot and save a nop. */
5704 if (length == 8 && dbr_sequence_length () == 0)
5705 nullify = 1;
5707 /* If this is a short forward conditional branch which did not get
5708 its delay slot filled, the delay slot can still be nullified. */
5709 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5710 nullify = forward_branch_p (insn);
5712 /* Handle short versions first. */
5713 if (length == 4 && nullify)
5714 return "movb,%C2,n %1,%0,%3";
5715 else if (length == 4 && ! nullify)
5716 return "movb,%C2 %1,%0,%3";
5717 else if (length == 8)
5719 /* Handle weird backwards branch with a filled delay slot
5720 which is nullified. */
5721 if (dbr_sequence_length () != 0
5722 && ! forward_branch_p (insn)
5723 && nullify)
5724 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5726 /* Handle short backwards branch with an unfilled delay slot.
5727 Using a movb;nop rather than or;bl saves 1 cycle for both
5728 taken and untaken branches. */
5729 else if (dbr_sequence_length () == 0
5730 && ! forward_branch_p (insn)
5731 && INSN_ADDRESSES_SET_P ()
5732 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5733 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5734 return "movb,%C2 %1,%0,%3%#";
5735 /* Handle normal cases. */
5736 if (nullify)
5737 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5738 else
5739 return "or,%N2 %1,%%r0,%0\n\tb %3";
5741 else
5742 abort ();
5744 /* Deal with gross reload from FP register case. */
5745 else if (which_alternative == 1)
5747 /* Move loop counter from FP register to MEM then into a GR,
5748 increment the GR, store the GR into MEM, and finally reload
5749 the FP register from MEM from within the branch's delay slot. */
5750 output_asm_insn ("stw %1,-16(%%r30)", operands);
5751 if (get_attr_length (insn) == 12)
5752 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5753 else
5754 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5756 /* Deal with gross reload from memory case. */
5757 else if (which_alternative == 2)
5759 /* Reload loop counter from memory, the store back to memory
5760 happens in the branch's delay slot. */
5761 if (get_attr_length (insn) == 8)
5762 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5763 else
5764 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5766 /* Handle SAR as a destination. */
5767 else
5769 if (get_attr_length (insn) == 8)
5770 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5771 else
5772 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
5777 /* INSN is a millicode call. It may have an unconditional jump in its delay
5778 slot.
5780 CALL_DEST is the routine we are calling. */
5782 const char *
5783 output_millicode_call (insn, call_dest)
5784 rtx insn;
5785 rtx call_dest;
5787 int distance;
5788 rtx xoperands[4];
5789 rtx seq_insn;
5791 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
5793 /* Handle common case -- empty delay slot or no jump in the delay slot,
5794 and we're sure that the branch will reach the beginning of the $CODE$
5795 subspace. */
5796 if ((dbr_sequence_length () == 0
5797 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
5798 || (dbr_sequence_length () != 0
5799 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5800 && get_attr_length (insn) == 4))
5802 xoperands[0] = call_dest;
5803 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
5804 return "";
5807 /* This call may not reach the beginning of the $CODE$ subspace. */
5808 if (get_attr_length (insn) > 4)
5810 int delay_insn_deleted = 0;
5812 /* We need to emit an inline long-call branch. */
5813 if (dbr_sequence_length () != 0
5814 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5816 /* A non-jump insn in the delay slot. By definition we can
5817 emit this insn before the call. */
5818 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5820 /* Now delete the delay insn. */
5821 PUT_CODE (NEXT_INSN (insn), NOTE);
5822 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5823 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5824 delay_insn_deleted = 1;
5827 /* PIC long millicode call sequence. */
5828 if (flag_pic)
5830 xoperands[0] = call_dest;
5831 xoperands[1] = gen_label_rtx ();
5832 /* Get our address + 8 into %r1. */
5833 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5835 /* Add %r1 to the offset of our target from the next insn. */
5836 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5837 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5838 CODE_LABEL_NUMBER (xoperands[1]));
5839 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5841 /* Get the return address into %r31. */
5842 output_asm_insn ("blr 0,%3", xoperands);
5844 /* Branch to our target which is in %r1. */
5845 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5847 /* Empty delay slot. Note this insn gets fetched twice and
5848 executed once. To be safe we use a nop. */
5849 output_asm_insn ("nop", xoperands);
5851 /* Pure portable runtime doesn't allow be/ble; we also don't have
5852 PIC support in the assembler/linker, so this sequence is needed. */
5853 else if (TARGET_PORTABLE_RUNTIME)
5855 xoperands[0] = call_dest;
5856 /* Get the address of our target into %r29. */
5857 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5858 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5860 /* Get our return address into %r31. */
5861 output_asm_insn ("blr %%r0,%3", xoperands);
5863 /* Jump to our target address in %r29. */
5864 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
5866 /* Empty delay slot. Note this insn gets fetched twice and
5867 executed once. To be safe we use a nop. */
5868 output_asm_insn ("nop", xoperands);
5870 /* If we're allowed to use be/ble instructions, then this is the
5871 best sequence to use for a long millicode call. */
5872 else
5874 xoperands[0] = call_dest;
5875 output_asm_insn ("ldil L%%%0,%3", xoperands);
5876 output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
5877 output_asm_insn ("nop", xoperands);
5880 /* If we had a jump in the call's delay slot, output it now. */
5881 if (dbr_sequence_length () != 0
5882 && !delay_insn_deleted)
5884 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5885 output_asm_insn ("b,n %0", xoperands);
5887 /* Now delete the delay insn. */
5888 PUT_CODE (NEXT_INSN (insn), NOTE);
5889 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5890 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5892 return "";
5895 /* This call has an unconditional jump in its delay slot and the
5896 call is known to reach its target or the beginning of the current
5897 subspace. */
5899 /* Use the containing sequence insn's address. */
5900 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5902 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
5903 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
5905 /* If the branch was too far away, emit a normal call followed
5906 by a nop, followed by the unconditional branch.
5908 If the branch is close, then adjust %r2 from within the
5909 call's delay slot. */
5911 xoperands[0] = call_dest;
5912 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5913 if (! VAL_14_BITS_P (distance))
5914 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
5915 else
5917 xoperands[2] = gen_label_rtx ();
5918 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
5919 xoperands);
5920 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5921 CODE_LABEL_NUMBER (xoperands[2]));
5924 /* Delete the jump. */
5925 PUT_CODE (NEXT_INSN (insn), NOTE);
5926 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5927 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5928 return "";
5931 extern struct obstack permanent_obstack;
5933 /* INSN is either a function call. It may have an unconditional jump
5934 in its delay slot.
5936 CALL_DEST is the routine we are calling. */
5938 const char *
5939 output_call (insn, call_dest, sibcall)
5940 rtx insn;
5941 rtx call_dest;
5942 int sibcall;
5944 int distance;
5945 rtx xoperands[4];
5946 rtx seq_insn;
5948 /* Handle common case -- empty delay slot or no jump in the delay slot,
5949 and we're sure that the branch will reach the beginning of the $CODE$
5950 subspace. */
5951 if ((dbr_sequence_length () == 0
5952 && get_attr_length (insn) == 8)
5953 || (dbr_sequence_length () != 0
5954 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5955 && get_attr_length (insn) == 4))
5957 xoperands[0] = call_dest;
5958 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
5959 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
5960 return "";
5963 /* This call may not reach the beginning of the $CODE$ subspace. */
5964 if (get_attr_length (insn) > 8)
5966 int delay_insn_deleted = 0;
5967 rtx xoperands[2];
5968 rtx link;
5970 /* We need to emit an inline long-call branch. Furthermore,
5971 because we're changing a named function call into an indirect
5972 function call well after the parameters have been set up, we
5973 need to make sure any FP args appear in both the integer
5974 and FP registers. Also, we need move any delay slot insn
5975 out of the delay slot. And finally, we can't rely on the linker
5976 being able to fix the call to $$dyncall! -- Yuk!. */
5977 if (dbr_sequence_length () != 0
5978 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5980 /* A non-jump insn in the delay slot. By definition we can
5981 emit this insn before the call (and in fact before argument
5982 relocating. */
5983 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5985 /* Now delete the delay insn. */
5986 PUT_CODE (NEXT_INSN (insn), NOTE);
5987 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5988 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5989 delay_insn_deleted = 1;
5992 /* Now copy any FP arguments into integer registers. */
5993 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5995 int arg_mode, regno;
5996 rtx use = XEXP (link, 0);
5997 if (! (GET_CODE (use) == USE
5998 && GET_CODE (XEXP (use, 0)) == REG
5999 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6000 continue;
6002 arg_mode = GET_MODE (XEXP (use, 0));
6003 regno = REGNO (XEXP (use, 0));
6004 /* Is it a floating point register? */
6005 if (regno >= 32 && regno <= 39)
6007 /* Copy from the FP register into an integer register
6008 (via memory). */
6009 if (arg_mode == SFmode)
6011 xoperands[0] = XEXP (use, 0);
6012 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6013 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
6014 xoperands);
6015 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6017 else
6019 xoperands[0] = XEXP (use, 0);
6020 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6021 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
6022 xoperands);
6023 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6024 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6029 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
6030 we don't have any direct calls in that case. */
6032 int i;
6033 const char *name = XSTR (call_dest, 0);
6035 /* See if we have already put this function on the list
6036 of deferred plabels. This list is generally small,
6037 so a liner search is not too ugly. If it proves too
6038 slow replace it with something faster. */
6039 for (i = 0; i < n_deferred_plabels; i++)
6040 if (strcmp (name, deferred_plabels[i].name) == 0)
6041 break;
6043 /* If the deferred plabel list is empty, or this entry was
6044 not found on the list, create a new entry on the list. */
6045 if (deferred_plabels == NULL || i == n_deferred_plabels)
6047 const char *real_name;
6049 if (deferred_plabels == 0)
6050 deferred_plabels = (struct deferred_plabel *)
6051 xmalloc (1 * sizeof (struct deferred_plabel));
6052 else
6053 deferred_plabels = (struct deferred_plabel *)
6054 xrealloc (deferred_plabels,
6055 ((n_deferred_plabels + 1)
6056 * sizeof (struct deferred_plabel)));
6058 i = n_deferred_plabels++;
6059 deferred_plabels[i].internal_label = gen_label_rtx ();
6060 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
6061 strlen (name) + 1);
6062 strcpy (deferred_plabels[i].name, name);
6064 /* Gross. We have just implicitly taken the address of this
6065 function, mark it as such. */
6066 STRIP_NAME_ENCODING (real_name, name);
6067 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
6070 /* We have to load the address of the function using a procedure
6071 label (plabel). Inline plabels can lose for PIC and other
6072 cases, so avoid them by creating a 32bit plabel in the data
6073 segment. */
6074 if (flag_pic)
6076 xoperands[0] = deferred_plabels[i].internal_label;
6077 xoperands[1] = gen_label_rtx ();
6079 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
6080 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
6081 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
6083 /* Get our address + 8 into %r1. */
6084 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6086 /* Add %r1 to the offset of dyncall from the next insn. */
6087 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
6088 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6089 CODE_LABEL_NUMBER (xoperands[1]));
6090 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
6092 /* Get the return address into %r31. */
6093 output_asm_insn ("blr %%r0,%%r31", xoperands);
6095 /* Branch to our target which is in %r1. */
6096 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6098 if (sibcall)
6100 /* This call never returns, so we do not need to fix the
6101 return pointer. */
6102 output_asm_insn ("nop", xoperands);
6104 else
6106 /* Copy the return address into %r2 also. */
6107 output_asm_insn ("copy %%r31,%%r2", xoperands);
6110 else
6112 xoperands[0] = deferred_plabels[i].internal_label;
6114 /* Get the address of our target into %r22. */
6115 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
6116 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
6118 /* Get the high part of the address of $dyncall into %r2, then
6119 add in the low part in the branch instruction. */
6120 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
6121 output_asm_insn ("{ble|be,l} R%%$$dyncall(%%sr4,%%r2)",
6122 xoperands);
6124 if (sibcall)
6126 /* This call never returns, so we do not need to fix the
6127 return pointer. */
6128 output_asm_insn ("nop", xoperands);
6130 else
6132 /* Copy the return address into %r2 also. */
6133 output_asm_insn ("copy %%r31,%%r2", xoperands);
6138 /* If we had a jump in the call's delay slot, output it now. */
6139 if (dbr_sequence_length () != 0
6140 && !delay_insn_deleted)
6142 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6143 output_asm_insn ("b,n %0", xoperands);
6145 /* Now delete the delay insn. */
6146 PUT_CODE (NEXT_INSN (insn), NOTE);
6147 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6148 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6150 return "";
6153 /* This call has an unconditional jump in its delay slot and the
6154 call is known to reach its target or the beginning of the current
6155 subspace. */
6157 /* Use the containing sequence insn's address. */
6158 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6160 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6161 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6163 /* If the branch was too far away, emit a normal call followed
6164 by a nop, followed by the unconditional branch.
6166 If the branch is close, then adjust %r2 from within the
6167 call's delay slot. */
6169 xoperands[0] = call_dest;
6170 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6171 if (! VAL_14_BITS_P (distance))
6172 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
6173 else
6175 xoperands[3] = gen_label_rtx ();
6176 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
6177 xoperands);
6178 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6179 CODE_LABEL_NUMBER (xoperands[3]));
6182 /* Delete the jump. */
6183 PUT_CODE (NEXT_INSN (insn), NOTE);
6184 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6185 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6186 return "";
6189 /* In HPUX 8.0's shared library scheme, special relocations are needed
6190 for function labels if they might be passed to a function
6191 in a shared library (because shared libraries don't live in code
6192 space), and special magic is needed to construct their address. */
6194 void
6195 hppa_encode_label (sym)
6196 rtx sym;
6198 const char *str = XSTR (sym, 0);
6199 int len = strlen (str) + 1;
6200 char *newstr, *p;
6202 p = newstr = alloca (len + 1);
6203 if (str[0] == '*')
6205 str++;
6206 len--;
6208 *p++ = '@';
6209 strcpy (p, str);
6211 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6215 function_label_operand (op, mode)
6216 rtx op;
6217 enum machine_mode mode ATTRIBUTE_UNUSED;
6219 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6222 /* Returns 1 if OP is a function label involved in a simple addition
6223 with a constant. Used to keep certain patterns from matching
6224 during instruction combination. */
6226 is_function_label_plus_const (op)
6227 rtx op;
6229 /* Strip off any CONST. */
6230 if (GET_CODE (op) == CONST)
6231 op = XEXP (op, 0);
6233 return (GET_CODE (op) == PLUS
6234 && function_label_operand (XEXP (op, 0), Pmode)
6235 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6238 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6239 use in fmpyadd instructions. */
6241 fmpyaddoperands (operands)
6242 rtx *operands;
6244 enum machine_mode mode = GET_MODE (operands[0]);
6246 /* Must be a floating point mode. */
6247 if (mode != SFmode && mode != DFmode)
6248 return 0;
6250 /* All modes must be the same. */
6251 if (! (mode == GET_MODE (operands[1])
6252 && mode == GET_MODE (operands[2])
6253 && mode == GET_MODE (operands[3])
6254 && mode == GET_MODE (operands[4])
6255 && mode == GET_MODE (operands[5])))
6256 return 0;
6258 /* All operands must be registers. */
6259 if (! (GET_CODE (operands[1]) == REG
6260 && GET_CODE (operands[2]) == REG
6261 && GET_CODE (operands[3]) == REG
6262 && GET_CODE (operands[4]) == REG
6263 && GET_CODE (operands[5]) == REG))
6264 return 0;
6266 /* Only 2 real operands to the addition. One of the input operands must
6267 be the same as the output operand. */
6268 if (! rtx_equal_p (operands[3], operands[4])
6269 && ! rtx_equal_p (operands[3], operands[5]))
6270 return 0;
6272 /* Inout operand of add can not conflict with any operands from multiply. */
6273 if (rtx_equal_p (operands[3], operands[0])
6274 || rtx_equal_p (operands[3], operands[1])
6275 || rtx_equal_p (operands[3], operands[2]))
6276 return 0;
6278 /* multiply can not feed into addition operands. */
6279 if (rtx_equal_p (operands[4], operands[0])
6280 || rtx_equal_p (operands[5], operands[0]))
6281 return 0;
6283 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6284 if (mode == SFmode
6285 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6286 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6287 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6288 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6289 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6290 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6291 return 0;
6293 /* Passed. Operands are suitable for fmpyadd. */
6294 return 1;
6297 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6298 use in fmpysub instructions. */
6300 fmpysuboperands (operands)
6301 rtx *operands;
6303 enum machine_mode mode = GET_MODE (operands[0]);
6305 /* Must be a floating point mode. */
6306 if (mode != SFmode && mode != DFmode)
6307 return 0;
6309 /* All modes must be the same. */
6310 if (! (mode == GET_MODE (operands[1])
6311 && mode == GET_MODE (operands[2])
6312 && mode == GET_MODE (operands[3])
6313 && mode == GET_MODE (operands[4])
6314 && mode == GET_MODE (operands[5])))
6315 return 0;
6317 /* All operands must be registers. */
6318 if (! (GET_CODE (operands[1]) == REG
6319 && GET_CODE (operands[2]) == REG
6320 && GET_CODE (operands[3]) == REG
6321 && GET_CODE (operands[4]) == REG
6322 && GET_CODE (operands[5]) == REG))
6323 return 0;
6325 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6326 operation, so operands[4] must be the same as operand[3]. */
6327 if (! rtx_equal_p (operands[3], operands[4]))
6328 return 0;
6330 /* multiply can not feed into subtraction. */
6331 if (rtx_equal_p (operands[5], operands[0]))
6332 return 0;
6334 /* Inout operand of sub can not conflict with any operands from multiply. */
6335 if (rtx_equal_p (operands[3], operands[0])
6336 || rtx_equal_p (operands[3], operands[1])
6337 || rtx_equal_p (operands[3], operands[2]))
6338 return 0;
6340 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6341 if (mode == SFmode
6342 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6343 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6344 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6345 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6346 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6347 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6348 return 0;
6350 /* Passed. Operands are suitable for fmpysub. */
6351 return 1;
6355 plus_xor_ior_operator (op, mode)
6356 rtx op;
6357 enum machine_mode mode ATTRIBUTE_UNUSED;
6359 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6360 || GET_CODE (op) == IOR);
6363 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6364 constants for shadd instructions. */
6365 static int
6366 shadd_constant_p (val)
6367 int val;
6369 if (val == 2 || val == 4 || val == 8)
6370 return 1;
6371 else
6372 return 0;
6375 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6376 the valid constant for shadd instructions. */
6378 shadd_operand (op, mode)
6379 rtx op;
6380 enum machine_mode mode ATTRIBUTE_UNUSED;
6382 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6385 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6388 basereg_operand (op, mode)
6389 rtx op;
6390 enum machine_mode mode;
6392 /* cse will create some unscaled indexed addresses, however; it
6393 generally isn't a win on the PA, so avoid creating unscaled
6394 indexed addresses until after cse is finished. */
6395 if (!cse_not_expected)
6396 return 0;
6398 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6399 we don't have to worry about the braindamaged implicit space
6400 register selection from the basereg. */
6401 if (TARGET_NO_SPACE_REGS)
6402 return (GET_CODE (op) == REG);
6404 /* While it's always safe to index off the frame pointer, it's not
6405 always profitable, particularly when the frame pointer is being
6406 eliminated. */
6407 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6408 return 1;
6410 return (GET_CODE (op) == REG
6411 && REG_POINTER (op)
6412 && register_operand (op, mode));
6415 /* Return 1 if this operand is anything other than a hard register. */
6418 non_hard_reg_operand (op, mode)
6419 rtx op;
6420 enum machine_mode mode ATTRIBUTE_UNUSED;
6422 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6425 /* Return 1 if INSN branches forward. Should be using insn_addresses
6426 to avoid walking through all the insns... */
6427 static int
6428 forward_branch_p (insn)
6429 rtx insn;
6431 rtx label = JUMP_LABEL (insn);
6433 while (insn)
6435 if (insn == label)
6436 break;
6437 else
6438 insn = NEXT_INSN (insn);
6441 return (insn == label);
6444 /* Return 1 if OP is an equality comparison, else return 0. */
6446 eq_neq_comparison_operator (op, mode)
6447 rtx op;
6448 enum machine_mode mode ATTRIBUTE_UNUSED;
6450 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6453 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6455 movb_comparison_operator (op, mode)
6456 rtx op;
6457 enum machine_mode mode ATTRIBUTE_UNUSED;
6459 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6460 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6463 /* Return 1 if INSN is in the delay slot of a call instruction. */
6465 jump_in_call_delay (insn)
6466 rtx insn;
6469 if (GET_CODE (insn) != JUMP_INSN)
6470 return 0;
6472 if (PREV_INSN (insn)
6473 && PREV_INSN (PREV_INSN (insn))
6474 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6476 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6478 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6479 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6482 else
6483 return 0;
6486 /* Output an unconditional move and branch insn. */
6488 const char *
6489 output_parallel_movb (operands, length)
6490 rtx *operands;
6491 int length;
6493 /* These are the cases in which we win. */
6494 if (length == 4)
6495 return "mov%I1b,tr %1,%0,%2";
6497 /* None of these cases wins, but they don't lose either. */
6498 if (dbr_sequence_length () == 0)
6500 /* Nothing in the delay slot, fake it by putting the combined
6501 insn (the copy or add) in the delay slot of a bl. */
6502 if (GET_CODE (operands[1]) == CONST_INT)
6503 return "b %2\n\tldi %1,%0";
6504 else
6505 return "b %2\n\tcopy %1,%0";
6507 else
6509 /* Something in the delay slot, but we've got a long branch. */
6510 if (GET_CODE (operands[1]) == CONST_INT)
6511 return "ldi %1,%0\n\tb %2";
6512 else
6513 return "copy %1,%0\n\tb %2";
6517 /* Output an unconditional add and branch insn. */
6519 const char *
6520 output_parallel_addb (operands, length)
6521 rtx *operands;
6522 int length;
6524 /* To make life easy we want operand0 to be the shared input/output
6525 operand and operand1 to be the readonly operand. */
6526 if (operands[0] == operands[1])
6527 operands[1] = operands[2];
6529 /* These are the cases in which we win. */
6530 if (length == 4)
6531 return "add%I1b,tr %1,%0,%3";
6533 /* None of these cases win, but they don't lose either. */
6534 if (dbr_sequence_length () == 0)
6536 /* Nothing in the delay slot, fake it by putting the combined
6537 insn (the copy or add) in the delay slot of a bl. */
6538 return "b %3\n\tadd%I1 %1,%0,%0";
6540 else
6542 /* Something in the delay slot, but we've got a long branch. */
6543 return "add%I1 %1,%0,%0\n\tb %3";
6547 /* Return nonzero if INSN (a jump insn) immediately follows a call to
6548 a named function. This is used to discourage creating parallel movb/addb
6549 insns since a jump which immediately follows a call can execute in the
6550 delay slot of the call.
6552 It is also used to avoid filling the delay slot of a jump which
6553 immediately follows a call since the jump can usually be eliminated
6554 completely by modifying RP in the delay slot of the call. */
6557 following_call (insn)
6558 rtx insn;
6560 /* We do not parallel movb,addb or place jumps into call delay slots when
6561 optimizing for the PA8000. */
6562 if (pa_cpu != PROCESSOR_8000)
6563 return 0;
6565 /* Find the previous real insn, skipping NOTEs. */
6566 insn = PREV_INSN (insn);
6567 while (insn && GET_CODE (insn) == NOTE)
6568 insn = PREV_INSN (insn);
6570 /* Check for CALL_INSNs and millicode calls. */
6571 if (insn
6572 && ((GET_CODE (insn) == CALL_INSN
6573 && get_attr_type (insn) != TYPE_DYNCALL)
6574 || (GET_CODE (insn) == INSN
6575 && GET_CODE (PATTERN (insn)) != SEQUENCE
6576 && GET_CODE (PATTERN (insn)) != USE
6577 && GET_CODE (PATTERN (insn)) != CLOBBER
6578 && get_attr_type (insn) == TYPE_MILLI)))
6579 return 1;
6581 return 0;
6584 /* We use this hook to perform a PA specific optimization which is difficult
6585 to do in earlier passes.
6587 We want the delay slots of branches within jump tables to be filled.
6588 None of the compiler passes at the moment even has the notion that a
6589 PA jump table doesn't contain addresses, but instead contains actual
6590 instructions!
6592 Because we actually jump into the table, the addresses of each entry
6593 must stay constant in relation to the beginning of the table (which
6594 itself must stay constant relative to the instruction to jump into
6595 it). I don't believe we can guarantee earlier passes of the compiler
6596 will adhere to those rules.
6598 So, late in the compilation process we find all the jump tables, and
6599 expand them into real code -- eg each entry in the jump table vector
6600 will get an appropriate label followed by a jump to the final target.
6602 Reorg and the final jump pass can then optimize these branches and
6603 fill their delay slots. We end up with smaller, more efficient code.
6605 The jump instructions within the table are special; we must be able
6606 to identify them during assembly output (if the jumps don't get filled
6607 we need to emit a nop rather than nullifying the delay slot)). We
6608 identify jumps in switch tables by marking the SET with DImode.
6610 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6611 insns. This serves two purposes, first it prevents jump.c from
6612 noticing that the last N entries in the table jump to the instruction
6613 immediately after the table and deleting the jumps. Second, those
6614 insns mark where we should emit .begin_brtab and .end_brtab directives
6615 when using GAS (allows for better link time optimizations). */
6617 void
6618 pa_reorg (insns)
6619 rtx insns;
6621 rtx insn;
6623 remove_useless_addtr_insns (insns, 1);
6625 if (pa_cpu < PROCESSOR_8000)
6626 pa_combine_instructions (get_insns ());
6629 /* This is fairly cheap, so always run it if optimizing. */
6630 if (optimize > 0 && !TARGET_BIG_SWITCH)
6632 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6633 insns = get_insns ();
6634 for (insn = insns; insn; insn = NEXT_INSN (insn))
6636 rtx pattern, tmp, location;
6637 unsigned int length, i;
6639 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6640 if (GET_CODE (insn) != JUMP_INSN
6641 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6642 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6643 continue;
6645 /* Emit marker for the beginning of the branch table. */
6646 emit_insn_before (gen_begin_brtab (), insn);
6648 pattern = PATTERN (insn);
6649 location = PREV_INSN (insn);
6650 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6652 for (i = 0; i < length; i++)
6654 /* Emit a label before each jump to keep jump.c from
6655 removing this code. */
6656 tmp = gen_label_rtx ();
6657 LABEL_NUSES (tmp) = 1;
6658 emit_label_after (tmp, location);
6659 location = NEXT_INSN (location);
6661 if (GET_CODE (pattern) == ADDR_VEC)
6663 /* Emit the jump itself. */
6664 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6665 tmp = emit_jump_insn_after (tmp, location);
6666 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6667 /* It is easy to rely on the branch table markers
6668 during assembly output to trigger the correct code
6669 for a switch table jump with an unfilled delay slot,
6671 However, that requires state and assumes that we look
6672 at insns in order.
6674 We can't make such assumptions when computing the length
6675 of instructions. Ugh. We could walk the insn chain to
6676 determine if this instruction is in a branch table, but
6677 that can get rather expensive, particularly during the
6678 branch shortening phase of the compiler.
6680 So instead we mark this jump as being special. This is
6681 far from ideal and knows that no code after this will
6682 muck around with the mode of the JUMP_INSN itself. */
6683 PUT_MODE (tmp, SImode);
6684 LABEL_NUSES (JUMP_LABEL (tmp))++;
6685 location = NEXT_INSN (location);
6687 else
6689 /* Emit the jump itself. */
6690 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6691 tmp = emit_jump_insn_after (tmp, location);
6692 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6693 /* It is easy to rely on the branch table markers
6694 during assembly output to trigger the correct code
6695 for a switch table jump with an unfilled delay slot,
6697 However, that requires state and assumes that we look
6698 at insns in order.
6700 We can't make such assumptions when computing the length
6701 of instructions. Ugh. We could walk the insn chain to
6702 determine if this instruction is in a branch table, but
6703 that can get rather expensive, particularly during the
6704 branch shortening phase of the compiler.
6706 So instead we mark this jump as being special. This is
6707 far from ideal and knows that no code after this will
6708 muck around with the mode of the JUMP_INSN itself. */
6709 PUT_MODE (tmp, SImode);
6710 LABEL_NUSES (JUMP_LABEL (tmp))++;
6711 location = NEXT_INSN (location);
6714 /* Emit a BARRIER after the jump. */
6715 emit_barrier_after (location);
6716 location = NEXT_INSN (location);
6719 /* Emit marker for the end of the branch table. */
6720 emit_insn_before (gen_end_brtab (), location);
6721 location = NEXT_INSN (location);
6722 emit_barrier_after (location);
6724 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6725 delete_insn (insn);
6728 else
6730 /* Sill need an end_brtab insn. */
6731 insns = get_insns ();
6732 for (insn = insns; insn; insn = NEXT_INSN (insn))
6734 /* Find an ADDR_VEC insn. */
6735 if (GET_CODE (insn) != JUMP_INSN
6736 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6737 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6738 continue;
6740 /* Now generate markers for the beginning and end of the
6741 branch table. */
6742 emit_insn_before (gen_begin_brtab (), insn);
6743 emit_insn_after (gen_end_brtab (), insn);
6748 /* The PA has a number of odd instructions which can perform multiple
6749 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6750 it may be profitable to combine two instructions into one instruction
6751 with two outputs. It's not profitable PA2.0 machines because the
6752 two outputs would take two slots in the reorder buffers.
6754 This routine finds instructions which can be combined and combines
6755 them. We only support some of the potential combinations, and we
6756 only try common ways to find suitable instructions.
6758 * addb can add two registers or a register and a small integer
6759 and jump to a nearby (+-8k) location. Normally the jump to the
6760 nearby location is conditional on the result of the add, but by
6761 using the "true" condition we can make the jump unconditional.
6762 Thus addb can perform two independent operations in one insn.
6764 * movb is similar to addb in that it can perform a reg->reg
6765 or small immediate->reg copy and jump to a nearby (+-8k location).
6767 * fmpyadd and fmpysub can perform a FP multiply and either an
6768 FP add or FP sub if the operands of the multiply and add/sub are
6769 independent (there are other minor restrictions). Note both
6770 the fmpy and fadd/fsub can in theory move to better spots according
6771 to data dependencies, but for now we require the fmpy stay at a
6772 fixed location.
6774 * Many of the memory operations can perform pre & post updates
6775 of index registers. GCC's pre/post increment/decrement addressing
6776 is far too simple to take advantage of all the possibilities. This
6777 pass may not be suitable since those insns may not be independent.
6779 * comclr can compare two ints or an int and a register, nullify
6780 the following instruction and zero some other register. This
6781 is more difficult to use as it's harder to find an insn which
6782 will generate a comclr than finding something like an unconditional
6783 branch. (conditional moves & long branches create comclr insns).
6785 * Most arithmetic operations can conditionally skip the next
6786 instruction. They can be viewed as "perform this operation
6787 and conditionally jump to this nearby location" (where nearby
6788 is an insns away). These are difficult to use due to the
6789 branch length restrictions. */
6791 static void
6792 pa_combine_instructions (insns)
6793 rtx insns ATTRIBUTE_UNUSED;
6795 rtx anchor, new;
6797 /* This can get expensive since the basic algorithm is on the
6798 order of O(n^2) (or worse). Only do it for -O2 or higher
6799 levels of optimization. */
6800 if (optimize < 2)
6801 return;
6803 /* Walk down the list of insns looking for "anchor" insns which
6804 may be combined with "floating" insns. As the name implies,
6805 "anchor" instructions don't move, while "floating" insns may
6806 move around. */
6807 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
6808 new = make_insn_raw (new);
6810 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
6812 enum attr_pa_combine_type anchor_attr;
6813 enum attr_pa_combine_type floater_attr;
6815 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
6816 Also ignore any special USE insns. */
6817 if ((GET_CODE (anchor) != INSN
6818 && GET_CODE (anchor) != JUMP_INSN
6819 && GET_CODE (anchor) != CALL_INSN)
6820 || GET_CODE (PATTERN (anchor)) == USE
6821 || GET_CODE (PATTERN (anchor)) == CLOBBER
6822 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
6823 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
6824 continue;
6826 anchor_attr = get_attr_pa_combine_type (anchor);
6827 /* See if anchor is an insn suitable for combination. */
6828 if (anchor_attr == PA_COMBINE_TYPE_FMPY
6829 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
6830 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6831 && ! forward_branch_p (anchor)))
6833 rtx floater;
6835 for (floater = PREV_INSN (anchor);
6836 floater;
6837 floater = PREV_INSN (floater))
6839 if (GET_CODE (floater) == NOTE
6840 || (GET_CODE (floater) == INSN
6841 && (GET_CODE (PATTERN (floater)) == USE
6842 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6843 continue;
6845 /* Anything except a regular INSN will stop our search. */
6846 if (GET_CODE (floater) != INSN
6847 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6848 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6850 floater = NULL_RTX;
6851 break;
6854 /* See if FLOATER is suitable for combination with the
6855 anchor. */
6856 floater_attr = get_attr_pa_combine_type (floater);
6857 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6858 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6859 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6860 && floater_attr == PA_COMBINE_TYPE_FMPY))
6862 /* If ANCHOR and FLOATER can be combined, then we're
6863 done with this pass. */
6864 if (pa_can_combine_p (new, anchor, floater, 0,
6865 SET_DEST (PATTERN (floater)),
6866 XEXP (SET_SRC (PATTERN (floater)), 0),
6867 XEXP (SET_SRC (PATTERN (floater)), 1)))
6868 break;
6871 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6872 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6874 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6876 if (pa_can_combine_p (new, anchor, floater, 0,
6877 SET_DEST (PATTERN (floater)),
6878 XEXP (SET_SRC (PATTERN (floater)), 0),
6879 XEXP (SET_SRC (PATTERN (floater)), 1)))
6880 break;
6882 else
6884 if (pa_can_combine_p (new, anchor, floater, 0,
6885 SET_DEST (PATTERN (floater)),
6886 SET_SRC (PATTERN (floater)),
6887 SET_SRC (PATTERN (floater))))
6888 break;
6893 /* If we didn't find anything on the backwards scan try forwards. */
6894 if (!floater
6895 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6896 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6898 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6900 if (GET_CODE (floater) == NOTE
6901 || (GET_CODE (floater) == INSN
6902 && (GET_CODE (PATTERN (floater)) == USE
6903 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6905 continue;
6907 /* Anything except a regular INSN will stop our search. */
6908 if (GET_CODE (floater) != INSN
6909 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6910 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6912 floater = NULL_RTX;
6913 break;
6916 /* See if FLOATER is suitable for combination with the
6917 anchor. */
6918 floater_attr = get_attr_pa_combine_type (floater);
6919 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6920 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6921 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6922 && floater_attr == PA_COMBINE_TYPE_FMPY))
6924 /* If ANCHOR and FLOATER can be combined, then we're
6925 done with this pass. */
6926 if (pa_can_combine_p (new, anchor, floater, 1,
6927 SET_DEST (PATTERN (floater)),
6928 XEXP (SET_SRC (PATTERN (floater)),
6930 XEXP (SET_SRC (PATTERN (floater)),
6931 1)))
6932 break;
6937 /* FLOATER will be nonzero if we found a suitable floating
6938 insn for combination with ANCHOR. */
6939 if (floater
6940 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6941 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6943 /* Emit the new instruction and delete the old anchor. */
6944 emit_insn_before (gen_rtx_PARALLEL
6945 (VOIDmode,
6946 gen_rtvec (2, PATTERN (anchor),
6947 PATTERN (floater))),
6948 anchor);
6950 PUT_CODE (anchor, NOTE);
6951 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6952 NOTE_SOURCE_FILE (anchor) = 0;
6954 /* Emit a special USE insn for FLOATER, then delete
6955 the floating insn. */
6956 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6957 delete_insn (floater);
6959 continue;
6961 else if (floater
6962 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6964 rtx temp;
6965 /* Emit the new_jump instruction and delete the old anchor. */
6966 temp
6967 = emit_jump_insn_before (gen_rtx_PARALLEL
6968 (VOIDmode,
6969 gen_rtvec (2, PATTERN (anchor),
6970 PATTERN (floater))),
6971 anchor);
6973 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6974 PUT_CODE (anchor, NOTE);
6975 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6976 NOTE_SOURCE_FILE (anchor) = 0;
6978 /* Emit a special USE insn for FLOATER, then delete
6979 the floating insn. */
6980 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6981 delete_insn (floater);
6982 continue;
6988 static int
6989 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6990 rtx new, anchor, floater;
6991 int reversed;
6992 rtx dest, src1, src2;
6994 int insn_code_number;
6995 rtx start, end;
6997 /* Create a PARALLEL with the patterns of ANCHOR and
6998 FLOATER, try to recognize it, then test constraints
6999 for the resulting pattern.
7001 If the pattern doesn't match or the constraints
7002 aren't met keep searching for a suitable floater
7003 insn. */
7004 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7005 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7006 INSN_CODE (new) = -1;
7007 insn_code_number = recog_memoized (new);
7008 if (insn_code_number < 0
7009 || !constrain_operands (1))
7010 return 0;
7012 if (reversed)
7014 start = anchor;
7015 end = floater;
7017 else
7019 start = floater;
7020 end = anchor;
7023 /* There's up to three operands to consider. One
7024 output and two inputs.
7026 The output must not be used between FLOATER & ANCHOR
7027 exclusive. The inputs must not be set between
7028 FLOATER and ANCHOR exclusive. */
7030 if (reg_used_between_p (dest, start, end))
7031 return 0;
7033 if (reg_set_between_p (src1, start, end))
7034 return 0;
7036 if (reg_set_between_p (src2, start, end))
7037 return 0;
7039 /* If we get here, then everything is good. */
7040 return 1;
7043 /* Return nonzero if references for INSN are delayed.
7045 Millicode insns are actually function calls with some special
7046 constraints on arguments and register usage.
7048 Millicode calls always expect their arguments in the integer argument
7049 registers, and always return their result in %r29 (ret1). They
7050 are expected to clobber their arguments, %r1, %r29, and the return
7051 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
7053 This function tells reorg that the references to arguments and
7054 millicode calls do not appear to happen until after the millicode call.
7055 This allows reorg to put insns which set the argument registers into the
7056 delay slot of the millicode call -- thus they act more like traditional
7057 CALL_INSNs.
7059 Note we can not consider side effects of the insn to be delayed because
7060 the branch and link insn will clobber the return pointer. If we happened
7061 to use the return pointer in the delay slot of the call, then we lose.
7063 get_attr_type will try to recognize the given insn, so make sure to
7064 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
7065 in particular. */
7067 insn_refs_are_delayed (insn)
7068 rtx insn;
7070 return ((GET_CODE (insn) == INSN
7071 && GET_CODE (PATTERN (insn)) != SEQUENCE
7072 && GET_CODE (PATTERN (insn)) != USE
7073 && GET_CODE (PATTERN (insn)) != CLOBBER
7074 && get_attr_type (insn) == TYPE_MILLI));
7077 /* Return the location of a parameter that is passed in a register or NULL
7078 if the parameter has any component that is passed in memory.
7080 This is new code and will be pushed to into the net sources after
7081 further testing.
7083 ??? We might want to restructure this so that it looks more like other
7084 ports. */
7086 function_arg (cum, mode, type, named, incoming)
7087 CUMULATIVE_ARGS *cum;
7088 enum machine_mode mode;
7089 tree type;
7090 int named ATTRIBUTE_UNUSED;
7091 int incoming;
7093 int max_arg_words = (TARGET_64BIT ? 8 : 4);
7094 int fpr_reg_base;
7095 int gpr_reg_base;
7096 rtx retval;
7098 if (! TARGET_64BIT)
7100 /* If this arg would be passed partially or totally on the stack, then
7101 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
7102 handle arguments which are split between regs and stack slots if
7103 the ABI mandates split arguments. */
7104 if (cum->words + FUNCTION_ARG_SIZE (mode, type) > max_arg_words
7105 || mode == VOIDmode)
7106 return NULL_RTX;
7108 else
7110 int offset = 0;
7111 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7112 offset = 1;
7113 if (cum->words + offset >= max_arg_words
7114 || mode == VOIDmode)
7115 return NULL_RTX;
7118 /* The 32bit ABIs and the 64bit ABIs are rather different,
7119 particularly in their handling of FP registers. We might
7120 be able to cleverly share code between them, but I'm not
7121 going to bother in the hope that splitting them up results
7122 in code that is more easily understood.
7124 The 64bit code probably is very wrong for structure passing. */
7125 if (TARGET_64BIT)
7127 /* Advance the base registers to their current locations.
7129 Remember, gprs grow towards smaller register numbers while
7130 fprs grow to higher register numbers. Also remember FP regs
7131 are always 4 bytes wide, while the size of an integer register
7132 varies based on the size of the target word. */
7133 gpr_reg_base = 26 - cum->words;
7134 fpr_reg_base = 32 + cum->words;
7136 /* If the argument is more than a word long, then we need to align
7137 the base registers. Same caveats as above. */
7138 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7140 if (mode != BLKmode)
7142 /* First deal with alignment of the doubleword. */
7143 gpr_reg_base -= (cum->words & 1);
7145 /* This seems backwards, but it is what HP specifies. We need
7146 gpr_reg_base to point to the smaller numbered register of
7147 the integer register pair. So if we have an even register
7148 number, then decrement the gpr base. */
7149 gpr_reg_base -= ((gpr_reg_base % 2) == 0);
7151 /* FP values behave sanely, except that each FP reg is only
7152 half of word. */
7153 fpr_reg_base += ((fpr_reg_base % 2) == 0);
7155 else
7157 rtx loc[8];
7158 int i, offset = 0, ub;
7159 ub = FUNCTION_ARG_SIZE (mode, type);
7160 ub = MIN (ub,
7161 MAX (0, max_arg_words - cum->words - (cum->words & 1)));
7162 gpr_reg_base -= (cum->words & 1);
7163 for (i = 0; i < ub; i++)
7165 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7166 gen_rtx_REG (DImode,
7167 gpr_reg_base),
7168 GEN_INT (offset));
7169 gpr_reg_base -= 1;
7170 offset += 8;
7172 if (ub == 0)
7173 return NULL_RTX;
7174 else if (ub == 1)
7175 return XEXP (loc[0], 0);
7176 else
7177 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
7181 else
7183 /* If the argument is larger than a word, then we know precisely
7184 which registers we must use. */
7185 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7187 if (cum->words)
7189 gpr_reg_base = 23;
7190 fpr_reg_base = 38;
7192 else
7194 gpr_reg_base = 25;
7195 fpr_reg_base = 34;
7198 else
7200 /* We have a single word (32 bits). A simple computation
7201 will get us the register #s we need. */
7202 gpr_reg_base = 26 - cum->words;
7203 fpr_reg_base = 32 + 2 * cum->words;
7207 if (TARGET_64BIT && mode == TFmode)
7209 return
7210 gen_rtx_PARALLEL
7211 (mode,
7212 gen_rtvec (2,
7213 gen_rtx_EXPR_LIST (VOIDmode,
7214 gen_rtx_REG (DImode, gpr_reg_base + 1),
7215 const0_rtx),
7216 gen_rtx_EXPR_LIST (VOIDmode,
7217 gen_rtx_REG (DImode, gpr_reg_base),
7218 GEN_INT (8))));
7220 /* Determine if the argument needs to be passed in both general and
7221 floating point registers. */
7222 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7223 /* If we are doing soft-float with portable runtime, then there
7224 is no need to worry about FP regs. */
7225 && ! TARGET_SOFT_FLOAT
7226 /* The parameter must be some kind of float, else we can just
7227 pass it in integer registers. */
7228 && FLOAT_MODE_P (mode)
7229 /* The target function must not have a prototype. */
7230 && cum->nargs_prototype <= 0
7231 /* libcalls do not need to pass items in both FP and general
7232 registers. */
7233 && type != NULL_TREE
7234 /* All this hair applies to outgoing args only. */
7235 && ! incoming)
7236 /* Also pass outgoing floating arguments in both registers in indirect
7237 calls with the 32 bit ABI and the HP assembler since there is no
7238 way to the specify argument locations in static functions. */
7239 || (! TARGET_64BIT
7240 && ! TARGET_GAS
7241 && ! incoming
7242 && cum->indirect
7243 && FLOAT_MODE_P (mode)))
7245 retval
7246 = gen_rtx_PARALLEL
7247 (mode,
7248 gen_rtvec (2,
7249 gen_rtx_EXPR_LIST (VOIDmode,
7250 gen_rtx_REG (mode, fpr_reg_base),
7251 const0_rtx),
7252 gen_rtx_EXPR_LIST (VOIDmode,
7253 gen_rtx_REG (mode, gpr_reg_base),
7254 const0_rtx)));
7256 else
7258 /* See if we should pass this parameter in a general register. */
7259 if (TARGET_SOFT_FLOAT
7260 /* Indirect calls in the normal 32bit ABI require all arguments
7261 to be passed in general registers. */
7262 || (!TARGET_PORTABLE_RUNTIME
7263 && !TARGET_64BIT
7264 && cum->indirect)
7265 /* If the parameter is not a floating point parameter, then
7266 it belongs in GPRs. */
7267 || !FLOAT_MODE_P (mode))
7268 retval = gen_rtx_REG (mode, gpr_reg_base);
7269 else
7270 retval = gen_rtx_REG (mode, fpr_reg_base);
7272 return retval;
7276 /* If this arg would be passed totally in registers or totally on the stack,
7277 then this routine should return zero. It is currently called only for
7278 the 64-bit target. */
7280 function_arg_partial_nregs (cum, mode, type, named)
7281 CUMULATIVE_ARGS *cum;
7282 enum machine_mode mode;
7283 tree type;
7284 int named ATTRIBUTE_UNUSED;
7286 unsigned int max_arg_words = 8;
7287 unsigned int offset = 0;
7289 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7290 offset = 1;
7292 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7293 /* Arg fits fully into registers. */
7294 return 0;
7295 else if (cum->words + offset >= max_arg_words)
7296 /* Arg fully on the stack. */
7297 return 0;
7298 else
7299 /* Arg is split. */
7300 return max_arg_words - cum->words - offset;
7304 /* Return 1 if this is a comparison operator. This allows the use of
7305 MATCH_OPERATOR to recognize all the branch insns. */
7308 cmpib_comparison_operator (op, mode)
7309 register rtx op;
7310 enum machine_mode mode;
7312 return ((mode == VOIDmode || GET_MODE (op) == mode)
7313 && (GET_CODE (op) == EQ
7314 || GET_CODE (op) == NE
7315 || GET_CODE (op) == GT
7316 || GET_CODE (op) == GTU
7317 || GET_CODE (op) == GE
7318 || GET_CODE (op) == LT
7319 || GET_CODE (op) == LE
7320 || GET_CODE (op) == LEU));
7323 /* Mark ARG (which is really a struct deferred_plabel **) for GC. */
7325 static void
7326 mark_deferred_plabels (arg)
7327 void *arg;
7329 struct deferred_plabel *dp = *(struct deferred_plabel **) arg;
7330 int i;
7332 for (i = 0; i < n_deferred_plabels; ++i)
7333 ggc_mark_rtx (dp[i].internal_label);
7336 /* Called to register all of our global variables with the garbage
7337 collector. */
7339 static void
7340 pa_add_gc_roots ()
7342 ggc_add_rtx_root (&hppa_compare_op0, 1);
7343 ggc_add_rtx_root (&hppa_compare_op1, 1);
7344 ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
7345 &mark_deferred_plabels);