* system.h (ENCODE_SECTION_INFO): Poison it.
[official-gcc.git] / gcc / config / pa / pa.c
blobd12c1be910a02d77791548e33e6869178a55f702
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "except.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "libfuncs.h"
39 #include "reload.h"
40 #include "c-tree.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "obstack.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "recog.h"
47 #include "predict.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
52 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
54 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
55 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
57 static int
58 hppa_use_dfa_pipeline_interface ()
60 return 1;
63 /* Return nonzero if there is a bypass for the output of
64 OUT_INSN and the fp store IN_INSN. */
65 int
66 hppa_fpstore_bypass_p (out_insn, in_insn)
67 rtx out_insn, in_insn;
69 enum machine_mode store_mode;
70 enum machine_mode other_mode;
71 rtx set;
73 if (recog_memoized (in_insn) < 0
74 || get_attr_type (in_insn) != TYPE_FPSTORE
75 || recog_memoized (out_insn) < 0)
76 return 0;
78 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
80 set = single_set (out_insn);
81 if (!set)
82 return 0;
84 other_mode = GET_MODE (SET_SRC (set));
86 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
90 #ifndef DO_FRAME_NOTES
91 #ifdef INCOMING_RETURN_ADDR_RTX
92 #define DO_FRAME_NOTES 1
93 #else
94 #define DO_FRAME_NOTES 0
95 #endif
96 #endif
98 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
99 static void pa_combine_instructions PARAMS ((rtx));
100 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
101 static int forward_branch_p PARAMS ((rtx));
102 static int shadd_constant_p PARAMS ((int));
103 static void pa_add_gc_roots PARAMS ((void));
104 static void mark_deferred_plabels PARAMS ((void *));
105 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
106 static int compute_movstrsi_length PARAMS ((rtx));
107 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
108 static void remove_useless_addtr_insns PARAMS ((rtx, int));
109 static void store_reg PARAMS ((int, int, int));
110 static void store_reg_modify PARAMS ((int, int, int));
111 static void load_reg PARAMS ((int, int, int));
112 static void set_reg_plus_d PARAMS ((int, int, int, int));
113 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
114 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
115 static int pa_adjust_priority PARAMS ((rtx, int));
116 static int pa_issue_rate PARAMS ((void));
117 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
118 ATTRIBUTE_UNUSED;
119 static void pa_encode_section_info PARAMS ((tree, int));
121 /* Save the operands last given to a compare for use when we
122 generate a scc or bcc insn. */
124 rtx hppa_compare_op0, hppa_compare_op1;
125 enum cmp_type hppa_branch_type;
127 /* Which cpu we are scheduling for. */
128 enum processor_type pa_cpu;
130 /* String to hold which cpu we are scheduling for. */
131 const char *pa_cpu_string;
133 /* Which architecture we are generating code for. */
134 enum architecture_type pa_arch;
136 /* String to hold which architecture we are generating code for. */
137 const char *pa_arch_string;
139 /* Counts for the number of callee-saved general and floating point
140 registers which were saved by the current function's prologue. */
141 static int gr_saved, fr_saved;
143 static rtx find_addr_reg PARAMS ((rtx));
145 /* Keep track of the number of bytes we have output in the CODE subspaces
146 during this compilation so we'll know when to emit inline long-calls. */
148 unsigned int total_code_bytes;
150 /* Variables to handle plabels that we discover are necessary at assembly
151 output time. They are output after the current function. */
153 struct deferred_plabel
155 rtx internal_label;
156 char *name;
157 } *deferred_plabels = 0;
158 int n_deferred_plabels = 0;
160 /* Initialize the GCC target structure. */
162 #undef TARGET_ASM_ALIGNED_HI_OP
163 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
164 #undef TARGET_ASM_ALIGNED_SI_OP
165 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
166 #undef TARGET_ASM_ALIGNED_DI_OP
167 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
168 #undef TARGET_ASM_UNALIGNED_HI_OP
169 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
170 #undef TARGET_ASM_UNALIGNED_SI_OP
171 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
172 #undef TARGET_ASM_UNALIGNED_DI_OP
173 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
174 #undef TARGET_ASM_INTEGER
175 #define TARGET_ASM_INTEGER pa_assemble_integer
177 #undef TARGET_ASM_FUNCTION_PROLOGUE
178 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
179 #undef TARGET_ASM_FUNCTION_EPILOGUE
180 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
182 #undef TARGET_SCHED_ADJUST_COST
183 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
184 #undef TARGET_SCHED_ADJUST_PRIORITY
185 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
186 #undef TARGET_SCHED_ISSUE_RATE
187 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
189 #undef TARGET_ENCODE_SECTION_INFO
190 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
192 struct gcc_target targetm = TARGET_INITIALIZER;
194 void
195 override_options ()
197 /* Default to 8000 scheduling. */
198 if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100"))
200 pa_cpu_string = "7100";
201 pa_cpu = PROCESSOR_7100;
203 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700"))
205 pa_cpu_string = "700";
206 pa_cpu = PROCESSOR_700;
208 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100LC"))
210 pa_cpu_string = "7100LC";
211 pa_cpu = PROCESSOR_7100LC;
213 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200"))
215 pa_cpu_string = "7200";
216 pa_cpu = PROCESSOR_7200;
218 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7300"))
220 pa_cpu_string = "7300";
221 pa_cpu = PROCESSOR_7300;
223 else if (pa_cpu_string == NULL
224 || ! strcmp (pa_cpu_string, "8000"))
226 pa_cpu_string = "8000";
227 pa_cpu = PROCESSOR_8000;
229 else
231 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
234 /* Set the instruction set architecture. */
235 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
237 pa_arch_string = "1.0";
238 pa_arch = ARCHITECTURE_10;
239 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
241 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
243 pa_arch_string = "1.1";
244 pa_arch = ARCHITECTURE_11;
245 target_flags &= ~MASK_PA_20;
246 target_flags |= MASK_PA_11;
248 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
250 pa_arch_string = "2.0";
251 pa_arch = ARCHITECTURE_20;
252 target_flags |= MASK_PA_11 | MASK_PA_20;
254 else if (pa_arch_string)
256 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
259 /* Unconditional branches in the delay slot are not compatible with dwarf2
260 call frame information. There is no benefit in using this optimization
261 on PA8000 and later processors. */
262 if (pa_cpu >= PROCESSOR_8000
263 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
264 || flag_unwind_tables)
265 target_flags &= ~MASK_JUMP_IN_DELAY;
267 if (flag_pic && TARGET_PORTABLE_RUNTIME)
269 warning ("PIC code generation is not supported in the portable runtime model\n");
272 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
274 warning ("PIC code generation is not compatible with fast indirect calls\n");
277 if (! TARGET_GAS && write_symbols != NO_DEBUG)
279 warning ("-g is only supported when using GAS on this processor,");
280 warning ("-g option disabled");
281 write_symbols = NO_DEBUG;
284 /* We only support the "big PIC" model now. And we always generate PIC
285 code when in 64bit mode. */
286 if (flag_pic == 1 || TARGET_64BIT)
287 flag_pic = 2;
289 /* We can't guarantee that .dword is available for 32-bit targets. */
290 if (UNITS_PER_WORD == 4)
291 targetm.asm_out.aligned_op.di = NULL;
293 /* The unaligned ops are only available when using GAS. */
294 if (!TARGET_GAS)
296 targetm.asm_out.unaligned_op.hi = NULL;
297 targetm.asm_out.unaligned_op.si = NULL;
298 targetm.asm_out.unaligned_op.di = NULL;
301 /* Register global variables with the garbage collector. */
302 pa_add_gc_roots ();
305 /* Return non-zero only if OP is a register of mode MODE,
306 or CONST0_RTX. */
308 reg_or_0_operand (op, mode)
309 rtx op;
310 enum machine_mode mode;
312 return (op == CONST0_RTX (mode) || register_operand (op, mode));
315 /* Return non-zero if OP is suitable for use in a call to a named
316 function.
318 For 2.5 try to eliminate either call_operand_address or
319 function_label_operand, they perform very similar functions. */
321 call_operand_address (op, mode)
322 rtx op;
323 enum machine_mode mode ATTRIBUTE_UNUSED;
325 return (GET_MODE (op) == word_mode
326 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
329 /* Return 1 if X contains a symbolic expression. We know these
330 expressions will have one of a few well defined forms, so
331 we need only check those forms. */
333 symbolic_expression_p (x)
334 register rtx x;
337 /* Strip off any HIGH. */
338 if (GET_CODE (x) == HIGH)
339 x = XEXP (x, 0);
341 return (symbolic_operand (x, VOIDmode));
345 symbolic_operand (op, mode)
346 register rtx op;
347 enum machine_mode mode ATTRIBUTE_UNUSED;
349 switch (GET_CODE (op))
351 case SYMBOL_REF:
352 case LABEL_REF:
353 return 1;
354 case CONST:
355 op = XEXP (op, 0);
356 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
357 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
358 && GET_CODE (XEXP (op, 1)) == CONST_INT);
359 default:
360 return 0;
364 /* Return truth value of statement that OP is a symbolic memory
365 operand of mode MODE. */
368 symbolic_memory_operand (op, mode)
369 rtx op;
370 enum machine_mode mode ATTRIBUTE_UNUSED;
372 if (GET_CODE (op) == SUBREG)
373 op = SUBREG_REG (op);
374 if (GET_CODE (op) != MEM)
375 return 0;
376 op = XEXP (op, 0);
377 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
378 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
381 /* Return 1 if the operand is either a register or a memory operand that is
382 not symbolic. */
385 reg_or_nonsymb_mem_operand (op, mode)
386 register rtx op;
387 enum machine_mode mode;
389 if (register_operand (op, mode))
390 return 1;
392 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
393 return 1;
395 return 0;
398 /* Return 1 if the operand is either a register, zero, or a memory operand
399 that is not symbolic. */
402 reg_or_0_or_nonsymb_mem_operand (op, mode)
403 register rtx op;
404 enum machine_mode mode;
406 if (register_operand (op, mode))
407 return 1;
409 if (op == CONST0_RTX (mode))
410 return 1;
412 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
413 return 1;
415 return 0;
418 /* Return 1 if the operand is a register operand or a non-symbolic memory
419 operand after reload. This predicate is used for branch patterns that
420 internally handle register reloading. We need to accept non-symbolic
421 memory operands after reload to ensure that the pattern is still valid
422 if reload didn't find a hard register for the operand. */
425 reg_before_reload_operand (op, mode)
426 register rtx op;
427 enum machine_mode mode;
429 /* Don't accept a SUBREG since it will need a reload. */
430 if (GET_CODE (op) == SUBREG)
431 return 0;
433 if (register_operand (op, mode))
434 return 1;
436 if (reload_completed
437 && memory_operand (op, mode)
438 && ! symbolic_memory_operand (op, mode))
439 return 1;
441 return 0;
444 /* Accept any constant that can be moved in one instructions into a
445 general register. */
447 cint_ok_for_move (intval)
448 HOST_WIDE_INT intval;
450 /* OK if ldo, ldil, or zdepi, can be used. */
451 return (CONST_OK_FOR_LETTER_P (intval, 'J')
452 || CONST_OK_FOR_LETTER_P (intval, 'N')
453 || CONST_OK_FOR_LETTER_P (intval, 'K'));
456 /* Accept anything that can be moved in one instruction into a general
457 register. */
459 move_operand (op, mode)
460 rtx op;
461 enum machine_mode mode;
463 if (register_operand (op, mode))
464 return 1;
466 if (GET_CODE (op) == CONSTANT_P_RTX)
467 return 1;
469 if (GET_CODE (op) == CONST_INT)
470 return cint_ok_for_move (INTVAL (op));
472 if (GET_CODE (op) == SUBREG)
473 op = SUBREG_REG (op);
474 if (GET_CODE (op) != MEM)
475 return 0;
477 op = XEXP (op, 0);
479 /* We consider a LO_SUM DLT reference a move_operand now since it has
480 been merged into the normal movsi/movdi patterns. */
481 if (GET_CODE (op) == LO_SUM
482 && GET_CODE (XEXP (op, 0)) == REG
483 && REG_OK_FOR_BASE_P (XEXP (op, 0))
484 && GET_CODE (XEXP (op, 1)) == UNSPEC
485 && GET_MODE (op) == Pmode)
486 return 1;
488 /* Since move_operand is only used for source operands, we can always
489 allow scaled indexing! */
490 if (! TARGET_DISABLE_INDEXING
491 && GET_CODE (op) == PLUS
492 && ((GET_CODE (XEXP (op, 0)) == MULT
493 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
494 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
495 && INTVAL (XEXP (XEXP (op, 0), 1))
496 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
497 && GET_CODE (XEXP (op, 1)) == REG)
498 || (GET_CODE (XEXP (op, 1)) == MULT
499 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
500 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
501 && INTVAL (XEXP (XEXP (op, 1), 1))
502 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
503 && GET_CODE (XEXP (op, 0)) == REG)))
504 return 1;
506 return memory_address_p (mode, op);
509 /* Accept REG and any CONST_INT that can be moved in one instruction into a
510 general register. */
512 reg_or_cint_move_operand (op, mode)
513 rtx op;
514 enum machine_mode mode;
516 if (register_operand (op, mode))
517 return 1;
519 if (GET_CODE (op) == CONST_INT)
520 return cint_ok_for_move (INTVAL (op));
522 return 0;
526 pic_label_operand (op, mode)
527 rtx op;
528 enum machine_mode mode ATTRIBUTE_UNUSED;
530 if (!flag_pic)
531 return 0;
533 switch (GET_CODE (op))
535 case LABEL_REF:
536 return 1;
537 case CONST:
538 op = XEXP (op, 0);
539 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
540 && GET_CODE (XEXP (op, 1)) == CONST_INT);
541 default:
542 return 0;
547 fp_reg_operand (op, mode)
548 rtx op;
549 enum machine_mode mode ATTRIBUTE_UNUSED;
551 return reg_renumber && FP_REG_P (op);
556 /* Return truth value of whether OP can be used as an operand in a
557 three operand arithmetic insn that accepts registers of mode MODE
558 or 14-bit signed integers. */
560 arith_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
564 return (register_operand (op, mode)
565 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
568 /* Return truth value of whether OP can be used as an operand in a
569 three operand arithmetic insn that accepts registers of mode MODE
570 or 11-bit signed integers. */
572 arith11_operand (op, mode)
573 rtx op;
574 enum machine_mode mode;
576 return (register_operand (op, mode)
577 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
580 /* A constant integer suitable for use in a PRE_MODIFY memory
581 reference. */
583 pre_cint_operand (op, mode)
584 rtx op;
585 enum machine_mode mode ATTRIBUTE_UNUSED;
587 return (GET_CODE (op) == CONST_INT
588 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
591 /* A constant integer suitable for use in a POST_MODIFY memory
592 reference. */
594 post_cint_operand (op, mode)
595 rtx op;
596 enum machine_mode mode ATTRIBUTE_UNUSED;
598 return (GET_CODE (op) == CONST_INT
599 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
603 arith_double_operand (op, mode)
604 rtx op;
605 enum machine_mode mode;
607 return (register_operand (op, mode)
608 || (GET_CODE (op) == CONST_DOUBLE
609 && GET_MODE (op) == mode
610 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
611 && ((CONST_DOUBLE_HIGH (op) >= 0)
612 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
615 /* Return truth value of whether OP is an integer which fits the
616 range constraining immediate operands in three-address insns, or
617 is an integer register. */
620 ireg_or_int5_operand (op, mode)
621 rtx op;
622 enum machine_mode mode ATTRIBUTE_UNUSED;
624 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
625 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
628 /* Return nonzero if OP is an integer register, else return zero. */
630 ireg_operand (op, mode)
631 rtx op;
632 enum machine_mode mode ATTRIBUTE_UNUSED;
634 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
637 /* Return truth value of whether OP is an integer which fits the
638 range constraining immediate operands in three-address insns. */
641 int5_operand (op, mode)
642 rtx op;
643 enum machine_mode mode ATTRIBUTE_UNUSED;
645 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
649 uint5_operand (op, mode)
650 rtx op;
651 enum machine_mode mode ATTRIBUTE_UNUSED;
653 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
657 int11_operand (op, mode)
658 rtx op;
659 enum machine_mode mode ATTRIBUTE_UNUSED;
661 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
665 uint32_operand (op, mode)
666 rtx op;
667 enum machine_mode mode ATTRIBUTE_UNUSED;
669 #if HOST_BITS_PER_WIDE_INT > 32
670 /* All allowed constants will fit a CONST_INT. */
671 return (GET_CODE (op) == CONST_INT
672 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
673 #else
674 return (GET_CODE (op) == CONST_INT
675 || (GET_CODE (op) == CONST_DOUBLE
676 && CONST_DOUBLE_HIGH (op) == 0));
677 #endif
681 arith5_operand (op, mode)
682 rtx op;
683 enum machine_mode mode;
685 return register_operand (op, mode) || int5_operand (op, mode);
688 /* True iff zdepi can be used to generate this CONST_INT.
689 zdepi first sign extends a 5 bit signed number to a given field
690 length, then places this field anywhere in a zero. */
692 zdepi_cint_p (x)
693 unsigned HOST_WIDE_INT x;
695 unsigned HOST_WIDE_INT lsb_mask, t;
697 /* This might not be obvious, but it's at least fast.
698 This function is critical; we don't have the time loops would take. */
699 lsb_mask = x & -x;
700 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
701 /* Return true iff t is a power of two. */
702 return ((t & (t - 1)) == 0);
705 /* True iff depi or extru can be used to compute (reg & mask).
706 Accept bit pattern like these:
707 0....01....1
708 1....10....0
709 1..10..01..1 */
711 and_mask_p (mask)
712 unsigned HOST_WIDE_INT mask;
714 mask = ~mask;
715 mask += mask & -mask;
716 return (mask & (mask - 1)) == 0;
719 /* True iff depi or extru can be used to compute (reg & OP). */
721 and_operand (op, mode)
722 rtx op;
723 enum machine_mode mode;
725 return (register_operand (op, mode)
726 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
729 /* True iff depi can be used to compute (reg | MASK). */
731 ior_mask_p (mask)
732 unsigned HOST_WIDE_INT mask;
734 mask += mask & -mask;
735 return (mask & (mask - 1)) == 0;
738 /* True iff depi can be used to compute (reg | OP). */
740 ior_operand (op, mode)
741 rtx op;
742 enum machine_mode mode ATTRIBUTE_UNUSED;
744 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
748 lhs_lshift_operand (op, mode)
749 rtx op;
750 enum machine_mode mode;
752 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
755 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
756 Such values can be the left hand side x in (x << r), using the zvdepi
757 instruction. */
759 lhs_lshift_cint_operand (op, mode)
760 rtx op;
761 enum machine_mode mode ATTRIBUTE_UNUSED;
763 unsigned HOST_WIDE_INT x;
764 if (GET_CODE (op) != CONST_INT)
765 return 0;
766 x = INTVAL (op) >> 4;
767 return (x & (x + 1)) == 0;
771 arith32_operand (op, mode)
772 rtx op;
773 enum machine_mode mode;
775 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
779 pc_or_label_operand (op, mode)
780 rtx op;
781 enum machine_mode mode ATTRIBUTE_UNUSED;
783 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
786 /* Legitimize PIC addresses. If the address is already
787 position-independent, we return ORIG. Newly generated
788 position-independent addresses go to REG. If we need more
789 than one register, we lose. */
792 legitimize_pic_address (orig, mode, reg)
793 rtx orig, reg;
794 enum machine_mode mode;
796 rtx pic_ref = orig;
798 /* Labels need special handling. */
799 if (pic_label_operand (orig, mode))
801 /* We do not want to go through the movXX expanders here since that
802 would create recursion.
804 Nor do we really want to call a generator for a named pattern
805 since that requires multiple patterns if we want to support
806 multiple word sizes.
808 So instead we just emit the raw set, which avoids the movXX
809 expanders completely. */
810 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
811 current_function_uses_pic_offset_table = 1;
812 return reg;
814 if (GET_CODE (orig) == SYMBOL_REF)
816 if (reg == 0)
817 abort ();
819 emit_move_insn (reg,
820 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
821 gen_rtx_HIGH (word_mode, orig)));
822 pic_ref
823 = gen_rtx_MEM (Pmode,
824 gen_rtx_LO_SUM (Pmode, reg,
825 gen_rtx_UNSPEC (Pmode,
826 gen_rtvec (1, orig),
827 0)));
829 current_function_uses_pic_offset_table = 1;
830 RTX_UNCHANGING_P (pic_ref) = 1;
831 emit_move_insn (reg, pic_ref);
832 return reg;
834 else if (GET_CODE (orig) == CONST)
836 rtx base;
838 if (GET_CODE (XEXP (orig, 0)) == PLUS
839 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
840 return orig;
842 if (reg == 0)
843 abort ();
845 if (GET_CODE (XEXP (orig, 0)) == PLUS)
847 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
848 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
849 base == reg ? 0 : reg);
851 else abort ();
852 if (GET_CODE (orig) == CONST_INT)
854 if (INT_14_BITS (orig))
855 return plus_constant (base, INTVAL (orig));
856 orig = force_reg (Pmode, orig);
858 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
859 /* Likewise, should we set special REG_NOTEs here? */
861 return pic_ref;
864 /* Try machine-dependent ways of modifying an illegitimate address
865 to be legitimate. If we find one, return the new, valid address.
866 This macro is used in only one place: `memory_address' in explow.c.
868 OLDX is the address as it was before break_out_memory_refs was called.
869 In some cases it is useful to look at this to decide what needs to be done.
871 MODE and WIN are passed so that this macro can use
872 GO_IF_LEGITIMATE_ADDRESS.
874 It is always safe for this macro to do nothing. It exists to recognize
875 opportunities to optimize the output.
877 For the PA, transform:
879 memory(X + <large int>)
881 into:
883 if (<large int> & mask) >= 16
884 Y = (<large int> & ~mask) + mask + 1 Round up.
885 else
886 Y = (<large int> & ~mask) Round down.
887 Z = X + Y
888 memory (Z + (<large int> - Y));
890 This is for CSE to find several similar references, and only use one Z.
892 X can either be a SYMBOL_REF or REG, but because combine can not
893 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
894 D will not fit in 14 bits.
896 MODE_FLOAT references allow displacements which fit in 5 bits, so use
897 0x1f as the mask.
899 MODE_INT references allow displacements which fit in 14 bits, so use
900 0x3fff as the mask.
902 This relies on the fact that most mode MODE_FLOAT references will use FP
903 registers and most mode MODE_INT references will use integer registers.
904 (In the rare case of an FP register used in an integer MODE, we depend
905 on secondary reloads to clean things up.)
908 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
909 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
910 addressing modes to be used).
912 Put X and Z into registers. Then put the entire expression into
913 a register. */
916 hppa_legitimize_address (x, oldx, mode)
917 rtx x, oldx ATTRIBUTE_UNUSED;
918 enum machine_mode mode;
920 rtx orig = x;
922 if (flag_pic)
923 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
925 /* Strip off CONST. */
926 if (GET_CODE (x) == CONST)
927 x = XEXP (x, 0);
929 /* Special case. Get the SYMBOL_REF into a register and use indexing.
930 That should always be safe. */
931 if (GET_CODE (x) == PLUS
932 && GET_CODE (XEXP (x, 0)) == REG
933 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
935 rtx reg = force_reg (Pmode, XEXP (x, 1));
936 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
939 /* Note we must reject symbols which represent function addresses
940 since the assembler/linker can't handle arithmetic on plabels. */
941 if (GET_CODE (x) == PLUS
942 && GET_CODE (XEXP (x, 1)) == CONST_INT
943 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
944 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
945 || GET_CODE (XEXP (x, 0)) == REG))
947 rtx int_part, ptr_reg;
948 int newoffset;
949 int offset = INTVAL (XEXP (x, 1));
950 int mask;
952 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
953 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
955 /* Choose which way to round the offset. Round up if we
956 are >= halfway to the next boundary. */
957 if ((offset & mask) >= ((mask + 1) / 2))
958 newoffset = (offset & ~ mask) + mask + 1;
959 else
960 newoffset = (offset & ~ mask);
962 /* If the newoffset will not fit in 14 bits (ldo), then
963 handling this would take 4 or 5 instructions (2 to load
964 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
965 add the new offset and the SYMBOL_REF.) Combine can
966 not handle 4->2 or 5->2 combinations, so do not create
967 them. */
968 if (! VAL_14_BITS_P (newoffset)
969 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
971 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
972 rtx tmp_reg
973 = force_reg (Pmode,
974 gen_rtx_HIGH (Pmode, const_part));
975 ptr_reg
976 = force_reg (Pmode,
977 gen_rtx_LO_SUM (Pmode,
978 tmp_reg, const_part));
980 else
982 if (! VAL_14_BITS_P (newoffset))
983 int_part = force_reg (Pmode, GEN_INT (newoffset));
984 else
985 int_part = GEN_INT (newoffset);
987 ptr_reg = force_reg (Pmode,
988 gen_rtx_PLUS (Pmode,
989 force_reg (Pmode, XEXP (x, 0)),
990 int_part));
992 return plus_constant (ptr_reg, offset - newoffset);
995 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
997 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
998 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
999 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1000 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1001 || GET_CODE (XEXP (x, 1)) == SUBREG)
1002 && GET_CODE (XEXP (x, 1)) != CONST)
1004 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1005 rtx reg1, reg2;
1007 reg1 = XEXP (x, 1);
1008 if (GET_CODE (reg1) != REG)
1009 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1011 reg2 = XEXP (XEXP (x, 0), 0);
1012 if (GET_CODE (reg2) != REG)
1013 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1015 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1016 gen_rtx_MULT (Pmode,
1017 reg2,
1018 GEN_INT (val)),
1019 reg1));
1022 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1024 Only do so for floating point modes since this is more speculative
1025 and we lose if it's an integer store. */
1026 if (GET_CODE (x) == PLUS
1027 && GET_CODE (XEXP (x, 0)) == PLUS
1028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1029 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1030 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1031 && (mode == SFmode || mode == DFmode))
1034 /* First, try and figure out what to use as a base register. */
1035 rtx reg1, reg2, base, idx, orig_base;
1037 reg1 = XEXP (XEXP (x, 0), 1);
1038 reg2 = XEXP (x, 1);
1039 base = NULL_RTX;
1040 idx = NULL_RTX;
1042 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1043 then emit_move_sequence will turn on REG_POINTER so we'll know
1044 it's a base register below. */
1045 if (GET_CODE (reg1) != REG)
1046 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1048 if (GET_CODE (reg2) != REG)
1049 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1051 /* Figure out what the base and index are. */
1053 if (GET_CODE (reg1) == REG
1054 && REG_POINTER (reg1))
1056 base = reg1;
1057 orig_base = XEXP (XEXP (x, 0), 1);
1058 idx = gen_rtx_PLUS (Pmode,
1059 gen_rtx_MULT (Pmode,
1060 XEXP (XEXP (XEXP (x, 0), 0), 0),
1061 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1062 XEXP (x, 1));
1064 else if (GET_CODE (reg2) == REG
1065 && REG_POINTER (reg2))
1067 base = reg2;
1068 orig_base = XEXP (x, 1);
1069 idx = XEXP (x, 0);
1072 if (base == 0)
1073 return orig;
1075 /* If the index adds a large constant, try to scale the
1076 constant so that it can be loaded with only one insn. */
1077 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1078 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1079 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1080 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1082 /* Divide the CONST_INT by the scale factor, then add it to A. */
1083 int val = INTVAL (XEXP (idx, 1));
1085 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1086 reg1 = XEXP (XEXP (idx, 0), 0);
1087 if (GET_CODE (reg1) != REG)
1088 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1090 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1092 /* We can now generate a simple scaled indexed address. */
1093 return
1094 force_reg
1095 (Pmode, gen_rtx_PLUS (Pmode,
1096 gen_rtx_MULT (Pmode, reg1,
1097 XEXP (XEXP (idx, 0), 1)),
1098 base));
1101 /* If B + C is still a valid base register, then add them. */
1102 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1103 && INTVAL (XEXP (idx, 1)) <= 4096
1104 && INTVAL (XEXP (idx, 1)) >= -4096)
1106 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1107 rtx reg1, reg2;
1109 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1111 reg2 = XEXP (XEXP (idx, 0), 0);
1112 if (GET_CODE (reg2) != CONST_INT)
1113 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1116 gen_rtx_MULT (Pmode,
1117 reg2,
1118 GEN_INT (val)),
1119 reg1));
1122 /* Get the index into a register, then add the base + index and
1123 return a register holding the result. */
1125 /* First get A into a register. */
1126 reg1 = XEXP (XEXP (idx, 0), 0);
1127 if (GET_CODE (reg1) != REG)
1128 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1130 /* And get B into a register. */
1131 reg2 = XEXP (idx, 1);
1132 if (GET_CODE (reg2) != REG)
1133 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1135 reg1 = force_reg (Pmode,
1136 gen_rtx_PLUS (Pmode,
1137 gen_rtx_MULT (Pmode, reg1,
1138 XEXP (XEXP (idx, 0), 1)),
1139 reg2));
1141 /* Add the result to our base register and return. */
1142 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1146 /* Uh-oh. We might have an address for x[n-100000]. This needs
1147 special handling to avoid creating an indexed memory address
1148 with x-100000 as the base.
1150 If the constant part is small enough, then it's still safe because
1151 there is a guard page at the beginning and end of the data segment.
1153 Scaled references are common enough that we want to try and rearrange the
1154 terms so that we can use indexing for these addresses too. Only
1155 do the optimization for floatint point modes. */
1157 if (GET_CODE (x) == PLUS
1158 && symbolic_expression_p (XEXP (x, 1)))
1160 /* Ugly. We modify things here so that the address offset specified
1161 by the index expression is computed first, then added to x to form
1162 the entire address. */
1164 rtx regx1, regx2, regy1, regy2, y;
1166 /* Strip off any CONST. */
1167 y = XEXP (x, 1);
1168 if (GET_CODE (y) == CONST)
1169 y = XEXP (y, 0);
1171 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1173 /* See if this looks like
1174 (plus (mult (reg) (shadd_const))
1175 (const (plus (symbol_ref) (const_int))))
1177 Where const_int is small. In that case the const
1178 expression is a valid pointer for indexing.
1180 If const_int is big, but can be divided evenly by shadd_const
1181 and added to (reg). This allows more scaled indexed addresses. */
1182 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1183 && GET_CODE (XEXP (x, 0)) == MULT
1184 && GET_CODE (XEXP (y, 1)) == CONST_INT
1185 && INTVAL (XEXP (y, 1)) >= -4096
1186 && INTVAL (XEXP (y, 1)) <= 4095
1187 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1188 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1190 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1191 rtx reg1, reg2;
1193 reg1 = XEXP (x, 1);
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1197 reg2 = XEXP (XEXP (x, 0), 0);
1198 if (GET_CODE (reg2) != REG)
1199 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1201 return force_reg (Pmode,
1202 gen_rtx_PLUS (Pmode,
1203 gen_rtx_MULT (Pmode,
1204 reg2,
1205 GEN_INT (val)),
1206 reg1));
1208 else if ((mode == DFmode || mode == SFmode)
1209 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1210 && GET_CODE (XEXP (x, 0)) == MULT
1211 && GET_CODE (XEXP (y, 1)) == CONST_INT
1212 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1213 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1214 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1216 regx1
1217 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1218 / INTVAL (XEXP (XEXP (x, 0), 1))));
1219 regx2 = XEXP (XEXP (x, 0), 0);
1220 if (GET_CODE (regx2) != REG)
1221 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1222 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1223 regx2, regx1));
1224 return
1225 force_reg (Pmode,
1226 gen_rtx_PLUS (Pmode,
1227 gen_rtx_MULT (Pmode, regx2,
1228 XEXP (XEXP (x, 0), 1)),
1229 force_reg (Pmode, XEXP (y, 0))));
1231 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1232 && INTVAL (XEXP (y, 1)) >= -4096
1233 && INTVAL (XEXP (y, 1)) <= 4095)
1235 /* This is safe because of the guard page at the
1236 beginning and end of the data space. Just
1237 return the original address. */
1238 return orig;
1240 else
1242 /* Doesn't look like one we can optimize. */
1243 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1244 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1245 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1246 regx1 = force_reg (Pmode,
1247 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1248 regx1, regy2));
1249 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1254 return orig;
1257 /* For the HPPA, REG and REG+CONST is cost 0
1258 and addresses involving symbolic constants are cost 2.
1260 PIC addresses are very expensive.
1262 It is no coincidence that this has the same structure
1263 as GO_IF_LEGITIMATE_ADDRESS. */
1265 hppa_address_cost (X)
1266 rtx X;
1268 if (GET_CODE (X) == PLUS)
1269 return 1;
1270 else if (GET_CODE (X) == LO_SUM)
1271 return 1;
1272 else if (GET_CODE (X) == HIGH)
1273 return 2;
1274 return 4;
1277 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1278 new rtx with the correct mode. */
1279 static inline rtx
1280 force_mode (mode, orig)
1281 enum machine_mode mode;
1282 rtx orig;
1284 if (mode == GET_MODE (orig))
1285 return orig;
1287 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1288 abort ();
1290 return gen_rtx_REG (mode, REGNO (orig));
1293 /* Emit insns to move operands[1] into operands[0].
1295 Return 1 if we have written out everything that needs to be done to
1296 do the move. Otherwise, return 0 and the caller will emit the move
1297 normally.
1299 Note SCRATCH_REG may not be in the proper mode depending on how it
1300 will be used. This routine is resposible for creating a new copy
1301 of SCRATCH_REG in the proper mode. */
1304 emit_move_sequence (operands, mode, scratch_reg)
1305 rtx *operands;
1306 enum machine_mode mode;
1307 rtx scratch_reg;
1309 register rtx operand0 = operands[0];
1310 register rtx operand1 = operands[1];
1311 register rtx tem;
1313 if (scratch_reg
1314 && reload_in_progress && GET_CODE (operand0) == REG
1315 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1316 operand0 = reg_equiv_mem[REGNO (operand0)];
1317 else if (scratch_reg
1318 && reload_in_progress && GET_CODE (operand0) == SUBREG
1319 && GET_CODE (SUBREG_REG (operand0)) == REG
1320 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1322 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1323 the code which tracks sets/uses for delete_output_reload. */
1324 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1325 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1326 SUBREG_BYTE (operand0));
1327 operand0 = alter_subreg (&temp);
1330 if (scratch_reg
1331 && reload_in_progress && GET_CODE (operand1) == REG
1332 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1333 operand1 = reg_equiv_mem[REGNO (operand1)];
1334 else if (scratch_reg
1335 && reload_in_progress && GET_CODE (operand1) == SUBREG
1336 && GET_CODE (SUBREG_REG (operand1)) == REG
1337 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1339 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1340 the code which tracks sets/uses for delete_output_reload. */
1341 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1342 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1343 SUBREG_BYTE (operand1));
1344 operand1 = alter_subreg (&temp);
1347 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1348 && ((tem = find_replacement (&XEXP (operand0, 0)))
1349 != XEXP (operand0, 0)))
1350 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1351 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1352 && ((tem = find_replacement (&XEXP (operand1, 0)))
1353 != XEXP (operand1, 0)))
1354 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1356 /* Handle secondary reloads for loads/stores of FP registers from
1357 REG+D addresses where D does not fit in 5 bits, including
1358 (subreg (mem (addr))) cases. */
1359 if (fp_reg_operand (operand0, mode)
1360 && ((GET_CODE (operand1) == MEM
1361 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1362 || ((GET_CODE (operand1) == SUBREG
1363 && GET_CODE (XEXP (operand1, 0)) == MEM
1364 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1365 && scratch_reg)
1367 if (GET_CODE (operand1) == SUBREG)
1368 operand1 = XEXP (operand1, 0);
1370 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1371 it in WORD_MODE regardless of what mode it was originally given
1372 to us. */
1373 scratch_reg = force_mode (word_mode, scratch_reg);
1375 /* D might not fit in 14 bits either; for such cases load D into
1376 scratch reg. */
1377 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1379 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1380 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1381 Pmode,
1382 XEXP (XEXP (operand1, 0), 0),
1383 scratch_reg));
1385 else
1386 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1387 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1388 gen_rtx_MEM (mode, scratch_reg)));
1389 return 1;
1391 else if (fp_reg_operand (operand1, mode)
1392 && ((GET_CODE (operand0) == MEM
1393 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1394 || ((GET_CODE (operand0) == SUBREG)
1395 && GET_CODE (XEXP (operand0, 0)) == MEM
1396 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1397 && scratch_reg)
1399 if (GET_CODE (operand0) == SUBREG)
1400 operand0 = XEXP (operand0, 0);
1402 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1403 it in WORD_MODE regardless of what mode it was originally given
1404 to us. */
1405 scratch_reg = force_mode (word_mode, scratch_reg);
1407 /* D might not fit in 14 bits either; for such cases load D into
1408 scratch reg. */
1409 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1411 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1412 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1413 0)),
1414 Pmode,
1415 XEXP (XEXP (operand0, 0),
1417 scratch_reg));
1419 else
1420 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1421 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1422 operand1));
1423 return 1;
1425 /* Handle secondary reloads for loads of FP registers from constant
1426 expressions by forcing the constant into memory.
1428 use scratch_reg to hold the address of the memory location.
1430 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1431 NO_REGS when presented with a const_int and an register class
1432 containing only FP registers. Doing so unfortunately creates
1433 more problems than it solves. Fix this for 2.5. */
1434 else if (fp_reg_operand (operand0, mode)
1435 && CONSTANT_P (operand1)
1436 && scratch_reg)
1438 rtx xoperands[2];
1440 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1441 it in WORD_MODE regardless of what mode it was originally given
1442 to us. */
1443 scratch_reg = force_mode (word_mode, scratch_reg);
1445 /* Force the constant into memory and put the address of the
1446 memory location into scratch_reg. */
1447 xoperands[0] = scratch_reg;
1448 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1449 emit_move_sequence (xoperands, Pmode, 0);
1451 /* Now load the destination register. */
1452 emit_insn (gen_rtx_SET (mode, operand0,
1453 gen_rtx_MEM (mode, scratch_reg)));
1454 return 1;
1456 /* Handle secondary reloads for SAR. These occur when trying to load
1457 the SAR from memory, FP register, or with a constant. */
1458 else if (GET_CODE (operand0) == REG
1459 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1460 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1461 && (GET_CODE (operand1) == MEM
1462 || GET_CODE (operand1) == CONST_INT
1463 || (GET_CODE (operand1) == REG
1464 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1465 && scratch_reg)
1467 /* D might not fit in 14 bits either; for such cases load D into
1468 scratch reg. */
1469 if (GET_CODE (operand1) == MEM
1470 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1472 /* We are reloading the address into the scratch register, so we
1473 want to make sure the scratch register is a full register. */
1474 scratch_reg = force_mode (word_mode, scratch_reg);
1476 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1477 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1478 0)),
1479 Pmode,
1480 XEXP (XEXP (operand1, 0),
1482 scratch_reg));
1484 /* Now we are going to load the scratch register from memory,
1485 we want to load it in the same width as the original MEM,
1486 which must be the same as the width of the ultimate destination,
1487 OPERAND0. */
1488 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1490 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1491 scratch_reg));
1493 else
1495 /* We want to load the scratch register using the same mode as
1496 the ultimate destination. */
1497 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1499 emit_move_insn (scratch_reg, operand1);
1502 /* And emit the insn to set the ultimate destination. We know that
1503 the scratch register has the same mode as the destination at this
1504 point. */
1505 emit_move_insn (operand0, scratch_reg);
1506 return 1;
1508 /* Handle most common case: storing into a register. */
1509 else if (register_operand (operand0, mode))
1511 if (register_operand (operand1, mode)
1512 || (GET_CODE (operand1) == CONST_INT
1513 && cint_ok_for_move (INTVAL (operand1)))
1514 || (operand1 == CONST0_RTX (mode))
1515 || (GET_CODE (operand1) == HIGH
1516 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1517 /* Only `general_operands' can come here, so MEM is ok. */
1518 || GET_CODE (operand1) == MEM)
1520 /* Run this case quickly. */
1521 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1522 return 1;
1525 else if (GET_CODE (operand0) == MEM)
1527 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1528 && !(reload_in_progress || reload_completed))
1530 rtx temp = gen_reg_rtx (DFmode);
1532 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1533 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1534 return 1;
1536 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1538 /* Run this case quickly. */
1539 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1540 return 1;
1542 if (! (reload_in_progress || reload_completed))
1544 operands[0] = validize_mem (operand0);
1545 operands[1] = operand1 = force_reg (mode, operand1);
1549 /* Simplify the source if we need to.
1550 Note we do have to handle function labels here, even though we do
1551 not consider them legitimate constants. Loop optimizations can
1552 call the emit_move_xxx with one as a source. */
1553 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1554 || function_label_operand (operand1, mode)
1555 || (GET_CODE (operand1) == HIGH
1556 && symbolic_operand (XEXP (operand1, 0), mode)))
1558 int ishighonly = 0;
1560 if (GET_CODE (operand1) == HIGH)
1562 ishighonly = 1;
1563 operand1 = XEXP (operand1, 0);
1565 if (symbolic_operand (operand1, mode))
1567 /* Argh. The assembler and linker can't handle arithmetic
1568 involving plabels.
1570 So we force the plabel into memory, load operand0 from
1571 the memory location, then add in the constant part. */
1572 if ((GET_CODE (operand1) == CONST
1573 && GET_CODE (XEXP (operand1, 0)) == PLUS
1574 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1575 || function_label_operand (operand1, mode))
1577 rtx temp, const_part;
1579 /* Figure out what (if any) scratch register to use. */
1580 if (reload_in_progress || reload_completed)
1582 scratch_reg = scratch_reg ? scratch_reg : operand0;
1583 /* SCRATCH_REG will hold an address and maybe the actual
1584 data. We want it in WORD_MODE regardless of what mode it
1585 was originally given to us. */
1586 scratch_reg = force_mode (word_mode, scratch_reg);
1588 else if (flag_pic)
1589 scratch_reg = gen_reg_rtx (Pmode);
1591 if (GET_CODE (operand1) == CONST)
1593 /* Save away the constant part of the expression. */
1594 const_part = XEXP (XEXP (operand1, 0), 1);
1595 if (GET_CODE (const_part) != CONST_INT)
1596 abort ();
1598 /* Force the function label into memory. */
1599 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1601 else
1603 /* No constant part. */
1604 const_part = NULL_RTX;
1606 /* Force the function label into memory. */
1607 temp = force_const_mem (mode, operand1);
1611 /* Get the address of the memory location. PIC-ify it if
1612 necessary. */
1613 temp = XEXP (temp, 0);
1614 if (flag_pic)
1615 temp = legitimize_pic_address (temp, mode, scratch_reg);
1617 /* Put the address of the memory location into our destination
1618 register. */
1619 operands[1] = temp;
1620 emit_move_sequence (operands, mode, scratch_reg);
1622 /* Now load from the memory location into our destination
1623 register. */
1624 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1625 emit_move_sequence (operands, mode, scratch_reg);
1627 /* And add back in the constant part. */
1628 if (const_part != NULL_RTX)
1629 expand_inc (operand0, const_part);
1631 return 1;
1634 if (flag_pic)
1636 rtx temp;
1638 if (reload_in_progress || reload_completed)
1640 temp = scratch_reg ? scratch_reg : operand0;
1641 /* TEMP will hold an address and maybe the actual
1642 data. We want it in WORD_MODE regardless of what mode it
1643 was originally given to us. */
1644 temp = force_mode (word_mode, temp);
1646 else
1647 temp = gen_reg_rtx (Pmode);
1649 /* (const (plus (symbol) (const_int))) must be forced to
1650 memory during/after reload if the const_int will not fit
1651 in 14 bits. */
1652 if (GET_CODE (operand1) == CONST
1653 && GET_CODE (XEXP (operand1, 0)) == PLUS
1654 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1655 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1656 && (reload_completed || reload_in_progress)
1657 && flag_pic)
1659 operands[1] = force_const_mem (mode, operand1);
1660 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1661 mode, temp);
1662 emit_move_sequence (operands, mode, temp);
1664 else
1666 operands[1] = legitimize_pic_address (operand1, mode, temp);
1667 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1670 /* On the HPPA, references to data space are supposed to use dp,
1671 register 27, but showing it in the RTL inhibits various cse
1672 and loop optimizations. */
1673 else
1675 rtx temp, set;
1677 if (reload_in_progress || reload_completed)
1679 temp = scratch_reg ? scratch_reg : operand0;
1680 /* TEMP will hold an address and maybe the actual
1681 data. We want it in WORD_MODE regardless of what mode it
1682 was originally given to us. */
1683 temp = force_mode (word_mode, temp);
1685 else
1686 temp = gen_reg_rtx (mode);
1688 /* Loading a SYMBOL_REF into a register makes that register
1689 safe to be used as the base in an indexed address.
1691 Don't mark hard registers though. That loses. */
1692 if (GET_CODE (operand0) == REG
1693 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1694 REG_POINTER (operand0) = 1;
1695 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1696 REG_POINTER (temp) = 1;
1697 if (ishighonly)
1698 set = gen_rtx_SET (mode, operand0, temp);
1699 else
1700 set = gen_rtx_SET (VOIDmode,
1701 operand0,
1702 gen_rtx_LO_SUM (mode, temp, operand1));
1704 emit_insn (gen_rtx_SET (VOIDmode,
1705 temp,
1706 gen_rtx_HIGH (mode, operand1)));
1707 emit_insn (set);
1710 return 1;
1712 else if (GET_CODE (operand1) != CONST_INT
1713 || ! cint_ok_for_move (INTVAL (operand1)))
1715 rtx extend = NULL_RTX;
1716 rtx temp;
1718 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1719 && HOST_BITS_PER_WIDE_INT > 32
1720 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1722 HOST_WIDE_INT val = INTVAL (operand1);
1723 HOST_WIDE_INT nval;
1725 /* Extract the low order 32 bits of the value and sign extend.
1726 If the new value is the same as the original value, we can
1727 can use the original value as-is. If the new value is
1728 different, we use it and insert the most-significant 32-bits
1729 of the original value into the final result. */
1730 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1731 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1732 if (val != nval)
1734 #if HOST_BITS_PER_WIDE_INT > 32
1735 extend = GEN_INT (val >> 32);
1736 #endif
1737 operand1 = GEN_INT (nval);
1741 if (reload_in_progress || reload_completed)
1742 temp = operand0;
1743 else
1744 temp = gen_reg_rtx (mode);
1746 if (GET_CODE (operand1) == CONST_INT)
1748 /* Directly break constant into low and high parts. This
1749 provides better optimization opportunities because various
1750 passes recognize constants split with PLUS but not LO_SUM.
1751 We use a 14-bit signed low part except when the addition
1752 of 0x4000 to the high part might change the sign of the
1753 high part. */
1754 HOST_WIDE_INT value = INTVAL (operand1);
1755 HOST_WIDE_INT low = value & 0x3fff;
1756 HOST_WIDE_INT high = value & ~ 0x3fff;
1758 if (low >= 0x2000)
1760 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1761 high += 0x2000;
1762 else
1763 high += 0x4000;
1766 low = value - high;
1768 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1769 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1771 else
1773 emit_insn (gen_rtx_SET (VOIDmode, temp,
1774 gen_rtx_HIGH (mode, operand1)));
1775 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1778 emit_move_insn (operands[0], operands[1]);
1780 if (extend != NULL_RTX)
1781 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1782 extend));
1784 return 1;
1787 /* Now have insn-emit do whatever it normally does. */
1788 return 0;
1791 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1792 it will need a link/runtime reloc). */
1795 reloc_needed (exp)
1796 tree exp;
1798 int reloc = 0;
1800 switch (TREE_CODE (exp))
1802 case ADDR_EXPR:
1803 return 1;
1805 case PLUS_EXPR:
1806 case MINUS_EXPR:
1807 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1808 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1809 break;
1811 case NOP_EXPR:
1812 case CONVERT_EXPR:
1813 case NON_LVALUE_EXPR:
1814 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1815 break;
1817 case CONSTRUCTOR:
1819 register tree link;
1820 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1821 if (TREE_VALUE (link) != 0)
1822 reloc |= reloc_needed (TREE_VALUE (link));
1824 break;
1826 case ERROR_MARK:
1827 break;
1829 default:
1830 break;
1832 return reloc;
1835 /* Does operand (which is a symbolic_operand) live in text space?
1836 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1837 will be true. */
1840 read_only_operand (operand, mode)
1841 rtx operand;
1842 enum machine_mode mode ATTRIBUTE_UNUSED;
1844 if (GET_CODE (operand) == CONST)
1845 operand = XEXP (XEXP (operand, 0), 0);
1846 if (flag_pic)
1848 if (GET_CODE (operand) == SYMBOL_REF)
1849 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1851 else
1853 if (GET_CODE (operand) == SYMBOL_REF)
1854 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1856 return 1;
1860 /* Return the best assembler insn template
1861 for moving operands[1] into operands[0] as a fullword. */
1862 const char *
1863 singlemove_string (operands)
1864 rtx *operands;
1866 HOST_WIDE_INT intval;
1868 if (GET_CODE (operands[0]) == MEM)
1869 return "stw %r1,%0";
1870 if (GET_CODE (operands[1]) == MEM)
1871 return "ldw %1,%0";
1872 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1874 long i;
1875 REAL_VALUE_TYPE d;
1877 if (GET_MODE (operands[1]) != SFmode)
1878 abort ();
1880 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1881 bit pattern. */
1882 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1883 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1885 operands[1] = GEN_INT (i);
1886 /* Fall through to CONST_INT case. */
1888 if (GET_CODE (operands[1]) == CONST_INT)
1890 intval = INTVAL (operands[1]);
1892 if (VAL_14_BITS_P (intval))
1893 return "ldi %1,%0";
1894 else if ((intval & 0x7ff) == 0)
1895 return "ldil L'%1,%0";
1896 else if (zdepi_cint_p (intval))
1897 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1898 else
1899 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1901 return "copy %1,%0";
1905 /* Compute position (in OP[1]) and width (in OP[2])
1906 useful for copying IMM to a register using the zdepi
1907 instructions. Store the immediate value to insert in OP[0]. */
1908 static void
1909 compute_zdepwi_operands (imm, op)
1910 unsigned HOST_WIDE_INT imm;
1911 unsigned *op;
1913 int lsb, len;
1915 /* Find the least significant set bit in IMM. */
1916 for (lsb = 0; lsb < 32; lsb++)
1918 if ((imm & 1) != 0)
1919 break;
1920 imm >>= 1;
1923 /* Choose variants based on *sign* of the 5-bit field. */
1924 if ((imm & 0x10) == 0)
1925 len = (lsb <= 28) ? 4 : 32 - lsb;
1926 else
1928 /* Find the width of the bitstring in IMM. */
1929 for (len = 5; len < 32; len++)
1931 if ((imm & (1 << len)) == 0)
1932 break;
1935 /* Sign extend IMM as a 5-bit value. */
1936 imm = (imm & 0xf) - 0x10;
1939 op[0] = imm;
1940 op[1] = 31 - lsb;
1941 op[2] = len;
1944 /* Compute position (in OP[1]) and width (in OP[2])
1945 useful for copying IMM to a register using the depdi,z
1946 instructions. Store the immediate value to insert in OP[0]. */
1947 void
1948 compute_zdepdi_operands (imm, op)
1949 unsigned HOST_WIDE_INT imm;
1950 unsigned *op;
1952 HOST_WIDE_INT lsb, len;
1954 /* Find the least significant set bit in IMM. */
1955 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1957 if ((imm & 1) != 0)
1958 break;
1959 imm >>= 1;
1962 /* Choose variants based on *sign* of the 5-bit field. */
1963 if ((imm & 0x10) == 0)
1964 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1965 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1966 else
1968 /* Find the width of the bitstring in IMM. */
1969 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1971 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
1972 break;
1975 /* Sign extend IMM as a 5-bit value. */
1976 imm = (imm & 0xf) - 0x10;
1979 op[0] = imm;
1980 op[1] = 63 - lsb;
1981 op[2] = len;
1984 /* Output assembler code to perform a doubleword move insn
1985 with operands OPERANDS. */
1987 const char *
1988 output_move_double (operands)
1989 rtx *operands;
1991 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1992 rtx latehalf[2];
1993 rtx addreg0 = 0, addreg1 = 0;
1995 /* First classify both operands. */
1997 if (REG_P (operands[0]))
1998 optype0 = REGOP;
1999 else if (offsettable_memref_p (operands[0]))
2000 optype0 = OFFSOP;
2001 else if (GET_CODE (operands[0]) == MEM)
2002 optype0 = MEMOP;
2003 else
2004 optype0 = RNDOP;
2006 if (REG_P (operands[1]))
2007 optype1 = REGOP;
2008 else if (CONSTANT_P (operands[1]))
2009 optype1 = CNSTOP;
2010 else if (offsettable_memref_p (operands[1]))
2011 optype1 = OFFSOP;
2012 else if (GET_CODE (operands[1]) == MEM)
2013 optype1 = MEMOP;
2014 else
2015 optype1 = RNDOP;
2017 /* Check for the cases that the operand constraints are not
2018 supposed to allow to happen. Abort if we get one,
2019 because generating code for these cases is painful. */
2021 if (optype0 != REGOP && optype1 != REGOP)
2022 abort ();
2024 /* Handle auto decrementing and incrementing loads and stores
2025 specifically, since the structure of the function doesn't work
2026 for them without major modification. Do it better when we learn
2027 this port about the general inc/dec addressing of PA.
2028 (This was written by tege. Chide him if it doesn't work.) */
2030 if (optype0 == MEMOP)
2032 /* We have to output the address syntax ourselves, since print_operand
2033 doesn't deal with the addresses we want to use. Fix this later. */
2035 rtx addr = XEXP (operands[0], 0);
2036 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2038 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2040 operands[0] = XEXP (addr, 0);
2041 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2042 abort ();
2044 if (!reg_overlap_mentioned_p (high_reg, addr))
2046 /* No overlap between high target register and address
2047 register. (We do this in a non-obvious way to
2048 save a register file writeback) */
2049 if (GET_CODE (addr) == POST_INC)
2050 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2051 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2053 else
2054 abort ();
2056 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2058 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2060 operands[0] = XEXP (addr, 0);
2061 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2062 abort ();
2064 if (!reg_overlap_mentioned_p (high_reg, addr))
2066 /* No overlap between high target register and address
2067 register. (We do this in a non-obvious way to
2068 save a register file writeback) */
2069 if (GET_CODE (addr) == PRE_INC)
2070 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2071 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2073 else
2074 abort ();
2077 if (optype1 == MEMOP)
2079 /* We have to output the address syntax ourselves, since print_operand
2080 doesn't deal with the addresses we want to use. Fix this later. */
2082 rtx addr = XEXP (operands[1], 0);
2083 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2085 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2087 operands[1] = XEXP (addr, 0);
2088 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2089 abort ();
2091 if (!reg_overlap_mentioned_p (high_reg, addr))
2093 /* No overlap between high target register and address
2094 register. (We do this in a non-obvious way to
2095 save a register file writeback) */
2096 if (GET_CODE (addr) == POST_INC)
2097 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2098 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2100 else
2102 /* This is an undefined situation. We should load into the
2103 address register *and* update that register. Probably
2104 we don't need to handle this at all. */
2105 if (GET_CODE (addr) == POST_INC)
2106 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2107 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2110 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2112 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2114 operands[1] = XEXP (addr, 0);
2115 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2116 abort ();
2118 if (!reg_overlap_mentioned_p (high_reg, addr))
2120 /* No overlap between high target register and address
2121 register. (We do this in a non-obvious way to
2122 save a register file writeback) */
2123 if (GET_CODE (addr) == PRE_INC)
2124 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2125 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2127 else
2129 /* This is an undefined situation. We should load into the
2130 address register *and* update that register. Probably
2131 we don't need to handle this at all. */
2132 if (GET_CODE (addr) == PRE_INC)
2133 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2134 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2137 else if (GET_CODE (addr) == PLUS
2138 && GET_CODE (XEXP (addr, 0)) == MULT)
2140 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2142 if (!reg_overlap_mentioned_p (high_reg, addr))
2144 rtx xoperands[3];
2146 xoperands[0] = high_reg;
2147 xoperands[1] = XEXP (addr, 1);
2148 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2149 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2150 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2151 xoperands);
2152 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2154 else
2156 rtx xoperands[3];
2158 xoperands[0] = high_reg;
2159 xoperands[1] = XEXP (addr, 1);
2160 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2161 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2162 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2163 xoperands);
2164 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2169 /* If an operand is an unoffsettable memory ref, find a register
2170 we can increment temporarily to make it refer to the second word. */
2172 if (optype0 == MEMOP)
2173 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2175 if (optype1 == MEMOP)
2176 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2178 /* Ok, we can do one word at a time.
2179 Normally we do the low-numbered word first.
2181 In either case, set up in LATEHALF the operands to use
2182 for the high-numbered word and in some cases alter the
2183 operands in OPERANDS to be suitable for the low-numbered word. */
2185 if (optype0 == REGOP)
2186 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2187 else if (optype0 == OFFSOP)
2188 latehalf[0] = adjust_address (operands[0], SImode, 4);
2189 else
2190 latehalf[0] = operands[0];
2192 if (optype1 == REGOP)
2193 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2194 else if (optype1 == OFFSOP)
2195 latehalf[1] = adjust_address (operands[1], SImode, 4);
2196 else if (optype1 == CNSTOP)
2197 split_double (operands[1], &operands[1], &latehalf[1]);
2198 else
2199 latehalf[1] = operands[1];
2201 /* If the first move would clobber the source of the second one,
2202 do them in the other order.
2204 This can happen in two cases:
2206 mem -> register where the first half of the destination register
2207 is the same register used in the memory's address. Reload
2208 can create such insns.
2210 mem in this case will be either register indirect or register
2211 indirect plus a valid offset.
2213 register -> register move where REGNO(dst) == REGNO(src + 1)
2214 someone (Tim/Tege?) claimed this can happen for parameter loads.
2216 Handle mem -> register case first. */
2217 if (optype0 == REGOP
2218 && (optype1 == MEMOP || optype1 == OFFSOP)
2219 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2220 operands[1], 0))
2222 /* Do the late half first. */
2223 if (addreg1)
2224 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2225 output_asm_insn (singlemove_string (latehalf), latehalf);
2227 /* Then clobber. */
2228 if (addreg1)
2229 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2230 return singlemove_string (operands);
2233 /* Now handle register -> register case. */
2234 if (optype0 == REGOP && optype1 == REGOP
2235 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2237 output_asm_insn (singlemove_string (latehalf), latehalf);
2238 return singlemove_string (operands);
2241 /* Normal case: do the two words, low-numbered first. */
2243 output_asm_insn (singlemove_string (operands), operands);
2245 /* Make any unoffsettable addresses point at high-numbered word. */
2246 if (addreg0)
2247 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2248 if (addreg1)
2249 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2251 /* Do that word. */
2252 output_asm_insn (singlemove_string (latehalf), latehalf);
2254 /* Undo the adds we just did. */
2255 if (addreg0)
2256 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2257 if (addreg1)
2258 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2260 return "";
2263 const char *
2264 output_fp_move_double (operands)
2265 rtx *operands;
2267 if (FP_REG_P (operands[0]))
2269 if (FP_REG_P (operands[1])
2270 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2271 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2272 else
2273 output_asm_insn ("fldd%F1 %1,%0", operands);
2275 else if (FP_REG_P (operands[1]))
2277 output_asm_insn ("fstd%F0 %1,%0", operands);
2279 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2281 if (GET_CODE (operands[0]) == REG)
2283 rtx xoperands[2];
2284 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2285 xoperands[0] = operands[0];
2286 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2288 /* This is a pain. You have to be prepared to deal with an
2289 arbitrary address here including pre/post increment/decrement.
2291 so avoid this in the MD. */
2292 else
2293 abort ();
2295 else abort ();
2296 return "";
2299 /* Return a REG that occurs in ADDR with coefficient 1.
2300 ADDR can be effectively incremented by incrementing REG. */
2302 static rtx
2303 find_addr_reg (addr)
2304 rtx addr;
2306 while (GET_CODE (addr) == PLUS)
2308 if (GET_CODE (XEXP (addr, 0)) == REG)
2309 addr = XEXP (addr, 0);
2310 else if (GET_CODE (XEXP (addr, 1)) == REG)
2311 addr = XEXP (addr, 1);
2312 else if (CONSTANT_P (XEXP (addr, 0)))
2313 addr = XEXP (addr, 1);
2314 else if (CONSTANT_P (XEXP (addr, 1)))
2315 addr = XEXP (addr, 0);
2316 else
2317 abort ();
2319 if (GET_CODE (addr) == REG)
2320 return addr;
2321 abort ();
2324 /* Emit code to perform a block move.
2326 OPERANDS[0] is the destination pointer as a REG, clobbered.
2327 OPERANDS[1] is the source pointer as a REG, clobbered.
2328 OPERANDS[2] is a register for temporary storage.
2329 OPERANDS[4] is the size as a CONST_INT
2330 OPERANDS[3] is a register for temporary storage.
2331 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2332 OPERANDS[6] is another temporary register. */
2334 const char *
2335 output_block_move (operands, size_is_constant)
2336 rtx *operands;
2337 int size_is_constant ATTRIBUTE_UNUSED;
2339 int align = INTVAL (operands[5]);
2340 unsigned long n_bytes = INTVAL (operands[4]);
2342 /* We can't move more than four bytes at a time because the PA
2343 has no longer integer move insns. (Could use fp mem ops?) */
2344 if (align > 4)
2345 align = 4;
2347 /* Note that we know each loop below will execute at least twice
2348 (else we would have open-coded the copy). */
2349 switch (align)
2351 case 4:
2352 /* Pre-adjust the loop counter. */
2353 operands[4] = GEN_INT (n_bytes - 8);
2354 output_asm_insn ("ldi %4,%2", operands);
2356 /* Copying loop. */
2357 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2358 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2359 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2360 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2361 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2363 /* Handle the residual. There could be up to 7 bytes of
2364 residual to copy! */
2365 if (n_bytes % 8 != 0)
2367 operands[4] = GEN_INT (n_bytes % 4);
2368 if (n_bytes % 8 >= 4)
2369 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2370 if (n_bytes % 4 != 0)
2371 output_asm_insn ("ldw 0(%1),%6", operands);
2372 if (n_bytes % 8 >= 4)
2373 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2374 if (n_bytes % 4 != 0)
2375 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2377 return "";
2379 case 2:
2380 /* Pre-adjust the loop counter. */
2381 operands[4] = GEN_INT (n_bytes - 4);
2382 output_asm_insn ("ldi %4,%2", operands);
2384 /* Copying loop. */
2385 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2386 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2387 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2388 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2389 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2391 /* Handle the residual. */
2392 if (n_bytes % 4 != 0)
2394 if (n_bytes % 4 >= 2)
2395 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2396 if (n_bytes % 2 != 0)
2397 output_asm_insn ("ldb 0(%1),%6", operands);
2398 if (n_bytes % 4 >= 2)
2399 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2400 if (n_bytes % 2 != 0)
2401 output_asm_insn ("stb %6,0(%0)", operands);
2403 return "";
2405 case 1:
2406 /* Pre-adjust the loop counter. */
2407 operands[4] = GEN_INT (n_bytes - 2);
2408 output_asm_insn ("ldi %4,%2", operands);
2410 /* Copying loop. */
2411 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2412 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2413 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2414 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2415 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2417 /* Handle the residual. */
2418 if (n_bytes % 2 != 0)
2420 output_asm_insn ("ldb 0(%1),%3", operands);
2421 output_asm_insn ("stb %3,0(%0)", operands);
2423 return "";
2425 default:
2426 abort ();
2430 /* Count the number of insns necessary to handle this block move.
2432 Basic structure is the same as emit_block_move, except that we
2433 count insns rather than emit them. */
2435 static int
2436 compute_movstrsi_length (insn)
2437 rtx insn;
2439 rtx pat = PATTERN (insn);
2440 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2441 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2442 unsigned int n_insns = 0;
2444 /* We can't move more than four bytes at a time because the PA
2445 has no longer integer move insns. (Could use fp mem ops?) */
2446 if (align > 4)
2447 align = 4;
2449 /* The basic copying loop. */
2450 n_insns = 6;
2452 /* Residuals. */
2453 if (n_bytes % (2 * align) != 0)
2455 if ((n_bytes % (2 * align)) >= align)
2456 n_insns += 2;
2458 if ((n_bytes % align) != 0)
2459 n_insns += 2;
2462 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2463 return n_insns * 4;
2467 const char *
2468 output_and (operands)
2469 rtx *operands;
2471 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2473 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2474 int ls0, ls1, ms0, p, len;
2476 for (ls0 = 0; ls0 < 32; ls0++)
2477 if ((mask & (1 << ls0)) == 0)
2478 break;
2480 for (ls1 = ls0; ls1 < 32; ls1++)
2481 if ((mask & (1 << ls1)) != 0)
2482 break;
2484 for (ms0 = ls1; ms0 < 32; ms0++)
2485 if ((mask & (1 << ms0)) == 0)
2486 break;
2488 if (ms0 != 32)
2489 abort ();
2491 if (ls1 == 32)
2493 len = ls0;
2495 if (len == 0)
2496 abort ();
2498 operands[2] = GEN_INT (len);
2499 return "{extru|extrw,u} %1,31,%2,%0";
2501 else
2503 /* We could use this `depi' for the case above as well, but `depi'
2504 requires one more register file access than an `extru'. */
2506 p = 31 - ls0;
2507 len = ls1 - ls0;
2509 operands[2] = GEN_INT (p);
2510 operands[3] = GEN_INT (len);
2511 return "{depi|depwi} 0,%2,%3,%0";
2514 else
2515 return "and %1,%2,%0";
2518 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2519 storing the result in operands[0]. */
2520 const char *
2521 output_64bit_and (operands)
2522 rtx *operands;
2524 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2526 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2527 int ls0, ls1, ms0, p, len;
2529 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2530 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2531 break;
2533 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2534 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2535 break;
2537 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2538 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2539 break;
2541 if (ms0 != HOST_BITS_PER_WIDE_INT)
2542 abort ();
2544 if (ls1 == HOST_BITS_PER_WIDE_INT)
2546 len = ls0;
2548 if (len == 0)
2549 abort ();
2551 operands[2] = GEN_INT (len);
2552 return "extrd,u %1,63,%2,%0";
2554 else
2556 /* We could use this `depi' for the case above as well, but `depi'
2557 requires one more register file access than an `extru'. */
2559 p = 63 - ls0;
2560 len = ls1 - ls0;
2562 operands[2] = GEN_INT (p);
2563 operands[3] = GEN_INT (len);
2564 return "depdi 0,%2,%3,%0";
2567 else
2568 return "and %1,%2,%0";
2571 const char *
2572 output_ior (operands)
2573 rtx *operands;
2575 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2576 int bs0, bs1, p, len;
2578 if (INTVAL (operands[2]) == 0)
2579 return "copy %1,%0";
2581 for (bs0 = 0; bs0 < 32; bs0++)
2582 if ((mask & (1 << bs0)) != 0)
2583 break;
2585 for (bs1 = bs0; bs1 < 32; bs1++)
2586 if ((mask & (1 << bs1)) == 0)
2587 break;
2589 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2590 abort ();
2592 p = 31 - bs0;
2593 len = bs1 - bs0;
2595 operands[2] = GEN_INT (p);
2596 operands[3] = GEN_INT (len);
2597 return "{depi|depwi} -1,%2,%3,%0";
2600 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2601 storing the result in operands[0]. */
2602 const char *
2603 output_64bit_ior (operands)
2604 rtx *operands;
2606 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2607 int bs0, bs1, p, len;
2609 if (INTVAL (operands[2]) == 0)
2610 return "copy %1,%0";
2612 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2613 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2614 break;
2616 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2617 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2618 break;
2620 if (bs1 != HOST_BITS_PER_WIDE_INT
2621 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2622 abort ();
2624 p = 63 - bs0;
2625 len = bs1 - bs0;
2627 operands[2] = GEN_INT (p);
2628 operands[3] = GEN_INT (len);
2629 return "depdi -1,%2,%3,%0";
2632 /* Target hook for assembling integer objects. This code handles
2633 aligned SI and DI integers specially, since function references must
2634 be preceded by P%. */
2636 static bool
2637 pa_assemble_integer (x, size, aligned_p)
2638 rtx x;
2639 unsigned int size;
2640 int aligned_p;
2642 if (size == UNITS_PER_WORD && aligned_p
2643 && function_label_operand (x, VOIDmode))
2645 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2646 output_addr_const (asm_out_file, x);
2647 fputc ('\n', asm_out_file);
2648 return true;
2650 return default_assemble_integer (x, size, aligned_p);
2653 /* Output an ascii string. */
2654 void
2655 output_ascii (file, p, size)
2656 FILE *file;
2657 const char *p;
2658 int size;
2660 int i;
2661 int chars_output;
2662 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2664 /* The HP assembler can only take strings of 256 characters at one
2665 time. This is a limitation on input line length, *not* the
2666 length of the string. Sigh. Even worse, it seems that the
2667 restriction is in number of input characters (see \xnn &
2668 \whatever). So we have to do this very carefully. */
2670 fputs ("\t.STRING \"", file);
2672 chars_output = 0;
2673 for (i = 0; i < size; i += 4)
2675 int co = 0;
2676 int io = 0;
2677 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2679 register unsigned int c = (unsigned char) p[i + io];
2681 if (c == '\"' || c == '\\')
2682 partial_output[co++] = '\\';
2683 if (c >= ' ' && c < 0177)
2684 partial_output[co++] = c;
2685 else
2687 unsigned int hexd;
2688 partial_output[co++] = '\\';
2689 partial_output[co++] = 'x';
2690 hexd = c / 16 - 0 + '0';
2691 if (hexd > '9')
2692 hexd -= '9' - 'a' + 1;
2693 partial_output[co++] = hexd;
2694 hexd = c % 16 - 0 + '0';
2695 if (hexd > '9')
2696 hexd -= '9' - 'a' + 1;
2697 partial_output[co++] = hexd;
2700 if (chars_output + co > 243)
2702 fputs ("\"\n\t.STRING \"", file);
2703 chars_output = 0;
2705 fwrite (partial_output, 1, (size_t) co, file);
2706 chars_output += co;
2707 co = 0;
2709 fputs ("\"\n", file);
2712 /* Try to rewrite floating point comparisons & branches to avoid
2713 useless add,tr insns.
2715 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2716 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2717 first attempt to remove useless add,tr insns. It is zero
2718 for the second pass as reorg sometimes leaves bogus REG_DEAD
2719 notes lying around.
2721 When CHECK_NOTES is zero we can only eliminate add,tr insns
2722 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2723 instructions. */
2724 static void
2725 remove_useless_addtr_insns (insns, check_notes)
2726 rtx insns;
2727 int check_notes;
2729 rtx insn;
2730 static int pass = 0;
2732 /* This is fairly cheap, so always run it when optimizing. */
2733 if (optimize > 0)
2735 int fcmp_count = 0;
2736 int fbranch_count = 0;
2738 /* Walk all the insns in this function looking for fcmp & fbranch
2739 instructions. Keep track of how many of each we find. */
2740 insns = get_insns ();
2741 for (insn = insns; insn; insn = next_insn (insn))
2743 rtx tmp;
2745 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2746 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2747 continue;
2749 tmp = PATTERN (insn);
2751 /* It must be a set. */
2752 if (GET_CODE (tmp) != SET)
2753 continue;
2755 /* If the destination is CCFP, then we've found an fcmp insn. */
2756 tmp = SET_DEST (tmp);
2757 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2759 fcmp_count++;
2760 continue;
2763 tmp = PATTERN (insn);
2764 /* If this is an fbranch instruction, bump the fbranch counter. */
2765 if (GET_CODE (tmp) == SET
2766 && SET_DEST (tmp) == pc_rtx
2767 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2768 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2769 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2770 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2772 fbranch_count++;
2773 continue;
2778 /* Find all floating point compare + branch insns. If possible,
2779 reverse the comparison & the branch to avoid add,tr insns. */
2780 for (insn = insns; insn; insn = next_insn (insn))
2782 rtx tmp, next;
2784 /* Ignore anything that isn't an INSN. */
2785 if (GET_CODE (insn) != INSN)
2786 continue;
2788 tmp = PATTERN (insn);
2790 /* It must be a set. */
2791 if (GET_CODE (tmp) != SET)
2792 continue;
2794 /* The destination must be CCFP, which is register zero. */
2795 tmp = SET_DEST (tmp);
2796 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2797 continue;
2799 /* INSN should be a set of CCFP.
2801 See if the result of this insn is used in a reversed FP
2802 conditional branch. If so, reverse our condition and
2803 the branch. Doing so avoids useless add,tr insns. */
2804 next = next_insn (insn);
2805 while (next)
2807 /* Jumps, calls and labels stop our search. */
2808 if (GET_CODE (next) == JUMP_INSN
2809 || GET_CODE (next) == CALL_INSN
2810 || GET_CODE (next) == CODE_LABEL)
2811 break;
2813 /* As does another fcmp insn. */
2814 if (GET_CODE (next) == INSN
2815 && GET_CODE (PATTERN (next)) == SET
2816 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2817 && REGNO (SET_DEST (PATTERN (next))) == 0)
2818 break;
2820 next = next_insn (next);
2823 /* Is NEXT_INSN a branch? */
2824 if (next
2825 && GET_CODE (next) == JUMP_INSN)
2827 rtx pattern = PATTERN (next);
2829 /* If it a reversed fp conditional branch (eg uses add,tr)
2830 and CCFP dies, then reverse our conditional and the branch
2831 to avoid the add,tr. */
2832 if (GET_CODE (pattern) == SET
2833 && SET_DEST (pattern) == pc_rtx
2834 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2835 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2836 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2837 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2838 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2839 && (fcmp_count == fbranch_count
2840 || (check_notes
2841 && find_regno_note (next, REG_DEAD, 0))))
2843 /* Reverse the branch. */
2844 tmp = XEXP (SET_SRC (pattern), 1);
2845 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2846 XEXP (SET_SRC (pattern), 2) = tmp;
2847 INSN_CODE (next) = -1;
2849 /* Reverse our condition. */
2850 tmp = PATTERN (insn);
2851 PUT_CODE (XEXP (tmp, 1),
2852 (reverse_condition_maybe_unordered
2853 (GET_CODE (XEXP (tmp, 1)))));
2859 pass = !pass;
2863 /* You may have trouble believing this, but this is the 32 bit HP-PA
2864 stack layout. Wow.
2866 Offset Contents
2868 Variable arguments (optional; any number may be allocated)
2870 SP-(4*(N+9)) arg word N
2872 SP-56 arg word 5
2873 SP-52 arg word 4
2875 Fixed arguments (must be allocated; may remain unused)
2877 SP-48 arg word 3
2878 SP-44 arg word 2
2879 SP-40 arg word 1
2880 SP-36 arg word 0
2882 Frame Marker
2884 SP-32 External Data Pointer (DP)
2885 SP-28 External sr4
2886 SP-24 External/stub RP (RP')
2887 SP-20 Current RP
2888 SP-16 Static Link
2889 SP-12 Clean up
2890 SP-8 Calling Stub RP (RP'')
2891 SP-4 Previous SP
2893 Top of Frame
2895 SP-0 Stack Pointer (points to next available address)
2899 /* This function saves registers as follows. Registers marked with ' are
2900 this function's registers (as opposed to the previous function's).
2901 If a frame_pointer isn't needed, r4 is saved as a general register;
2902 the space for the frame pointer is still allocated, though, to keep
2903 things simple.
2906 Top of Frame
2908 SP (FP') Previous FP
2909 SP + 4 Alignment filler (sigh)
2910 SP + 8 Space for locals reserved here.
2914 SP + n All call saved register used.
2918 SP + o All call saved fp registers used.
2922 SP + p (SP') points to next available address.
2926 /* Global variables set by output_function_prologue(). */
2927 /* Size of frame. Need to know this to emit return insns from
2928 leaf procedures. */
2929 static int actual_fsize;
2930 static int local_fsize, save_fregs;
2932 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2933 Handle case where DISP > 8k by using the add_high_const patterns.
2935 Note in DISP > 8k case, we will leave the high part of the address
2936 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2938 static void
2939 store_reg (reg, disp, base)
2940 int reg, disp, base;
2942 rtx insn, dest, src, basereg;
2944 src = gen_rtx_REG (word_mode, reg);
2945 basereg = gen_rtx_REG (Pmode, base);
2946 if (VAL_14_BITS_P (disp))
2948 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2949 insn = emit_move_insn (dest, src);
2951 else
2953 rtx delta = GEN_INT (disp);
2954 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2955 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2956 emit_move_insn (tmpreg, high);
2957 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2958 insn = emit_move_insn (dest, src);
2959 if (DO_FRAME_NOTES)
2961 REG_NOTES (insn)
2962 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2963 gen_rtx_SET (VOIDmode,
2964 gen_rtx_MEM (word_mode,
2965 gen_rtx_PLUS (word_mode, basereg,
2966 delta)),
2967 src),
2968 REG_NOTES (insn));
2972 if (DO_FRAME_NOTES)
2973 RTX_FRAME_RELATED_P (insn) = 1;
2976 /* Emit RTL to store REG at the memory location specified by BASE and then
2977 add MOD to BASE. MOD must be <= 8k. */
2979 static void
2980 store_reg_modify (base, reg, mod)
2981 int base, reg, mod;
2983 rtx insn, basereg, srcreg, delta;
2985 if (! VAL_14_BITS_P (mod))
2986 abort ();
2988 basereg = gen_rtx_REG (Pmode, base);
2989 srcreg = gen_rtx_REG (word_mode, reg);
2990 delta = GEN_INT (mod);
2992 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
2993 if (DO_FRAME_NOTES)
2995 RTX_FRAME_RELATED_P (insn) = 1;
2997 /* RTX_FRAME_RELATED_P must be set on each frame related set
2998 in a parallel with more than one element. Don't set
2999 RTX_FRAME_RELATED_P in the first set if reg is temporary
3000 register 1. The effect of this operation is recorded in
3001 the initial copy. */
3002 if (reg != 1)
3004 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3005 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3007 else
3009 /* The first element of a PARALLEL is always processed if it is
3010 a SET. Thus, we need an expression list for this case. */
3011 REG_NOTES (insn)
3012 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3013 gen_rtx_SET (VOIDmode, basereg,
3014 gen_rtx_PLUS (word_mode, basereg, delta)),
3015 REG_NOTES (insn));
3020 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3021 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3022 whether to add a frame note or not.
3024 In the DISP > 8k case, we leave the high part of the address in %r1.
3025 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3027 static void
3028 set_reg_plus_d (reg, base, disp, note)
3029 int reg, base, disp, note;
3031 rtx insn;
3033 if (VAL_14_BITS_P (disp))
3035 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3036 plus_constant (gen_rtx_REG (Pmode, base), disp));
3038 else
3040 rtx basereg = gen_rtx_REG (Pmode, base);
3041 rtx delta = GEN_INT (disp);
3043 emit_move_insn (gen_rtx_REG (Pmode, 1),
3044 gen_rtx_PLUS (Pmode, basereg,
3045 gen_rtx_HIGH (Pmode, delta)));
3046 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3047 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3048 delta));
3051 if (DO_FRAME_NOTES && note)
3052 RTX_FRAME_RELATED_P (insn) = 1;
3056 compute_frame_size (size, fregs_live)
3057 int size;
3058 int *fregs_live;
3060 int i, fsize;
3062 /* Space for frame pointer + filler. If any frame is allocated
3063 we need to add this in because of STARTING_FRAME_OFFSET.
3065 Similar code also appears in hppa_expand_prologue. Change both
3066 of them at the same time. */
3067 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
3069 /* If the current function calls __builtin_eh_return, then we need
3070 to allocate stack space for registers that will hold data for
3071 the exception handler. */
3072 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3074 unsigned int i;
3076 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3077 continue;
3078 fsize += i * UNITS_PER_WORD;
3081 /* Account for space used by the callee general register saves. */
3082 for (i = 18; i >= 3; i--)
3083 if (regs_ever_live[i])
3084 fsize += UNITS_PER_WORD;
3086 /* Round the stack. */
3087 fsize = (fsize + 7) & ~7;
3089 /* Account for space used by the callee floating point register saves. */
3090 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3091 if (regs_ever_live[i]
3092 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3094 if (fregs_live)
3095 *fregs_live = 1;
3097 /* We always save both halves of the FP register, so always
3098 increment the frame size by 8 bytes. */
3099 fsize += 8;
3102 /* The various ABIs include space for the outgoing parameters in the
3103 size of the current function's stack frame. */
3104 fsize += current_function_outgoing_args_size;
3106 /* Allocate space for the fixed frame marker. This space must be
3107 allocated for any function that makes calls or otherwise allocates
3108 stack space. */
3109 if (!current_function_is_leaf || fsize)
3110 fsize += TARGET_64BIT ? 16 : 32;
3112 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
3115 /* Generate the assembly code for function entry. FILE is a stdio
3116 stream to output the code to. SIZE is an int: how many units of
3117 temporary storage to allocate.
3119 Refer to the array `regs_ever_live' to determine which registers to
3120 save; `regs_ever_live[I]' is nonzero if register number I is ever
3121 used in the function. This function is responsible for knowing
3122 which registers should not be saved even if used. */
3124 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3125 of memory. If any fpu reg is used in the function, we allocate
3126 such a block here, at the bottom of the frame, just in case it's needed.
3128 If this function is a leaf procedure, then we may choose not
3129 to do a "save" insn. The decision about whether or not
3130 to do this is made in regclass.c. */
3132 void
3133 pa_output_function_prologue (file, size)
3134 FILE *file;
3135 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3137 /* The function's label and associated .PROC must never be
3138 separated and must be output *after* any profiling declarations
3139 to avoid changing spaces/subspaces within a procedure. */
3140 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3141 fputs ("\t.PROC\n", file);
3143 /* hppa_expand_prologue does the dirty work now. We just need
3144 to output the assembler directives which denote the start
3145 of a function. */
3146 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3147 if (regs_ever_live[2])
3148 fputs (",CALLS,SAVE_RP", file);
3149 else
3150 fputs (",NO_CALLS", file);
3152 if (frame_pointer_needed)
3153 fputs (",SAVE_SP", file);
3155 /* Pass on information about the number of callee register saves
3156 performed in the prologue.
3158 The compiler is supposed to pass the highest register number
3159 saved, the assembler then has to adjust that number before
3160 entering it into the unwind descriptor (to account for any
3161 caller saved registers with lower register numbers than the
3162 first callee saved register). */
3163 if (gr_saved)
3164 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3166 if (fr_saved)
3167 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3169 fputs ("\n\t.ENTRY\n", file);
3171 /* If we're using GAS and SOM, and not using the portable runtime model,
3172 then we don't need to accumulate the total number of code bytes. */
3173 if ((TARGET_GAS && TARGET_SOM && ! TARGET_PORTABLE_RUNTIME)
3174 /* FIXME: we can't handle long calls for TARGET_64BIT. */
3175 || TARGET_64BIT)
3176 total_code_bytes = 0;
3177 else if (INSN_ADDRESSES_SET_P ())
3179 unsigned int old_total = total_code_bytes;
3181 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn ()));
3182 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3184 /* Be prepared to handle overflows. */
3185 if (old_total > total_code_bytes)
3186 total_code_bytes = -1;
3188 else
3189 total_code_bytes = -1;
3191 remove_useless_addtr_insns (get_insns (), 0);
3194 void
3195 hppa_expand_prologue ()
3197 int size = get_frame_size ();
3198 int merge_sp_adjust_with_store = 0;
3199 int i, offset;
3200 rtx insn, tmpreg;
3202 gr_saved = 0;
3203 fr_saved = 0;
3204 save_fregs = 0;
3206 /* Allocate space for frame pointer + filler. If any frame is allocated
3207 we need to add this in because of STARTING_FRAME_OFFSET.
3209 Similar code also appears in compute_frame_size. Change both
3210 of them at the same time. */
3211 local_fsize = size + (size || frame_pointer_needed
3212 ? STARTING_FRAME_OFFSET : 0);
3214 actual_fsize = compute_frame_size (size, &save_fregs);
3216 /* Compute a few things we will use often. */
3217 tmpreg = gen_rtx_REG (word_mode, 1);
3219 /* Save RP first. The calling conventions manual states RP will
3220 always be stored into the caller's frame at sp - 20 or sp - 16
3221 depending on which ABI is in use. */
3222 if (regs_ever_live[2] || current_function_calls_eh_return)
3223 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3225 /* Allocate the local frame and set up the frame pointer if needed. */
3226 if (actual_fsize != 0)
3228 if (frame_pointer_needed)
3230 /* Copy the old frame pointer temporarily into %r1. Set up the
3231 new stack pointer, then store away the saved old frame pointer
3232 into the stack at sp and at the same time update the stack
3233 pointer by actual_fsize bytes. Two versions, first
3234 handles small (<8k) frames. The second handles large (>=8k)
3235 frames. */
3236 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3237 if (DO_FRAME_NOTES)
3239 /* We need to record the frame pointer save here since the
3240 new frame pointer is set in the following insn. */
3241 RTX_FRAME_RELATED_P (insn) = 1;
3242 REG_NOTES (insn)
3243 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3244 gen_rtx_SET (VOIDmode,
3245 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3246 frame_pointer_rtx),
3247 REG_NOTES (insn));
3250 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3251 if (DO_FRAME_NOTES)
3252 RTX_FRAME_RELATED_P (insn) = 1;
3254 if (VAL_14_BITS_P (actual_fsize))
3255 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3256 else
3258 /* It is incorrect to store the saved frame pointer at *sp,
3259 then increment sp (writes beyond the current stack boundary).
3261 So instead use stwm to store at *sp and post-increment the
3262 stack pointer as an atomic operation. Then increment sp to
3263 finish allocating the new frame. */
3264 int adjust1 = 8192 - 64;
3265 int adjust2 = actual_fsize - adjust1;
3267 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3268 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3269 adjust2, 1);
3272 /* Prevent register spills from being scheduled before the
3273 stack pointer is raised. Necessary as we will be storing
3274 registers using the frame pointer as a base register, and
3275 we happen to set fp before raising sp. */
3276 emit_insn (gen_blockage ());
3278 /* no frame pointer needed. */
3279 else
3281 /* In some cases we can perform the first callee register save
3282 and allocating the stack frame at the same time. If so, just
3283 make a note of it and defer allocating the frame until saving
3284 the callee registers. */
3285 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3286 merge_sp_adjust_with_store = 1;
3287 /* Can not optimize. Adjust the stack frame by actual_fsize
3288 bytes. */
3289 else
3290 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3291 actual_fsize, 1);
3295 /* Normal register save.
3297 Do not save the frame pointer in the frame_pointer_needed case. It
3298 was done earlier. */
3299 if (frame_pointer_needed)
3301 offset = local_fsize;
3303 /* Saving the EH return data registers in the frame is the simplest
3304 way to get the frame unwind information emitted. We put them
3305 just before the general registers. */
3306 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3308 unsigned int i, regno;
3310 for (i = 0; ; ++i)
3312 regno = EH_RETURN_DATA_REGNO (i);
3313 if (regno == INVALID_REGNUM)
3314 break;
3316 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3317 offset += UNITS_PER_WORD;
3321 for (i = 18; i >= 4; i--)
3322 if (regs_ever_live[i] && ! call_used_regs[i])
3324 store_reg (i, offset, FRAME_POINTER_REGNUM);
3325 offset += UNITS_PER_WORD;
3326 gr_saved++;
3328 /* Account for %r3 which is saved in a special place. */
3329 gr_saved++;
3331 /* No frame pointer needed. */
3332 else
3334 offset = local_fsize - actual_fsize;
3336 /* Saving the EH return data registers in the frame is the simplest
3337 way to get the frame unwind information emitted. */
3338 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3340 unsigned int i, regno;
3342 for (i = 0; ; ++i)
3344 regno = EH_RETURN_DATA_REGNO (i);
3345 if (regno == INVALID_REGNUM)
3346 break;
3348 /* If merge_sp_adjust_with_store is nonzero, then we can
3349 optimize the first save. */
3350 if (merge_sp_adjust_with_store)
3352 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3353 merge_sp_adjust_with_store = 0;
3355 else
3356 store_reg (regno, offset, STACK_POINTER_REGNUM);
3357 offset += UNITS_PER_WORD;
3361 for (i = 18; i >= 3; i--)
3362 if (regs_ever_live[i] && ! call_used_regs[i])
3364 /* If merge_sp_adjust_with_store is nonzero, then we can
3365 optimize the first GR save. */
3366 if (merge_sp_adjust_with_store)
3368 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3369 merge_sp_adjust_with_store = 0;
3371 else
3372 store_reg (i, offset, STACK_POINTER_REGNUM);
3373 offset += UNITS_PER_WORD;
3374 gr_saved++;
3377 /* If we wanted to merge the SP adjustment with a GR save, but we never
3378 did any GR saves, then just emit the adjustment here. */
3379 if (merge_sp_adjust_with_store)
3380 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3381 actual_fsize, 1);
3384 /* The hppa calling conventions say that %r19, the pic offset
3385 register, is saved at sp - 32 (in this function's frame)
3386 when generating PIC code. FIXME: What is the correct thing
3387 to do for functions which make no calls and allocate no
3388 frame? Do we need to allocate a frame, or can we just omit
3389 the save? For now we'll just omit the save. */
3390 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3391 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3393 /* Align pointer properly (doubleword boundary). */
3394 offset = (offset + 7) & ~7;
3396 /* Floating point register store. */
3397 if (save_fregs)
3399 rtx base;
3401 /* First get the frame or stack pointer to the start of the FP register
3402 save area. */
3403 if (frame_pointer_needed)
3405 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3406 base = frame_pointer_rtx;
3408 else
3410 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3411 base = stack_pointer_rtx;
3414 /* Now actually save the FP registers. */
3415 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3417 if (regs_ever_live[i]
3418 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3420 rtx addr, insn, reg;
3421 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3422 reg = gen_rtx_REG (DFmode, i);
3423 insn = emit_move_insn (addr, reg);
3424 if (DO_FRAME_NOTES)
3426 RTX_FRAME_RELATED_P (insn) = 1;
3427 if (TARGET_64BIT)
3429 rtx mem = gen_rtx_MEM (DFmode,
3430 plus_constant (base, offset));
3431 REG_NOTES (insn)
3432 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3433 gen_rtx_SET (VOIDmode, mem, reg),
3434 REG_NOTES (insn));
3436 else
3438 rtx meml = gen_rtx_MEM (SFmode,
3439 plus_constant (base, offset));
3440 rtx memr = gen_rtx_MEM (SFmode,
3441 plus_constant (base, offset + 4));
3442 rtx regl = gen_rtx_REG (SFmode, i);
3443 rtx regr = gen_rtx_REG (SFmode, i + 1);
3444 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3445 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3446 rtvec vec;
3448 RTX_FRAME_RELATED_P (setl) = 1;
3449 RTX_FRAME_RELATED_P (setr) = 1;
3450 vec = gen_rtvec (2, setl, setr);
3451 REG_NOTES (insn)
3452 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3453 gen_rtx_SEQUENCE (VOIDmode, vec),
3454 REG_NOTES (insn));
3457 offset += GET_MODE_SIZE (DFmode);
3458 fr_saved++;
3463 /* FIXME: expand_call and expand_millicode_call need to be fixed to
3464 prevent insns with frame notes being scheduled in the delay slot
3465 of calls. This causes problems because the dwarf2 output code
3466 processes the insn list serially. For now, limit the migration
3467 of prologue insns with a blockage. */
3468 if (DO_FRAME_NOTES)
3469 emit_insn (gen_blockage ());
3472 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3473 Handle case where DISP > 8k by using the add_high_const patterns. */
3475 static void
3476 load_reg (reg, disp, base)
3477 int reg, disp, base;
3479 rtx src, dest, basereg;
3481 dest = gen_rtx_REG (word_mode, reg);
3482 basereg = gen_rtx_REG (Pmode, base);
3483 if (VAL_14_BITS_P (disp))
3485 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3486 emit_move_insn (dest, src);
3488 else
3490 rtx delta = GEN_INT (disp);
3491 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3492 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3493 emit_move_insn (tmpreg, high);
3494 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3495 emit_move_insn (dest, src);
3499 /* This function generates the assembly code for function exit.
3500 Args are as for output_function_prologue ().
3502 The function epilogue should not depend on the current stack
3503 pointer! It should use the frame pointer only. This is mandatory
3504 because of alloca; we also take advantage of it to omit stack
3505 adjustments before returning. */
3507 static void
3508 pa_output_function_epilogue (file, size)
3509 FILE *file;
3510 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3512 rtx insn = get_last_insn ();
3514 /* hppa_expand_epilogue does the dirty work now. We just need
3515 to output the assembler directives which denote the end
3516 of a function.
3518 To make debuggers happy, emit a nop if the epilogue was completely
3519 eliminated due to a volatile call as the last insn in the
3520 current function. That way the return address (in %r2) will
3521 always point to a valid instruction in the current function. */
3523 /* Get the last real insn. */
3524 if (GET_CODE (insn) == NOTE)
3525 insn = prev_real_insn (insn);
3527 /* If it is a sequence, then look inside. */
3528 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3529 insn = XVECEXP (PATTERN (insn), 0, 0);
3531 /* If insn is a CALL_INSN, then it must be a call to a volatile
3532 function (otherwise there would be epilogue insns). */
3533 if (insn && GET_CODE (insn) == CALL_INSN)
3534 fputs ("\tnop\n", file);
3536 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3539 void
3540 hppa_expand_epilogue ()
3542 rtx tmpreg;
3543 int offset, i;
3544 int merge_sp_adjust_with_load = 0;
3545 int ret_off = 0;
3547 /* We will use this often. */
3548 tmpreg = gen_rtx_REG (word_mode, 1);
3550 /* Try to restore RP early to avoid load/use interlocks when
3551 RP gets used in the return (bv) instruction. This appears to still
3552 be necessary even when we schedule the prologue and epilogue. */
3553 if (regs_ever_live [2] || current_function_calls_eh_return)
3555 ret_off = TARGET_64BIT ? -16 : -20;
3556 if (frame_pointer_needed)
3558 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3559 ret_off = 0;
3561 else
3563 /* No frame pointer, and stack is smaller than 8k. */
3564 if (VAL_14_BITS_P (ret_off - actual_fsize))
3566 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3567 ret_off = 0;
3572 /* General register restores. */
3573 if (frame_pointer_needed)
3575 offset = local_fsize;
3577 /* If the current function calls __builtin_eh_return, then we need
3578 to restore the saved EH data registers. */
3579 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3581 unsigned int i, regno;
3583 for (i = 0; ; ++i)
3585 regno = EH_RETURN_DATA_REGNO (i);
3586 if (regno == INVALID_REGNUM)
3587 break;
3589 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3590 offset += UNITS_PER_WORD;
3594 for (i = 18; i >= 4; i--)
3595 if (regs_ever_live[i] && ! call_used_regs[i])
3597 load_reg (i, offset, FRAME_POINTER_REGNUM);
3598 offset += UNITS_PER_WORD;
3601 else
3603 offset = local_fsize - actual_fsize;
3605 /* If the current function calls __builtin_eh_return, then we need
3606 to restore the saved EH data registers. */
3607 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3609 unsigned int i, regno;
3611 for (i = 0; ; ++i)
3613 regno = EH_RETURN_DATA_REGNO (i);
3614 if (regno == INVALID_REGNUM)
3615 break;
3617 /* Only for the first load.
3618 merge_sp_adjust_with_load holds the register load
3619 with which we will merge the sp adjustment. */
3620 if (merge_sp_adjust_with_load == 0
3621 && local_fsize == 0
3622 && VAL_14_BITS_P (-actual_fsize))
3623 merge_sp_adjust_with_load = regno;
3624 else
3625 load_reg (regno, offset, STACK_POINTER_REGNUM);
3626 offset += UNITS_PER_WORD;
3630 for (i = 18; i >= 3; i--)
3632 if (regs_ever_live[i] && ! call_used_regs[i])
3634 /* Only for the first load.
3635 merge_sp_adjust_with_load holds the register load
3636 with which we will merge the sp adjustment. */
3637 if (merge_sp_adjust_with_load == 0
3638 && local_fsize == 0
3639 && VAL_14_BITS_P (-actual_fsize))
3640 merge_sp_adjust_with_load = i;
3641 else
3642 load_reg (i, offset, STACK_POINTER_REGNUM);
3643 offset += UNITS_PER_WORD;
3648 /* Align pointer properly (doubleword boundary). */
3649 offset = (offset + 7) & ~7;
3651 /* FP register restores. */
3652 if (save_fregs)
3654 /* Adjust the register to index off of. */
3655 if (frame_pointer_needed)
3656 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3657 else
3658 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3660 /* Actually do the restores now. */
3661 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3662 if (regs_ever_live[i]
3663 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3665 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3666 rtx dest = gen_rtx_REG (DFmode, i);
3667 emit_move_insn (dest, src);
3671 /* Emit a blockage insn here to keep these insns from being moved to
3672 an earlier spot in the epilogue, or into the main instruction stream.
3674 This is necessary as we must not cut the stack back before all the
3675 restores are finished. */
3676 emit_insn (gen_blockage ());
3678 /* Reset stack pointer (and possibly frame pointer). The stack
3679 pointer is initially set to fp + 64 to avoid a race condition. */
3680 if (frame_pointer_needed)
3682 rtx delta = GEN_INT (-64);
3684 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3685 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3687 /* If we were deferring a callee register restore, do it now. */
3688 else if (merge_sp_adjust_with_load)
3690 rtx delta = GEN_INT (-actual_fsize);
3691 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3693 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3695 else if (actual_fsize != 0)
3696 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3697 - actual_fsize, 0);
3699 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3700 frame greater than 8k), do so now. */
3701 if (ret_off != 0)
3702 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3704 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3706 rtx sa = EH_RETURN_STACKADJ_RTX;
3708 emit_insn (gen_blockage ());
3709 emit_insn (TARGET_64BIT
3710 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3711 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3716 hppa_pic_save_rtx ()
3718 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3721 void
3722 hppa_profile_hook (label_no)
3723 int label_no;
3725 rtx begin_label_rtx, call_insn;
3726 char begin_label_name[16];
3728 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3729 label_no);
3730 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3732 if (TARGET_64BIT)
3733 emit_move_insn (arg_pointer_rtx,
3734 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3735 GEN_INT (64)));
3737 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3739 #ifndef NO_PROFILE_COUNTERS
3741 rtx count_label_rtx, addr, r24;
3742 char count_label_name[16];
3744 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3745 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3747 addr = force_reg (Pmode, count_label_rtx);
3748 r24 = gen_rtx_REG (Pmode, 24);
3749 emit_move_insn (r24, addr);
3751 /* %r25 is set from within the output pattern. */
3752 call_insn =
3753 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3754 GEN_INT (TARGET_64BIT ? 24 : 12),
3755 begin_label_rtx));
3757 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3759 #else
3760 /* %r25 is set from within the output pattern. */
3761 call_insn =
3762 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3763 GEN_INT (TARGET_64BIT ? 16 : 8),
3764 begin_label_rtx));
3765 #endif
3767 /* Indicate the _mcount call cannot throw, nor will it execute a
3768 non-local goto. */
3769 REG_NOTES (call_insn)
3770 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3772 if (flag_pic)
3774 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3775 if (TARGET_64BIT)
3776 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3778 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3782 /* Fetch the return address for the frame COUNT steps up from
3783 the current frame, after the prologue. FRAMEADDR is the
3784 frame pointer of the COUNT frame.
3786 We want to ignore any export stub remnants here. To handle this,
3787 we examine the code at the return address, and if it is an export
3788 stub, we return a memory rtx for the stub return address stored
3789 at frame-24.
3791 The value returned is used in two different ways:
3793 1. To find a function's caller.
3795 2. To change the return address for a function.
3797 This function handles most instances of case 1; however, it will
3798 fail if there are two levels of stubs to execute on the return
3799 path. The only way I believe that can happen is if the return value
3800 needs a parameter relocation, which never happens for C code.
3802 This function handles most instances of case 2; however, it will
3803 fail if we did not originally have stub code on the return path
3804 but will need stub code on the new return path. This can happen if
3805 the caller & callee are both in the main program, but the new
3806 return location is in a shared library. */
3809 return_addr_rtx (count, frameaddr)
3810 int count;
3811 rtx frameaddr;
3813 rtx label;
3814 rtx rp;
3815 rtx saved_rp;
3816 rtx ins;
3818 if (count != 0)
3819 return NULL_RTX;
3821 rp = get_hard_reg_initial_val (Pmode, 2);
3823 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3824 return rp;
3826 saved_rp = gen_reg_rtx (Pmode);
3827 emit_move_insn (saved_rp, rp);
3829 /* Get pointer to the instruction stream. We have to mask out the
3830 privilege level from the two low order bits of the return address
3831 pointer here so that ins will point to the start of the first
3832 instruction that would have been executed if we returned. */
3833 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3834 label = gen_label_rtx ();
3836 /* Check the instruction stream at the normal return address for the
3837 export stub:
3839 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3840 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3841 0x00011820 | stub+16: mtsp r1,sr0
3842 0xe0400002 | stub+20: be,n 0(sr0,rp)
3844 If it is an export stub, than our return address is really in
3845 -24[frameaddr]. */
3847 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3848 NULL_RTX, SImode, 1);
3849 emit_jump_insn (gen_bne (label));
3851 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3852 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3853 emit_jump_insn (gen_bne (label));
3855 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3856 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3857 emit_jump_insn (gen_bne (label));
3859 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3860 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3862 /* If there is no export stub then just use the value saved from
3863 the return pointer register. */
3865 emit_jump_insn (gen_bne (label));
3867 /* Here we know that our return address points to an export
3868 stub. We don't want to return the address of the export stub,
3869 but rather the return address of the export stub. That return
3870 address is stored at -24[frameaddr]. */
3872 emit_move_insn (saved_rp,
3873 gen_rtx_MEM (Pmode,
3874 memory_address (Pmode,
3875 plus_constant (frameaddr,
3876 -24))));
3878 emit_label (label);
3879 return saved_rp;
3882 /* This is only valid once reload has completed because it depends on
3883 knowing exactly how much (if any) frame there is and...
3885 It's only valid if there is no frame marker to de-allocate and...
3887 It's only valid if %r2 hasn't been saved into the caller's frame
3888 (we're not profiling and %r2 isn't live anywhere). */
3890 hppa_can_use_return_insn_p ()
3892 return (reload_completed
3893 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3894 && ! regs_ever_live[2]
3895 && ! frame_pointer_needed);
3898 void
3899 emit_bcond_fp (code, operand0)
3900 enum rtx_code code;
3901 rtx operand0;
3903 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3904 gen_rtx_IF_THEN_ELSE (VOIDmode,
3905 gen_rtx_fmt_ee (code,
3906 VOIDmode,
3907 gen_rtx_REG (CCFPmode, 0),
3908 const0_rtx),
3909 gen_rtx_LABEL_REF (VOIDmode, operand0),
3910 pc_rtx)));
3915 gen_cmp_fp (code, operand0, operand1)
3916 enum rtx_code code;
3917 rtx operand0, operand1;
3919 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3920 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3923 /* Adjust the cost of a scheduling dependency. Return the new cost of
3924 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3926 static int
3927 pa_adjust_cost (insn, link, dep_insn, cost)
3928 rtx insn;
3929 rtx link;
3930 rtx dep_insn;
3931 int cost;
3933 enum attr_type attr_type;
3935 /* Don't adjust costs for a pa8000 chip, also do not adjust any
3936 true dependencies as they are described with bypasses now. */
3937 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
3938 return cost;
3940 if (! recog_memoized (insn))
3941 return 0;
3943 attr_type = get_attr_type (insn);
3945 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3947 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3948 cycles later. */
3950 if (attr_type == TYPE_FPLOAD)
3952 rtx pat = PATTERN (insn);
3953 rtx dep_pat = PATTERN (dep_insn);
3954 if (GET_CODE (pat) == PARALLEL)
3956 /* This happens for the fldXs,mb patterns. */
3957 pat = XVECEXP (pat, 0, 0);
3959 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3960 /* If this happens, we have to extend this to schedule
3961 optimally. Return 0 for now. */
3962 return 0;
3964 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3966 if (! recog_memoized (dep_insn))
3967 return 0;
3968 switch (get_attr_type (dep_insn))
3970 case TYPE_FPALU:
3971 case TYPE_FPMULSGL:
3972 case TYPE_FPMULDBL:
3973 case TYPE_FPDIVSGL:
3974 case TYPE_FPDIVDBL:
3975 case TYPE_FPSQRTSGL:
3976 case TYPE_FPSQRTDBL:
3977 /* A fpload can't be issued until one cycle before a
3978 preceding arithmetic operation has finished if
3979 the target of the fpload is any of the sources
3980 (or destination) of the arithmetic operation. */
3981 return insn_default_latency (dep_insn) - 1;
3983 default:
3984 return 0;
3988 else if (attr_type == TYPE_FPALU)
3990 rtx pat = PATTERN (insn);
3991 rtx dep_pat = PATTERN (dep_insn);
3992 if (GET_CODE (pat) == PARALLEL)
3994 /* This happens for the fldXs,mb patterns. */
3995 pat = XVECEXP (pat, 0, 0);
3997 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3998 /* If this happens, we have to extend this to schedule
3999 optimally. Return 0 for now. */
4000 return 0;
4002 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4004 if (! recog_memoized (dep_insn))
4005 return 0;
4006 switch (get_attr_type (dep_insn))
4008 case TYPE_FPDIVSGL:
4009 case TYPE_FPDIVDBL:
4010 case TYPE_FPSQRTSGL:
4011 case TYPE_FPSQRTDBL:
4012 /* An ALU flop can't be issued until two cycles before a
4013 preceding divide or sqrt operation has finished if
4014 the target of the ALU flop is any of the sources
4015 (or destination) of the divide or sqrt operation. */
4016 return insn_default_latency (dep_insn) - 2;
4018 default:
4019 return 0;
4024 /* For other anti dependencies, the cost is 0. */
4025 return 0;
4027 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4029 /* Output dependency; DEP_INSN writes a register that INSN writes some
4030 cycles later. */
4031 if (attr_type == TYPE_FPLOAD)
4033 rtx pat = PATTERN (insn);
4034 rtx dep_pat = PATTERN (dep_insn);
4035 if (GET_CODE (pat) == PARALLEL)
4037 /* This happens for the fldXs,mb patterns. */
4038 pat = XVECEXP (pat, 0, 0);
4040 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4041 /* If this happens, we have to extend this to schedule
4042 optimally. Return 0 for now. */
4043 return 0;
4045 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4047 if (! recog_memoized (dep_insn))
4048 return 0;
4049 switch (get_attr_type (dep_insn))
4051 case TYPE_FPALU:
4052 case TYPE_FPMULSGL:
4053 case TYPE_FPMULDBL:
4054 case TYPE_FPDIVSGL:
4055 case TYPE_FPDIVDBL:
4056 case TYPE_FPSQRTSGL:
4057 case TYPE_FPSQRTDBL:
4058 /* A fpload can't be issued until one cycle before a
4059 preceding arithmetic operation has finished if
4060 the target of the fpload is the destination of the
4061 arithmetic operation.
4063 Exception: For PA7100LC, PA7200 and PA7300, the cost
4064 is 3 cycles, unless they bundle together. We also
4065 pay the penalty if the second insn is a fpload. */
4066 return insn_default_latency (dep_insn) - 1;
4068 default:
4069 return 0;
4073 else if (attr_type == TYPE_FPALU)
4075 rtx pat = PATTERN (insn);
4076 rtx dep_pat = PATTERN (dep_insn);
4077 if (GET_CODE (pat) == PARALLEL)
4079 /* This happens for the fldXs,mb patterns. */
4080 pat = XVECEXP (pat, 0, 0);
4082 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4083 /* If this happens, we have to extend this to schedule
4084 optimally. Return 0 for now. */
4085 return 0;
4087 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4089 if (! recog_memoized (dep_insn))
4090 return 0;
4091 switch (get_attr_type (dep_insn))
4093 case TYPE_FPDIVSGL:
4094 case TYPE_FPDIVDBL:
4095 case TYPE_FPSQRTSGL:
4096 case TYPE_FPSQRTDBL:
4097 /* An ALU flop can't be issued until two cycles before a
4098 preceding divide or sqrt operation has finished if
4099 the target of the ALU flop is also the target of
4100 the divide or sqrt operation. */
4101 return insn_default_latency (dep_insn) - 2;
4103 default:
4104 return 0;
4109 /* For other output dependencies, the cost is 0. */
4110 return 0;
4112 else
4113 abort ();
4116 /* Adjust scheduling priorities. We use this to try and keep addil
4117 and the next use of %r1 close together. */
4118 static int
4119 pa_adjust_priority (insn, priority)
4120 rtx insn;
4121 int priority;
4123 rtx set = single_set (insn);
4124 rtx src, dest;
4125 if (set)
4127 src = SET_SRC (set);
4128 dest = SET_DEST (set);
4129 if (GET_CODE (src) == LO_SUM
4130 && symbolic_operand (XEXP (src, 1), VOIDmode)
4131 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4132 priority >>= 3;
4134 else if (GET_CODE (src) == MEM
4135 && GET_CODE (XEXP (src, 0)) == LO_SUM
4136 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4137 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4138 priority >>= 1;
4140 else if (GET_CODE (dest) == MEM
4141 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4142 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4143 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4144 priority >>= 3;
4146 return priority;
4149 /* The 700 can only issue a single insn at a time.
4150 The 7XXX processors can issue two insns at a time.
4151 The 8000 can issue 4 insns at a time. */
4152 static int
4153 pa_issue_rate ()
4155 switch (pa_cpu)
4157 case PROCESSOR_700: return 1;
4158 case PROCESSOR_7100: return 2;
4159 case PROCESSOR_7100LC: return 2;
4160 case PROCESSOR_7200: return 2;
4161 case PROCESSOR_7300: return 2;
4162 case PROCESSOR_8000: return 4;
4164 default:
4165 abort ();
4171 /* Return any length adjustment needed by INSN which already has its length
4172 computed as LENGTH. Return zero if no adjustment is necessary.
4174 For the PA: function calls, millicode calls, and backwards short
4175 conditional branches with unfilled delay slots need an adjustment by +1
4176 (to account for the NOP which will be inserted into the instruction stream).
4178 Also compute the length of an inline block move here as it is too
4179 complicated to express as a length attribute in pa.md. */
4181 pa_adjust_insn_length (insn, length)
4182 rtx insn;
4183 int length;
4185 rtx pat = PATTERN (insn);
4187 /* Call insns which are *not* indirect and have unfilled delay slots. */
4188 if (GET_CODE (insn) == CALL_INSN)
4191 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4192 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4193 return 4;
4194 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4195 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4196 == SYMBOL_REF)
4197 return 4;
4198 else
4199 return 0;
4201 /* Jumps inside switch tables which have unfilled delay slots
4202 also need adjustment. */
4203 else if (GET_CODE (insn) == JUMP_INSN
4204 && simplejump_p (insn)
4205 && GET_MODE (insn) == SImode)
4206 return 4;
4207 /* Millicode insn with an unfilled delay slot. */
4208 else if (GET_CODE (insn) == INSN
4209 && GET_CODE (pat) != SEQUENCE
4210 && GET_CODE (pat) != USE
4211 && GET_CODE (pat) != CLOBBER
4212 && get_attr_type (insn) == TYPE_MILLI)
4213 return 4;
4214 /* Block move pattern. */
4215 else if (GET_CODE (insn) == INSN
4216 && GET_CODE (pat) == PARALLEL
4217 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4218 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4219 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4220 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4221 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4222 return compute_movstrsi_length (insn) - 4;
4223 /* Conditional branch with an unfilled delay slot. */
4224 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4226 /* Adjust a short backwards conditional with an unfilled delay slot. */
4227 if (GET_CODE (pat) == SET
4228 && length == 4
4229 && ! forward_branch_p (insn))
4230 return 4;
4231 else if (GET_CODE (pat) == PARALLEL
4232 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4233 && length == 4)
4234 return 4;
4235 /* Adjust dbra insn with short backwards conditional branch with
4236 unfilled delay slot -- only for case where counter is in a
4237 general register register. */
4238 else if (GET_CODE (pat) == PARALLEL
4239 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4240 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4241 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4242 && length == 4
4243 && ! forward_branch_p (insn))
4244 return 4;
4245 else
4246 return 0;
4248 return 0;
4251 /* Print operand X (an rtx) in assembler syntax to file FILE.
4252 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4253 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4255 void
4256 print_operand (file, x, code)
4257 FILE *file;
4258 rtx x;
4259 int code;
4261 switch (code)
4263 case '#':
4264 /* Output a 'nop' if there's nothing for the delay slot. */
4265 if (dbr_sequence_length () == 0)
4266 fputs ("\n\tnop", file);
4267 return;
4268 case '*':
4269 /* Output an nullification completer if there's nothing for the */
4270 /* delay slot or nullification is requested. */
4271 if (dbr_sequence_length () == 0 ||
4272 (final_sequence &&
4273 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4274 fputs (",n", file);
4275 return;
4276 case 'R':
4277 /* Print out the second register name of a register pair.
4278 I.e., R (6) => 7. */
4279 fputs (reg_names[REGNO (x) + 1], file);
4280 return;
4281 case 'r':
4282 /* A register or zero. */
4283 if (x == const0_rtx
4284 || (x == CONST0_RTX (DFmode))
4285 || (x == CONST0_RTX (SFmode)))
4287 fputs ("%r0", file);
4288 return;
4290 else
4291 break;
4292 case 'f':
4293 /* A register or zero (floating point). */
4294 if (x == const0_rtx
4295 || (x == CONST0_RTX (DFmode))
4296 || (x == CONST0_RTX (SFmode)))
4298 fputs ("%fr0", file);
4299 return;
4301 else
4302 break;
4303 case 'A':
4305 rtx xoperands[2];
4307 xoperands[0] = XEXP (XEXP (x, 0), 0);
4308 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4309 output_global_address (file, xoperands[1], 0);
4310 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4311 return;
4314 case 'C': /* Plain (C)ondition */
4315 case 'X':
4316 switch (GET_CODE (x))
4318 case EQ:
4319 fputs ("=", file); break;
4320 case NE:
4321 fputs ("<>", file); break;
4322 case GT:
4323 fputs (">", file); break;
4324 case GE:
4325 fputs (">=", file); break;
4326 case GEU:
4327 fputs (">>=", file); break;
4328 case GTU:
4329 fputs (">>", file); break;
4330 case LT:
4331 fputs ("<", file); break;
4332 case LE:
4333 fputs ("<=", file); break;
4334 case LEU:
4335 fputs ("<<=", file); break;
4336 case LTU:
4337 fputs ("<<", file); break;
4338 default:
4339 abort ();
4341 return;
4342 case 'N': /* Condition, (N)egated */
4343 switch (GET_CODE (x))
4345 case EQ:
4346 fputs ("<>", file); break;
4347 case NE:
4348 fputs ("=", file); break;
4349 case GT:
4350 fputs ("<=", file); break;
4351 case GE:
4352 fputs ("<", file); break;
4353 case GEU:
4354 fputs ("<<", file); break;
4355 case GTU:
4356 fputs ("<<=", file); break;
4357 case LT:
4358 fputs (">=", file); break;
4359 case LE:
4360 fputs (">", file); break;
4361 case LEU:
4362 fputs (">>", file); break;
4363 case LTU:
4364 fputs (">>=", file); break;
4365 default:
4366 abort ();
4368 return;
4369 /* For floating point comparisons. Note that the output
4370 predicates are the complement of the desired mode. */
4371 case 'Y':
4372 switch (GET_CODE (x))
4374 case EQ:
4375 fputs ("!=", file); break;
4376 case NE:
4377 fputs ("=", file); break;
4378 case GT:
4379 fputs ("!>", file); break;
4380 case GE:
4381 fputs ("!>=", file); break;
4382 case LT:
4383 fputs ("!<", file); break;
4384 case LE:
4385 fputs ("!<=", file); break;
4386 case LTGT:
4387 fputs ("!<>", file); break;
4388 case UNLE:
4389 fputs (">", file); break;
4390 case UNLT:
4391 fputs (">=", file); break;
4392 case UNGE:
4393 fputs ("<", file); break;
4394 case UNGT:
4395 fputs ("<=", file); break;
4396 case UNEQ:
4397 fputs ("<>", file); break;
4398 case UNORDERED:
4399 fputs ("<=>", file); break;
4400 case ORDERED:
4401 fputs ("!<=>", file); break;
4402 default:
4403 abort ();
4405 return;
4406 case 'S': /* Condition, operands are (S)wapped. */
4407 switch (GET_CODE (x))
4409 case EQ:
4410 fputs ("=", file); break;
4411 case NE:
4412 fputs ("<>", file); break;
4413 case GT:
4414 fputs ("<", file); break;
4415 case GE:
4416 fputs ("<=", file); break;
4417 case GEU:
4418 fputs ("<<=", file); break;
4419 case GTU:
4420 fputs ("<<", file); break;
4421 case LT:
4422 fputs (">", file); break;
4423 case LE:
4424 fputs (">=", file); break;
4425 case LEU:
4426 fputs (">>=", file); break;
4427 case LTU:
4428 fputs (">>", file); break;
4429 default:
4430 abort ();
4432 return;
4433 case 'B': /* Condition, (B)oth swapped and negate. */
4434 switch (GET_CODE (x))
4436 case EQ:
4437 fputs ("<>", file); break;
4438 case NE:
4439 fputs ("=", file); break;
4440 case GT:
4441 fputs (">=", file); break;
4442 case GE:
4443 fputs (">", file); break;
4444 case GEU:
4445 fputs (">>", file); break;
4446 case GTU:
4447 fputs (">>=", file); break;
4448 case LT:
4449 fputs ("<=", file); break;
4450 case LE:
4451 fputs ("<", file); break;
4452 case LEU:
4453 fputs ("<<", file); break;
4454 case LTU:
4455 fputs ("<<=", file); break;
4456 default:
4457 abort ();
4459 return;
4460 case 'k':
4461 if (GET_CODE (x) == CONST_INT)
4463 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4464 return;
4466 abort ();
4467 case 'Q':
4468 if (GET_CODE (x) == CONST_INT)
4470 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4471 return;
4473 abort ();
4474 case 'L':
4475 if (GET_CODE (x) == CONST_INT)
4477 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4478 return;
4480 abort ();
4481 case 'O':
4482 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4484 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4485 return;
4487 abort ();
4488 case 'p':
4489 if (GET_CODE (x) == CONST_INT)
4491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4492 return;
4494 abort ();
4495 case 'P':
4496 if (GET_CODE (x) == CONST_INT)
4498 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4499 return;
4501 abort ();
4502 case 'I':
4503 if (GET_CODE (x) == CONST_INT)
4504 fputs ("i", file);
4505 return;
4506 case 'M':
4507 case 'F':
4508 switch (GET_CODE (XEXP (x, 0)))
4510 case PRE_DEC:
4511 case PRE_INC:
4512 if (ASSEMBLER_DIALECT == 0)
4513 fputs ("s,mb", file);
4514 else
4515 fputs (",mb", file);
4516 break;
4517 case POST_DEC:
4518 case POST_INC:
4519 if (ASSEMBLER_DIALECT == 0)
4520 fputs ("s,ma", file);
4521 else
4522 fputs (",ma", file);
4523 break;
4524 case PLUS:
4525 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4526 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4528 if (ASSEMBLER_DIALECT == 0)
4529 fputs ("x,s", file);
4530 else
4531 fputs (",s", file);
4533 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4534 fputs ("s", file);
4535 break;
4536 default:
4537 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4538 fputs ("s", file);
4539 break;
4541 return;
4542 case 'G':
4543 output_global_address (file, x, 0);
4544 return;
4545 case 'H':
4546 output_global_address (file, x, 1);
4547 return;
4548 case 0: /* Don't do anything special */
4549 break;
4550 case 'Z':
4552 unsigned op[3];
4553 compute_zdepwi_operands (INTVAL (x), op);
4554 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4555 return;
4557 case 'z':
4559 unsigned op[3];
4560 compute_zdepdi_operands (INTVAL (x), op);
4561 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4562 return;
4564 case 'c':
4565 /* We can get here from a .vtable_inherit due to our
4566 CONSTANT_ADDRESS_P rejecting perfectly good constant
4567 addresses. */
4568 break;
4569 default:
4570 abort ();
4572 if (GET_CODE (x) == REG)
4574 fputs (reg_names [REGNO (x)], file);
4575 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4577 fputs ("R", file);
4578 return;
4580 if (FP_REG_P (x)
4581 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4582 && (REGNO (x) & 1) == 0)
4583 fputs ("L", file);
4585 else if (GET_CODE (x) == MEM)
4587 int size = GET_MODE_SIZE (GET_MODE (x));
4588 rtx base = NULL_RTX;
4589 switch (GET_CODE (XEXP (x, 0)))
4591 case PRE_DEC:
4592 case POST_DEC:
4593 base = XEXP (XEXP (x, 0), 0);
4594 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4595 break;
4596 case PRE_INC:
4597 case POST_INC:
4598 base = XEXP (XEXP (x, 0), 0);
4599 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4600 break;
4601 default:
4602 if (GET_CODE (XEXP (x, 0)) == PLUS
4603 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4604 fprintf (file, "%s(%s)",
4605 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4606 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4607 else if (GET_CODE (XEXP (x, 0)) == PLUS
4608 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4609 fprintf (file, "%s(%s)",
4610 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4611 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4612 else
4613 output_address (XEXP (x, 0));
4614 break;
4617 else
4618 output_addr_const (file, x);
4621 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4623 void
4624 output_global_address (file, x, round_constant)
4625 FILE *file;
4626 rtx x;
4627 int round_constant;
4630 /* Imagine (high (const (plus ...))). */
4631 if (GET_CODE (x) == HIGH)
4632 x = XEXP (x, 0);
4634 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4635 assemble_name (file, XSTR (x, 0));
4636 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4638 assemble_name (file, XSTR (x, 0));
4639 fputs ("-$global$", file);
4641 else if (GET_CODE (x) == CONST)
4643 const char *sep = "";
4644 int offset = 0; /* assembler wants -$global$ at end */
4645 rtx base = NULL_RTX;
4647 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4649 base = XEXP (XEXP (x, 0), 0);
4650 output_addr_const (file, base);
4652 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4653 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4654 else abort ();
4656 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4658 base = XEXP (XEXP (x, 0), 1);
4659 output_addr_const (file, base);
4661 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4662 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4663 else abort ();
4665 /* How bogus. The compiler is apparently responsible for
4666 rounding the constant if it uses an LR field selector.
4668 The linker and/or assembler seem a better place since
4669 they have to do this kind of thing already.
4671 If we fail to do this, HP's optimizing linker may eliminate
4672 an addil, but not update the ldw/stw/ldo instruction that
4673 uses the result of the addil. */
4674 if (round_constant)
4675 offset = ((offset + 0x1000) & ~0x1fff);
4677 if (GET_CODE (XEXP (x, 0)) == PLUS)
4679 if (offset < 0)
4681 offset = -offset;
4682 sep = "-";
4684 else
4685 sep = "+";
4687 else if (GET_CODE (XEXP (x, 0)) == MINUS
4688 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4689 sep = "-";
4690 else abort ();
4692 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4693 fputs ("-$global$", file);
4694 if (offset)
4695 fprintf (file, "%s%d", sep, offset);
4697 else
4698 output_addr_const (file, x);
4701 void
4702 output_deferred_plabels (file)
4703 FILE *file;
4705 int i;
4706 /* If we have deferred plabels, then we need to switch into the data
4707 section and align it to a 4 byte boundary before we output the
4708 deferred plabels. */
4709 if (n_deferred_plabels)
4711 data_section ();
4712 ASM_OUTPUT_ALIGN (file, 2);
4715 /* Now output the deferred plabels. */
4716 for (i = 0; i < n_deferred_plabels; i++)
4718 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4719 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4720 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4724 /* HP's millicode routines mean something special to the assembler.
4725 Keep track of which ones we have used. */
4727 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4728 static void import_milli PARAMS ((enum millicodes));
4729 static char imported[(int) end1000];
4730 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4731 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4732 #define MILLI_START 10
4734 static void
4735 import_milli (code)
4736 enum millicodes code;
4738 char str[sizeof (import_string)];
4740 if (!imported[(int) code])
4742 imported[(int) code] = 1;
4743 strcpy (str, import_string);
4744 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4745 output_asm_insn (str, 0);
4749 /* The register constraints have put the operands and return value in
4750 the proper registers. */
4752 const char *
4753 output_mul_insn (unsignedp, insn)
4754 int unsignedp ATTRIBUTE_UNUSED;
4755 rtx insn;
4757 import_milli (mulI);
4758 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4761 /* Emit the rtl for doing a division by a constant. */
4763 /* Do magic division millicodes exist for this value? */
4764 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4765 1, 1};
4767 /* We'll use an array to keep track of the magic millicodes and
4768 whether or not we've used them already. [n][0] is signed, [n][1] is
4769 unsigned. */
4771 static int div_milli[16][2];
4774 div_operand (op, mode)
4775 rtx op;
4776 enum machine_mode mode;
4778 return (mode == SImode
4779 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4780 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4781 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4785 emit_hpdiv_const (operands, unsignedp)
4786 rtx *operands;
4787 int unsignedp;
4789 if (GET_CODE (operands[2]) == CONST_INT
4790 && INTVAL (operands[2]) > 0
4791 && INTVAL (operands[2]) < 16
4792 && magic_milli[INTVAL (operands[2])])
4794 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4796 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4797 emit
4798 (gen_rtx
4799 (PARALLEL, VOIDmode,
4800 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4801 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4802 SImode,
4803 gen_rtx_REG (SImode, 26),
4804 operands[2])),
4805 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4806 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4807 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4808 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4809 gen_rtx_CLOBBER (VOIDmode, ret))));
4810 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4811 return 1;
4813 return 0;
4816 const char *
4817 output_div_insn (operands, unsignedp, insn)
4818 rtx *operands;
4819 int unsignedp;
4820 rtx insn;
4822 int divisor;
4824 /* If the divisor is a constant, try to use one of the special
4825 opcodes .*/
4826 if (GET_CODE (operands[0]) == CONST_INT)
4828 static char buf[100];
4829 divisor = INTVAL (operands[0]);
4830 if (!div_milli[divisor][unsignedp])
4832 div_milli[divisor][unsignedp] = 1;
4833 if (unsignedp)
4834 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4835 else
4836 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4838 if (unsignedp)
4840 sprintf (buf, "$$divU_");
4841 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4842 return output_millicode_call (insn,
4843 gen_rtx_SYMBOL_REF (SImode, buf));
4845 else
4847 sprintf (buf, "$$divI_");
4848 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4849 return output_millicode_call (insn,
4850 gen_rtx_SYMBOL_REF (SImode, buf));
4853 /* Divisor isn't a special constant. */
4854 else
4856 if (unsignedp)
4858 import_milli (divU);
4859 return output_millicode_call (insn,
4860 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4862 else
4864 import_milli (divI);
4865 return output_millicode_call (insn,
4866 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4871 /* Output a $$rem millicode to do mod. */
4873 const char *
4874 output_mod_insn (unsignedp, insn)
4875 int unsignedp;
4876 rtx insn;
4878 if (unsignedp)
4880 import_milli (remU);
4881 return output_millicode_call (insn,
4882 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4884 else
4886 import_milli (remI);
4887 return output_millicode_call (insn,
4888 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4892 void
4893 output_arg_descriptor (call_insn)
4894 rtx call_insn;
4896 const char *arg_regs[4];
4897 enum machine_mode arg_mode;
4898 rtx link;
4899 int i, output_flag = 0;
4900 int regno;
4902 /* We neither need nor want argument location descriptors for the
4903 64bit runtime environment or the ELF32 environment. */
4904 if (TARGET_64BIT || TARGET_ELF32)
4905 return;
4907 for (i = 0; i < 4; i++)
4908 arg_regs[i] = 0;
4910 /* Specify explicitly that no argument relocations should take place
4911 if using the portable runtime calling conventions. */
4912 if (TARGET_PORTABLE_RUNTIME)
4914 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4915 asm_out_file);
4916 return;
4919 if (GET_CODE (call_insn) != CALL_INSN)
4920 abort ();
4921 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4923 rtx use = XEXP (link, 0);
4925 if (! (GET_CODE (use) == USE
4926 && GET_CODE (XEXP (use, 0)) == REG
4927 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4928 continue;
4930 arg_mode = GET_MODE (XEXP (use, 0));
4931 regno = REGNO (XEXP (use, 0));
4932 if (regno >= 23 && regno <= 26)
4934 arg_regs[26 - regno] = "GR";
4935 if (arg_mode == DImode)
4936 arg_regs[25 - regno] = "GR";
4938 else if (regno >= 32 && regno <= 39)
4940 if (arg_mode == SFmode)
4941 arg_regs[(regno - 32) / 2] = "FR";
4942 else
4944 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4945 arg_regs[(regno - 34) / 2] = "FR";
4946 arg_regs[(regno - 34) / 2 + 1] = "FU";
4947 #else
4948 arg_regs[(regno - 34) / 2] = "FU";
4949 arg_regs[(regno - 34) / 2 + 1] = "FR";
4950 #endif
4954 fputs ("\t.CALL ", asm_out_file);
4955 for (i = 0; i < 4; i++)
4957 if (arg_regs[i])
4959 if (output_flag++)
4960 fputc (',', asm_out_file);
4961 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4964 fputc ('\n', asm_out_file);
4967 /* Return the class of any secondary reload register that is needed to
4968 move IN into a register in class CLASS using mode MODE.
4970 Profiling has showed this routine and its descendants account for
4971 a significant amount of compile time (~7%). So it has been
4972 optimized to reduce redundant computations and eliminate useless
4973 function calls.
4975 It might be worthwhile to try and make this a leaf function too. */
4977 enum reg_class
4978 secondary_reload_class (class, mode, in)
4979 enum reg_class class;
4980 enum machine_mode mode;
4981 rtx in;
4983 int regno, is_symbolic;
4985 /* Trying to load a constant into a FP register during PIC code
4986 generation will require %r1 as a scratch register. */
4987 if (flag_pic
4988 && GET_MODE_CLASS (mode) == MODE_INT
4989 && FP_REG_CLASS_P (class)
4990 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4991 return R1_REGS;
4993 /* Profiling showed the PA port spends about 1.3% of its compilation
4994 time in true_regnum from calls inside secondary_reload_class. */
4996 if (GET_CODE (in) == REG)
4998 regno = REGNO (in);
4999 if (regno >= FIRST_PSEUDO_REGISTER)
5000 regno = true_regnum (in);
5002 else if (GET_CODE (in) == SUBREG)
5003 regno = true_regnum (in);
5004 else
5005 regno = -1;
5007 /* If we have something like (mem (mem (...)), we can safely assume the
5008 inner MEM will end up in a general register after reloading, so there's
5009 no need for a secondary reload. */
5010 if (GET_CODE (in) == MEM
5011 && GET_CODE (XEXP (in, 0)) == MEM)
5012 return NO_REGS;
5014 /* Handle out of range displacement for integer mode loads/stores of
5015 FP registers. */
5016 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5017 && GET_MODE_CLASS (mode) == MODE_INT
5018 && FP_REG_CLASS_P (class))
5019 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5020 return GENERAL_REGS;
5022 /* A SAR<->FP register copy requires a secondary register (GPR) as
5023 well as secondary memory. */
5024 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5025 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5026 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5027 return GENERAL_REGS;
5029 if (GET_CODE (in) == HIGH)
5030 in = XEXP (in, 0);
5032 /* Profiling has showed GCC spends about 2.6% of its compilation
5033 time in symbolic_operand from calls inside secondary_reload_class.
5035 We use an inline copy and only compute its return value once to avoid
5036 useless work. */
5037 switch (GET_CODE (in))
5039 rtx tmp;
5041 case SYMBOL_REF:
5042 case LABEL_REF:
5043 is_symbolic = 1;
5044 break;
5045 case CONST:
5046 tmp = XEXP (in, 0);
5047 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5048 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5049 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5050 break;
5052 default:
5053 is_symbolic = 0;
5054 break;
5057 if (!flag_pic
5058 && is_symbolic
5059 && read_only_operand (in, VOIDmode))
5060 return NO_REGS;
5062 if (class != R1_REGS && is_symbolic)
5063 return R1_REGS;
5065 return NO_REGS;
5068 enum direction
5069 function_arg_padding (mode, type)
5070 enum machine_mode mode;
5071 tree type;
5073 int size;
5075 if (mode == BLKmode)
5077 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
5078 size = int_size_in_bytes (type) * BITS_PER_UNIT;
5079 else
5080 return upward; /* Don't know if this is right, but */
5081 /* same as old definition. */
5083 else
5084 size = GET_MODE_BITSIZE (mode);
5085 if (size < PARM_BOUNDARY)
5086 return downward;
5087 else if (size % PARM_BOUNDARY)
5088 return upward;
5089 else
5090 return none;
5094 /* Do what is necessary for `va_start'. We look at the current function
5095 to determine if stdargs or varargs is used and fill in an initial
5096 va_list. A pointer to this constructor is returned. */
5098 struct rtx_def *
5099 hppa_builtin_saveregs ()
5101 rtx offset, dest;
5102 tree fntype = TREE_TYPE (current_function_decl);
5103 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5104 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5105 != void_type_node)))
5106 ? UNITS_PER_WORD : 0);
5108 if (argadj)
5109 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5110 else
5111 offset = current_function_arg_offset_rtx;
5113 if (TARGET_64BIT)
5115 int i, off;
5117 /* Adjust for varargs/stdarg differences. */
5118 if (argadj)
5119 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5120 else
5121 offset = current_function_arg_offset_rtx;
5123 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5124 from the incoming arg pointer and growing to larger addresses. */
5125 for (i = 26, off = -64; i >= 19; i--, off += 8)
5126 emit_move_insn (gen_rtx_MEM (word_mode,
5127 plus_constant (arg_pointer_rtx, off)),
5128 gen_rtx_REG (word_mode, i));
5130 /* The incoming args pointer points just beyond the flushback area;
5131 normally this is not a serious concern. However, when we are doing
5132 varargs/stdargs we want to make the arg pointer point to the start
5133 of the incoming argument area. */
5134 emit_move_insn (virtual_incoming_args_rtx,
5135 plus_constant (arg_pointer_rtx, -64));
5137 /* Now return a pointer to the first anonymous argument. */
5138 return copy_to_reg (expand_binop (Pmode, add_optab,
5139 virtual_incoming_args_rtx,
5140 offset, 0, 0, OPTAB_LIB_WIDEN));
5143 /* Store general registers on the stack. */
5144 dest = gen_rtx_MEM (BLKmode,
5145 plus_constant (current_function_internal_arg_pointer,
5146 -16));
5147 set_mem_alias_set (dest, get_varargs_alias_set ());
5148 set_mem_align (dest, BITS_PER_WORD);
5149 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
5151 /* move_block_from_reg will emit code to store the argument registers
5152 individually as scalar stores.
5154 However, other insns may later load from the same addresses for
5155 a structure load (passing a struct to a varargs routine).
5157 The alias code assumes that such aliasing can never happen, so we
5158 have to keep memory referencing insns from moving up beyond the
5159 last argument register store. So we emit a blockage insn here. */
5160 emit_insn (gen_blockage ());
5162 return copy_to_reg (expand_binop (Pmode, add_optab,
5163 current_function_internal_arg_pointer,
5164 offset, 0, 0, OPTAB_LIB_WIDEN));
5167 void
5168 hppa_va_start (stdarg_p, valist, nextarg)
5169 int stdarg_p ATTRIBUTE_UNUSED;
5170 tree valist;
5171 rtx nextarg;
5173 nextarg = expand_builtin_saveregs ();
5174 std_expand_builtin_va_start (1, valist, nextarg);
5178 hppa_va_arg (valist, type)
5179 tree valist, type;
5181 HOST_WIDE_INT align, size, ofs;
5182 tree t, ptr, pptr;
5184 if (TARGET_64BIT)
5186 /* Every argument in PA64 is passed by value (including large structs).
5187 Arguments with size greater than 8 must be aligned 0 MOD 16. */
5189 size = int_size_in_bytes (type);
5190 if (size > UNITS_PER_WORD)
5192 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5193 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5194 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5195 build_int_2 (-2 * UNITS_PER_WORD, -1));
5196 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5197 TREE_SIDE_EFFECTS (t) = 1;
5198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5200 return std_expand_builtin_va_arg (valist, type);
5203 /* Compute the rounded size of the type. */
5204 align = PARM_BOUNDARY / BITS_PER_UNIT;
5205 size = int_size_in_bytes (type);
5207 ptr = build_pointer_type (type);
5209 /* "Large" types are passed by reference. */
5210 if (size > 8)
5212 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5213 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5214 TREE_SIDE_EFFECTS (t) = 1;
5216 pptr = build_pointer_type (ptr);
5217 t = build1 (NOP_EXPR, pptr, t);
5218 TREE_SIDE_EFFECTS (t) = 1;
5220 t = build1 (INDIRECT_REF, ptr, t);
5221 TREE_SIDE_EFFECTS (t) = 1;
5223 else
5225 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5226 build_int_2 (-size, -1));
5228 /* Copied from va-pa.h, but we probably don't need to align
5229 to word size, since we generate and preserve that invariant. */
5230 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5231 build_int_2 ((size > 4 ? -8 : -4), -1));
5233 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5234 TREE_SIDE_EFFECTS (t) = 1;
5236 ofs = (8 - size) % 4;
5237 if (ofs)
5239 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
5240 TREE_SIDE_EFFECTS (t) = 1;
5243 t = build1 (NOP_EXPR, ptr, t);
5244 TREE_SIDE_EFFECTS (t) = 1;
5247 /* Calculate! */
5248 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
5253 /* This routine handles all the normal conditional branch sequences we
5254 might need to generate. It handles compare immediate vs compare
5255 register, nullification of delay slots, varying length branches,
5256 negated branches, and all combinations of the above. It returns the
5257 output appropriate to emit the branch corresponding to all given
5258 parameters. */
5260 const char *
5261 output_cbranch (operands, nullify, length, negated, insn)
5262 rtx *operands;
5263 int nullify, length, negated;
5264 rtx insn;
5266 static char buf[100];
5267 int useskip = 0;
5269 /* A conditional branch to the following instruction (eg the delay slot) is
5270 asking for a disaster. This can happen when not optimizing.
5272 In such cases it is safe to emit nothing. */
5274 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5275 return "";
5277 /* If this is a long branch with its delay slot unfilled, set `nullify'
5278 as it can nullify the delay slot and save a nop. */
5279 if (length == 8 && dbr_sequence_length () == 0)
5280 nullify = 1;
5282 /* If this is a short forward conditional branch which did not get
5283 its delay slot filled, the delay slot can still be nullified. */
5284 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5285 nullify = forward_branch_p (insn);
5287 /* A forward branch over a single nullified insn can be done with a
5288 comclr instruction. This avoids a single cycle penalty due to
5289 mis-predicted branch if we fall through (branch not taken). */
5290 if (length == 4
5291 && next_real_insn (insn) != 0
5292 && get_attr_length (next_real_insn (insn)) == 4
5293 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5294 && nullify)
5295 useskip = 1;
5297 switch (length)
5299 /* All short conditional branches except backwards with an unfilled
5300 delay slot. */
5301 case 4:
5302 if (useskip)
5303 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5304 else
5305 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5306 if (GET_MODE (operands[1]) == DImode)
5307 strcat (buf, "*");
5308 if (negated)
5309 strcat (buf, "%B3");
5310 else
5311 strcat (buf, "%S3");
5312 if (useskip)
5313 strcat (buf, " %2,%r1,%%r0");
5314 else if (nullify)
5315 strcat (buf, ",n %2,%r1,%0");
5316 else
5317 strcat (buf, " %2,%r1,%0");
5318 break;
5320 /* All long conditionals. Note an short backward branch with an
5321 unfilled delay slot is treated just like a long backward branch
5322 with an unfilled delay slot. */
5323 case 8:
5324 /* Handle weird backwards branch with a filled delay slot
5325 with is nullified. */
5326 if (dbr_sequence_length () != 0
5327 && ! forward_branch_p (insn)
5328 && nullify)
5330 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5331 if (GET_MODE (operands[1]) == DImode)
5332 strcat (buf, "*");
5333 if (negated)
5334 strcat (buf, "%S3");
5335 else
5336 strcat (buf, "%B3");
5337 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5339 /* Handle short backwards branch with an unfilled delay slot.
5340 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5341 taken and untaken branches. */
5342 else if (dbr_sequence_length () == 0
5343 && ! forward_branch_p (insn)
5344 && INSN_ADDRESSES_SET_P ()
5345 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5346 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5348 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5349 if (GET_MODE (operands[1]) == DImode)
5350 strcat (buf, "*");
5351 if (negated)
5352 strcat (buf, "%B3 %2,%r1,%0%#");
5353 else
5354 strcat (buf, "%S3 %2,%r1,%0%#");
5356 else
5358 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5359 if (GET_MODE (operands[1]) == DImode)
5360 strcat (buf, "*");
5361 if (negated)
5362 strcat (buf, "%S3");
5363 else
5364 strcat (buf, "%B3");
5365 if (nullify)
5366 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5367 else
5368 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5370 break;
5372 case 20:
5373 /* Very long branch. Right now we only handle these when not
5374 optimizing. See "jump" pattern in pa.md for details. */
5375 if (optimize)
5376 abort ();
5378 /* Create a reversed conditional branch which branches around
5379 the following insns. */
5380 if (negated)
5381 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5382 else
5383 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5384 if (GET_MODE (operands[1]) == DImode)
5386 if (negated)
5387 strcpy (buf,
5388 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5389 else
5390 strcpy (buf,
5391 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5393 output_asm_insn (buf, operands);
5395 /* Output an insn to save %r1. */
5396 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5398 /* Now output a very long branch to the original target. */
5399 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5401 /* Now restore the value of %r1 in the delay slot. We're not
5402 optimizing so we know nothing else can be in the delay slot. */
5403 return "ldw -16(%%r30),%%r1";
5405 case 28:
5406 /* Very long branch when generating PIC code. Right now we only
5407 handle these when not optimizing. See "jump" pattern in pa.md
5408 for details. */
5409 if (optimize)
5410 abort ();
5412 /* Create a reversed conditional branch which branches around
5413 the following insns. */
5414 if (negated)
5415 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5416 else
5417 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5418 if (GET_MODE (operands[1]) == DImode)
5420 if (negated)
5421 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5422 else
5423 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5425 output_asm_insn (buf, operands);
5427 /* Output an insn to save %r1. */
5428 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5430 /* Now output a very long PIC branch to the original target. */
5432 rtx xoperands[5];
5434 xoperands[0] = operands[0];
5435 xoperands[1] = operands[1];
5436 xoperands[2] = operands[2];
5437 xoperands[3] = operands[3];
5438 if (TARGET_SOM || ! TARGET_GAS)
5439 xoperands[4] = gen_label_rtx ();
5441 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5442 if (TARGET_SOM || ! TARGET_GAS)
5444 output_asm_insn ("addil L'%l0-%l4,%%r1", xoperands);
5445 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5446 CODE_LABEL_NUMBER (xoperands[4]));
5447 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1", xoperands);
5449 else
5451 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5452 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1",
5453 xoperands);
5455 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5458 /* Now restore the value of %r1 in the delay slot. We're not
5459 optimizing so we know nothing else can be in the delay slot. */
5460 return "ldw -16(%%r30),%%r1";
5462 default:
5463 abort ();
5465 return buf;
5468 /* This routine handles all the branch-on-bit conditional branch sequences we
5469 might need to generate. It handles nullification of delay slots,
5470 varying length branches, negated branches and all combinations of the
5471 above. it returns the appropriate output template to emit the branch. */
5473 const char *
5474 output_bb (operands, nullify, length, negated, insn, which)
5475 rtx *operands ATTRIBUTE_UNUSED;
5476 int nullify, length, negated;
5477 rtx insn;
5478 int which;
5480 static char buf[100];
5481 int useskip = 0;
5483 /* A conditional branch to the following instruction (eg the delay slot) is
5484 asking for a disaster. I do not think this can happen as this pattern
5485 is only used when optimizing; jump optimization should eliminate the
5486 jump. But be prepared just in case. */
5488 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5489 return "";
5491 /* If this is a long branch with its delay slot unfilled, set `nullify'
5492 as it can nullify the delay slot and save a nop. */
5493 if (length == 8 && dbr_sequence_length () == 0)
5494 nullify = 1;
5496 /* If this is a short forward conditional branch which did not get
5497 its delay slot filled, the delay slot can still be nullified. */
5498 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5499 nullify = forward_branch_p (insn);
5501 /* A forward branch over a single nullified insn can be done with a
5502 extrs instruction. This avoids a single cycle penalty due to
5503 mis-predicted branch if we fall through (branch not taken). */
5505 if (length == 4
5506 && next_real_insn (insn) != 0
5507 && get_attr_length (next_real_insn (insn)) == 4
5508 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5509 && nullify)
5510 useskip = 1;
5512 switch (length)
5515 /* All short conditional branches except backwards with an unfilled
5516 delay slot. */
5517 case 4:
5518 if (useskip)
5519 strcpy (buf, "{extrs,|extrw,s,}");
5520 else
5521 strcpy (buf, "bb,");
5522 if (useskip && GET_MODE (operands[0]) == DImode)
5523 strcpy (buf, "extrd,s,*");
5524 else if (GET_MODE (operands[0]) == DImode)
5525 strcpy (buf, "bb,*");
5526 if ((which == 0 && negated)
5527 || (which == 1 && ! negated))
5528 strcat (buf, ">=");
5529 else
5530 strcat (buf, "<");
5531 if (useskip)
5532 strcat (buf, " %0,%1,1,%%r0");
5533 else if (nullify && negated)
5534 strcat (buf, ",n %0,%1,%3");
5535 else if (nullify && ! negated)
5536 strcat (buf, ",n %0,%1,%2");
5537 else if (! nullify && negated)
5538 strcat (buf, "%0,%1,%3");
5539 else if (! nullify && ! negated)
5540 strcat (buf, " %0,%1,%2");
5541 break;
5543 /* All long conditionals. Note an short backward branch with an
5544 unfilled delay slot is treated just like a long backward branch
5545 with an unfilled delay slot. */
5546 case 8:
5547 /* Handle weird backwards branch with a filled delay slot
5548 with is nullified. */
5549 if (dbr_sequence_length () != 0
5550 && ! forward_branch_p (insn)
5551 && nullify)
5553 strcpy (buf, "bb,");
5554 if (GET_MODE (operands[0]) == DImode)
5555 strcat (buf, "*");
5556 if ((which == 0 && negated)
5557 || (which == 1 && ! negated))
5558 strcat (buf, "<");
5559 else
5560 strcat (buf, ">=");
5561 if (negated)
5562 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5563 else
5564 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5566 /* Handle short backwards branch with an unfilled delay slot.
5567 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5568 taken and untaken branches. */
5569 else if (dbr_sequence_length () == 0
5570 && ! forward_branch_p (insn)
5571 && INSN_ADDRESSES_SET_P ()
5572 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5573 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5575 strcpy (buf, "bb,");
5576 if (GET_MODE (operands[0]) == DImode)
5577 strcat (buf, "*");
5578 if ((which == 0 && negated)
5579 || (which == 1 && ! negated))
5580 strcat (buf, ">=");
5581 else
5582 strcat (buf, "<");
5583 if (negated)
5584 strcat (buf, " %0,%1,%3%#");
5585 else
5586 strcat (buf, " %0,%1,%2%#");
5588 else
5590 strcpy (buf, "{extrs,|extrw,s,}");
5591 if (GET_MODE (operands[0]) == DImode)
5592 strcpy (buf, "extrd,s,*");
5593 if ((which == 0 && negated)
5594 || (which == 1 && ! negated))
5595 strcat (buf, "<");
5596 else
5597 strcat (buf, ">=");
5598 if (nullify && negated)
5599 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5600 else if (nullify && ! negated)
5601 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5602 else if (negated)
5603 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5604 else
5605 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5607 break;
5609 default:
5610 abort ();
5612 return buf;
5615 /* This routine handles all the branch-on-variable-bit conditional branch
5616 sequences we might need to generate. It handles nullification of delay
5617 slots, varying length branches, negated branches and all combinations
5618 of the above. it returns the appropriate output template to emit the
5619 branch. */
5621 const char *
5622 output_bvb (operands, nullify, length, negated, insn, which)
5623 rtx *operands ATTRIBUTE_UNUSED;
5624 int nullify, length, negated;
5625 rtx insn;
5626 int which;
5628 static char buf[100];
5629 int useskip = 0;
5631 /* A conditional branch to the following instruction (eg the delay slot) is
5632 asking for a disaster. I do not think this can happen as this pattern
5633 is only used when optimizing; jump optimization should eliminate the
5634 jump. But be prepared just in case. */
5636 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5637 return "";
5639 /* If this is a long branch with its delay slot unfilled, set `nullify'
5640 as it can nullify the delay slot and save a nop. */
5641 if (length == 8 && dbr_sequence_length () == 0)
5642 nullify = 1;
5644 /* If this is a short forward conditional branch which did not get
5645 its delay slot filled, the delay slot can still be nullified. */
5646 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5647 nullify = forward_branch_p (insn);
5649 /* A forward branch over a single nullified insn can be done with a
5650 extrs instruction. This avoids a single cycle penalty due to
5651 mis-predicted branch if we fall through (branch not taken). */
5653 if (length == 4
5654 && next_real_insn (insn) != 0
5655 && get_attr_length (next_real_insn (insn)) == 4
5656 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5657 && nullify)
5658 useskip = 1;
5660 switch (length)
5663 /* All short conditional branches except backwards with an unfilled
5664 delay slot. */
5665 case 4:
5666 if (useskip)
5667 strcpy (buf, "{vextrs,|extrw,s,}");
5668 else
5669 strcpy (buf, "{bvb,|bb,}");
5670 if (useskip && GET_MODE (operands[0]) == DImode)
5671 strcpy (buf, "extrd,s,*}");
5672 else if (GET_MODE (operands[0]) == DImode)
5673 strcpy (buf, "bb,*");
5674 if ((which == 0 && negated)
5675 || (which == 1 && ! negated))
5676 strcat (buf, ">=");
5677 else
5678 strcat (buf, "<");
5679 if (useskip)
5680 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5681 else if (nullify && negated)
5682 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5683 else if (nullify && ! negated)
5684 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5685 else if (! nullify && negated)
5686 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5687 else if (! nullify && ! negated)
5688 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5689 break;
5691 /* All long conditionals. Note an short backward branch with an
5692 unfilled delay slot is treated just like a long backward branch
5693 with an unfilled delay slot. */
5694 case 8:
5695 /* Handle weird backwards branch with a filled delay slot
5696 with is nullified. */
5697 if (dbr_sequence_length () != 0
5698 && ! forward_branch_p (insn)
5699 && nullify)
5701 strcpy (buf, "{bvb,|bb,}");
5702 if (GET_MODE (operands[0]) == DImode)
5703 strcat (buf, "*");
5704 if ((which == 0 && negated)
5705 || (which == 1 && ! negated))
5706 strcat (buf, "<");
5707 else
5708 strcat (buf, ">=");
5709 if (negated)
5710 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5711 else
5712 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5714 /* Handle short backwards branch with an unfilled delay slot.
5715 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5716 taken and untaken branches. */
5717 else if (dbr_sequence_length () == 0
5718 && ! forward_branch_p (insn)
5719 && INSN_ADDRESSES_SET_P ()
5720 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5721 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5723 strcpy (buf, "{bvb,|bb,}");
5724 if (GET_MODE (operands[0]) == DImode)
5725 strcat (buf, "*");
5726 if ((which == 0 && negated)
5727 || (which == 1 && ! negated))
5728 strcat (buf, ">=");
5729 else
5730 strcat (buf, "<");
5731 if (negated)
5732 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5733 else
5734 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5736 else
5738 strcpy (buf, "{vextrs,|extrw,s,}");
5739 if (GET_MODE (operands[0]) == DImode)
5740 strcpy (buf, "extrd,s,*");
5741 if ((which == 0 && negated)
5742 || (which == 1 && ! negated))
5743 strcat (buf, "<");
5744 else
5745 strcat (buf, ">=");
5746 if (nullify && negated)
5747 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5748 else if (nullify && ! negated)
5749 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5750 else if (negated)
5751 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5752 else
5753 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5755 break;
5757 default:
5758 abort ();
5760 return buf;
5763 /* Return the output template for emitting a dbra type insn.
5765 Note it may perform some output operations on its own before
5766 returning the final output string. */
5767 const char *
5768 output_dbra (operands, insn, which_alternative)
5769 rtx *operands;
5770 rtx insn;
5771 int which_alternative;
5774 /* A conditional branch to the following instruction (eg the delay slot) is
5775 asking for a disaster. Be prepared! */
5777 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5779 if (which_alternative == 0)
5780 return "ldo %1(%0),%0";
5781 else if (which_alternative == 1)
5783 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
5784 output_asm_insn ("ldw -16(%%r30),%4", operands);
5785 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5786 return "{fldws|fldw} -16(%%r30),%0";
5788 else
5790 output_asm_insn ("ldw %0,%4", operands);
5791 return "ldo %1(%4),%4\n\tstw %4,%0";
5795 if (which_alternative == 0)
5797 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5798 int length = get_attr_length (insn);
5800 /* If this is a long branch with its delay slot unfilled, set `nullify'
5801 as it can nullify the delay slot and save a nop. */
5802 if (length == 8 && dbr_sequence_length () == 0)
5803 nullify = 1;
5805 /* If this is a short forward conditional branch which did not get
5806 its delay slot filled, the delay slot can still be nullified. */
5807 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5808 nullify = forward_branch_p (insn);
5810 /* Handle short versions first. */
5811 if (length == 4 && nullify)
5812 return "addib,%C2,n %1,%0,%3";
5813 else if (length == 4 && ! nullify)
5814 return "addib,%C2 %1,%0,%3";
5815 else if (length == 8)
5817 /* Handle weird backwards branch with a fulled delay slot
5818 which is nullified. */
5819 if (dbr_sequence_length () != 0
5820 && ! forward_branch_p (insn)
5821 && nullify)
5822 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5823 /* Handle short backwards branch with an unfilled delay slot.
5824 Using a addb;nop rather than addi;bl saves 1 cycle for both
5825 taken and untaken branches. */
5826 else if (dbr_sequence_length () == 0
5827 && ! forward_branch_p (insn)
5828 && INSN_ADDRESSES_SET_P ()
5829 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5830 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5831 return "addib,%C2 %1,%0,%3%#";
5833 /* Handle normal cases. */
5834 if (nullify)
5835 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5836 else
5837 return "addi,%N2 %1,%0,%0\n\tb %3";
5839 else
5840 abort ();
5842 /* Deal with gross reload from FP register case. */
5843 else if (which_alternative == 1)
5845 /* Move loop counter from FP register to MEM then into a GR,
5846 increment the GR, store the GR into MEM, and finally reload
5847 the FP register from MEM from within the branch's delay slot. */
5848 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
5849 operands);
5850 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5851 if (get_attr_length (insn) == 24)
5852 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5853 else
5854 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5856 /* Deal with gross reload from memory case. */
5857 else
5859 /* Reload loop counter from memory, the store back to memory
5860 happens in the branch's delay slot. */
5861 output_asm_insn ("ldw %0,%4", operands);
5862 if (get_attr_length (insn) == 12)
5863 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5864 else
5865 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5869 /* Return the output template for emitting a dbra type insn.
5871 Note it may perform some output operations on its own before
5872 returning the final output string. */
5873 const char *
5874 output_movb (operands, insn, which_alternative, reverse_comparison)
5875 rtx *operands;
5876 rtx insn;
5877 int which_alternative;
5878 int reverse_comparison;
5881 /* A conditional branch to the following instruction (eg the delay slot) is
5882 asking for a disaster. Be prepared! */
5884 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5886 if (which_alternative == 0)
5887 return "copy %1,%0";
5888 else if (which_alternative == 1)
5890 output_asm_insn ("stw %1,-16(%%r30)", operands);
5891 return "{fldws|fldw} -16(%%r30),%0";
5893 else if (which_alternative == 2)
5894 return "stw %1,%0";
5895 else
5896 return "mtsar %r1";
5899 /* Support the second variant. */
5900 if (reverse_comparison)
5901 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5903 if (which_alternative == 0)
5905 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5906 int length = get_attr_length (insn);
5908 /* If this is a long branch with its delay slot unfilled, set `nullify'
5909 as it can nullify the delay slot and save a nop. */
5910 if (length == 8 && dbr_sequence_length () == 0)
5911 nullify = 1;
5913 /* If this is a short forward conditional branch which did not get
5914 its delay slot filled, the delay slot can still be nullified. */
5915 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5916 nullify = forward_branch_p (insn);
5918 /* Handle short versions first. */
5919 if (length == 4 && nullify)
5920 return "movb,%C2,n %1,%0,%3";
5921 else if (length == 4 && ! nullify)
5922 return "movb,%C2 %1,%0,%3";
5923 else if (length == 8)
5925 /* Handle weird backwards branch with a filled delay slot
5926 which is nullified. */
5927 if (dbr_sequence_length () != 0
5928 && ! forward_branch_p (insn)
5929 && nullify)
5930 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5932 /* Handle short backwards branch with an unfilled delay slot.
5933 Using a movb;nop rather than or;bl saves 1 cycle for both
5934 taken and untaken branches. */
5935 else if (dbr_sequence_length () == 0
5936 && ! forward_branch_p (insn)
5937 && INSN_ADDRESSES_SET_P ()
5938 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5939 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5940 return "movb,%C2 %1,%0,%3%#";
5941 /* Handle normal cases. */
5942 if (nullify)
5943 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5944 else
5945 return "or,%N2 %1,%%r0,%0\n\tb %3";
5947 else
5948 abort ();
5950 /* Deal with gross reload from FP register case. */
5951 else if (which_alternative == 1)
5953 /* Move loop counter from FP register to MEM then into a GR,
5954 increment the GR, store the GR into MEM, and finally reload
5955 the FP register from MEM from within the branch's delay slot. */
5956 output_asm_insn ("stw %1,-16(%%r30)", operands);
5957 if (get_attr_length (insn) == 12)
5958 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5959 else
5960 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5962 /* Deal with gross reload from memory case. */
5963 else if (which_alternative == 2)
5965 /* Reload loop counter from memory, the store back to memory
5966 happens in the branch's delay slot. */
5967 if (get_attr_length (insn) == 8)
5968 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5969 else
5970 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5972 /* Handle SAR as a destination. */
5973 else
5975 if (get_attr_length (insn) == 8)
5976 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5977 else
5978 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
5983 /* INSN is a millicode call. It may have an unconditional jump in its delay
5984 slot.
5986 CALL_DEST is the routine we are calling. */
5988 const char *
5989 output_millicode_call (insn, call_dest)
5990 rtx insn;
5991 rtx call_dest;
5993 int attr_length = get_attr_length (insn);
5994 int seq_length = dbr_sequence_length ();
5995 int distance;
5996 rtx xoperands[4];
5997 rtx seq_insn;
5999 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6001 /* Handle common case -- empty delay slot or no jump in the delay slot,
6002 and we're sure that the branch will reach the beginning of the $CODE$
6003 subspace. The within reach form of the $$sh_func_adrs call has
6004 a length of 28 and attribute type of multi. This length is the
6005 same as the maximum length of an out of reach PIC call to $$div. */
6006 if ((seq_length == 0
6007 && (attr_length == 8
6008 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6009 || (seq_length != 0
6010 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6011 && attr_length == 4))
6013 xoperands[0] = call_dest;
6014 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
6015 return "";
6018 /* This call may not reach the beginning of the $CODE$ subspace. */
6019 if (attr_length > 8)
6021 int delay_insn_deleted = 0;
6023 /* We need to emit an inline long-call branch. */
6024 if (seq_length != 0
6025 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6027 /* A non-jump insn in the delay slot. By definition we can
6028 emit this insn before the call. */
6029 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6031 /* Now delete the delay insn. */
6032 PUT_CODE (NEXT_INSN (insn), NOTE);
6033 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6034 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6035 delay_insn_deleted = 1;
6038 /* PIC long millicode call sequence. */
6039 if (flag_pic)
6041 xoperands[0] = call_dest;
6042 if (TARGET_SOM || ! TARGET_GAS)
6043 xoperands[1] = gen_label_rtx ();
6045 /* Get our address + 8 into %r1. */
6046 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6048 if (TARGET_SOM || ! TARGET_GAS)
6050 /* Add %r1 to the offset of our target from the next insn. */
6051 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
6052 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6053 CODE_LABEL_NUMBER (xoperands[1]));
6054 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
6056 else
6058 output_asm_insn ("addil L%%%0-$PIC_pcrel$0+4,%%r1", xoperands);
6059 output_asm_insn ("ldo R%%%0-$PIC_pcrel$0+8(%%r1),%%r1",
6060 xoperands);
6063 /* Get the return address into %r31. */
6064 output_asm_insn ("blr 0,%3", xoperands);
6066 /* Branch to our target which is in %r1. */
6067 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
6069 /* Empty delay slot. Note this insn gets fetched twice and
6070 executed once. To be safe we use a nop. */
6071 output_asm_insn ("nop", xoperands);
6073 /* Pure portable runtime doesn't allow be/ble; we also don't have
6074 PIC support in the assembler/linker, so this sequence is needed. */
6075 else if (TARGET_PORTABLE_RUNTIME)
6077 xoperands[0] = call_dest;
6078 /* Get the address of our target into %r29. */
6079 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
6080 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
6082 /* Get our return address into %r31. */
6083 output_asm_insn ("blr %%r0,%3", xoperands);
6085 /* Jump to our target address in %r29. */
6086 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
6088 /* Empty delay slot. Note this insn gets fetched twice and
6089 executed once. To be safe we use a nop. */
6090 output_asm_insn ("nop", xoperands);
6092 /* If we're allowed to use be/ble instructions, then this is the
6093 best sequence to use for a long millicode call. */
6094 else
6096 xoperands[0] = call_dest;
6097 output_asm_insn ("ldil L%%%0,%3", xoperands);
6098 output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
6099 output_asm_insn ("nop", xoperands);
6102 /* If we had a jump in the call's delay slot, output it now. */
6103 if (seq_length != 0 && !delay_insn_deleted)
6105 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6106 output_asm_insn ("b,n %0", xoperands);
6108 /* Now delete the delay insn. */
6109 PUT_CODE (NEXT_INSN (insn), NOTE);
6110 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6111 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6113 return "";
6116 /* This call has an unconditional jump in its delay slot and the
6117 call is known to reach its target or the beginning of the current
6118 subspace. */
6120 /* Use the containing sequence insn's address. */
6121 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6123 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6124 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6126 /* If the branch was too far away, emit a normal call followed
6127 by a nop, followed by the unconditional branch.
6129 If the branch is close, then adjust %r2 from within the
6130 call's delay slot. */
6132 xoperands[0] = call_dest;
6133 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6134 if (! VAL_14_BITS_P (distance))
6135 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
6136 else
6138 xoperands[2] = gen_label_rtx ();
6139 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
6140 xoperands);
6141 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6142 CODE_LABEL_NUMBER (xoperands[2]));
6145 /* Delete the jump. */
6146 PUT_CODE (NEXT_INSN (insn), NOTE);
6147 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6148 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6149 return "";
6152 extern struct obstack permanent_obstack;
6154 /* INSN is either a function call. It may have an unconditional jump
6155 in its delay slot.
6157 CALL_DEST is the routine we are calling. */
6159 const char *
6160 output_call (insn, call_dest, sibcall)
6161 rtx insn;
6162 rtx call_dest;
6163 int sibcall;
6165 int attr_length = get_attr_length (insn);
6166 int seq_length = dbr_sequence_length ();
6167 int distance;
6168 rtx xoperands[4];
6169 rtx seq_insn;
6171 /* Handle common case -- empty delay slot or no jump in the delay slot,
6172 and we're sure that the branch will reach the beginning of the $CODE$
6173 subspace. */
6174 if ((seq_length == 0 && attr_length == 12)
6175 || (seq_length != 0
6176 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6177 && attr_length == 8))
6179 xoperands[0] = call_dest;
6180 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6181 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
6182 return "";
6185 /* This call may not reach the beginning of the $CODE$ subspace. */
6186 if (attr_length > 12)
6188 int delay_insn_deleted = 0;
6189 rtx xoperands[2];
6190 rtx link;
6192 /* We need to emit an inline long-call branch. Furthermore,
6193 because we're changing a named function call into an indirect
6194 function call well after the parameters have been set up, we
6195 need to make sure any FP args appear in both the integer
6196 and FP registers. Also, we need move any delay slot insn
6197 out of the delay slot. And finally, we can't rely on the linker
6198 being able to fix the call to $$dyncall! -- Yuk!. */
6199 if (seq_length != 0
6200 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6202 /* A non-jump insn in the delay slot. By definition we can
6203 emit this insn before the call (and in fact before argument
6204 relocating. */
6205 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6207 /* Now delete the delay insn. */
6208 PUT_CODE (NEXT_INSN (insn), NOTE);
6209 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6210 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6211 delay_insn_deleted = 1;
6214 /* Now copy any FP arguments into integer registers. */
6215 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6217 int arg_mode, regno;
6218 rtx use = XEXP (link, 0);
6219 if (! (GET_CODE (use) == USE
6220 && GET_CODE (XEXP (use, 0)) == REG
6221 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6222 continue;
6224 arg_mode = GET_MODE (XEXP (use, 0));
6225 regno = REGNO (XEXP (use, 0));
6226 /* Is it a floating point register? */
6227 if (regno >= 32 && regno <= 39)
6229 /* Copy from the FP register into an integer register
6230 (via memory). */
6231 if (arg_mode == SFmode)
6233 xoperands[0] = XEXP (use, 0);
6234 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6235 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
6236 xoperands);
6237 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6239 else
6241 xoperands[0] = XEXP (use, 0);
6242 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6243 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
6244 xoperands);
6245 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6246 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6251 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
6252 we don't have any direct calls in that case. */
6254 int i;
6255 const char *name = XSTR (call_dest, 0);
6257 /* See if we have already put this function on the list
6258 of deferred plabels. This list is generally small,
6259 so a liner search is not too ugly. If it proves too
6260 slow replace it with something faster. */
6261 for (i = 0; i < n_deferred_plabels; i++)
6262 if (strcmp (name, deferred_plabels[i].name) == 0)
6263 break;
6265 /* If the deferred plabel list is empty, or this entry was
6266 not found on the list, create a new entry on the list. */
6267 if (deferred_plabels == NULL || i == n_deferred_plabels)
6269 const char *real_name;
6271 if (deferred_plabels == 0)
6272 deferred_plabels = (struct deferred_plabel *)
6273 xmalloc (1 * sizeof (struct deferred_plabel));
6274 else
6275 deferred_plabels = (struct deferred_plabel *)
6276 xrealloc (deferred_plabels,
6277 ((n_deferred_plabels + 1)
6278 * sizeof (struct deferred_plabel)));
6280 i = n_deferred_plabels++;
6281 deferred_plabels[i].internal_label = gen_label_rtx ();
6282 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
6283 strlen (name) + 1);
6284 strcpy (deferred_plabels[i].name, name);
6286 /* Gross. We have just implicitly taken the address of this
6287 function, mark it as such. */
6288 STRIP_NAME_ENCODING (real_name, name);
6289 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
6292 /* We have to load the address of the function using a procedure
6293 label (plabel). Inline plabels can lose for PIC and other
6294 cases, so avoid them by creating a 32bit plabel in the data
6295 segment. */
6296 if (flag_pic)
6298 xoperands[0] = deferred_plabels[i].internal_label;
6299 if (TARGET_SOM || ! TARGET_GAS)
6300 xoperands[1] = gen_label_rtx ();
6302 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
6303 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
6304 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
6306 /* Get our address + 8 into %r1. */
6307 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6309 if (TARGET_SOM || ! TARGET_GAS)
6311 /* Add %r1 to the offset of dyncall from the next insn. */
6312 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
6313 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6314 CODE_LABEL_NUMBER (xoperands[1]));
6315 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
6317 else
6319 output_asm_insn ("addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1",
6320 xoperands);
6321 output_asm_insn ("ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
6322 xoperands);
6325 /* Get the return address into %r31. */
6326 output_asm_insn ("blr %%r0,%%r31", xoperands);
6328 /* Branch to our target which is in %r1. */
6329 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6331 if (sibcall)
6333 /* This call never returns, so we do not need to fix the
6334 return pointer. */
6335 output_asm_insn ("nop", xoperands);
6337 else
6339 /* Copy the return address into %r2 also. */
6340 output_asm_insn ("copy %%r31,%%r2", xoperands);
6343 else
6345 xoperands[0] = deferred_plabels[i].internal_label;
6347 /* Get the address of our target into %r22. */
6348 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
6349 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
6351 /* Get the high part of the address of $dyncall into %r2, then
6352 add in the low part in the branch instruction. */
6353 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
6354 output_asm_insn ("{ble|be,l} R%%$$dyncall(%%sr4,%%r2)",
6355 xoperands);
6357 if (sibcall)
6359 /* This call never returns, so we do not need to fix the
6360 return pointer. */
6361 output_asm_insn ("nop", xoperands);
6363 else
6365 /* Copy the return address into %r2 also. */
6366 output_asm_insn ("copy %%r31,%%r2", xoperands);
6371 /* If we had a jump in the call's delay slot, output it now. */
6372 if (seq_length != 0 && !delay_insn_deleted)
6374 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6375 output_asm_insn ("b,n %0", xoperands);
6377 /* Now delete the delay insn. */
6378 PUT_CODE (NEXT_INSN (insn), NOTE);
6379 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6380 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6382 return "";
6385 /* This call has an unconditional jump in its delay slot and the
6386 call is known to reach its target or the beginning of the current
6387 subspace. */
6389 /* Use the containing sequence insn's address. */
6390 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6392 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6393 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6395 /* If the branch is too far away, emit a normal call followed
6396 by a nop, followed by the unconditional branch. If the branch
6397 is close, then adjust %r2 in the call's delay slot. */
6399 xoperands[0] = call_dest;
6400 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6401 if (! VAL_14_BITS_P (distance))
6402 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
6403 else
6405 xoperands[3] = gen_label_rtx ();
6406 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
6407 xoperands);
6408 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6409 CODE_LABEL_NUMBER (xoperands[3]));
6412 /* Delete the jump. */
6413 PUT_CODE (NEXT_INSN (insn), NOTE);
6414 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6415 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6416 return "";
6419 /* In HPUX 8.0's shared library scheme, special relocations are needed
6420 for function labels if they might be passed to a function
6421 in a shared library (because shared libraries don't live in code
6422 space), and special magic is needed to construct their address. */
6424 void
6425 hppa_encode_label (sym)
6426 rtx sym;
6428 const char *str = XSTR (sym, 0);
6429 int len = strlen (str) + 1;
6430 char *newstr, *p;
6432 p = newstr = alloca (len + 1);
6433 if (str[0] == '*')
6435 str++;
6436 len--;
6438 *p++ = '@';
6439 strcpy (p, str);
6441 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6444 static void
6445 pa_encode_section_info (decl, first)
6446 tree decl;
6447 int first;
6449 if (first && TEXT_SPACE_P (decl))
6451 rtx rtl;
6452 if (TREE_CODE (decl) == FUNCTION_DECL
6453 || TREE_CODE (decl) == VAR_DECL)
6454 rtl = DECL_RTL (decl);
6455 else
6456 rtl = TREE_CST_RTL (decl);
6457 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
6458 if (TREE_CODE (decl) == FUNCTION_DECL)
6459 hppa_encode_label (XEXP (DECL_RTL (decl), 0));
6464 function_label_operand (op, mode)
6465 rtx op;
6466 enum machine_mode mode ATTRIBUTE_UNUSED;
6468 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6471 /* Returns 1 if OP is a function label involved in a simple addition
6472 with a constant. Used to keep certain patterns from matching
6473 during instruction combination. */
6475 is_function_label_plus_const (op)
6476 rtx op;
6478 /* Strip off any CONST. */
6479 if (GET_CODE (op) == CONST)
6480 op = XEXP (op, 0);
6482 return (GET_CODE (op) == PLUS
6483 && function_label_operand (XEXP (op, 0), Pmode)
6484 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6487 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6488 use in fmpyadd instructions. */
6490 fmpyaddoperands (operands)
6491 rtx *operands;
6493 enum machine_mode mode = GET_MODE (operands[0]);
6495 /* Must be a floating point mode. */
6496 if (mode != SFmode && mode != DFmode)
6497 return 0;
6499 /* All modes must be the same. */
6500 if (! (mode == GET_MODE (operands[1])
6501 && mode == GET_MODE (operands[2])
6502 && mode == GET_MODE (operands[3])
6503 && mode == GET_MODE (operands[4])
6504 && mode == GET_MODE (operands[5])))
6505 return 0;
6507 /* All operands must be registers. */
6508 if (! (GET_CODE (operands[1]) == REG
6509 && GET_CODE (operands[2]) == REG
6510 && GET_CODE (operands[3]) == REG
6511 && GET_CODE (operands[4]) == REG
6512 && GET_CODE (operands[5]) == REG))
6513 return 0;
6515 /* Only 2 real operands to the addition. One of the input operands must
6516 be the same as the output operand. */
6517 if (! rtx_equal_p (operands[3], operands[4])
6518 && ! rtx_equal_p (operands[3], operands[5]))
6519 return 0;
6521 /* Inout operand of add can not conflict with any operands from multiply. */
6522 if (rtx_equal_p (operands[3], operands[0])
6523 || rtx_equal_p (operands[3], operands[1])
6524 || rtx_equal_p (operands[3], operands[2]))
6525 return 0;
6527 /* multiply can not feed into addition operands. */
6528 if (rtx_equal_p (operands[4], operands[0])
6529 || rtx_equal_p (operands[5], operands[0]))
6530 return 0;
6532 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6533 if (mode == SFmode
6534 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6535 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6536 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6537 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6538 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6539 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6540 return 0;
6542 /* Passed. Operands are suitable for fmpyadd. */
6543 return 1;
6546 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6547 use in fmpysub instructions. */
6549 fmpysuboperands (operands)
6550 rtx *operands;
6552 enum machine_mode mode = GET_MODE (operands[0]);
6554 /* Must be a floating point mode. */
6555 if (mode != SFmode && mode != DFmode)
6556 return 0;
6558 /* All modes must be the same. */
6559 if (! (mode == GET_MODE (operands[1])
6560 && mode == GET_MODE (operands[2])
6561 && mode == GET_MODE (operands[3])
6562 && mode == GET_MODE (operands[4])
6563 && mode == GET_MODE (operands[5])))
6564 return 0;
6566 /* All operands must be registers. */
6567 if (! (GET_CODE (operands[1]) == REG
6568 && GET_CODE (operands[2]) == REG
6569 && GET_CODE (operands[3]) == REG
6570 && GET_CODE (operands[4]) == REG
6571 && GET_CODE (operands[5]) == REG))
6572 return 0;
6574 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6575 operation, so operands[4] must be the same as operand[3]. */
6576 if (! rtx_equal_p (operands[3], operands[4]))
6577 return 0;
6579 /* multiply can not feed into subtraction. */
6580 if (rtx_equal_p (operands[5], operands[0]))
6581 return 0;
6583 /* Inout operand of sub can not conflict with any operands from multiply. */
6584 if (rtx_equal_p (operands[3], operands[0])
6585 || rtx_equal_p (operands[3], operands[1])
6586 || rtx_equal_p (operands[3], operands[2]))
6587 return 0;
6589 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6590 if (mode == SFmode
6591 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6592 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6593 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6594 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6595 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6596 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6597 return 0;
6599 /* Passed. Operands are suitable for fmpysub. */
6600 return 1;
6604 plus_xor_ior_operator (op, mode)
6605 rtx op;
6606 enum machine_mode mode ATTRIBUTE_UNUSED;
6608 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6609 || GET_CODE (op) == IOR);
6612 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6613 constants for shadd instructions. */
6614 static int
6615 shadd_constant_p (val)
6616 int val;
6618 if (val == 2 || val == 4 || val == 8)
6619 return 1;
6620 else
6621 return 0;
6624 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6625 the valid constant for shadd instructions. */
6627 shadd_operand (op, mode)
6628 rtx op;
6629 enum machine_mode mode ATTRIBUTE_UNUSED;
6631 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6634 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6637 basereg_operand (op, mode)
6638 rtx op;
6639 enum machine_mode mode;
6641 /* cse will create some unscaled indexed addresses, however; it
6642 generally isn't a win on the PA, so avoid creating unscaled
6643 indexed addresses until after cse is finished. */
6644 if (!cse_not_expected)
6645 return 0;
6647 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6648 we don't have to worry about the braindamaged implicit space
6649 register selection from the basereg. */
6650 if (TARGET_NO_SPACE_REGS)
6651 return (GET_CODE (op) == REG);
6653 /* While it's always safe to index off the frame pointer, it's not
6654 always profitable, particularly when the frame pointer is being
6655 eliminated. */
6656 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6657 return 1;
6659 return (GET_CODE (op) == REG
6660 && REG_POINTER (op)
6661 && register_operand (op, mode));
6664 /* Return 1 if this operand is anything other than a hard register. */
6667 non_hard_reg_operand (op, mode)
6668 rtx op;
6669 enum machine_mode mode ATTRIBUTE_UNUSED;
6671 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6674 /* Return 1 if INSN branches forward. Should be using insn_addresses
6675 to avoid walking through all the insns... */
6676 static int
6677 forward_branch_p (insn)
6678 rtx insn;
6680 rtx label = JUMP_LABEL (insn);
6682 while (insn)
6684 if (insn == label)
6685 break;
6686 else
6687 insn = NEXT_INSN (insn);
6690 return (insn == label);
6693 /* Return 1 if OP is an equality comparison, else return 0. */
6695 eq_neq_comparison_operator (op, mode)
6696 rtx op;
6697 enum machine_mode mode ATTRIBUTE_UNUSED;
6699 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6702 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6704 movb_comparison_operator (op, mode)
6705 rtx op;
6706 enum machine_mode mode ATTRIBUTE_UNUSED;
6708 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6709 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6712 /* Return 1 if INSN is in the delay slot of a call instruction. */
6714 jump_in_call_delay (insn)
6715 rtx insn;
6718 if (GET_CODE (insn) != JUMP_INSN)
6719 return 0;
6721 if (PREV_INSN (insn)
6722 && PREV_INSN (PREV_INSN (insn))
6723 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6725 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6727 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6728 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6731 else
6732 return 0;
6735 /* Output an unconditional move and branch insn. */
6737 const char *
6738 output_parallel_movb (operands, length)
6739 rtx *operands;
6740 int length;
6742 /* These are the cases in which we win. */
6743 if (length == 4)
6744 return "mov%I1b,tr %1,%0,%2";
6746 /* None of these cases wins, but they don't lose either. */
6747 if (dbr_sequence_length () == 0)
6749 /* Nothing in the delay slot, fake it by putting the combined
6750 insn (the copy or add) in the delay slot of a bl. */
6751 if (GET_CODE (operands[1]) == CONST_INT)
6752 return "b %2\n\tldi %1,%0";
6753 else
6754 return "b %2\n\tcopy %1,%0";
6756 else
6758 /* Something in the delay slot, but we've got a long branch. */
6759 if (GET_CODE (operands[1]) == CONST_INT)
6760 return "ldi %1,%0\n\tb %2";
6761 else
6762 return "copy %1,%0\n\tb %2";
6766 /* Output an unconditional add and branch insn. */
6768 const char *
6769 output_parallel_addb (operands, length)
6770 rtx *operands;
6771 int length;
6773 /* To make life easy we want operand0 to be the shared input/output
6774 operand and operand1 to be the readonly operand. */
6775 if (operands[0] == operands[1])
6776 operands[1] = operands[2];
6778 /* These are the cases in which we win. */
6779 if (length == 4)
6780 return "add%I1b,tr %1,%0,%3";
6782 /* None of these cases win, but they don't lose either. */
6783 if (dbr_sequence_length () == 0)
6785 /* Nothing in the delay slot, fake it by putting the combined
6786 insn (the copy or add) in the delay slot of a bl. */
6787 return "b %3\n\tadd%I1 %1,%0,%0";
6789 else
6791 /* Something in the delay slot, but we've got a long branch. */
6792 return "add%I1 %1,%0,%0\n\tb %3";
6796 /* Return nonzero if INSN (a jump insn) immediately follows a call
6797 to a named function. This is used to avoid filling the delay slot
6798 of the jump since it can usually be eliminated by modifying RP in
6799 the delay slot of the call. */
6802 following_call (insn)
6803 rtx insn;
6805 /* We do not place jumps into call delay slots when optimizing for the
6806 PA8000 processor or when generating dwarf2 call frame information. */
6807 if (pa_cpu >= PROCESSOR_8000
6808 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
6809 || flag_unwind_tables)
6810 return 0;
6812 /* Find the previous real insn, skipping NOTEs. */
6813 insn = PREV_INSN (insn);
6814 while (insn && GET_CODE (insn) == NOTE)
6815 insn = PREV_INSN (insn);
6817 /* Check for CALL_INSNs and millicode calls. */
6818 if (insn
6819 && ((GET_CODE (insn) == CALL_INSN
6820 && get_attr_type (insn) != TYPE_DYNCALL)
6821 || (GET_CODE (insn) == INSN
6822 && GET_CODE (PATTERN (insn)) != SEQUENCE
6823 && GET_CODE (PATTERN (insn)) != USE
6824 && GET_CODE (PATTERN (insn)) != CLOBBER
6825 && get_attr_type (insn) == TYPE_MILLI)))
6826 return 1;
6828 return 0;
6831 /* We use this hook to perform a PA specific optimization which is difficult
6832 to do in earlier passes.
6834 We want the delay slots of branches within jump tables to be filled.
6835 None of the compiler passes at the moment even has the notion that a
6836 PA jump table doesn't contain addresses, but instead contains actual
6837 instructions!
6839 Because we actually jump into the table, the addresses of each entry
6840 must stay constant in relation to the beginning of the table (which
6841 itself must stay constant relative to the instruction to jump into
6842 it). I don't believe we can guarantee earlier passes of the compiler
6843 will adhere to those rules.
6845 So, late in the compilation process we find all the jump tables, and
6846 expand them into real code -- eg each entry in the jump table vector
6847 will get an appropriate label followed by a jump to the final target.
6849 Reorg and the final jump pass can then optimize these branches and
6850 fill their delay slots. We end up with smaller, more efficient code.
6852 The jump instructions within the table are special; we must be able
6853 to identify them during assembly output (if the jumps don't get filled
6854 we need to emit a nop rather than nullifying the delay slot)). We
6855 identify jumps in switch tables by marking the SET with DImode.
6857 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6858 insns. This serves two purposes, first it prevents jump.c from
6859 noticing that the last N entries in the table jump to the instruction
6860 immediately after the table and deleting the jumps. Second, those
6861 insns mark where we should emit .begin_brtab and .end_brtab directives
6862 when using GAS (allows for better link time optimizations). */
6864 void
6865 pa_reorg (insns)
6866 rtx insns;
6868 rtx insn;
6870 remove_useless_addtr_insns (insns, 1);
6872 if (pa_cpu < PROCESSOR_8000)
6873 pa_combine_instructions (get_insns ());
6876 /* This is fairly cheap, so always run it if optimizing. */
6877 if (optimize > 0 && !TARGET_BIG_SWITCH)
6879 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6880 insns = get_insns ();
6881 for (insn = insns; insn; insn = NEXT_INSN (insn))
6883 rtx pattern, tmp, location;
6884 unsigned int length, i;
6886 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6887 if (GET_CODE (insn) != JUMP_INSN
6888 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6889 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6890 continue;
6892 /* Emit marker for the beginning of the branch table. */
6893 emit_insn_before (gen_begin_brtab (), insn);
6895 pattern = PATTERN (insn);
6896 location = PREV_INSN (insn);
6897 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6899 for (i = 0; i < length; i++)
6901 /* Emit a label before each jump to keep jump.c from
6902 removing this code. */
6903 tmp = gen_label_rtx ();
6904 LABEL_NUSES (tmp) = 1;
6905 emit_label_after (tmp, location);
6906 location = NEXT_INSN (location);
6908 if (GET_CODE (pattern) == ADDR_VEC)
6910 /* Emit the jump itself. */
6911 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6912 tmp = emit_jump_insn_after (tmp, location);
6913 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6914 /* It is easy to rely on the branch table markers
6915 during assembly output to trigger the correct code
6916 for a switch table jump with an unfilled delay slot,
6918 However, that requires state and assumes that we look
6919 at insns in order.
6921 We can't make such assumptions when computing the length
6922 of instructions. Ugh. We could walk the insn chain to
6923 determine if this instruction is in a branch table, but
6924 that can get rather expensive, particularly during the
6925 branch shortening phase of the compiler.
6927 So instead we mark this jump as being special. This is
6928 far from ideal and knows that no code after this will
6929 muck around with the mode of the JUMP_INSN itself. */
6930 PUT_MODE (tmp, SImode);
6931 LABEL_NUSES (JUMP_LABEL (tmp))++;
6932 location = NEXT_INSN (location);
6934 else
6936 /* Emit the jump itself. */
6937 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6938 tmp = emit_jump_insn_after (tmp, location);
6939 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6940 /* It is easy to rely on the branch table markers
6941 during assembly output to trigger the correct code
6942 for a switch table jump with an unfilled delay slot,
6944 However, that requires state and assumes that we look
6945 at insns in order.
6947 We can't make such assumptions when computing the length
6948 of instructions. Ugh. We could walk the insn chain to
6949 determine if this instruction is in a branch table, but
6950 that can get rather expensive, particularly during the
6951 branch shortening phase of the compiler.
6953 So instead we mark this jump as being special. This is
6954 far from ideal and knows that no code after this will
6955 muck around with the mode of the JUMP_INSN itself. */
6956 PUT_MODE (tmp, SImode);
6957 LABEL_NUSES (JUMP_LABEL (tmp))++;
6958 location = NEXT_INSN (location);
6961 /* Emit a BARRIER after the jump. */
6962 emit_barrier_after (location);
6963 location = NEXT_INSN (location);
6966 /* Emit marker for the end of the branch table. */
6967 emit_insn_before (gen_end_brtab (), location);
6968 location = NEXT_INSN (location);
6969 emit_barrier_after (location);
6971 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6972 delete_insn (insn);
6975 else
6977 /* Sill need an end_brtab insn. */
6978 insns = get_insns ();
6979 for (insn = insns; insn; insn = NEXT_INSN (insn))
6981 /* Find an ADDR_VEC insn. */
6982 if (GET_CODE (insn) != JUMP_INSN
6983 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6984 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6985 continue;
6987 /* Now generate markers for the beginning and end of the
6988 branch table. */
6989 emit_insn_before (gen_begin_brtab (), insn);
6990 emit_insn_after (gen_end_brtab (), insn);
6995 /* The PA has a number of odd instructions which can perform multiple
6996 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6997 it may be profitable to combine two instructions into one instruction
6998 with two outputs. It's not profitable PA2.0 machines because the
6999 two outputs would take two slots in the reorder buffers.
7001 This routine finds instructions which can be combined and combines
7002 them. We only support some of the potential combinations, and we
7003 only try common ways to find suitable instructions.
7005 * addb can add two registers or a register and a small integer
7006 and jump to a nearby (+-8k) location. Normally the jump to the
7007 nearby location is conditional on the result of the add, but by
7008 using the "true" condition we can make the jump unconditional.
7009 Thus addb can perform two independent operations in one insn.
7011 * movb is similar to addb in that it can perform a reg->reg
7012 or small immediate->reg copy and jump to a nearby (+-8k location).
7014 * fmpyadd and fmpysub can perform a FP multiply and either an
7015 FP add or FP sub if the operands of the multiply and add/sub are
7016 independent (there are other minor restrictions). Note both
7017 the fmpy and fadd/fsub can in theory move to better spots according
7018 to data dependencies, but for now we require the fmpy stay at a
7019 fixed location.
7021 * Many of the memory operations can perform pre & post updates
7022 of index registers. GCC's pre/post increment/decrement addressing
7023 is far too simple to take advantage of all the possibilities. This
7024 pass may not be suitable since those insns may not be independent.
7026 * comclr can compare two ints or an int and a register, nullify
7027 the following instruction and zero some other register. This
7028 is more difficult to use as it's harder to find an insn which
7029 will generate a comclr than finding something like an unconditional
7030 branch. (conditional moves & long branches create comclr insns).
7032 * Most arithmetic operations can conditionally skip the next
7033 instruction. They can be viewed as "perform this operation
7034 and conditionally jump to this nearby location" (where nearby
7035 is an insns away). These are difficult to use due to the
7036 branch length restrictions. */
7038 static void
7039 pa_combine_instructions (insns)
7040 rtx insns ATTRIBUTE_UNUSED;
7042 rtx anchor, new;
7044 /* This can get expensive since the basic algorithm is on the
7045 order of O(n^2) (or worse). Only do it for -O2 or higher
7046 levels of optimization. */
7047 if (optimize < 2)
7048 return;
7050 /* Walk down the list of insns looking for "anchor" insns which
7051 may be combined with "floating" insns. As the name implies,
7052 "anchor" instructions don't move, while "floating" insns may
7053 move around. */
7054 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7055 new = make_insn_raw (new);
7057 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7059 enum attr_pa_combine_type anchor_attr;
7060 enum attr_pa_combine_type floater_attr;
7062 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7063 Also ignore any special USE insns. */
7064 if ((GET_CODE (anchor) != INSN
7065 && GET_CODE (anchor) != JUMP_INSN
7066 && GET_CODE (anchor) != CALL_INSN)
7067 || GET_CODE (PATTERN (anchor)) == USE
7068 || GET_CODE (PATTERN (anchor)) == CLOBBER
7069 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7070 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7071 continue;
7073 anchor_attr = get_attr_pa_combine_type (anchor);
7074 /* See if anchor is an insn suitable for combination. */
7075 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7076 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7077 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7078 && ! forward_branch_p (anchor)))
7080 rtx floater;
7082 for (floater = PREV_INSN (anchor);
7083 floater;
7084 floater = PREV_INSN (floater))
7086 if (GET_CODE (floater) == NOTE
7087 || (GET_CODE (floater) == INSN
7088 && (GET_CODE (PATTERN (floater)) == USE
7089 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7090 continue;
7092 /* Anything except a regular INSN will stop our search. */
7093 if (GET_CODE (floater) != INSN
7094 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7095 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7097 floater = NULL_RTX;
7098 break;
7101 /* See if FLOATER is suitable for combination with the
7102 anchor. */
7103 floater_attr = get_attr_pa_combine_type (floater);
7104 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7105 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7106 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7107 && floater_attr == PA_COMBINE_TYPE_FMPY))
7109 /* If ANCHOR and FLOATER can be combined, then we're
7110 done with this pass. */
7111 if (pa_can_combine_p (new, anchor, floater, 0,
7112 SET_DEST (PATTERN (floater)),
7113 XEXP (SET_SRC (PATTERN (floater)), 0),
7114 XEXP (SET_SRC (PATTERN (floater)), 1)))
7115 break;
7118 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7119 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7121 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7123 if (pa_can_combine_p (new, anchor, floater, 0,
7124 SET_DEST (PATTERN (floater)),
7125 XEXP (SET_SRC (PATTERN (floater)), 0),
7126 XEXP (SET_SRC (PATTERN (floater)), 1)))
7127 break;
7129 else
7131 if (pa_can_combine_p (new, anchor, floater, 0,
7132 SET_DEST (PATTERN (floater)),
7133 SET_SRC (PATTERN (floater)),
7134 SET_SRC (PATTERN (floater))))
7135 break;
7140 /* If we didn't find anything on the backwards scan try forwards. */
7141 if (!floater
7142 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7143 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7145 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7147 if (GET_CODE (floater) == NOTE
7148 || (GET_CODE (floater) == INSN
7149 && (GET_CODE (PATTERN (floater)) == USE
7150 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7152 continue;
7154 /* Anything except a regular INSN will stop our search. */
7155 if (GET_CODE (floater) != INSN
7156 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7157 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7159 floater = NULL_RTX;
7160 break;
7163 /* See if FLOATER is suitable for combination with the
7164 anchor. */
7165 floater_attr = get_attr_pa_combine_type (floater);
7166 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7167 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7168 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7169 && floater_attr == PA_COMBINE_TYPE_FMPY))
7171 /* If ANCHOR and FLOATER can be combined, then we're
7172 done with this pass. */
7173 if (pa_can_combine_p (new, anchor, floater, 1,
7174 SET_DEST (PATTERN (floater)),
7175 XEXP (SET_SRC (PATTERN (floater)),
7177 XEXP (SET_SRC (PATTERN (floater)),
7178 1)))
7179 break;
7184 /* FLOATER will be nonzero if we found a suitable floating
7185 insn for combination with ANCHOR. */
7186 if (floater
7187 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7188 || anchor_attr == PA_COMBINE_TYPE_FMPY))
7190 /* Emit the new instruction and delete the old anchor. */
7191 emit_insn_before (gen_rtx_PARALLEL
7192 (VOIDmode,
7193 gen_rtvec (2, PATTERN (anchor),
7194 PATTERN (floater))),
7195 anchor);
7197 PUT_CODE (anchor, NOTE);
7198 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7199 NOTE_SOURCE_FILE (anchor) = 0;
7201 /* Emit a special USE insn for FLOATER, then delete
7202 the floating insn. */
7203 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7204 delete_insn (floater);
7206 continue;
7208 else if (floater
7209 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
7211 rtx temp;
7212 /* Emit the new_jump instruction and delete the old anchor. */
7213 temp
7214 = emit_jump_insn_before (gen_rtx_PARALLEL
7215 (VOIDmode,
7216 gen_rtvec (2, PATTERN (anchor),
7217 PATTERN (floater))),
7218 anchor);
7220 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
7221 PUT_CODE (anchor, NOTE);
7222 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7223 NOTE_SOURCE_FILE (anchor) = 0;
7225 /* Emit a special USE insn for FLOATER, then delete
7226 the floating insn. */
7227 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7228 delete_insn (floater);
7229 continue;
7235 static int
7236 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
7237 rtx new, anchor, floater;
7238 int reversed;
7239 rtx dest, src1, src2;
7241 int insn_code_number;
7242 rtx start, end;
7244 /* Create a PARALLEL with the patterns of ANCHOR and
7245 FLOATER, try to recognize it, then test constraints
7246 for the resulting pattern.
7248 If the pattern doesn't match or the constraints
7249 aren't met keep searching for a suitable floater
7250 insn. */
7251 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7252 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7253 INSN_CODE (new) = -1;
7254 insn_code_number = recog_memoized (new);
7255 if (insn_code_number < 0
7256 || (extract_insn (new), ! constrain_operands (1)))
7257 return 0;
7259 if (reversed)
7261 start = anchor;
7262 end = floater;
7264 else
7266 start = floater;
7267 end = anchor;
7270 /* There's up to three operands to consider. One
7271 output and two inputs.
7273 The output must not be used between FLOATER & ANCHOR
7274 exclusive. The inputs must not be set between
7275 FLOATER and ANCHOR exclusive. */
7277 if (reg_used_between_p (dest, start, end))
7278 return 0;
7280 if (reg_set_between_p (src1, start, end))
7281 return 0;
7283 if (reg_set_between_p (src2, start, end))
7284 return 0;
7286 /* If we get here, then everything is good. */
7287 return 1;
7290 /* Return nonzero if references for INSN are delayed.
7292 Millicode insns are actually function calls with some special
7293 constraints on arguments and register usage.
7295 Millicode calls always expect their arguments in the integer argument
7296 registers, and always return their result in %r29 (ret1). They
7297 are expected to clobber their arguments, %r1, %r29, and the return
7298 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
7300 This function tells reorg that the references to arguments and
7301 millicode calls do not appear to happen until after the millicode call.
7302 This allows reorg to put insns which set the argument registers into the
7303 delay slot of the millicode call -- thus they act more like traditional
7304 CALL_INSNs.
7306 Note we can not consider side effects of the insn to be delayed because
7307 the branch and link insn will clobber the return pointer. If we happened
7308 to use the return pointer in the delay slot of the call, then we lose.
7310 get_attr_type will try to recognize the given insn, so make sure to
7311 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
7312 in particular. */
7314 insn_refs_are_delayed (insn)
7315 rtx insn;
7317 return ((GET_CODE (insn) == INSN
7318 && GET_CODE (PATTERN (insn)) != SEQUENCE
7319 && GET_CODE (PATTERN (insn)) != USE
7320 && GET_CODE (PATTERN (insn)) != CLOBBER
7321 && get_attr_type (insn) == TYPE_MILLI));
7324 /* Return the location of a parameter that is passed in a register or NULL
7325 if the parameter has any component that is passed in memory.
7327 This is new code and will be pushed to into the net sources after
7328 further testing.
7330 ??? We might want to restructure this so that it looks more like other
7331 ports. */
7333 function_arg (cum, mode, type, named, incoming)
7334 CUMULATIVE_ARGS *cum;
7335 enum machine_mode mode;
7336 tree type;
7337 int named ATTRIBUTE_UNUSED;
7338 int incoming;
7340 int max_arg_words = (TARGET_64BIT ? 8 : 4);
7341 int fpr_reg_base;
7342 int gpr_reg_base;
7343 rtx retval;
7345 if (! TARGET_64BIT)
7347 /* If this arg would be passed partially or totally on the stack, then
7348 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
7349 handle arguments which are split between regs and stack slots if
7350 the ABI mandates split arguments. */
7351 if (cum->words + FUNCTION_ARG_SIZE (mode, type) > max_arg_words
7352 || mode == VOIDmode)
7353 return NULL_RTX;
7355 else
7357 int offset = 0;
7358 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7359 offset = 1;
7360 if (cum->words + offset >= max_arg_words
7361 || mode == VOIDmode)
7362 return NULL_RTX;
7365 /* The 32bit ABIs and the 64bit ABIs are rather different,
7366 particularly in their handling of FP registers. We might
7367 be able to cleverly share code between them, but I'm not
7368 going to bother in the hope that splitting them up results
7369 in code that is more easily understood.
7371 The 64bit code probably is very wrong for structure passing. */
7372 if (TARGET_64BIT)
7374 /* Advance the base registers to their current locations.
7376 Remember, gprs grow towards smaller register numbers while
7377 fprs grow to higher register numbers. Also remember FP regs
7378 are always 4 bytes wide, while the size of an integer register
7379 varies based on the size of the target word. */
7380 gpr_reg_base = 26 - cum->words;
7381 fpr_reg_base = 32 + cum->words;
7383 /* If the argument is more than a word long, then we need to align
7384 the base registers. Same caveats as above. */
7385 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7387 if (mode != BLKmode)
7389 /* First deal with alignment of the doubleword. */
7390 gpr_reg_base -= (cum->words & 1);
7392 /* This seems backwards, but it is what HP specifies. We need
7393 gpr_reg_base to point to the smaller numbered register of
7394 the integer register pair. So if we have an even register
7395 number, then decrement the gpr base. */
7396 gpr_reg_base -= ((gpr_reg_base % 2) == 0);
7398 /* FP values behave sanely, except that each FP reg is only
7399 half of word. */
7400 fpr_reg_base += ((fpr_reg_base % 2) == 0);
7402 else
7404 rtx loc[8];
7405 int i, offset = 0, ub;
7406 ub = FUNCTION_ARG_SIZE (mode, type);
7407 ub = MIN (ub,
7408 MAX (0, max_arg_words - cum->words - (cum->words & 1)));
7409 gpr_reg_base -= (cum->words & 1);
7410 for (i = 0; i < ub; i++)
7412 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7413 gen_rtx_REG (DImode,
7414 gpr_reg_base),
7415 GEN_INT (offset));
7416 gpr_reg_base -= 1;
7417 offset += 8;
7419 if (ub == 0)
7420 return NULL_RTX;
7421 else if (ub == 1)
7422 return XEXP (loc[0], 0);
7423 else
7424 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
7428 else
7430 /* If the argument is larger than a word, then we know precisely
7431 which registers we must use. */
7432 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7434 if (cum->words)
7436 gpr_reg_base = 23;
7437 fpr_reg_base = 38;
7439 else
7441 gpr_reg_base = 25;
7442 fpr_reg_base = 34;
7445 else
7447 /* We have a single word (32 bits). A simple computation
7448 will get us the register #s we need. */
7449 gpr_reg_base = 26 - cum->words;
7450 fpr_reg_base = 32 + 2 * cum->words;
7454 if (TARGET_64BIT && mode == TFmode)
7456 return
7457 gen_rtx_PARALLEL
7458 (mode,
7459 gen_rtvec (2,
7460 gen_rtx_EXPR_LIST (VOIDmode,
7461 gen_rtx_REG (DImode, gpr_reg_base + 1),
7462 const0_rtx),
7463 gen_rtx_EXPR_LIST (VOIDmode,
7464 gen_rtx_REG (DImode, gpr_reg_base),
7465 GEN_INT (8))));
7467 /* Determine if the argument needs to be passed in both general and
7468 floating point registers. */
7469 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7470 /* If we are doing soft-float with portable runtime, then there
7471 is no need to worry about FP regs. */
7472 && ! TARGET_SOFT_FLOAT
7473 /* The parameter must be some kind of float, else we can just
7474 pass it in integer registers. */
7475 && FLOAT_MODE_P (mode)
7476 /* The target function must not have a prototype. */
7477 && cum->nargs_prototype <= 0
7478 /* libcalls do not need to pass items in both FP and general
7479 registers. */
7480 && type != NULL_TREE
7481 /* All this hair applies to outgoing args only. */
7482 && ! incoming)
7483 /* Also pass outgoing floating arguments in both registers in indirect
7484 calls with the 32 bit ABI and the HP assembler since there is no
7485 way to the specify argument locations in static functions. */
7486 || (! TARGET_64BIT
7487 && ! TARGET_GAS
7488 && ! incoming
7489 && cum->indirect
7490 && FLOAT_MODE_P (mode)))
7492 retval
7493 = gen_rtx_PARALLEL
7494 (mode,
7495 gen_rtvec (2,
7496 gen_rtx_EXPR_LIST (VOIDmode,
7497 gen_rtx_REG (mode, fpr_reg_base),
7498 const0_rtx),
7499 gen_rtx_EXPR_LIST (VOIDmode,
7500 gen_rtx_REG (mode, gpr_reg_base),
7501 const0_rtx)));
7503 else
7505 /* See if we should pass this parameter in a general register. */
7506 if (TARGET_SOFT_FLOAT
7507 /* Indirect calls in the normal 32bit ABI require all arguments
7508 to be passed in general registers. */
7509 || (!TARGET_PORTABLE_RUNTIME
7510 && !TARGET_64BIT
7511 && !TARGET_ELF32
7512 && cum->indirect)
7513 /* If the parameter is not a floating point parameter, then
7514 it belongs in GPRs. */
7515 || !FLOAT_MODE_P (mode))
7516 retval = gen_rtx_REG (mode, gpr_reg_base);
7517 else
7518 retval = gen_rtx_REG (mode, fpr_reg_base);
7520 return retval;
7524 /* If this arg would be passed totally in registers or totally on the stack,
7525 then this routine should return zero. It is currently called only for
7526 the 64-bit target. */
7528 function_arg_partial_nregs (cum, mode, type, named)
7529 CUMULATIVE_ARGS *cum;
7530 enum machine_mode mode;
7531 tree type;
7532 int named ATTRIBUTE_UNUSED;
7534 unsigned int max_arg_words = 8;
7535 unsigned int offset = 0;
7537 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7538 offset = 1;
7540 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7541 /* Arg fits fully into registers. */
7542 return 0;
7543 else if (cum->words + offset >= max_arg_words)
7544 /* Arg fully on the stack. */
7545 return 0;
7546 else
7547 /* Arg is split. */
7548 return max_arg_words - cum->words - offset;
7552 /* Return 1 if this is a comparison operator. This allows the use of
7553 MATCH_OPERATOR to recognize all the branch insns. */
7556 cmpib_comparison_operator (op, mode)
7557 register rtx op;
7558 enum machine_mode mode;
7560 return ((mode == VOIDmode || GET_MODE (op) == mode)
7561 && (GET_CODE (op) == EQ
7562 || GET_CODE (op) == NE
7563 || GET_CODE (op) == GT
7564 || GET_CODE (op) == GTU
7565 || GET_CODE (op) == GE
7566 || GET_CODE (op) == LT
7567 || GET_CODE (op) == LE
7568 || GET_CODE (op) == LEU));
7571 /* Mark ARG (which is really a struct deferred_plabel **) for GC. */
7573 static void
7574 mark_deferred_plabels (arg)
7575 void *arg;
7577 struct deferred_plabel *dp = *(struct deferred_plabel **) arg;
7578 int i;
7580 for (i = 0; i < n_deferred_plabels; ++i)
7581 ggc_mark_rtx (dp[i].internal_label);
7584 /* Called to register all of our global variables with the garbage
7585 collector. */
7587 static void
7588 pa_add_gc_roots ()
7590 ggc_add_rtx_root (&hppa_compare_op0, 1);
7591 ggc_add_rtx_root (&hppa_compare_op1, 1);
7592 ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
7593 &mark_deferred_plabels);
7596 /* On hpux10, the linker will give an error if we have a reference
7597 in the read-only data section to a symbol defined in a shared
7598 library. Therefore, expressions that might require a reloc can
7599 not be placed in the read-only data section. */
7601 static void
7602 pa_select_section (exp, reloc, align)
7603 tree exp;
7604 int reloc;
7605 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
7607 if (TREE_CODE (exp) == VAR_DECL
7608 && TREE_READONLY (exp)
7609 && !TREE_THIS_VOLATILE (exp)
7610 && DECL_INITIAL (exp)
7611 && (DECL_INITIAL (exp) == error_mark_node
7612 || TREE_CONSTANT (DECL_INITIAL (exp)))
7613 && !reloc)
7614 readonly_data_section ();
7615 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
7616 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
7617 && !reloc)
7618 readonly_data_section ();
7619 else
7620 data_section ();