* ChangeLog: Follow spelling conventions.
[official-gcc.git] / gcc / config / pa / pa.c
blob2d655394a0daac0a5fc9719cc984a0f8c6056ab6
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-attr.h"
32 #include "flags.h"
33 #include "tree.h"
34 #include "output.h"
35 #include "except.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "libfuncs.h"
39 #include "reload.h"
40 #include "c-tree.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "obstack.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "recog.h"
47 #include "predict.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
52 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
54 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
55 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
57 static int
58 hppa_use_dfa_pipeline_interface ()
60 return 1;
63 /* Return nonzero if there is a bypass for the output of
64 OUT_INSN and the fp store IN_INSN. */
65 int
66 hppa_fpstore_bypass_p (out_insn, in_insn)
67 rtx out_insn, in_insn;
69 enum machine_mode store_mode;
70 enum machine_mode other_mode;
71 rtx set;
73 if (recog_memoized (in_insn) < 0
74 || get_attr_type (in_insn) != TYPE_FPSTORE
75 || recog_memoized (out_insn) < 0)
76 return 0;
78 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
80 set = single_set (out_insn);
81 if (!set)
82 return 0;
84 other_mode = GET_MODE (SET_SRC (set));
86 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
90 #ifndef DO_FRAME_NOTES
91 #ifdef INCOMING_RETURN_ADDR_RTX
92 #define DO_FRAME_NOTES 1
93 #else
94 #define DO_FRAME_NOTES 0
95 #endif
96 #endif
98 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
99 static void pa_combine_instructions PARAMS ((rtx));
100 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
101 static int forward_branch_p PARAMS ((rtx));
102 static int shadd_constant_p PARAMS ((int));
103 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
104 static int compute_movstrsi_length PARAMS ((rtx));
105 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
106 static void remove_useless_addtr_insns PARAMS ((rtx, int));
107 static void store_reg PARAMS ((int, int, int));
108 static void store_reg_modify PARAMS ((int, int, int));
109 static void load_reg PARAMS ((int, int, int));
110 static void set_reg_plus_d PARAMS ((int, int, int, int));
111 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
112 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
113 static int pa_adjust_priority PARAMS ((rtx, int));
114 static int pa_issue_rate PARAMS ((void));
115 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
116 ATTRIBUTE_UNUSED;
117 static void pa_encode_section_info PARAMS ((tree, int));
118 static const char *pa_strip_name_encoding PARAMS ((const char *));
119 static void pa_globalize_label PARAMS ((FILE *, const char *))
120 ATTRIBUTE_UNUSED;
122 /* Save the operands last given to a compare for use when we
123 generate a scc or bcc insn. */
125 rtx hppa_compare_op0, hppa_compare_op1;
126 enum cmp_type hppa_branch_type;
128 /* Which cpu we are scheduling for. */
129 enum processor_type pa_cpu;
131 /* String to hold which cpu we are scheduling for. */
132 const char *pa_cpu_string;
134 /* Which architecture we are generating code for. */
135 enum architecture_type pa_arch;
137 /* String to hold which architecture we are generating code for. */
138 const char *pa_arch_string;
140 /* Counts for the number of callee-saved general and floating point
141 registers which were saved by the current function's prologue. */
142 static int gr_saved, fr_saved;
144 static rtx find_addr_reg PARAMS ((rtx));
146 /* Keep track of the number of bytes we have output in the CODE subspaces
147 during this compilation so we'll know when to emit inline long-calls. */
149 unsigned int total_code_bytes;
151 /* Variables to handle plabels that we discover are necessary at assembly
152 output time. They are output after the current function. */
154 struct deferred_plabel GTY(())
156 rtx internal_label;
157 const char *name;
159 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
160 deferred_plabels;
161 static size_t n_deferred_plabels = 0;
163 /* Initialize the GCC target structure. */
165 #undef TARGET_ASM_ALIGNED_HI_OP
166 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
167 #undef TARGET_ASM_ALIGNED_SI_OP
168 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
169 #undef TARGET_ASM_ALIGNED_DI_OP
170 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
171 #undef TARGET_ASM_UNALIGNED_HI_OP
172 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
173 #undef TARGET_ASM_UNALIGNED_SI_OP
174 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
175 #undef TARGET_ASM_UNALIGNED_DI_OP
176 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
177 #undef TARGET_ASM_INTEGER
178 #define TARGET_ASM_INTEGER pa_assemble_integer
180 #undef TARGET_ASM_FUNCTION_PROLOGUE
181 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
182 #undef TARGET_ASM_FUNCTION_EPILOGUE
183 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
185 #undef TARGET_SCHED_ADJUST_COST
186 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
187 #undef TARGET_SCHED_ADJUST_PRIORITY
188 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
189 #undef TARGET_SCHED_ISSUE_RATE
190 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
192 #undef TARGET_ENCODE_SECTION_INFO
193 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
194 #undef TARGET_STRIP_NAME_ENCODING
195 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
197 struct gcc_target targetm = TARGET_INITIALIZER;
199 void
200 override_options ()
202 if (pa_cpu_string == NULL)
203 pa_cpu_string = TARGET_SCHED_DEFAULT;
205 if (! strcmp (pa_cpu_string, "8000"))
207 pa_cpu_string = "8000";
208 pa_cpu = PROCESSOR_8000;
210 else if (! strcmp (pa_cpu_string, "7100"))
212 pa_cpu_string = "7100";
213 pa_cpu = PROCESSOR_7100;
215 else if (! strcmp (pa_cpu_string, "700"))
217 pa_cpu_string = "700";
218 pa_cpu = PROCESSOR_700;
220 else if (! strcmp (pa_cpu_string, "7100LC"))
222 pa_cpu_string = "7100LC";
223 pa_cpu = PROCESSOR_7100LC;
225 else if (! strcmp (pa_cpu_string, "7200"))
227 pa_cpu_string = "7200";
228 pa_cpu = PROCESSOR_7200;
230 else if (! strcmp (pa_cpu_string, "7300"))
232 pa_cpu_string = "7300";
233 pa_cpu = PROCESSOR_7300;
235 else
237 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
240 /* Set the instruction set architecture. */
241 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
243 pa_arch_string = "1.0";
244 pa_arch = ARCHITECTURE_10;
245 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
247 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
249 pa_arch_string = "1.1";
250 pa_arch = ARCHITECTURE_11;
251 target_flags &= ~MASK_PA_20;
252 target_flags |= MASK_PA_11;
254 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
256 pa_arch_string = "2.0";
257 pa_arch = ARCHITECTURE_20;
258 target_flags |= MASK_PA_11 | MASK_PA_20;
260 else if (pa_arch_string)
262 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
265 /* Unconditional branches in the delay slot are not compatible with dwarf2
266 call frame information. There is no benefit in using this optimization
267 on PA8000 and later processors. */
268 if (pa_cpu >= PROCESSOR_8000
269 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
270 || flag_unwind_tables)
271 target_flags &= ~MASK_JUMP_IN_DELAY;
273 if (flag_pic && TARGET_PORTABLE_RUNTIME)
275 warning ("PIC code generation is not supported in the portable runtime model\n");
278 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
280 warning ("PIC code generation is not compatible with fast indirect calls\n");
283 if (! TARGET_GAS && write_symbols != NO_DEBUG)
285 warning ("-g is only supported when using GAS on this processor,");
286 warning ("-g option disabled");
287 write_symbols = NO_DEBUG;
290 /* We only support the "big PIC" model now. And we always generate PIC
291 code when in 64bit mode. */
292 if (flag_pic == 1 || TARGET_64BIT)
293 flag_pic = 2;
295 /* We can't guarantee that .dword is available for 32-bit targets. */
296 if (UNITS_PER_WORD == 4)
297 targetm.asm_out.aligned_op.di = NULL;
299 /* The unaligned ops are only available when using GAS. */
300 if (!TARGET_GAS)
302 targetm.asm_out.unaligned_op.hi = NULL;
303 targetm.asm_out.unaligned_op.si = NULL;
304 targetm.asm_out.unaligned_op.di = NULL;
308 /* Return non-zero only if OP is a register of mode MODE,
309 or CONST0_RTX. */
311 reg_or_0_operand (op, mode)
312 rtx op;
313 enum machine_mode mode;
315 return (op == CONST0_RTX (mode) || register_operand (op, mode));
318 /* Return non-zero if OP is suitable for use in a call to a named
319 function.
321 For 2.5 try to eliminate either call_operand_address or
322 function_label_operand, they perform very similar functions. */
324 call_operand_address (op, mode)
325 rtx op;
326 enum machine_mode mode ATTRIBUTE_UNUSED;
328 return (GET_MODE (op) == word_mode
329 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
332 /* Return 1 if X contains a symbolic expression. We know these
333 expressions will have one of a few well defined forms, so
334 we need only check those forms. */
336 symbolic_expression_p (x)
337 register rtx x;
340 /* Strip off any HIGH. */
341 if (GET_CODE (x) == HIGH)
342 x = XEXP (x, 0);
344 return (symbolic_operand (x, VOIDmode));
348 symbolic_operand (op, mode)
349 register rtx op;
350 enum machine_mode mode ATTRIBUTE_UNUSED;
352 switch (GET_CODE (op))
354 case SYMBOL_REF:
355 case LABEL_REF:
356 return 1;
357 case CONST:
358 op = XEXP (op, 0);
359 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
360 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
361 && GET_CODE (XEXP (op, 1)) == CONST_INT);
362 default:
363 return 0;
367 /* Return truth value of statement that OP is a symbolic memory
368 operand of mode MODE. */
371 symbolic_memory_operand (op, mode)
372 rtx op;
373 enum machine_mode mode ATTRIBUTE_UNUSED;
375 if (GET_CODE (op) == SUBREG)
376 op = SUBREG_REG (op);
377 if (GET_CODE (op) != MEM)
378 return 0;
379 op = XEXP (op, 0);
380 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
381 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
384 /* Return 1 if the operand is either a register or a memory operand that is
385 not symbolic. */
388 reg_or_nonsymb_mem_operand (op, mode)
389 register rtx op;
390 enum machine_mode mode;
392 if (register_operand (op, mode))
393 return 1;
395 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
396 return 1;
398 return 0;
401 /* Return 1 if the operand is either a register, zero, or a memory operand
402 that is not symbolic. */
405 reg_or_0_or_nonsymb_mem_operand (op, mode)
406 register rtx op;
407 enum machine_mode mode;
409 if (register_operand (op, mode))
410 return 1;
412 if (op == CONST0_RTX (mode))
413 return 1;
415 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
416 return 1;
418 return 0;
421 /* Return 1 if the operand is a register operand or a non-symbolic memory
422 operand after reload. This predicate is used for branch patterns that
423 internally handle register reloading. We need to accept non-symbolic
424 memory operands after reload to ensure that the pattern is still valid
425 if reload didn't find a hard register for the operand. */
428 reg_before_reload_operand (op, mode)
429 register rtx op;
430 enum machine_mode mode;
432 /* Don't accept a SUBREG since it will need a reload. */
433 if (GET_CODE (op) == SUBREG)
434 return 0;
436 if (register_operand (op, mode))
437 return 1;
439 if (reload_completed
440 && memory_operand (op, mode)
441 && ! symbolic_memory_operand (op, mode))
442 return 1;
444 return 0;
447 /* Accept any constant that can be moved in one instruction into a
448 general register. */
450 cint_ok_for_move (intval)
451 HOST_WIDE_INT intval;
453 /* OK if ldo, ldil, or zdepi, can be used. */
454 return (CONST_OK_FOR_LETTER_P (intval, 'J')
455 || CONST_OK_FOR_LETTER_P (intval, 'N')
456 || CONST_OK_FOR_LETTER_P (intval, 'K'));
459 /* Accept anything that can be moved in one instruction into a general
460 register. */
462 move_operand (op, mode)
463 rtx op;
464 enum machine_mode mode;
466 if (register_operand (op, mode))
467 return 1;
469 if (GET_CODE (op) == CONSTANT_P_RTX)
470 return 1;
472 if (GET_CODE (op) == CONST_INT)
473 return cint_ok_for_move (INTVAL (op));
475 if (GET_CODE (op) == SUBREG)
476 op = SUBREG_REG (op);
477 if (GET_CODE (op) != MEM)
478 return 0;
480 op = XEXP (op, 0);
482 /* We consider a LO_SUM DLT reference a move_operand now since it has
483 been merged into the normal movsi/movdi patterns. */
484 if (GET_CODE (op) == LO_SUM
485 && GET_CODE (XEXP (op, 0)) == REG
486 && REG_OK_FOR_BASE_P (XEXP (op, 0))
487 && GET_CODE (XEXP (op, 1)) == UNSPEC
488 && GET_MODE (op) == Pmode)
489 return 1;
491 /* Since move_operand is only used for source operands, we can always
492 allow scaled indexing! */
493 if (! TARGET_DISABLE_INDEXING
494 && GET_CODE (op) == PLUS
495 && ((GET_CODE (XEXP (op, 0)) == MULT
496 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
497 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
498 && INTVAL (XEXP (XEXP (op, 0), 1))
499 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
500 && GET_CODE (XEXP (op, 1)) == REG)
501 || (GET_CODE (XEXP (op, 1)) == MULT
502 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
503 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
504 && INTVAL (XEXP (XEXP (op, 1), 1))
505 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
506 && GET_CODE (XEXP (op, 0)) == REG)))
507 return 1;
509 return memory_address_p (mode, op);
512 /* Accept REG and any CONST_INT that can be moved in one instruction into a
513 general register. */
515 reg_or_cint_move_operand (op, mode)
516 rtx op;
517 enum machine_mode mode;
519 if (register_operand (op, mode))
520 return 1;
522 if (GET_CODE (op) == CONST_INT)
523 return cint_ok_for_move (INTVAL (op));
525 return 0;
529 pic_label_operand (op, mode)
530 rtx op;
531 enum machine_mode mode ATTRIBUTE_UNUSED;
533 if (!flag_pic)
534 return 0;
536 switch (GET_CODE (op))
538 case LABEL_REF:
539 return 1;
540 case CONST:
541 op = XEXP (op, 0);
542 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
543 && GET_CODE (XEXP (op, 1)) == CONST_INT);
544 default:
545 return 0;
550 fp_reg_operand (op, mode)
551 rtx op;
552 enum machine_mode mode ATTRIBUTE_UNUSED;
554 return reg_renumber && FP_REG_P (op);
559 /* Return truth value of whether OP can be used as an operand in a
560 three operand arithmetic insn that accepts registers of mode MODE
561 or 14-bit signed integers. */
563 arith_operand (op, mode)
564 rtx op;
565 enum machine_mode mode;
567 return (register_operand (op, mode)
568 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
571 /* Return truth value of whether OP can be used as an operand in a
572 three operand arithmetic insn that accepts registers of mode MODE
573 or 11-bit signed integers. */
575 arith11_operand (op, mode)
576 rtx op;
577 enum machine_mode mode;
579 return (register_operand (op, mode)
580 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
583 /* Return truth value of whether OP can be used as an operand in a
584 adddi3 insn. */
586 adddi3_operand (op, mode)
587 rtx op;
588 enum machine_mode mode;
590 return (register_operand (op, mode)
591 || (GET_CODE (op) == CONST_INT
592 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
595 /* A constant integer suitable for use in a PRE_MODIFY memory
596 reference. */
598 pre_cint_operand (op, mode)
599 rtx op;
600 enum machine_mode mode ATTRIBUTE_UNUSED;
602 return (GET_CODE (op) == CONST_INT
603 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
606 /* A constant integer suitable for use in a POST_MODIFY memory
607 reference. */
609 post_cint_operand (op, mode)
610 rtx op;
611 enum machine_mode mode ATTRIBUTE_UNUSED;
613 return (GET_CODE (op) == CONST_INT
614 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
618 arith_double_operand (op, mode)
619 rtx op;
620 enum machine_mode mode;
622 return (register_operand (op, mode)
623 || (GET_CODE (op) == CONST_DOUBLE
624 && GET_MODE (op) == mode
625 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
626 && ((CONST_DOUBLE_HIGH (op) >= 0)
627 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
630 /* Return truth value of whether OP is an integer which fits the
631 range constraining immediate operands in three-address insns, or
632 is an integer register. */
635 ireg_or_int5_operand (op, mode)
636 rtx op;
637 enum machine_mode mode ATTRIBUTE_UNUSED;
639 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
640 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
643 /* Return nonzero if OP is an integer register, else return zero. */
645 ireg_operand (op, mode)
646 rtx op;
647 enum machine_mode mode ATTRIBUTE_UNUSED;
649 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
652 /* Return truth value of whether OP is an integer which fits the
653 range constraining immediate operands in three-address insns. */
656 int5_operand (op, mode)
657 rtx op;
658 enum machine_mode mode ATTRIBUTE_UNUSED;
660 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
664 uint5_operand (op, mode)
665 rtx op;
666 enum machine_mode mode ATTRIBUTE_UNUSED;
668 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
672 int11_operand (op, mode)
673 rtx op;
674 enum machine_mode mode ATTRIBUTE_UNUSED;
676 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
680 uint32_operand (op, mode)
681 rtx op;
682 enum machine_mode mode ATTRIBUTE_UNUSED;
684 #if HOST_BITS_PER_WIDE_INT > 32
685 /* All allowed constants will fit a CONST_INT. */
686 return (GET_CODE (op) == CONST_INT
687 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
688 #else
689 return (GET_CODE (op) == CONST_INT
690 || (GET_CODE (op) == CONST_DOUBLE
691 && CONST_DOUBLE_HIGH (op) == 0));
692 #endif
696 arith5_operand (op, mode)
697 rtx op;
698 enum machine_mode mode;
700 return register_operand (op, mode) || int5_operand (op, mode);
703 /* True iff zdepi can be used to generate this CONST_INT.
704 zdepi first sign extends a 5 bit signed number to a given field
705 length, then places this field anywhere in a zero. */
707 zdepi_cint_p (x)
708 unsigned HOST_WIDE_INT x;
710 unsigned HOST_WIDE_INT lsb_mask, t;
712 /* This might not be obvious, but it's at least fast.
713 This function is critical; we don't have the time loops would take. */
714 lsb_mask = x & -x;
715 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
716 /* Return true iff t is a power of two. */
717 return ((t & (t - 1)) == 0);
720 /* True iff depi or extru can be used to compute (reg & mask).
721 Accept bit pattern like these:
722 0....01....1
723 1....10....0
724 1..10..01..1 */
726 and_mask_p (mask)
727 unsigned HOST_WIDE_INT mask;
729 mask = ~mask;
730 mask += mask & -mask;
731 return (mask & (mask - 1)) == 0;
734 /* True iff depi or extru can be used to compute (reg & OP). */
736 and_operand (op, mode)
737 rtx op;
738 enum machine_mode mode;
740 return (register_operand (op, mode)
741 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
744 /* True iff depi can be used to compute (reg | MASK). */
746 ior_mask_p (mask)
747 unsigned HOST_WIDE_INT mask;
749 mask += mask & -mask;
750 return (mask & (mask - 1)) == 0;
753 /* True iff depi can be used to compute (reg | OP). */
755 ior_operand (op, mode)
756 rtx op;
757 enum machine_mode mode ATTRIBUTE_UNUSED;
759 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
763 lhs_lshift_operand (op, mode)
764 rtx op;
765 enum machine_mode mode;
767 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
770 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
771 Such values can be the left hand side x in (x << r), using the zvdepi
772 instruction. */
774 lhs_lshift_cint_operand (op, mode)
775 rtx op;
776 enum machine_mode mode ATTRIBUTE_UNUSED;
778 unsigned HOST_WIDE_INT x;
779 if (GET_CODE (op) != CONST_INT)
780 return 0;
781 x = INTVAL (op) >> 4;
782 return (x & (x + 1)) == 0;
786 arith32_operand (op, mode)
787 rtx op;
788 enum machine_mode mode;
790 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
794 pc_or_label_operand (op, mode)
795 rtx op;
796 enum machine_mode mode ATTRIBUTE_UNUSED;
798 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
801 /* Legitimize PIC addresses. If the address is already
802 position-independent, we return ORIG. Newly generated
803 position-independent addresses go to REG. If we need more
804 than one register, we lose. */
807 legitimize_pic_address (orig, mode, reg)
808 rtx orig, reg;
809 enum machine_mode mode;
811 rtx pic_ref = orig;
813 /* Labels need special handling. */
814 if (pic_label_operand (orig, mode))
816 /* We do not want to go through the movXX expanders here since that
817 would create recursion.
819 Nor do we really want to call a generator for a named pattern
820 since that requires multiple patterns if we want to support
821 multiple word sizes.
823 So instead we just emit the raw set, which avoids the movXX
824 expanders completely. */
825 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
826 current_function_uses_pic_offset_table = 1;
827 return reg;
829 if (GET_CODE (orig) == SYMBOL_REF)
831 if (reg == 0)
832 abort ();
834 emit_move_insn (reg,
835 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
836 gen_rtx_HIGH (word_mode, orig)));
837 pic_ref
838 = gen_rtx_MEM (Pmode,
839 gen_rtx_LO_SUM (Pmode, reg,
840 gen_rtx_UNSPEC (Pmode,
841 gen_rtvec (1, orig),
842 0)));
844 current_function_uses_pic_offset_table = 1;
845 RTX_UNCHANGING_P (pic_ref) = 1;
846 emit_move_insn (reg, pic_ref);
847 return reg;
849 else if (GET_CODE (orig) == CONST)
851 rtx base;
853 if (GET_CODE (XEXP (orig, 0)) == PLUS
854 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
855 return orig;
857 if (reg == 0)
858 abort ();
860 if (GET_CODE (XEXP (orig, 0)) == PLUS)
862 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
863 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
864 base == reg ? 0 : reg);
866 else abort ();
867 if (GET_CODE (orig) == CONST_INT)
869 if (INT_14_BITS (orig))
870 return plus_constant (base, INTVAL (orig));
871 orig = force_reg (Pmode, orig);
873 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
874 /* Likewise, should we set special REG_NOTEs here? */
876 return pic_ref;
879 /* Try machine-dependent ways of modifying an illegitimate address
880 to be legitimate. If we find one, return the new, valid address.
881 This macro is used in only one place: `memory_address' in explow.c.
883 OLDX is the address as it was before break_out_memory_refs was called.
884 In some cases it is useful to look at this to decide what needs to be done.
886 MODE and WIN are passed so that this macro can use
887 GO_IF_LEGITIMATE_ADDRESS.
889 It is always safe for this macro to do nothing. It exists to recognize
890 opportunities to optimize the output.
892 For the PA, transform:
894 memory(X + <large int>)
896 into:
898 if (<large int> & mask) >= 16
899 Y = (<large int> & ~mask) + mask + 1 Round up.
900 else
901 Y = (<large int> & ~mask) Round down.
902 Z = X + Y
903 memory (Z + (<large int> - Y));
905 This is for CSE to find several similar references, and only use one Z.
907 X can either be a SYMBOL_REF or REG, but because combine can not
908 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
909 D will not fit in 14 bits.
911 MODE_FLOAT references allow displacements which fit in 5 bits, so use
912 0x1f as the mask.
914 MODE_INT references allow displacements which fit in 14 bits, so use
915 0x3fff as the mask.
917 This relies on the fact that most mode MODE_FLOAT references will use FP
918 registers and most mode MODE_INT references will use integer registers.
919 (In the rare case of an FP register used in an integer MODE, we depend
920 on secondary reloads to clean things up.)
923 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
924 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
925 addressing modes to be used).
927 Put X and Z into registers. Then put the entire expression into
928 a register. */
931 hppa_legitimize_address (x, oldx, mode)
932 rtx x, oldx ATTRIBUTE_UNUSED;
933 enum machine_mode mode;
935 rtx orig = x;
937 if (flag_pic)
938 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
940 /* Strip off CONST. */
941 if (GET_CODE (x) == CONST)
942 x = XEXP (x, 0);
944 /* Special case. Get the SYMBOL_REF into a register and use indexing.
945 That should always be safe. */
946 if (GET_CODE (x) == PLUS
947 && GET_CODE (XEXP (x, 0)) == REG
948 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
950 rtx reg = force_reg (Pmode, XEXP (x, 1));
951 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
954 /* Note we must reject symbols which represent function addresses
955 since the assembler/linker can't handle arithmetic on plabels. */
956 if (GET_CODE (x) == PLUS
957 && GET_CODE (XEXP (x, 1)) == CONST_INT
958 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
959 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
960 || GET_CODE (XEXP (x, 0)) == REG))
962 rtx int_part, ptr_reg;
963 int newoffset;
964 int offset = INTVAL (XEXP (x, 1));
965 int mask;
967 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
968 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
970 /* Choose which way to round the offset. Round up if we
971 are >= halfway to the next boundary. */
972 if ((offset & mask) >= ((mask + 1) / 2))
973 newoffset = (offset & ~ mask) + mask + 1;
974 else
975 newoffset = (offset & ~ mask);
977 /* If the newoffset will not fit in 14 bits (ldo), then
978 handling this would take 4 or 5 instructions (2 to load
979 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
980 add the new offset and the SYMBOL_REF.) Combine can
981 not handle 4->2 or 5->2 combinations, so do not create
982 them. */
983 if (! VAL_14_BITS_P (newoffset)
984 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
986 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
987 rtx tmp_reg
988 = force_reg (Pmode,
989 gen_rtx_HIGH (Pmode, const_part));
990 ptr_reg
991 = force_reg (Pmode,
992 gen_rtx_LO_SUM (Pmode,
993 tmp_reg, const_part));
995 else
997 if (! VAL_14_BITS_P (newoffset))
998 int_part = force_reg (Pmode, GEN_INT (newoffset));
999 else
1000 int_part = GEN_INT (newoffset);
1002 ptr_reg = force_reg (Pmode,
1003 gen_rtx_PLUS (Pmode,
1004 force_reg (Pmode, XEXP (x, 0)),
1005 int_part));
1007 return plus_constant (ptr_reg, offset - newoffset);
1010 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1012 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1013 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1014 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1015 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1016 || GET_CODE (XEXP (x, 1)) == SUBREG)
1017 && GET_CODE (XEXP (x, 1)) != CONST)
1019 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1020 rtx reg1, reg2;
1022 reg1 = XEXP (x, 1);
1023 if (GET_CODE (reg1) != REG)
1024 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1026 reg2 = XEXP (XEXP (x, 0), 0);
1027 if (GET_CODE (reg2) != REG)
1028 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1030 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1031 gen_rtx_MULT (Pmode,
1032 reg2,
1033 GEN_INT (val)),
1034 reg1));
1037 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1039 Only do so for floating point modes since this is more speculative
1040 and we lose if it's an integer store. */
1041 if (GET_CODE (x) == PLUS
1042 && GET_CODE (XEXP (x, 0)) == PLUS
1043 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1044 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1045 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1046 && (mode == SFmode || mode == DFmode))
1049 /* First, try and figure out what to use as a base register. */
1050 rtx reg1, reg2, base, idx, orig_base;
1052 reg1 = XEXP (XEXP (x, 0), 1);
1053 reg2 = XEXP (x, 1);
1054 base = NULL_RTX;
1055 idx = NULL_RTX;
1057 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1058 then emit_move_sequence will turn on REG_POINTER so we'll know
1059 it's a base register below. */
1060 if (GET_CODE (reg1) != REG)
1061 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1063 if (GET_CODE (reg2) != REG)
1064 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1066 /* Figure out what the base and index are. */
1068 if (GET_CODE (reg1) == REG
1069 && REG_POINTER (reg1))
1071 base = reg1;
1072 orig_base = XEXP (XEXP (x, 0), 1);
1073 idx = gen_rtx_PLUS (Pmode,
1074 gen_rtx_MULT (Pmode,
1075 XEXP (XEXP (XEXP (x, 0), 0), 0),
1076 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1077 XEXP (x, 1));
1079 else if (GET_CODE (reg2) == REG
1080 && REG_POINTER (reg2))
1082 base = reg2;
1083 orig_base = XEXP (x, 1);
1084 idx = XEXP (x, 0);
1087 if (base == 0)
1088 return orig;
1090 /* If the index adds a large constant, try to scale the
1091 constant so that it can be loaded with only one insn. */
1092 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1093 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1094 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1095 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1097 /* Divide the CONST_INT by the scale factor, then add it to A. */
1098 int val = INTVAL (XEXP (idx, 1));
1100 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1101 reg1 = XEXP (XEXP (idx, 0), 0);
1102 if (GET_CODE (reg1) != REG)
1103 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1105 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1107 /* We can now generate a simple scaled indexed address. */
1108 return
1109 force_reg
1110 (Pmode, gen_rtx_PLUS (Pmode,
1111 gen_rtx_MULT (Pmode, reg1,
1112 XEXP (XEXP (idx, 0), 1)),
1113 base));
1116 /* If B + C is still a valid base register, then add them. */
1117 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1118 && INTVAL (XEXP (idx, 1)) <= 4096
1119 && INTVAL (XEXP (idx, 1)) >= -4096)
1121 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1122 rtx reg1, reg2;
1124 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1126 reg2 = XEXP (XEXP (idx, 0), 0);
1127 if (GET_CODE (reg2) != CONST_INT)
1128 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1130 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1131 gen_rtx_MULT (Pmode,
1132 reg2,
1133 GEN_INT (val)),
1134 reg1));
1137 /* Get the index into a register, then add the base + index and
1138 return a register holding the result. */
1140 /* First get A into a register. */
1141 reg1 = XEXP (XEXP (idx, 0), 0);
1142 if (GET_CODE (reg1) != REG)
1143 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1145 /* And get B into a register. */
1146 reg2 = XEXP (idx, 1);
1147 if (GET_CODE (reg2) != REG)
1148 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1150 reg1 = force_reg (Pmode,
1151 gen_rtx_PLUS (Pmode,
1152 gen_rtx_MULT (Pmode, reg1,
1153 XEXP (XEXP (idx, 0), 1)),
1154 reg2));
1156 /* Add the result to our base register and return. */
1157 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1161 /* Uh-oh. We might have an address for x[n-100000]. This needs
1162 special handling to avoid creating an indexed memory address
1163 with x-100000 as the base.
1165 If the constant part is small enough, then it's still safe because
1166 there is a guard page at the beginning and end of the data segment.
1168 Scaled references are common enough that we want to try and rearrange the
1169 terms so that we can use indexing for these addresses too. Only
1170 do the optimization for floatint point modes. */
1172 if (GET_CODE (x) == PLUS
1173 && symbolic_expression_p (XEXP (x, 1)))
1175 /* Ugly. We modify things here so that the address offset specified
1176 by the index expression is computed first, then added to x to form
1177 the entire address. */
1179 rtx regx1, regx2, regy1, regy2, y;
1181 /* Strip off any CONST. */
1182 y = XEXP (x, 1);
1183 if (GET_CODE (y) == CONST)
1184 y = XEXP (y, 0);
1186 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1188 /* See if this looks like
1189 (plus (mult (reg) (shadd_const))
1190 (const (plus (symbol_ref) (const_int))))
1192 Where const_int is small. In that case the const
1193 expression is a valid pointer for indexing.
1195 If const_int is big, but can be divided evenly by shadd_const
1196 and added to (reg). This allows more scaled indexed addresses. */
1197 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1198 && GET_CODE (XEXP (x, 0)) == MULT
1199 && GET_CODE (XEXP (y, 1)) == CONST_INT
1200 && INTVAL (XEXP (y, 1)) >= -4096
1201 && INTVAL (XEXP (y, 1)) <= 4095
1202 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1203 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1205 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1206 rtx reg1, reg2;
1208 reg1 = XEXP (x, 1);
1209 if (GET_CODE (reg1) != REG)
1210 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1212 reg2 = XEXP (XEXP (x, 0), 0);
1213 if (GET_CODE (reg2) != REG)
1214 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1216 return force_reg (Pmode,
1217 gen_rtx_PLUS (Pmode,
1218 gen_rtx_MULT (Pmode,
1219 reg2,
1220 GEN_INT (val)),
1221 reg1));
1223 else if ((mode == DFmode || mode == SFmode)
1224 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1225 && GET_CODE (XEXP (x, 0)) == MULT
1226 && GET_CODE (XEXP (y, 1)) == CONST_INT
1227 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1228 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1229 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1231 regx1
1232 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1233 / INTVAL (XEXP (XEXP (x, 0), 1))));
1234 regx2 = XEXP (XEXP (x, 0), 0);
1235 if (GET_CODE (regx2) != REG)
1236 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1237 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1238 regx2, regx1));
1239 return
1240 force_reg (Pmode,
1241 gen_rtx_PLUS (Pmode,
1242 gen_rtx_MULT (Pmode, regx2,
1243 XEXP (XEXP (x, 0), 1)),
1244 force_reg (Pmode, XEXP (y, 0))));
1246 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1247 && INTVAL (XEXP (y, 1)) >= -4096
1248 && INTVAL (XEXP (y, 1)) <= 4095)
1250 /* This is safe because of the guard page at the
1251 beginning and end of the data space. Just
1252 return the original address. */
1253 return orig;
1255 else
1257 /* Doesn't look like one we can optimize. */
1258 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1259 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1260 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1261 regx1 = force_reg (Pmode,
1262 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1263 regx1, regy2));
1264 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1269 return orig;
1272 /* For the HPPA, REG and REG+CONST is cost 0
1273 and addresses involving symbolic constants are cost 2.
1275 PIC addresses are very expensive.
1277 It is no coincidence that this has the same structure
1278 as GO_IF_LEGITIMATE_ADDRESS. */
1280 hppa_address_cost (X)
1281 rtx X;
1283 if (GET_CODE (X) == PLUS)
1284 return 1;
1285 else if (GET_CODE (X) == LO_SUM)
1286 return 1;
1287 else if (GET_CODE (X) == HIGH)
1288 return 2;
1289 return 4;
1292 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1293 new rtx with the correct mode. */
1294 static inline rtx
1295 force_mode (mode, orig)
1296 enum machine_mode mode;
1297 rtx orig;
1299 if (mode == GET_MODE (orig))
1300 return orig;
1302 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1303 abort ();
1305 return gen_rtx_REG (mode, REGNO (orig));
1308 /* Emit insns to move operands[1] into operands[0].
1310 Return 1 if we have written out everything that needs to be done to
1311 do the move. Otherwise, return 0 and the caller will emit the move
1312 normally.
1314 Note SCRATCH_REG may not be in the proper mode depending on how it
1315 will be used. This routine is resposible for creating a new copy
1316 of SCRATCH_REG in the proper mode. */
1319 emit_move_sequence (operands, mode, scratch_reg)
1320 rtx *operands;
1321 enum machine_mode mode;
1322 rtx scratch_reg;
1324 register rtx operand0 = operands[0];
1325 register rtx operand1 = operands[1];
1326 register rtx tem;
1328 if (scratch_reg
1329 && reload_in_progress && GET_CODE (operand0) == REG
1330 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1331 operand0 = reg_equiv_mem[REGNO (operand0)];
1332 else if (scratch_reg
1333 && reload_in_progress && GET_CODE (operand0) == SUBREG
1334 && GET_CODE (SUBREG_REG (operand0)) == REG
1335 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1337 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1338 the code which tracks sets/uses for delete_output_reload. */
1339 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1340 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1341 SUBREG_BYTE (operand0));
1342 operand0 = alter_subreg (&temp);
1345 if (scratch_reg
1346 && reload_in_progress && GET_CODE (operand1) == REG
1347 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1348 operand1 = reg_equiv_mem[REGNO (operand1)];
1349 else if (scratch_reg
1350 && reload_in_progress && GET_CODE (operand1) == SUBREG
1351 && GET_CODE (SUBREG_REG (operand1)) == REG
1352 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1354 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1355 the code which tracks sets/uses for delete_output_reload. */
1356 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1357 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1358 SUBREG_BYTE (operand1));
1359 operand1 = alter_subreg (&temp);
1362 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1363 && ((tem = find_replacement (&XEXP (operand0, 0)))
1364 != XEXP (operand0, 0)))
1365 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1366 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1367 && ((tem = find_replacement (&XEXP (operand1, 0)))
1368 != XEXP (operand1, 0)))
1369 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1371 /* Handle secondary reloads for loads/stores of FP registers from
1372 REG+D addresses where D does not fit in 5 bits, including
1373 (subreg (mem (addr))) cases. */
1374 if (fp_reg_operand (operand0, mode)
1375 && ((GET_CODE (operand1) == MEM
1376 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1377 || ((GET_CODE (operand1) == SUBREG
1378 && GET_CODE (XEXP (operand1, 0)) == MEM
1379 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1380 && scratch_reg)
1382 if (GET_CODE (operand1) == SUBREG)
1383 operand1 = XEXP (operand1, 0);
1385 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1386 it in WORD_MODE regardless of what mode it was originally given
1387 to us. */
1388 scratch_reg = force_mode (word_mode, scratch_reg);
1390 /* D might not fit in 14 bits either; for such cases load D into
1391 scratch reg. */
1392 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1394 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1395 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1396 Pmode,
1397 XEXP (XEXP (operand1, 0), 0),
1398 scratch_reg));
1400 else
1401 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1402 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1403 gen_rtx_MEM (mode, scratch_reg)));
1404 return 1;
1406 else if (fp_reg_operand (operand1, mode)
1407 && ((GET_CODE (operand0) == MEM
1408 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1409 || ((GET_CODE (operand0) == SUBREG)
1410 && GET_CODE (XEXP (operand0, 0)) == MEM
1411 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1412 && scratch_reg)
1414 if (GET_CODE (operand0) == SUBREG)
1415 operand0 = XEXP (operand0, 0);
1417 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1418 it in WORD_MODE regardless of what mode it was originally given
1419 to us. */
1420 scratch_reg = force_mode (word_mode, scratch_reg);
1422 /* D might not fit in 14 bits either; for such cases load D into
1423 scratch reg. */
1424 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1426 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1427 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1428 0)),
1429 Pmode,
1430 XEXP (XEXP (operand0, 0),
1432 scratch_reg));
1434 else
1435 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1436 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1437 operand1));
1438 return 1;
1440 /* Handle secondary reloads for loads of FP registers from constant
1441 expressions by forcing the constant into memory.
1443 use scratch_reg to hold the address of the memory location.
1445 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1446 NO_REGS when presented with a const_int and an register class
1447 containing only FP registers. Doing so unfortunately creates
1448 more problems than it solves. Fix this for 2.5. */
1449 else if (fp_reg_operand (operand0, mode)
1450 && CONSTANT_P (operand1)
1451 && scratch_reg)
1453 rtx xoperands[2];
1455 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1456 it in WORD_MODE regardless of what mode it was originally given
1457 to us. */
1458 scratch_reg = force_mode (word_mode, scratch_reg);
1460 /* Force the constant into memory and put the address of the
1461 memory location into scratch_reg. */
1462 xoperands[0] = scratch_reg;
1463 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1464 emit_move_sequence (xoperands, Pmode, 0);
1466 /* Now load the destination register. */
1467 emit_insn (gen_rtx_SET (mode, operand0,
1468 gen_rtx_MEM (mode, scratch_reg)));
1469 return 1;
1471 /* Handle secondary reloads for SAR. These occur when trying to load
1472 the SAR from memory, FP register, or with a constant. */
1473 else if (GET_CODE (operand0) == REG
1474 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1475 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1476 && (GET_CODE (operand1) == MEM
1477 || GET_CODE (operand1) == CONST_INT
1478 || (GET_CODE (operand1) == REG
1479 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1480 && scratch_reg)
1482 /* D might not fit in 14 bits either; for such cases load D into
1483 scratch reg. */
1484 if (GET_CODE (operand1) == MEM
1485 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1487 /* We are reloading the address into the scratch register, so we
1488 want to make sure the scratch register is a full register. */
1489 scratch_reg = force_mode (word_mode, scratch_reg);
1491 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1492 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1493 0)),
1494 Pmode,
1495 XEXP (XEXP (operand1, 0),
1497 scratch_reg));
1499 /* Now we are going to load the scratch register from memory,
1500 we want to load it in the same width as the original MEM,
1501 which must be the same as the width of the ultimate destination,
1502 OPERAND0. */
1503 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1505 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1506 scratch_reg));
1508 else
1510 /* We want to load the scratch register using the same mode as
1511 the ultimate destination. */
1512 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1514 emit_move_insn (scratch_reg, operand1);
1517 /* And emit the insn to set the ultimate destination. We know that
1518 the scratch register has the same mode as the destination at this
1519 point. */
1520 emit_move_insn (operand0, scratch_reg);
1521 return 1;
1523 /* Handle most common case: storing into a register. */
1524 else if (register_operand (operand0, mode))
1526 if (register_operand (operand1, mode)
1527 || (GET_CODE (operand1) == CONST_INT
1528 && cint_ok_for_move (INTVAL (operand1)))
1529 || (operand1 == CONST0_RTX (mode))
1530 || (GET_CODE (operand1) == HIGH
1531 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1532 /* Only `general_operands' can come here, so MEM is ok. */
1533 || GET_CODE (operand1) == MEM)
1535 /* Run this case quickly. */
1536 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1537 return 1;
1540 else if (GET_CODE (operand0) == MEM)
1542 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1543 && !(reload_in_progress || reload_completed))
1545 rtx temp = gen_reg_rtx (DFmode);
1547 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1548 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1549 return 1;
1551 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1553 /* Run this case quickly. */
1554 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1555 return 1;
1557 if (! (reload_in_progress || reload_completed))
1559 operands[0] = validize_mem (operand0);
1560 operands[1] = operand1 = force_reg (mode, operand1);
1564 /* Simplify the source if we need to.
1565 Note we do have to handle function labels here, even though we do
1566 not consider them legitimate constants. Loop optimizations can
1567 call the emit_move_xxx with one as a source. */
1568 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1569 || function_label_operand (operand1, mode)
1570 || (GET_CODE (operand1) == HIGH
1571 && symbolic_operand (XEXP (operand1, 0), mode)))
1573 int ishighonly = 0;
1575 if (GET_CODE (operand1) == HIGH)
1577 ishighonly = 1;
1578 operand1 = XEXP (operand1, 0);
1580 if (symbolic_operand (operand1, mode))
1582 /* Argh. The assembler and linker can't handle arithmetic
1583 involving plabels.
1585 So we force the plabel into memory, load operand0 from
1586 the memory location, then add in the constant part. */
1587 if ((GET_CODE (operand1) == CONST
1588 && GET_CODE (XEXP (operand1, 0)) == PLUS
1589 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1590 || function_label_operand (operand1, mode))
1592 rtx temp, const_part;
1594 /* Figure out what (if any) scratch register to use. */
1595 if (reload_in_progress || reload_completed)
1597 scratch_reg = scratch_reg ? scratch_reg : operand0;
1598 /* SCRATCH_REG will hold an address and maybe the actual
1599 data. We want it in WORD_MODE regardless of what mode it
1600 was originally given to us. */
1601 scratch_reg = force_mode (word_mode, scratch_reg);
1603 else if (flag_pic)
1604 scratch_reg = gen_reg_rtx (Pmode);
1606 if (GET_CODE (operand1) == CONST)
1608 /* Save away the constant part of the expression. */
1609 const_part = XEXP (XEXP (operand1, 0), 1);
1610 if (GET_CODE (const_part) != CONST_INT)
1611 abort ();
1613 /* Force the function label into memory. */
1614 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1616 else
1618 /* No constant part. */
1619 const_part = NULL_RTX;
1621 /* Force the function label into memory. */
1622 temp = force_const_mem (mode, operand1);
1626 /* Get the address of the memory location. PIC-ify it if
1627 necessary. */
1628 temp = XEXP (temp, 0);
1629 if (flag_pic)
1630 temp = legitimize_pic_address (temp, mode, scratch_reg);
1632 /* Put the address of the memory location into our destination
1633 register. */
1634 operands[1] = temp;
1635 emit_move_sequence (operands, mode, scratch_reg);
1637 /* Now load from the memory location into our destination
1638 register. */
1639 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1640 emit_move_sequence (operands, mode, scratch_reg);
1642 /* And add back in the constant part. */
1643 if (const_part != NULL_RTX)
1644 expand_inc (operand0, const_part);
1646 return 1;
1649 if (flag_pic)
1651 rtx temp;
1653 if (reload_in_progress || reload_completed)
1655 temp = scratch_reg ? scratch_reg : operand0;
1656 /* TEMP will hold an address and maybe the actual
1657 data. We want it in WORD_MODE regardless of what mode it
1658 was originally given to us. */
1659 temp = force_mode (word_mode, temp);
1661 else
1662 temp = gen_reg_rtx (Pmode);
1664 /* (const (plus (symbol) (const_int))) must be forced to
1665 memory during/after reload if the const_int will not fit
1666 in 14 bits. */
1667 if (GET_CODE (operand1) == CONST
1668 && GET_CODE (XEXP (operand1, 0)) == PLUS
1669 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1670 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1671 && (reload_completed || reload_in_progress)
1672 && flag_pic)
1674 operands[1] = force_const_mem (mode, operand1);
1675 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1676 mode, temp);
1677 emit_move_sequence (operands, mode, temp);
1679 else
1681 operands[1] = legitimize_pic_address (operand1, mode, temp);
1682 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1685 /* On the HPPA, references to data space are supposed to use dp,
1686 register 27, but showing it in the RTL inhibits various cse
1687 and loop optimizations. */
1688 else
1690 rtx temp, set;
1692 if (reload_in_progress || reload_completed)
1694 temp = scratch_reg ? scratch_reg : operand0;
1695 /* TEMP will hold an address and maybe the actual
1696 data. We want it in WORD_MODE regardless of what mode it
1697 was originally given to us. */
1698 temp = force_mode (word_mode, temp);
1700 else
1701 temp = gen_reg_rtx (mode);
1703 /* Loading a SYMBOL_REF into a register makes that register
1704 safe to be used as the base in an indexed address.
1706 Don't mark hard registers though. That loses. */
1707 if (GET_CODE (operand0) == REG
1708 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1709 REG_POINTER (operand0) = 1;
1710 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1711 REG_POINTER (temp) = 1;
1712 if (ishighonly)
1713 set = gen_rtx_SET (mode, operand0, temp);
1714 else
1715 set = gen_rtx_SET (VOIDmode,
1716 operand0,
1717 gen_rtx_LO_SUM (mode, temp, operand1));
1719 emit_insn (gen_rtx_SET (VOIDmode,
1720 temp,
1721 gen_rtx_HIGH (mode, operand1)));
1722 emit_insn (set);
1725 return 1;
1727 else if (GET_CODE (operand1) != CONST_INT
1728 || ! cint_ok_for_move (INTVAL (operand1)))
1730 rtx extend = NULL_RTX;
1731 rtx temp;
1733 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1734 && HOST_BITS_PER_WIDE_INT > 32
1735 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1737 HOST_WIDE_INT val = INTVAL (operand1);
1738 HOST_WIDE_INT nval;
1740 /* Extract the low order 32 bits of the value and sign extend.
1741 If the new value is the same as the original value, we can
1742 can use the original value as-is. If the new value is
1743 different, we use it and insert the most-significant 32-bits
1744 of the original value into the final result. */
1745 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1746 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1747 if (val != nval)
1749 #if HOST_BITS_PER_WIDE_INT > 32
1750 extend = GEN_INT (val >> 32);
1751 #endif
1752 operand1 = GEN_INT (nval);
1756 if (reload_in_progress || reload_completed)
1757 temp = operand0;
1758 else
1759 temp = gen_reg_rtx (mode);
1761 /* We don't directly split DImode constants on 32-bit targets
1762 because PLUS uses an 11-bit immediate and the insn sequence
1763 generated is not as efficient as the one using HIGH/LO_SUM. */
1764 if (GET_CODE (operand1) == CONST_INT
1765 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1767 /* Directly break constant into high and low parts. This
1768 provides better optimization opportunities because various
1769 passes recognize constants split with PLUS but not LO_SUM.
1770 We use a 14-bit signed low part except when the addition
1771 of 0x4000 to the high part might change the sign of the
1772 high part. */
1773 HOST_WIDE_INT value = INTVAL (operand1);
1774 HOST_WIDE_INT low = value & 0x3fff;
1775 HOST_WIDE_INT high = value & ~ 0x3fff;
1777 if (low >= 0x2000)
1779 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1780 high += 0x2000;
1781 else
1782 high += 0x4000;
1785 low = value - high;
1787 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1788 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1790 else
1792 emit_insn (gen_rtx_SET (VOIDmode, temp,
1793 gen_rtx_HIGH (mode, operand1)));
1794 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1797 emit_move_insn (operands[0], operands[1]);
1799 if (extend != NULL_RTX)
1800 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1801 extend));
1803 return 1;
1806 /* Now have insn-emit do whatever it normally does. */
1807 return 0;
1810 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1811 it will need a link/runtime reloc). */
1814 reloc_needed (exp)
1815 tree exp;
1817 int reloc = 0;
1819 switch (TREE_CODE (exp))
1821 case ADDR_EXPR:
1822 return 1;
1824 case PLUS_EXPR:
1825 case MINUS_EXPR:
1826 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1827 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1828 break;
1830 case NOP_EXPR:
1831 case CONVERT_EXPR:
1832 case NON_LVALUE_EXPR:
1833 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1834 break;
1836 case CONSTRUCTOR:
1838 register tree link;
1839 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1840 if (TREE_VALUE (link) != 0)
1841 reloc |= reloc_needed (TREE_VALUE (link));
1843 break;
1845 case ERROR_MARK:
1846 break;
1848 default:
1849 break;
1851 return reloc;
1854 /* Does operand (which is a symbolic_operand) live in text space?
1855 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1856 will be true. */
1859 read_only_operand (operand, mode)
1860 rtx operand;
1861 enum machine_mode mode ATTRIBUTE_UNUSED;
1863 if (GET_CODE (operand) == CONST)
1864 operand = XEXP (XEXP (operand, 0), 0);
1865 if (flag_pic)
1867 if (GET_CODE (operand) == SYMBOL_REF)
1868 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1870 else
1872 if (GET_CODE (operand) == SYMBOL_REF)
1873 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1875 return 1;
1879 /* Return the best assembler insn template
1880 for moving operands[1] into operands[0] as a fullword. */
1881 const char *
1882 singlemove_string (operands)
1883 rtx *operands;
1885 HOST_WIDE_INT intval;
1887 if (GET_CODE (operands[0]) == MEM)
1888 return "stw %r1,%0";
1889 if (GET_CODE (operands[1]) == MEM)
1890 return "ldw %1,%0";
1891 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1893 long i;
1894 REAL_VALUE_TYPE d;
1896 if (GET_MODE (operands[1]) != SFmode)
1897 abort ();
1899 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1900 bit pattern. */
1901 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1902 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1904 operands[1] = GEN_INT (i);
1905 /* Fall through to CONST_INT case. */
1907 if (GET_CODE (operands[1]) == CONST_INT)
1909 intval = INTVAL (operands[1]);
1911 if (VAL_14_BITS_P (intval))
1912 return "ldi %1,%0";
1913 else if ((intval & 0x7ff) == 0)
1914 return "ldil L'%1,%0";
1915 else if (zdepi_cint_p (intval))
1916 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1917 else
1918 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1920 return "copy %1,%0";
1924 /* Compute position (in OP[1]) and width (in OP[2])
1925 useful for copying IMM to a register using the zdepi
1926 instructions. Store the immediate value to insert in OP[0]. */
1927 static void
1928 compute_zdepwi_operands (imm, op)
1929 unsigned HOST_WIDE_INT imm;
1930 unsigned *op;
1932 int lsb, len;
1934 /* Find the least significant set bit in IMM. */
1935 for (lsb = 0; lsb < 32; lsb++)
1937 if ((imm & 1) != 0)
1938 break;
1939 imm >>= 1;
1942 /* Choose variants based on *sign* of the 5-bit field. */
1943 if ((imm & 0x10) == 0)
1944 len = (lsb <= 28) ? 4 : 32 - lsb;
1945 else
1947 /* Find the width of the bitstring in IMM. */
1948 for (len = 5; len < 32; len++)
1950 if ((imm & (1 << len)) == 0)
1951 break;
1954 /* Sign extend IMM as a 5-bit value. */
1955 imm = (imm & 0xf) - 0x10;
1958 op[0] = imm;
1959 op[1] = 31 - lsb;
1960 op[2] = len;
1963 /* Compute position (in OP[1]) and width (in OP[2])
1964 useful for copying IMM to a register using the depdi,z
1965 instructions. Store the immediate value to insert in OP[0]. */
1966 void
1967 compute_zdepdi_operands (imm, op)
1968 unsigned HOST_WIDE_INT imm;
1969 unsigned *op;
1971 HOST_WIDE_INT lsb, len;
1973 /* Find the least significant set bit in IMM. */
1974 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1976 if ((imm & 1) != 0)
1977 break;
1978 imm >>= 1;
1981 /* Choose variants based on *sign* of the 5-bit field. */
1982 if ((imm & 0x10) == 0)
1983 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1984 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1985 else
1987 /* Find the width of the bitstring in IMM. */
1988 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1990 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
1991 break;
1994 /* Sign extend IMM as a 5-bit value. */
1995 imm = (imm & 0xf) - 0x10;
1998 op[0] = imm;
1999 op[1] = 63 - lsb;
2000 op[2] = len;
2003 /* Output assembler code to perform a doubleword move insn
2004 with operands OPERANDS. */
2006 const char *
2007 output_move_double (operands)
2008 rtx *operands;
2010 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2011 rtx latehalf[2];
2012 rtx addreg0 = 0, addreg1 = 0;
2014 /* First classify both operands. */
2016 if (REG_P (operands[0]))
2017 optype0 = REGOP;
2018 else if (offsettable_memref_p (operands[0]))
2019 optype0 = OFFSOP;
2020 else if (GET_CODE (operands[0]) == MEM)
2021 optype0 = MEMOP;
2022 else
2023 optype0 = RNDOP;
2025 if (REG_P (operands[1]))
2026 optype1 = REGOP;
2027 else if (CONSTANT_P (operands[1]))
2028 optype1 = CNSTOP;
2029 else if (offsettable_memref_p (operands[1]))
2030 optype1 = OFFSOP;
2031 else if (GET_CODE (operands[1]) == MEM)
2032 optype1 = MEMOP;
2033 else
2034 optype1 = RNDOP;
2036 /* Check for the cases that the operand constraints are not
2037 supposed to allow to happen. Abort if we get one,
2038 because generating code for these cases is painful. */
2040 if (optype0 != REGOP && optype1 != REGOP)
2041 abort ();
2043 /* Handle auto decrementing and incrementing loads and stores
2044 specifically, since the structure of the function doesn't work
2045 for them without major modification. Do it better when we learn
2046 this port about the general inc/dec addressing of PA.
2047 (This was written by tege. Chide him if it doesn't work.) */
2049 if (optype0 == MEMOP)
2051 /* We have to output the address syntax ourselves, since print_operand
2052 doesn't deal with the addresses we want to use. Fix this later. */
2054 rtx addr = XEXP (operands[0], 0);
2055 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2057 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2059 operands[0] = XEXP (addr, 0);
2060 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2061 abort ();
2063 if (!reg_overlap_mentioned_p (high_reg, addr))
2065 /* No overlap between high target register and address
2066 register. (We do this in a non-obvious way to
2067 save a register file writeback) */
2068 if (GET_CODE (addr) == POST_INC)
2069 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2070 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2072 else
2073 abort ();
2075 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2077 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2079 operands[0] = XEXP (addr, 0);
2080 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2081 abort ();
2083 if (!reg_overlap_mentioned_p (high_reg, addr))
2085 /* No overlap between high target register and address
2086 register. (We do this in a non-obvious way to
2087 save a register file writeback) */
2088 if (GET_CODE (addr) == PRE_INC)
2089 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2090 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2092 else
2093 abort ();
2096 if (optype1 == MEMOP)
2098 /* We have to output the address syntax ourselves, since print_operand
2099 doesn't deal with the addresses we want to use. Fix this later. */
2101 rtx addr = XEXP (operands[1], 0);
2102 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2104 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2106 operands[1] = XEXP (addr, 0);
2107 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2108 abort ();
2110 if (!reg_overlap_mentioned_p (high_reg, addr))
2112 /* No overlap between high target register and address
2113 register. (We do this in a non-obvious way to
2114 save a register file writeback) */
2115 if (GET_CODE (addr) == POST_INC)
2116 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2117 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2119 else
2121 /* This is an undefined situation. We should load into the
2122 address register *and* update that register. Probably
2123 we don't need to handle this at all. */
2124 if (GET_CODE (addr) == POST_INC)
2125 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2126 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2129 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2131 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2133 operands[1] = XEXP (addr, 0);
2134 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2135 abort ();
2137 if (!reg_overlap_mentioned_p (high_reg, addr))
2139 /* No overlap between high target register and address
2140 register. (We do this in a non-obvious way to
2141 save a register file writeback) */
2142 if (GET_CODE (addr) == PRE_INC)
2143 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2144 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2146 else
2148 /* This is an undefined situation. We should load into the
2149 address register *and* update that register. Probably
2150 we don't need to handle this at all. */
2151 if (GET_CODE (addr) == PRE_INC)
2152 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2153 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2156 else if (GET_CODE (addr) == PLUS
2157 && GET_CODE (XEXP (addr, 0)) == MULT)
2159 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2161 if (!reg_overlap_mentioned_p (high_reg, addr))
2163 rtx xoperands[3];
2165 xoperands[0] = high_reg;
2166 xoperands[1] = XEXP (addr, 1);
2167 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2168 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2169 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2170 xoperands);
2171 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2173 else
2175 rtx xoperands[3];
2177 xoperands[0] = high_reg;
2178 xoperands[1] = XEXP (addr, 1);
2179 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2180 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2181 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2182 xoperands);
2183 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2188 /* If an operand is an unoffsettable memory ref, find a register
2189 we can increment temporarily to make it refer to the second word. */
2191 if (optype0 == MEMOP)
2192 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2194 if (optype1 == MEMOP)
2195 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2197 /* Ok, we can do one word at a time.
2198 Normally we do the low-numbered word first.
2200 In either case, set up in LATEHALF the operands to use
2201 for the high-numbered word and in some cases alter the
2202 operands in OPERANDS to be suitable for the low-numbered word. */
2204 if (optype0 == REGOP)
2205 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2206 else if (optype0 == OFFSOP)
2207 latehalf[0] = adjust_address (operands[0], SImode, 4);
2208 else
2209 latehalf[0] = operands[0];
2211 if (optype1 == REGOP)
2212 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2213 else if (optype1 == OFFSOP)
2214 latehalf[1] = adjust_address (operands[1], SImode, 4);
2215 else if (optype1 == CNSTOP)
2216 split_double (operands[1], &operands[1], &latehalf[1]);
2217 else
2218 latehalf[1] = operands[1];
2220 /* If the first move would clobber the source of the second one,
2221 do them in the other order.
2223 This can happen in two cases:
2225 mem -> register where the first half of the destination register
2226 is the same register used in the memory's address. Reload
2227 can create such insns.
2229 mem in this case will be either register indirect or register
2230 indirect plus a valid offset.
2232 register -> register move where REGNO(dst) == REGNO(src + 1)
2233 someone (Tim/Tege?) claimed this can happen for parameter loads.
2235 Handle mem -> register case first. */
2236 if (optype0 == REGOP
2237 && (optype1 == MEMOP || optype1 == OFFSOP)
2238 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2239 operands[1], 0))
2241 /* Do the late half first. */
2242 if (addreg1)
2243 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2244 output_asm_insn (singlemove_string (latehalf), latehalf);
2246 /* Then clobber. */
2247 if (addreg1)
2248 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2249 return singlemove_string (operands);
2252 /* Now handle register -> register case. */
2253 if (optype0 == REGOP && optype1 == REGOP
2254 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2256 output_asm_insn (singlemove_string (latehalf), latehalf);
2257 return singlemove_string (operands);
2260 /* Normal case: do the two words, low-numbered first. */
2262 output_asm_insn (singlemove_string (operands), operands);
2264 /* Make any unoffsettable addresses point at high-numbered word. */
2265 if (addreg0)
2266 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2267 if (addreg1)
2268 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2270 /* Do that word. */
2271 output_asm_insn (singlemove_string (latehalf), latehalf);
2273 /* Undo the adds we just did. */
2274 if (addreg0)
2275 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2276 if (addreg1)
2277 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2279 return "";
2282 const char *
2283 output_fp_move_double (operands)
2284 rtx *operands;
2286 if (FP_REG_P (operands[0]))
2288 if (FP_REG_P (operands[1])
2289 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2290 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2291 else
2292 output_asm_insn ("fldd%F1 %1,%0", operands);
2294 else if (FP_REG_P (operands[1]))
2296 output_asm_insn ("fstd%F0 %1,%0", operands);
2298 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2300 if (GET_CODE (operands[0]) == REG)
2302 rtx xoperands[2];
2303 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2304 xoperands[0] = operands[0];
2305 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2307 /* This is a pain. You have to be prepared to deal with an
2308 arbitrary address here including pre/post increment/decrement.
2310 so avoid this in the MD. */
2311 else
2312 abort ();
2314 else abort ();
2315 return "";
2318 /* Return a REG that occurs in ADDR with coefficient 1.
2319 ADDR can be effectively incremented by incrementing REG. */
2321 static rtx
2322 find_addr_reg (addr)
2323 rtx addr;
2325 while (GET_CODE (addr) == PLUS)
2327 if (GET_CODE (XEXP (addr, 0)) == REG)
2328 addr = XEXP (addr, 0);
2329 else if (GET_CODE (XEXP (addr, 1)) == REG)
2330 addr = XEXP (addr, 1);
2331 else if (CONSTANT_P (XEXP (addr, 0)))
2332 addr = XEXP (addr, 1);
2333 else if (CONSTANT_P (XEXP (addr, 1)))
2334 addr = XEXP (addr, 0);
2335 else
2336 abort ();
2338 if (GET_CODE (addr) == REG)
2339 return addr;
2340 abort ();
2343 /* Emit code to perform a block move.
2345 OPERANDS[0] is the destination pointer as a REG, clobbered.
2346 OPERANDS[1] is the source pointer as a REG, clobbered.
2347 OPERANDS[2] is a register for temporary storage.
2348 OPERANDS[4] is the size as a CONST_INT
2349 OPERANDS[3] is a register for temporary storage.
2350 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2351 OPERANDS[6] is another temporary register. */
2353 const char *
2354 output_block_move (operands, size_is_constant)
2355 rtx *operands;
2356 int size_is_constant ATTRIBUTE_UNUSED;
2358 int align = INTVAL (operands[5]);
2359 unsigned long n_bytes = INTVAL (operands[4]);
2361 /* We can't move more than four bytes at a time because the PA
2362 has no longer integer move insns. (Could use fp mem ops?) */
2363 if (align > 4)
2364 align = 4;
2366 /* Note that we know each loop below will execute at least twice
2367 (else we would have open-coded the copy). */
2368 switch (align)
2370 case 4:
2371 /* Pre-adjust the loop counter. */
2372 operands[4] = GEN_INT (n_bytes - 8);
2373 output_asm_insn ("ldi %4,%2", operands);
2375 /* Copying loop. */
2376 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2377 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2378 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2379 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2380 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2382 /* Handle the residual. There could be up to 7 bytes of
2383 residual to copy! */
2384 if (n_bytes % 8 != 0)
2386 operands[4] = GEN_INT (n_bytes % 4);
2387 if (n_bytes % 8 >= 4)
2388 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2389 if (n_bytes % 4 != 0)
2390 output_asm_insn ("ldw 0(%1),%6", operands);
2391 if (n_bytes % 8 >= 4)
2392 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2393 if (n_bytes % 4 != 0)
2394 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2396 return "";
2398 case 2:
2399 /* Pre-adjust the loop counter. */
2400 operands[4] = GEN_INT (n_bytes - 4);
2401 output_asm_insn ("ldi %4,%2", operands);
2403 /* Copying loop. */
2404 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2405 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2406 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2407 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2408 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2410 /* Handle the residual. */
2411 if (n_bytes % 4 != 0)
2413 if (n_bytes % 4 >= 2)
2414 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2415 if (n_bytes % 2 != 0)
2416 output_asm_insn ("ldb 0(%1),%6", operands);
2417 if (n_bytes % 4 >= 2)
2418 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2419 if (n_bytes % 2 != 0)
2420 output_asm_insn ("stb %6,0(%0)", operands);
2422 return "";
2424 case 1:
2425 /* Pre-adjust the loop counter. */
2426 operands[4] = GEN_INT (n_bytes - 2);
2427 output_asm_insn ("ldi %4,%2", operands);
2429 /* Copying loop. */
2430 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2431 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2432 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2433 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2434 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2436 /* Handle the residual. */
2437 if (n_bytes % 2 != 0)
2439 output_asm_insn ("ldb 0(%1),%3", operands);
2440 output_asm_insn ("stb %3,0(%0)", operands);
2442 return "";
2444 default:
2445 abort ();
2449 /* Count the number of insns necessary to handle this block move.
2451 Basic structure is the same as emit_block_move, except that we
2452 count insns rather than emit them. */
2454 static int
2455 compute_movstrsi_length (insn)
2456 rtx insn;
2458 rtx pat = PATTERN (insn);
2459 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2460 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2461 unsigned int n_insns = 0;
2463 /* We can't move more than four bytes at a time because the PA
2464 has no longer integer move insns. (Could use fp mem ops?) */
2465 if (align > 4)
2466 align = 4;
2468 /* The basic copying loop. */
2469 n_insns = 6;
2471 /* Residuals. */
2472 if (n_bytes % (2 * align) != 0)
2474 if ((n_bytes % (2 * align)) >= align)
2475 n_insns += 2;
2477 if ((n_bytes % align) != 0)
2478 n_insns += 2;
2481 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2482 return n_insns * 4;
2486 const char *
2487 output_and (operands)
2488 rtx *operands;
2490 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2492 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2493 int ls0, ls1, ms0, p, len;
2495 for (ls0 = 0; ls0 < 32; ls0++)
2496 if ((mask & (1 << ls0)) == 0)
2497 break;
2499 for (ls1 = ls0; ls1 < 32; ls1++)
2500 if ((mask & (1 << ls1)) != 0)
2501 break;
2503 for (ms0 = ls1; ms0 < 32; ms0++)
2504 if ((mask & (1 << ms0)) == 0)
2505 break;
2507 if (ms0 != 32)
2508 abort ();
2510 if (ls1 == 32)
2512 len = ls0;
2514 if (len == 0)
2515 abort ();
2517 operands[2] = GEN_INT (len);
2518 return "{extru|extrw,u} %1,31,%2,%0";
2520 else
2522 /* We could use this `depi' for the case above as well, but `depi'
2523 requires one more register file access than an `extru'. */
2525 p = 31 - ls0;
2526 len = ls1 - ls0;
2528 operands[2] = GEN_INT (p);
2529 operands[3] = GEN_INT (len);
2530 return "{depi|depwi} 0,%2,%3,%0";
2533 else
2534 return "and %1,%2,%0";
2537 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2538 storing the result in operands[0]. */
2539 const char *
2540 output_64bit_and (operands)
2541 rtx *operands;
2543 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2545 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2546 int ls0, ls1, ms0, p, len;
2548 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2549 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2550 break;
2552 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2553 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2554 break;
2556 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2557 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2558 break;
2560 if (ms0 != HOST_BITS_PER_WIDE_INT)
2561 abort ();
2563 if (ls1 == HOST_BITS_PER_WIDE_INT)
2565 len = ls0;
2567 if (len == 0)
2568 abort ();
2570 operands[2] = GEN_INT (len);
2571 return "extrd,u %1,63,%2,%0";
2573 else
2575 /* We could use this `depi' for the case above as well, but `depi'
2576 requires one more register file access than an `extru'. */
2578 p = 63 - ls0;
2579 len = ls1 - ls0;
2581 operands[2] = GEN_INT (p);
2582 operands[3] = GEN_INT (len);
2583 return "depdi 0,%2,%3,%0";
2586 else
2587 return "and %1,%2,%0";
2590 const char *
2591 output_ior (operands)
2592 rtx *operands;
2594 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2595 int bs0, bs1, p, len;
2597 if (INTVAL (operands[2]) == 0)
2598 return "copy %1,%0";
2600 for (bs0 = 0; bs0 < 32; bs0++)
2601 if ((mask & (1 << bs0)) != 0)
2602 break;
2604 for (bs1 = bs0; bs1 < 32; bs1++)
2605 if ((mask & (1 << bs1)) == 0)
2606 break;
2608 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2609 abort ();
2611 p = 31 - bs0;
2612 len = bs1 - bs0;
2614 operands[2] = GEN_INT (p);
2615 operands[3] = GEN_INT (len);
2616 return "{depi|depwi} -1,%2,%3,%0";
2619 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2620 storing the result in operands[0]. */
2621 const char *
2622 output_64bit_ior (operands)
2623 rtx *operands;
2625 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2626 int bs0, bs1, p, len;
2628 if (INTVAL (operands[2]) == 0)
2629 return "copy %1,%0";
2631 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2632 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2633 break;
2635 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2636 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2637 break;
2639 if (bs1 != HOST_BITS_PER_WIDE_INT
2640 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2641 abort ();
2643 p = 63 - bs0;
2644 len = bs1 - bs0;
2646 operands[2] = GEN_INT (p);
2647 operands[3] = GEN_INT (len);
2648 return "depdi -1,%2,%3,%0";
2651 /* Target hook for assembling integer objects. This code handles
2652 aligned SI and DI integers specially, since function references must
2653 be preceded by P%. */
2655 static bool
2656 pa_assemble_integer (x, size, aligned_p)
2657 rtx x;
2658 unsigned int size;
2659 int aligned_p;
2661 if (size == UNITS_PER_WORD && aligned_p
2662 && function_label_operand (x, VOIDmode))
2664 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2665 output_addr_const (asm_out_file, x);
2666 fputc ('\n', asm_out_file);
2667 return true;
2669 return default_assemble_integer (x, size, aligned_p);
2672 /* Output an ascii string. */
2673 void
2674 output_ascii (file, p, size)
2675 FILE *file;
2676 const char *p;
2677 int size;
2679 int i;
2680 int chars_output;
2681 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2683 /* The HP assembler can only take strings of 256 characters at one
2684 time. This is a limitation on input line length, *not* the
2685 length of the string. Sigh. Even worse, it seems that the
2686 restriction is in number of input characters (see \xnn &
2687 \whatever). So we have to do this very carefully. */
2689 fputs ("\t.STRING \"", file);
2691 chars_output = 0;
2692 for (i = 0; i < size; i += 4)
2694 int co = 0;
2695 int io = 0;
2696 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2698 register unsigned int c = (unsigned char) p[i + io];
2700 if (c == '\"' || c == '\\')
2701 partial_output[co++] = '\\';
2702 if (c >= ' ' && c < 0177)
2703 partial_output[co++] = c;
2704 else
2706 unsigned int hexd;
2707 partial_output[co++] = '\\';
2708 partial_output[co++] = 'x';
2709 hexd = c / 16 - 0 + '0';
2710 if (hexd > '9')
2711 hexd -= '9' - 'a' + 1;
2712 partial_output[co++] = hexd;
2713 hexd = c % 16 - 0 + '0';
2714 if (hexd > '9')
2715 hexd -= '9' - 'a' + 1;
2716 partial_output[co++] = hexd;
2719 if (chars_output + co > 243)
2721 fputs ("\"\n\t.STRING \"", file);
2722 chars_output = 0;
2724 fwrite (partial_output, 1, (size_t) co, file);
2725 chars_output += co;
2726 co = 0;
2728 fputs ("\"\n", file);
2731 /* Try to rewrite floating point comparisons & branches to avoid
2732 useless add,tr insns.
2734 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2735 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2736 first attempt to remove useless add,tr insns. It is zero
2737 for the second pass as reorg sometimes leaves bogus REG_DEAD
2738 notes lying around.
2740 When CHECK_NOTES is zero we can only eliminate add,tr insns
2741 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2742 instructions. */
2743 static void
2744 remove_useless_addtr_insns (insns, check_notes)
2745 rtx insns;
2746 int check_notes;
2748 rtx insn;
2749 static int pass = 0;
2751 /* This is fairly cheap, so always run it when optimizing. */
2752 if (optimize > 0)
2754 int fcmp_count = 0;
2755 int fbranch_count = 0;
2757 /* Walk all the insns in this function looking for fcmp & fbranch
2758 instructions. Keep track of how many of each we find. */
2759 insns = get_insns ();
2760 for (insn = insns; insn; insn = next_insn (insn))
2762 rtx tmp;
2764 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2765 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2766 continue;
2768 tmp = PATTERN (insn);
2770 /* It must be a set. */
2771 if (GET_CODE (tmp) != SET)
2772 continue;
2774 /* If the destination is CCFP, then we've found an fcmp insn. */
2775 tmp = SET_DEST (tmp);
2776 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2778 fcmp_count++;
2779 continue;
2782 tmp = PATTERN (insn);
2783 /* If this is an fbranch instruction, bump the fbranch counter. */
2784 if (GET_CODE (tmp) == SET
2785 && SET_DEST (tmp) == pc_rtx
2786 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2787 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2788 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2789 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2791 fbranch_count++;
2792 continue;
2797 /* Find all floating point compare + branch insns. If possible,
2798 reverse the comparison & the branch to avoid add,tr insns. */
2799 for (insn = insns; insn; insn = next_insn (insn))
2801 rtx tmp, next;
2803 /* Ignore anything that isn't an INSN. */
2804 if (GET_CODE (insn) != INSN)
2805 continue;
2807 tmp = PATTERN (insn);
2809 /* It must be a set. */
2810 if (GET_CODE (tmp) != SET)
2811 continue;
2813 /* The destination must be CCFP, which is register zero. */
2814 tmp = SET_DEST (tmp);
2815 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2816 continue;
2818 /* INSN should be a set of CCFP.
2820 See if the result of this insn is used in a reversed FP
2821 conditional branch. If so, reverse our condition and
2822 the branch. Doing so avoids useless add,tr insns. */
2823 next = next_insn (insn);
2824 while (next)
2826 /* Jumps, calls and labels stop our search. */
2827 if (GET_CODE (next) == JUMP_INSN
2828 || GET_CODE (next) == CALL_INSN
2829 || GET_CODE (next) == CODE_LABEL)
2830 break;
2832 /* As does another fcmp insn. */
2833 if (GET_CODE (next) == INSN
2834 && GET_CODE (PATTERN (next)) == SET
2835 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2836 && REGNO (SET_DEST (PATTERN (next))) == 0)
2837 break;
2839 next = next_insn (next);
2842 /* Is NEXT_INSN a branch? */
2843 if (next
2844 && GET_CODE (next) == JUMP_INSN)
2846 rtx pattern = PATTERN (next);
2848 /* If it a reversed fp conditional branch (eg uses add,tr)
2849 and CCFP dies, then reverse our conditional and the branch
2850 to avoid the add,tr. */
2851 if (GET_CODE (pattern) == SET
2852 && SET_DEST (pattern) == pc_rtx
2853 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2854 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2855 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2856 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2857 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2858 && (fcmp_count == fbranch_count
2859 || (check_notes
2860 && find_regno_note (next, REG_DEAD, 0))))
2862 /* Reverse the branch. */
2863 tmp = XEXP (SET_SRC (pattern), 1);
2864 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2865 XEXP (SET_SRC (pattern), 2) = tmp;
2866 INSN_CODE (next) = -1;
2868 /* Reverse our condition. */
2869 tmp = PATTERN (insn);
2870 PUT_CODE (XEXP (tmp, 1),
2871 (reverse_condition_maybe_unordered
2872 (GET_CODE (XEXP (tmp, 1)))));
2878 pass = !pass;
2882 /* You may have trouble believing this, but this is the 32 bit HP-PA
2883 stack layout. Wow.
2885 Offset Contents
2887 Variable arguments (optional; any number may be allocated)
2889 SP-(4*(N+9)) arg word N
2891 SP-56 arg word 5
2892 SP-52 arg word 4
2894 Fixed arguments (must be allocated; may remain unused)
2896 SP-48 arg word 3
2897 SP-44 arg word 2
2898 SP-40 arg word 1
2899 SP-36 arg word 0
2901 Frame Marker
2903 SP-32 External Data Pointer (DP)
2904 SP-28 External sr4
2905 SP-24 External/stub RP (RP')
2906 SP-20 Current RP
2907 SP-16 Static Link
2908 SP-12 Clean up
2909 SP-8 Calling Stub RP (RP'')
2910 SP-4 Previous SP
2912 Top of Frame
2914 SP-0 Stack Pointer (points to next available address)
2918 /* This function saves registers as follows. Registers marked with ' are
2919 this function's registers (as opposed to the previous function's).
2920 If a frame_pointer isn't needed, r4 is saved as a general register;
2921 the space for the frame pointer is still allocated, though, to keep
2922 things simple.
2925 Top of Frame
2927 SP (FP') Previous FP
2928 SP + 4 Alignment filler (sigh)
2929 SP + 8 Space for locals reserved here.
2933 SP + n All call saved register used.
2937 SP + o All call saved fp registers used.
2941 SP + p (SP') points to next available address.
2945 /* Global variables set by output_function_prologue(). */
2946 /* Size of frame. Need to know this to emit return insns from
2947 leaf procedures. */
2948 static int actual_fsize;
2949 static int local_fsize, save_fregs;
2951 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2952 Handle case where DISP > 8k by using the add_high_const patterns.
2954 Note in DISP > 8k case, we will leave the high part of the address
2955 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2957 static void
2958 store_reg (reg, disp, base)
2959 int reg, disp, base;
2961 rtx insn, dest, src, basereg;
2963 src = gen_rtx_REG (word_mode, reg);
2964 basereg = gen_rtx_REG (Pmode, base);
2965 if (VAL_14_BITS_P (disp))
2967 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2968 insn = emit_move_insn (dest, src);
2970 else
2972 rtx delta = GEN_INT (disp);
2973 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2974 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2975 emit_move_insn (tmpreg, high);
2976 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2977 insn = emit_move_insn (dest, src);
2978 if (DO_FRAME_NOTES)
2980 REG_NOTES (insn)
2981 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2982 gen_rtx_SET (VOIDmode,
2983 gen_rtx_MEM (word_mode,
2984 gen_rtx_PLUS (word_mode, basereg,
2985 delta)),
2986 src),
2987 REG_NOTES (insn));
2991 if (DO_FRAME_NOTES)
2992 RTX_FRAME_RELATED_P (insn) = 1;
2995 /* Emit RTL to store REG at the memory location specified by BASE and then
2996 add MOD to BASE. MOD must be <= 8k. */
2998 static void
2999 store_reg_modify (base, reg, mod)
3000 int base, reg, mod;
3002 rtx insn, basereg, srcreg, delta;
3004 if (! VAL_14_BITS_P (mod))
3005 abort ();
3007 basereg = gen_rtx_REG (Pmode, base);
3008 srcreg = gen_rtx_REG (word_mode, reg);
3009 delta = GEN_INT (mod);
3011 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3012 if (DO_FRAME_NOTES)
3014 RTX_FRAME_RELATED_P (insn) = 1;
3016 /* RTX_FRAME_RELATED_P must be set on each frame related set
3017 in a parallel with more than one element. Don't set
3018 RTX_FRAME_RELATED_P in the first set if reg is temporary
3019 register 1. The effect of this operation is recorded in
3020 the initial copy. */
3021 if (reg != 1)
3023 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3024 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3026 else
3028 /* The first element of a PARALLEL is always processed if it is
3029 a SET. Thus, we need an expression list for this case. */
3030 REG_NOTES (insn)
3031 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3032 gen_rtx_SET (VOIDmode, basereg,
3033 gen_rtx_PLUS (word_mode, basereg, delta)),
3034 REG_NOTES (insn));
3039 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3040 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3041 whether to add a frame note or not.
3043 In the DISP > 8k case, we leave the high part of the address in %r1.
3044 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3046 static void
3047 set_reg_plus_d (reg, base, disp, note)
3048 int reg, base, disp, note;
3050 rtx insn;
3052 if (VAL_14_BITS_P (disp))
3054 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3055 plus_constant (gen_rtx_REG (Pmode, base), disp));
3057 else
3059 rtx basereg = gen_rtx_REG (Pmode, base);
3060 rtx delta = GEN_INT (disp);
3062 emit_move_insn (gen_rtx_REG (Pmode, 1),
3063 gen_rtx_PLUS (Pmode, basereg,
3064 gen_rtx_HIGH (Pmode, delta)));
3065 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3066 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3067 delta));
3070 if (DO_FRAME_NOTES && note)
3071 RTX_FRAME_RELATED_P (insn) = 1;
3075 compute_frame_size (size, fregs_live)
3076 int size;
3077 int *fregs_live;
3079 int i, fsize;
3081 /* Space for frame pointer + filler. If any frame is allocated
3082 we need to add this in because of STARTING_FRAME_OFFSET.
3084 Similar code also appears in hppa_expand_prologue. Change both
3085 of them at the same time. */
3086 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
3088 /* If the current function calls __builtin_eh_return, then we need
3089 to allocate stack space for registers that will hold data for
3090 the exception handler. */
3091 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3093 unsigned int i;
3095 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3096 continue;
3097 fsize += i * UNITS_PER_WORD;
3100 /* Account for space used by the callee general register saves. */
3101 for (i = 18; i >= 3; i--)
3102 if (regs_ever_live[i])
3103 fsize += UNITS_PER_WORD;
3105 /* Round the stack. */
3106 fsize = (fsize + 7) & ~7;
3108 /* Account for space used by the callee floating point register saves. */
3109 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3110 if (regs_ever_live[i]
3111 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3113 if (fregs_live)
3114 *fregs_live = 1;
3116 /* We always save both halves of the FP register, so always
3117 increment the frame size by 8 bytes. */
3118 fsize += 8;
3121 /* The various ABIs include space for the outgoing parameters in the
3122 size of the current function's stack frame. */
3123 fsize += current_function_outgoing_args_size;
3125 /* Allocate space for the fixed frame marker. This space must be
3126 allocated for any function that makes calls or otherwise allocates
3127 stack space. */
3128 if (!current_function_is_leaf || fsize)
3129 fsize += TARGET_64BIT ? 16 : 32;
3131 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
3134 /* Generate the assembly code for function entry. FILE is a stdio
3135 stream to output the code to. SIZE is an int: how many units of
3136 temporary storage to allocate.
3138 Refer to the array `regs_ever_live' to determine which registers to
3139 save; `regs_ever_live[I]' is nonzero if register number I is ever
3140 used in the function. This function is responsible for knowing
3141 which registers should not be saved even if used. */
3143 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3144 of memory. If any fpu reg is used in the function, we allocate
3145 such a block here, at the bottom of the frame, just in case it's needed.
3147 If this function is a leaf procedure, then we may choose not
3148 to do a "save" insn. The decision about whether or not
3149 to do this is made in regclass.c. */
3151 void
3152 pa_output_function_prologue (file, size)
3153 FILE *file;
3154 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3156 /* The function's label and associated .PROC must never be
3157 separated and must be output *after* any profiling declarations
3158 to avoid changing spaces/subspaces within a procedure. */
3159 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3160 fputs ("\t.PROC\n", file);
3162 /* hppa_expand_prologue does the dirty work now. We just need
3163 to output the assembler directives which denote the start
3164 of a function. */
3165 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3166 if (regs_ever_live[2])
3167 fputs (",CALLS,SAVE_RP", file);
3168 else
3169 fputs (",NO_CALLS", file);
3171 if (frame_pointer_needed)
3172 fputs (",SAVE_SP", file);
3174 /* Pass on information about the number of callee register saves
3175 performed in the prologue.
3177 The compiler is supposed to pass the highest register number
3178 saved, the assembler then has to adjust that number before
3179 entering it into the unwind descriptor (to account for any
3180 caller saved registers with lower register numbers than the
3181 first callee saved register). */
3182 if (gr_saved)
3183 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3185 if (fr_saved)
3186 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3188 fputs ("\n\t.ENTRY\n", file);
3190 /* If we're using GAS and SOM, and not using the portable runtime model,
3191 then we don't need to accumulate the total number of code bytes. */
3192 if ((TARGET_GAS && TARGET_SOM && ! TARGET_PORTABLE_RUNTIME)
3193 /* FIXME: we can't handle long calls for TARGET_64BIT. */
3194 || TARGET_64BIT)
3195 total_code_bytes = 0;
3196 else if (INSN_ADDRESSES_SET_P ())
3198 unsigned int old_total = total_code_bytes;
3200 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_nonnote_insn ()));
3201 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3203 /* Be prepared to handle overflows. */
3204 if (old_total > total_code_bytes)
3205 total_code_bytes = -1;
3207 else
3208 total_code_bytes = -1;
3210 remove_useless_addtr_insns (get_insns (), 0);
3213 void
3214 hppa_expand_prologue ()
3216 int size = get_frame_size ();
3217 int merge_sp_adjust_with_store = 0;
3218 int i, offset;
3219 rtx insn, tmpreg;
3221 gr_saved = 0;
3222 fr_saved = 0;
3223 save_fregs = 0;
3225 /* Allocate space for frame pointer + filler. If any frame is allocated
3226 we need to add this in because of STARTING_FRAME_OFFSET.
3228 Similar code also appears in compute_frame_size. Change both
3229 of them at the same time. */
3230 local_fsize = size + (size || frame_pointer_needed
3231 ? STARTING_FRAME_OFFSET : 0);
3233 actual_fsize = compute_frame_size (size, &save_fregs);
3235 /* Compute a few things we will use often. */
3236 tmpreg = gen_rtx_REG (word_mode, 1);
3238 /* Save RP first. The calling conventions manual states RP will
3239 always be stored into the caller's frame at sp - 20 or sp - 16
3240 depending on which ABI is in use. */
3241 if (regs_ever_live[2] || current_function_calls_eh_return)
3242 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3244 /* Allocate the local frame and set up the frame pointer if needed. */
3245 if (actual_fsize != 0)
3247 if (frame_pointer_needed)
3249 /* Copy the old frame pointer temporarily into %r1. Set up the
3250 new stack pointer, then store away the saved old frame pointer
3251 into the stack at sp and at the same time update the stack
3252 pointer by actual_fsize bytes. Two versions, first
3253 handles small (<8k) frames. The second handles large (>=8k)
3254 frames. */
3255 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3256 if (DO_FRAME_NOTES)
3258 /* We need to record the frame pointer save here since the
3259 new frame pointer is set in the following insn. */
3260 RTX_FRAME_RELATED_P (insn) = 1;
3261 REG_NOTES (insn)
3262 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3263 gen_rtx_SET (VOIDmode,
3264 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3265 frame_pointer_rtx),
3266 REG_NOTES (insn));
3269 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3270 if (DO_FRAME_NOTES)
3271 RTX_FRAME_RELATED_P (insn) = 1;
3273 if (VAL_14_BITS_P (actual_fsize))
3274 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3275 else
3277 /* It is incorrect to store the saved frame pointer at *sp,
3278 then increment sp (writes beyond the current stack boundary).
3280 So instead use stwm to store at *sp and post-increment the
3281 stack pointer as an atomic operation. Then increment sp to
3282 finish allocating the new frame. */
3283 int adjust1 = 8192 - 64;
3284 int adjust2 = actual_fsize - adjust1;
3286 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3287 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3288 adjust2, 1);
3291 /* Prevent register spills from being scheduled before the
3292 stack pointer is raised. Necessary as we will be storing
3293 registers using the frame pointer as a base register, and
3294 we happen to set fp before raising sp. */
3295 emit_insn (gen_blockage ());
3297 /* no frame pointer needed. */
3298 else
3300 /* In some cases we can perform the first callee register save
3301 and allocating the stack frame at the same time. If so, just
3302 make a note of it and defer allocating the frame until saving
3303 the callee registers. */
3304 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3305 merge_sp_adjust_with_store = 1;
3306 /* Can not optimize. Adjust the stack frame by actual_fsize
3307 bytes. */
3308 else
3309 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3310 actual_fsize, 1);
3314 /* Normal register save.
3316 Do not save the frame pointer in the frame_pointer_needed case. It
3317 was done earlier. */
3318 if (frame_pointer_needed)
3320 offset = local_fsize;
3322 /* Saving the EH return data registers in the frame is the simplest
3323 way to get the frame unwind information emitted. We put them
3324 just before the general registers. */
3325 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3327 unsigned int i, regno;
3329 for (i = 0; ; ++i)
3331 regno = EH_RETURN_DATA_REGNO (i);
3332 if (regno == INVALID_REGNUM)
3333 break;
3335 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3336 offset += UNITS_PER_WORD;
3340 for (i = 18; i >= 4; i--)
3341 if (regs_ever_live[i] && ! call_used_regs[i])
3343 store_reg (i, offset, FRAME_POINTER_REGNUM);
3344 offset += UNITS_PER_WORD;
3345 gr_saved++;
3347 /* Account for %r3 which is saved in a special place. */
3348 gr_saved++;
3350 /* No frame pointer needed. */
3351 else
3353 offset = local_fsize - actual_fsize;
3355 /* Saving the EH return data registers in the frame is the simplest
3356 way to get the frame unwind information emitted. */
3357 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3359 unsigned int i, regno;
3361 for (i = 0; ; ++i)
3363 regno = EH_RETURN_DATA_REGNO (i);
3364 if (regno == INVALID_REGNUM)
3365 break;
3367 /* If merge_sp_adjust_with_store is nonzero, then we can
3368 optimize the first save. */
3369 if (merge_sp_adjust_with_store)
3371 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3372 merge_sp_adjust_with_store = 0;
3374 else
3375 store_reg (regno, offset, STACK_POINTER_REGNUM);
3376 offset += UNITS_PER_WORD;
3380 for (i = 18; i >= 3; i--)
3381 if (regs_ever_live[i] && ! call_used_regs[i])
3383 /* If merge_sp_adjust_with_store is nonzero, then we can
3384 optimize the first GR save. */
3385 if (merge_sp_adjust_with_store)
3387 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3388 merge_sp_adjust_with_store = 0;
3390 else
3391 store_reg (i, offset, STACK_POINTER_REGNUM);
3392 offset += UNITS_PER_WORD;
3393 gr_saved++;
3396 /* If we wanted to merge the SP adjustment with a GR save, but we never
3397 did any GR saves, then just emit the adjustment here. */
3398 if (merge_sp_adjust_with_store)
3399 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3400 actual_fsize, 1);
3403 /* The hppa calling conventions say that %r19, the pic offset
3404 register, is saved at sp - 32 (in this function's frame)
3405 when generating PIC code. FIXME: What is the correct thing
3406 to do for functions which make no calls and allocate no
3407 frame? Do we need to allocate a frame, or can we just omit
3408 the save? For now we'll just omit the save. */
3409 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3410 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3412 /* Align pointer properly (doubleword boundary). */
3413 offset = (offset + 7) & ~7;
3415 /* Floating point register store. */
3416 if (save_fregs)
3418 rtx base;
3420 /* First get the frame or stack pointer to the start of the FP register
3421 save area. */
3422 if (frame_pointer_needed)
3424 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3425 base = frame_pointer_rtx;
3427 else
3429 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3430 base = stack_pointer_rtx;
3433 /* Now actually save the FP registers. */
3434 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3436 if (regs_ever_live[i]
3437 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3439 rtx addr, insn, reg;
3440 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3441 reg = gen_rtx_REG (DFmode, i);
3442 insn = emit_move_insn (addr, reg);
3443 if (DO_FRAME_NOTES)
3445 RTX_FRAME_RELATED_P (insn) = 1;
3446 if (TARGET_64BIT)
3448 rtx mem = gen_rtx_MEM (DFmode,
3449 plus_constant (base, offset));
3450 REG_NOTES (insn)
3451 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3452 gen_rtx_SET (VOIDmode, mem, reg),
3453 REG_NOTES (insn));
3455 else
3457 rtx meml = gen_rtx_MEM (SFmode,
3458 plus_constant (base, offset));
3459 rtx memr = gen_rtx_MEM (SFmode,
3460 plus_constant (base, offset + 4));
3461 rtx regl = gen_rtx_REG (SFmode, i);
3462 rtx regr = gen_rtx_REG (SFmode, i + 1);
3463 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3464 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3465 rtvec vec;
3467 RTX_FRAME_RELATED_P (setl) = 1;
3468 RTX_FRAME_RELATED_P (setr) = 1;
3469 vec = gen_rtvec (2, setl, setr);
3470 REG_NOTES (insn)
3471 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3472 gen_rtx_SEQUENCE (VOIDmode, vec),
3473 REG_NOTES (insn));
3476 offset += GET_MODE_SIZE (DFmode);
3477 fr_saved++;
3482 /* FIXME: expand_call and expand_millicode_call need to be fixed to
3483 prevent insns with frame notes being scheduled in the delay slot
3484 of calls. This causes problems because the dwarf2 output code
3485 processes the insn list serially. For now, limit the migration
3486 of prologue insns with a blockage. */
3487 if (DO_FRAME_NOTES)
3488 emit_insn (gen_blockage ());
3491 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3492 Handle case where DISP > 8k by using the add_high_const patterns. */
3494 static void
3495 load_reg (reg, disp, base)
3496 int reg, disp, base;
3498 rtx src, dest, basereg;
3500 dest = gen_rtx_REG (word_mode, reg);
3501 basereg = gen_rtx_REG (Pmode, base);
3502 if (VAL_14_BITS_P (disp))
3504 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3505 emit_move_insn (dest, src);
3507 else
3509 rtx delta = GEN_INT (disp);
3510 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3511 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3512 emit_move_insn (tmpreg, high);
3513 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3514 emit_move_insn (dest, src);
3518 /* This function generates the assembly code for function exit.
3519 Args are as for output_function_prologue ().
3521 The function epilogue should not depend on the current stack
3522 pointer! It should use the frame pointer only. This is mandatory
3523 because of alloca; we also take advantage of it to omit stack
3524 adjustments before returning. */
3526 static void
3527 pa_output_function_epilogue (file, size)
3528 FILE *file;
3529 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3531 rtx insn = get_last_insn ();
3533 /* hppa_expand_epilogue does the dirty work now. We just need
3534 to output the assembler directives which denote the end
3535 of a function.
3537 To make debuggers happy, emit a nop if the epilogue was completely
3538 eliminated due to a volatile call as the last insn in the
3539 current function. That way the return address (in %r2) will
3540 always point to a valid instruction in the current function. */
3542 /* Get the last real insn. */
3543 if (GET_CODE (insn) == NOTE)
3544 insn = prev_real_insn (insn);
3546 /* If it is a sequence, then look inside. */
3547 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3548 insn = XVECEXP (PATTERN (insn), 0, 0);
3550 /* If insn is a CALL_INSN, then it must be a call to a volatile
3551 function (otherwise there would be epilogue insns). */
3552 if (insn && GET_CODE (insn) == CALL_INSN)
3553 fputs ("\tnop\n", file);
3555 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3558 void
3559 hppa_expand_epilogue ()
3561 rtx tmpreg;
3562 int offset, i;
3563 int merge_sp_adjust_with_load = 0;
3564 int ret_off = 0;
3566 /* We will use this often. */
3567 tmpreg = gen_rtx_REG (word_mode, 1);
3569 /* Try to restore RP early to avoid load/use interlocks when
3570 RP gets used in the return (bv) instruction. This appears to still
3571 be necessary even when we schedule the prologue and epilogue. */
3572 if (regs_ever_live [2] || current_function_calls_eh_return)
3574 ret_off = TARGET_64BIT ? -16 : -20;
3575 if (frame_pointer_needed)
3577 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3578 ret_off = 0;
3580 else
3582 /* No frame pointer, and stack is smaller than 8k. */
3583 if (VAL_14_BITS_P (ret_off - actual_fsize))
3585 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3586 ret_off = 0;
3591 /* General register restores. */
3592 if (frame_pointer_needed)
3594 offset = local_fsize;
3596 /* If the current function calls __builtin_eh_return, then we need
3597 to restore the saved EH data registers. */
3598 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3600 unsigned int i, regno;
3602 for (i = 0; ; ++i)
3604 regno = EH_RETURN_DATA_REGNO (i);
3605 if (regno == INVALID_REGNUM)
3606 break;
3608 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3609 offset += UNITS_PER_WORD;
3613 for (i = 18; i >= 4; i--)
3614 if (regs_ever_live[i] && ! call_used_regs[i])
3616 load_reg (i, offset, FRAME_POINTER_REGNUM);
3617 offset += UNITS_PER_WORD;
3620 else
3622 offset = local_fsize - actual_fsize;
3624 /* If the current function calls __builtin_eh_return, then we need
3625 to restore the saved EH data registers. */
3626 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3628 unsigned int i, regno;
3630 for (i = 0; ; ++i)
3632 regno = EH_RETURN_DATA_REGNO (i);
3633 if (regno == INVALID_REGNUM)
3634 break;
3636 /* Only for the first load.
3637 merge_sp_adjust_with_load holds the register load
3638 with which we will merge the sp adjustment. */
3639 if (merge_sp_adjust_with_load == 0
3640 && local_fsize == 0
3641 && VAL_14_BITS_P (-actual_fsize))
3642 merge_sp_adjust_with_load = regno;
3643 else
3644 load_reg (regno, offset, STACK_POINTER_REGNUM);
3645 offset += UNITS_PER_WORD;
3649 for (i = 18; i >= 3; i--)
3651 if (regs_ever_live[i] && ! call_used_regs[i])
3653 /* Only for the first load.
3654 merge_sp_adjust_with_load holds the register load
3655 with which we will merge the sp adjustment. */
3656 if (merge_sp_adjust_with_load == 0
3657 && local_fsize == 0
3658 && VAL_14_BITS_P (-actual_fsize))
3659 merge_sp_adjust_with_load = i;
3660 else
3661 load_reg (i, offset, STACK_POINTER_REGNUM);
3662 offset += UNITS_PER_WORD;
3667 /* Align pointer properly (doubleword boundary). */
3668 offset = (offset + 7) & ~7;
3670 /* FP register restores. */
3671 if (save_fregs)
3673 /* Adjust the register to index off of. */
3674 if (frame_pointer_needed)
3675 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3676 else
3677 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3679 /* Actually do the restores now. */
3680 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3681 if (regs_ever_live[i]
3682 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3684 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3685 rtx dest = gen_rtx_REG (DFmode, i);
3686 emit_move_insn (dest, src);
3690 /* Emit a blockage insn here to keep these insns from being moved to
3691 an earlier spot in the epilogue, or into the main instruction stream.
3693 This is necessary as we must not cut the stack back before all the
3694 restores are finished. */
3695 emit_insn (gen_blockage ());
3697 /* Reset stack pointer (and possibly frame pointer). The stack
3698 pointer is initially set to fp + 64 to avoid a race condition. */
3699 if (frame_pointer_needed)
3701 rtx delta = GEN_INT (-64);
3703 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3704 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3706 /* If we were deferring a callee register restore, do it now. */
3707 else if (merge_sp_adjust_with_load)
3709 rtx delta = GEN_INT (-actual_fsize);
3710 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3712 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3714 else if (actual_fsize != 0)
3715 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3716 - actual_fsize, 0);
3718 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3719 frame greater than 8k), do so now. */
3720 if (ret_off != 0)
3721 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3723 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3725 rtx sa = EH_RETURN_STACKADJ_RTX;
3727 emit_insn (gen_blockage ());
3728 emit_insn (TARGET_64BIT
3729 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3730 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3735 hppa_pic_save_rtx ()
3737 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3740 void
3741 hppa_profile_hook (label_no)
3742 int label_no;
3744 rtx begin_label_rtx, call_insn;
3745 char begin_label_name[16];
3747 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3748 label_no);
3749 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3751 if (TARGET_64BIT)
3752 emit_move_insn (arg_pointer_rtx,
3753 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3754 GEN_INT (64)));
3756 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3758 #ifndef NO_PROFILE_COUNTERS
3760 rtx count_label_rtx, addr, r24;
3761 char count_label_name[16];
3763 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3764 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3766 addr = force_reg (Pmode, count_label_rtx);
3767 r24 = gen_rtx_REG (Pmode, 24);
3768 emit_move_insn (r24, addr);
3770 /* %r25 is set from within the output pattern. */
3771 call_insn =
3772 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3773 GEN_INT (TARGET_64BIT ? 24 : 12),
3774 begin_label_rtx));
3776 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3778 #else
3779 /* %r25 is set from within the output pattern. */
3780 call_insn =
3781 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3782 GEN_INT (TARGET_64BIT ? 16 : 8),
3783 begin_label_rtx));
3784 #endif
3786 /* Indicate the _mcount call cannot throw, nor will it execute a
3787 non-local goto. */
3788 REG_NOTES (call_insn)
3789 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3791 if (flag_pic)
3793 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3794 if (TARGET_64BIT)
3795 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3797 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3801 /* Fetch the return address for the frame COUNT steps up from
3802 the current frame, after the prologue. FRAMEADDR is the
3803 frame pointer of the COUNT frame.
3805 We want to ignore any export stub remnants here. To handle this,
3806 we examine the code at the return address, and if it is an export
3807 stub, we return a memory rtx for the stub return address stored
3808 at frame-24.
3810 The value returned is used in two different ways:
3812 1. To find a function's caller.
3814 2. To change the return address for a function.
3816 This function handles most instances of case 1; however, it will
3817 fail if there are two levels of stubs to execute on the return
3818 path. The only way I believe that can happen is if the return value
3819 needs a parameter relocation, which never happens for C code.
3821 This function handles most instances of case 2; however, it will
3822 fail if we did not originally have stub code on the return path
3823 but will need stub code on the new return path. This can happen if
3824 the caller & callee are both in the main program, but the new
3825 return location is in a shared library. */
3828 return_addr_rtx (count, frameaddr)
3829 int count;
3830 rtx frameaddr;
3832 rtx label;
3833 rtx rp;
3834 rtx saved_rp;
3835 rtx ins;
3837 if (count != 0)
3838 return NULL_RTX;
3840 rp = get_hard_reg_initial_val (Pmode, 2);
3842 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3843 return rp;
3845 saved_rp = gen_reg_rtx (Pmode);
3846 emit_move_insn (saved_rp, rp);
3848 /* Get pointer to the instruction stream. We have to mask out the
3849 privilege level from the two low order bits of the return address
3850 pointer here so that ins will point to the start of the first
3851 instruction that would have been executed if we returned. */
3852 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3853 label = gen_label_rtx ();
3855 /* Check the instruction stream at the normal return address for the
3856 export stub:
3858 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3859 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3860 0x00011820 | stub+16: mtsp r1,sr0
3861 0xe0400002 | stub+20: be,n 0(sr0,rp)
3863 If it is an export stub, than our return address is really in
3864 -24[frameaddr]. */
3866 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3867 NULL_RTX, SImode, 1);
3868 emit_jump_insn (gen_bne (label));
3870 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3871 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3872 emit_jump_insn (gen_bne (label));
3874 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3875 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3876 emit_jump_insn (gen_bne (label));
3878 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3879 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3881 /* If there is no export stub then just use the value saved from
3882 the return pointer register. */
3884 emit_jump_insn (gen_bne (label));
3886 /* Here we know that our return address points to an export
3887 stub. We don't want to return the address of the export stub,
3888 but rather the return address of the export stub. That return
3889 address is stored at -24[frameaddr]. */
3891 emit_move_insn (saved_rp,
3892 gen_rtx_MEM (Pmode,
3893 memory_address (Pmode,
3894 plus_constant (frameaddr,
3895 -24))));
3897 emit_label (label);
3898 return saved_rp;
3901 /* This is only valid once reload has completed because it depends on
3902 knowing exactly how much (if any) frame there is and...
3904 It's only valid if there is no frame marker to de-allocate and...
3906 It's only valid if %r2 hasn't been saved into the caller's frame
3907 (we're not profiling and %r2 isn't live anywhere). */
3909 hppa_can_use_return_insn_p ()
3911 return (reload_completed
3912 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3913 && ! regs_ever_live[2]
3914 && ! frame_pointer_needed);
3917 void
3918 emit_bcond_fp (code, operand0)
3919 enum rtx_code code;
3920 rtx operand0;
3922 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3923 gen_rtx_IF_THEN_ELSE (VOIDmode,
3924 gen_rtx_fmt_ee (code,
3925 VOIDmode,
3926 gen_rtx_REG (CCFPmode, 0),
3927 const0_rtx),
3928 gen_rtx_LABEL_REF (VOIDmode, operand0),
3929 pc_rtx)));
3934 gen_cmp_fp (code, operand0, operand1)
3935 enum rtx_code code;
3936 rtx operand0, operand1;
3938 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3939 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3942 /* Adjust the cost of a scheduling dependency. Return the new cost of
3943 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3945 static int
3946 pa_adjust_cost (insn, link, dep_insn, cost)
3947 rtx insn;
3948 rtx link;
3949 rtx dep_insn;
3950 int cost;
3952 enum attr_type attr_type;
3954 /* Don't adjust costs for a pa8000 chip, also do not adjust any
3955 true dependencies as they are described with bypasses now. */
3956 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
3957 return cost;
3959 if (! recog_memoized (insn))
3960 return 0;
3962 attr_type = get_attr_type (insn);
3964 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3966 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3967 cycles later. */
3969 if (attr_type == TYPE_FPLOAD)
3971 rtx pat = PATTERN (insn);
3972 rtx dep_pat = PATTERN (dep_insn);
3973 if (GET_CODE (pat) == PARALLEL)
3975 /* This happens for the fldXs,mb patterns. */
3976 pat = XVECEXP (pat, 0, 0);
3978 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3979 /* If this happens, we have to extend this to schedule
3980 optimally. Return 0 for now. */
3981 return 0;
3983 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3985 if (! recog_memoized (dep_insn))
3986 return 0;
3987 switch (get_attr_type (dep_insn))
3989 case TYPE_FPALU:
3990 case TYPE_FPMULSGL:
3991 case TYPE_FPMULDBL:
3992 case TYPE_FPDIVSGL:
3993 case TYPE_FPDIVDBL:
3994 case TYPE_FPSQRTSGL:
3995 case TYPE_FPSQRTDBL:
3996 /* A fpload can't be issued until one cycle before a
3997 preceding arithmetic operation has finished if
3998 the target of the fpload is any of the sources
3999 (or destination) of the arithmetic operation. */
4000 return insn_default_latency (dep_insn) - 1;
4002 default:
4003 return 0;
4007 else if (attr_type == TYPE_FPALU)
4009 rtx pat = PATTERN (insn);
4010 rtx dep_pat = PATTERN (dep_insn);
4011 if (GET_CODE (pat) == PARALLEL)
4013 /* This happens for the fldXs,mb patterns. */
4014 pat = XVECEXP (pat, 0, 0);
4016 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4017 /* If this happens, we have to extend this to schedule
4018 optimally. Return 0 for now. */
4019 return 0;
4021 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4023 if (! recog_memoized (dep_insn))
4024 return 0;
4025 switch (get_attr_type (dep_insn))
4027 case TYPE_FPDIVSGL:
4028 case TYPE_FPDIVDBL:
4029 case TYPE_FPSQRTSGL:
4030 case TYPE_FPSQRTDBL:
4031 /* An ALU flop can't be issued until two cycles before a
4032 preceding divide or sqrt operation has finished if
4033 the target of the ALU flop is any of the sources
4034 (or destination) of the divide or sqrt operation. */
4035 return insn_default_latency (dep_insn) - 2;
4037 default:
4038 return 0;
4043 /* For other anti dependencies, the cost is 0. */
4044 return 0;
4046 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4048 /* Output dependency; DEP_INSN writes a register that INSN writes some
4049 cycles later. */
4050 if (attr_type == TYPE_FPLOAD)
4052 rtx pat = PATTERN (insn);
4053 rtx dep_pat = PATTERN (dep_insn);
4054 if (GET_CODE (pat) == PARALLEL)
4056 /* This happens for the fldXs,mb patterns. */
4057 pat = XVECEXP (pat, 0, 0);
4059 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4060 /* If this happens, we have to extend this to schedule
4061 optimally. Return 0 for now. */
4062 return 0;
4064 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4066 if (! recog_memoized (dep_insn))
4067 return 0;
4068 switch (get_attr_type (dep_insn))
4070 case TYPE_FPALU:
4071 case TYPE_FPMULSGL:
4072 case TYPE_FPMULDBL:
4073 case TYPE_FPDIVSGL:
4074 case TYPE_FPDIVDBL:
4075 case TYPE_FPSQRTSGL:
4076 case TYPE_FPSQRTDBL:
4077 /* A fpload can't be issued until one cycle before a
4078 preceding arithmetic operation has finished if
4079 the target of the fpload is the destination of the
4080 arithmetic operation.
4082 Exception: For PA7100LC, PA7200 and PA7300, the cost
4083 is 3 cycles, unless they bundle together. We also
4084 pay the penalty if the second insn is a fpload. */
4085 return insn_default_latency (dep_insn) - 1;
4087 default:
4088 return 0;
4092 else if (attr_type == TYPE_FPALU)
4094 rtx pat = PATTERN (insn);
4095 rtx dep_pat = PATTERN (dep_insn);
4096 if (GET_CODE (pat) == PARALLEL)
4098 /* This happens for the fldXs,mb patterns. */
4099 pat = XVECEXP (pat, 0, 0);
4101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4102 /* If this happens, we have to extend this to schedule
4103 optimally. Return 0 for now. */
4104 return 0;
4106 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4108 if (! recog_memoized (dep_insn))
4109 return 0;
4110 switch (get_attr_type (dep_insn))
4112 case TYPE_FPDIVSGL:
4113 case TYPE_FPDIVDBL:
4114 case TYPE_FPSQRTSGL:
4115 case TYPE_FPSQRTDBL:
4116 /* An ALU flop can't be issued until two cycles before a
4117 preceding divide or sqrt operation has finished if
4118 the target of the ALU flop is also the target of
4119 the divide or sqrt operation. */
4120 return insn_default_latency (dep_insn) - 2;
4122 default:
4123 return 0;
4128 /* For other output dependencies, the cost is 0. */
4129 return 0;
4131 else
4132 abort ();
4135 /* Adjust scheduling priorities. We use this to try and keep addil
4136 and the next use of %r1 close together. */
4137 static int
4138 pa_adjust_priority (insn, priority)
4139 rtx insn;
4140 int priority;
4142 rtx set = single_set (insn);
4143 rtx src, dest;
4144 if (set)
4146 src = SET_SRC (set);
4147 dest = SET_DEST (set);
4148 if (GET_CODE (src) == LO_SUM
4149 && symbolic_operand (XEXP (src, 1), VOIDmode)
4150 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4151 priority >>= 3;
4153 else if (GET_CODE (src) == MEM
4154 && GET_CODE (XEXP (src, 0)) == LO_SUM
4155 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4156 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4157 priority >>= 1;
4159 else if (GET_CODE (dest) == MEM
4160 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4161 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4162 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4163 priority >>= 3;
4165 return priority;
4168 /* The 700 can only issue a single insn at a time.
4169 The 7XXX processors can issue two insns at a time.
4170 The 8000 can issue 4 insns at a time. */
4171 static int
4172 pa_issue_rate ()
4174 switch (pa_cpu)
4176 case PROCESSOR_700: return 1;
4177 case PROCESSOR_7100: return 2;
4178 case PROCESSOR_7100LC: return 2;
4179 case PROCESSOR_7200: return 2;
4180 case PROCESSOR_7300: return 2;
4181 case PROCESSOR_8000: return 4;
4183 default:
4184 abort ();
4190 /* Return any length adjustment needed by INSN which already has its length
4191 computed as LENGTH. Return zero if no adjustment is necessary.
4193 For the PA: function calls, millicode calls, and backwards short
4194 conditional branches with unfilled delay slots need an adjustment by +1
4195 (to account for the NOP which will be inserted into the instruction stream).
4197 Also compute the length of an inline block move here as it is too
4198 complicated to express as a length attribute in pa.md. */
4200 pa_adjust_insn_length (insn, length)
4201 rtx insn;
4202 int length;
4204 rtx pat = PATTERN (insn);
4206 /* Call insns which are *not* indirect and have unfilled delay slots. */
4207 if (GET_CODE (insn) == CALL_INSN)
4210 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4211 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4212 return 4;
4213 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4214 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4215 == SYMBOL_REF)
4216 return 4;
4217 else
4218 return 0;
4220 /* Jumps inside switch tables which have unfilled delay slots
4221 also need adjustment. */
4222 else if (GET_CODE (insn) == JUMP_INSN
4223 && simplejump_p (insn)
4224 && GET_MODE (insn) == SImode)
4225 return 4;
4226 /* Millicode insn with an unfilled delay slot. */
4227 else if (GET_CODE (insn) == INSN
4228 && GET_CODE (pat) != SEQUENCE
4229 && GET_CODE (pat) != USE
4230 && GET_CODE (pat) != CLOBBER
4231 && get_attr_type (insn) == TYPE_MILLI)
4232 return 4;
4233 /* Block move pattern. */
4234 else if (GET_CODE (insn) == INSN
4235 && GET_CODE (pat) == PARALLEL
4236 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4237 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4238 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4239 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4240 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4241 return compute_movstrsi_length (insn) - 4;
4242 /* Conditional branch with an unfilled delay slot. */
4243 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4245 /* Adjust a short backwards conditional with an unfilled delay slot. */
4246 if (GET_CODE (pat) == SET
4247 && length == 4
4248 && ! forward_branch_p (insn))
4249 return 4;
4250 else if (GET_CODE (pat) == PARALLEL
4251 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4252 && length == 4)
4253 return 4;
4254 /* Adjust dbra insn with short backwards conditional branch with
4255 unfilled delay slot -- only for case where counter is in a
4256 general register register. */
4257 else if (GET_CODE (pat) == PARALLEL
4258 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4259 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4260 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4261 && length == 4
4262 && ! forward_branch_p (insn))
4263 return 4;
4264 else
4265 return 0;
4267 return 0;
4270 /* Print operand X (an rtx) in assembler syntax to file FILE.
4271 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4272 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4274 void
4275 print_operand (file, x, code)
4276 FILE *file;
4277 rtx x;
4278 int code;
4280 switch (code)
4282 case '#':
4283 /* Output a 'nop' if there's nothing for the delay slot. */
4284 if (dbr_sequence_length () == 0)
4285 fputs ("\n\tnop", file);
4286 return;
4287 case '*':
4288 /* Output an nullification completer if there's nothing for the */
4289 /* delay slot or nullification is requested. */
4290 if (dbr_sequence_length () == 0 ||
4291 (final_sequence &&
4292 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4293 fputs (",n", file);
4294 return;
4295 case 'R':
4296 /* Print out the second register name of a register pair.
4297 I.e., R (6) => 7. */
4298 fputs (reg_names[REGNO (x) + 1], file);
4299 return;
4300 case 'r':
4301 /* A register or zero. */
4302 if (x == const0_rtx
4303 || (x == CONST0_RTX (DFmode))
4304 || (x == CONST0_RTX (SFmode)))
4306 fputs ("%r0", file);
4307 return;
4309 else
4310 break;
4311 case 'f':
4312 /* A register or zero (floating point). */
4313 if (x == const0_rtx
4314 || (x == CONST0_RTX (DFmode))
4315 || (x == CONST0_RTX (SFmode)))
4317 fputs ("%fr0", file);
4318 return;
4320 else
4321 break;
4322 case 'A':
4324 rtx xoperands[2];
4326 xoperands[0] = XEXP (XEXP (x, 0), 0);
4327 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4328 output_global_address (file, xoperands[1], 0);
4329 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4330 return;
4333 case 'C': /* Plain (C)ondition */
4334 case 'X':
4335 switch (GET_CODE (x))
4337 case EQ:
4338 fputs ("=", file); break;
4339 case NE:
4340 fputs ("<>", file); break;
4341 case GT:
4342 fputs (">", file); break;
4343 case GE:
4344 fputs (">=", file); break;
4345 case GEU:
4346 fputs (">>=", file); break;
4347 case GTU:
4348 fputs (">>", file); break;
4349 case LT:
4350 fputs ("<", file); break;
4351 case LE:
4352 fputs ("<=", file); break;
4353 case LEU:
4354 fputs ("<<=", file); break;
4355 case LTU:
4356 fputs ("<<", file); break;
4357 default:
4358 abort ();
4360 return;
4361 case 'N': /* Condition, (N)egated */
4362 switch (GET_CODE (x))
4364 case EQ:
4365 fputs ("<>", file); break;
4366 case NE:
4367 fputs ("=", file); break;
4368 case GT:
4369 fputs ("<=", file); break;
4370 case GE:
4371 fputs ("<", file); break;
4372 case GEU:
4373 fputs ("<<", file); break;
4374 case GTU:
4375 fputs ("<<=", file); break;
4376 case LT:
4377 fputs (">=", file); break;
4378 case LE:
4379 fputs (">", file); break;
4380 case LEU:
4381 fputs (">>", file); break;
4382 case LTU:
4383 fputs (">>=", file); break;
4384 default:
4385 abort ();
4387 return;
4388 /* For floating point comparisons. Note that the output
4389 predicates are the complement of the desired mode. */
4390 case 'Y':
4391 switch (GET_CODE (x))
4393 case EQ:
4394 fputs ("!=", file); break;
4395 case NE:
4396 fputs ("=", file); break;
4397 case GT:
4398 fputs ("!>", file); break;
4399 case GE:
4400 fputs ("!>=", file); break;
4401 case LT:
4402 fputs ("!<", file); break;
4403 case LE:
4404 fputs ("!<=", file); break;
4405 case LTGT:
4406 fputs ("!<>", file); break;
4407 case UNLE:
4408 fputs (">", file); break;
4409 case UNLT:
4410 fputs (">=", file); break;
4411 case UNGE:
4412 fputs ("<", file); break;
4413 case UNGT:
4414 fputs ("<=", file); break;
4415 case UNEQ:
4416 fputs ("<>", file); break;
4417 case UNORDERED:
4418 fputs ("<=>", file); break;
4419 case ORDERED:
4420 fputs ("!<=>", file); break;
4421 default:
4422 abort ();
4424 return;
4425 case 'S': /* Condition, operands are (S)wapped. */
4426 switch (GET_CODE (x))
4428 case EQ:
4429 fputs ("=", file); break;
4430 case NE:
4431 fputs ("<>", file); break;
4432 case GT:
4433 fputs ("<", file); break;
4434 case GE:
4435 fputs ("<=", file); break;
4436 case GEU:
4437 fputs ("<<=", file); break;
4438 case GTU:
4439 fputs ("<<", file); break;
4440 case LT:
4441 fputs (">", file); break;
4442 case LE:
4443 fputs (">=", file); break;
4444 case LEU:
4445 fputs (">>=", file); break;
4446 case LTU:
4447 fputs (">>", file); break;
4448 default:
4449 abort ();
4451 return;
4452 case 'B': /* Condition, (B)oth swapped and negate. */
4453 switch (GET_CODE (x))
4455 case EQ:
4456 fputs ("<>", file); break;
4457 case NE:
4458 fputs ("=", file); break;
4459 case GT:
4460 fputs (">=", file); break;
4461 case GE:
4462 fputs (">", file); break;
4463 case GEU:
4464 fputs (">>", file); break;
4465 case GTU:
4466 fputs (">>=", file); break;
4467 case LT:
4468 fputs ("<=", file); break;
4469 case LE:
4470 fputs ("<", file); break;
4471 case LEU:
4472 fputs ("<<", file); break;
4473 case LTU:
4474 fputs ("<<=", file); break;
4475 default:
4476 abort ();
4478 return;
4479 case 'k':
4480 if (GET_CODE (x) == CONST_INT)
4482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4483 return;
4485 abort ();
4486 case 'Q':
4487 if (GET_CODE (x) == CONST_INT)
4489 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4490 return;
4492 abort ();
4493 case 'L':
4494 if (GET_CODE (x) == CONST_INT)
4496 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4497 return;
4499 abort ();
4500 case 'O':
4501 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4503 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4504 return;
4506 abort ();
4507 case 'p':
4508 if (GET_CODE (x) == CONST_INT)
4510 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4511 return;
4513 abort ();
4514 case 'P':
4515 if (GET_CODE (x) == CONST_INT)
4517 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4518 return;
4520 abort ();
4521 case 'I':
4522 if (GET_CODE (x) == CONST_INT)
4523 fputs ("i", file);
4524 return;
4525 case 'M':
4526 case 'F':
4527 switch (GET_CODE (XEXP (x, 0)))
4529 case PRE_DEC:
4530 case PRE_INC:
4531 if (ASSEMBLER_DIALECT == 0)
4532 fputs ("s,mb", file);
4533 else
4534 fputs (",mb", file);
4535 break;
4536 case POST_DEC:
4537 case POST_INC:
4538 if (ASSEMBLER_DIALECT == 0)
4539 fputs ("s,ma", file);
4540 else
4541 fputs (",ma", file);
4542 break;
4543 case PLUS:
4544 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4545 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4547 if (ASSEMBLER_DIALECT == 0)
4548 fputs ("x,s", file);
4549 else
4550 fputs (",s", file);
4552 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4553 fputs ("s", file);
4554 break;
4555 default:
4556 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4557 fputs ("s", file);
4558 break;
4560 return;
4561 case 'G':
4562 output_global_address (file, x, 0);
4563 return;
4564 case 'H':
4565 output_global_address (file, x, 1);
4566 return;
4567 case 0: /* Don't do anything special */
4568 break;
4569 case 'Z':
4571 unsigned op[3];
4572 compute_zdepwi_operands (INTVAL (x), op);
4573 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4574 return;
4576 case 'z':
4578 unsigned op[3];
4579 compute_zdepdi_operands (INTVAL (x), op);
4580 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4581 return;
4583 case 'c':
4584 /* We can get here from a .vtable_inherit due to our
4585 CONSTANT_ADDRESS_P rejecting perfectly good constant
4586 addresses. */
4587 break;
4588 default:
4589 abort ();
4591 if (GET_CODE (x) == REG)
4593 fputs (reg_names [REGNO (x)], file);
4594 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4596 fputs ("R", file);
4597 return;
4599 if (FP_REG_P (x)
4600 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4601 && (REGNO (x) & 1) == 0)
4602 fputs ("L", file);
4604 else if (GET_CODE (x) == MEM)
4606 int size = GET_MODE_SIZE (GET_MODE (x));
4607 rtx base = NULL_RTX;
4608 switch (GET_CODE (XEXP (x, 0)))
4610 case PRE_DEC:
4611 case POST_DEC:
4612 base = XEXP (XEXP (x, 0), 0);
4613 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4614 break;
4615 case PRE_INC:
4616 case POST_INC:
4617 base = XEXP (XEXP (x, 0), 0);
4618 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4619 break;
4620 default:
4621 if (GET_CODE (XEXP (x, 0)) == PLUS
4622 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4623 fprintf (file, "%s(%s)",
4624 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4625 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4626 else if (GET_CODE (XEXP (x, 0)) == PLUS
4627 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4628 fprintf (file, "%s(%s)",
4629 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4630 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4631 else
4632 output_address (XEXP (x, 0));
4633 break;
4636 else
4637 output_addr_const (file, x);
4640 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4642 void
4643 output_global_address (file, x, round_constant)
4644 FILE *file;
4645 rtx x;
4646 int round_constant;
4649 /* Imagine (high (const (plus ...))). */
4650 if (GET_CODE (x) == HIGH)
4651 x = XEXP (x, 0);
4653 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4654 assemble_name (file, XSTR (x, 0));
4655 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4657 assemble_name (file, XSTR (x, 0));
4658 fputs ("-$global$", file);
4660 else if (GET_CODE (x) == CONST)
4662 const char *sep = "";
4663 int offset = 0; /* assembler wants -$global$ at end */
4664 rtx base = NULL_RTX;
4666 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4668 base = XEXP (XEXP (x, 0), 0);
4669 output_addr_const (file, base);
4671 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4672 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4673 else abort ();
4675 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4677 base = XEXP (XEXP (x, 0), 1);
4678 output_addr_const (file, base);
4680 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4681 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4682 else abort ();
4684 /* How bogus. The compiler is apparently responsible for
4685 rounding the constant if it uses an LR field selector.
4687 The linker and/or assembler seem a better place since
4688 they have to do this kind of thing already.
4690 If we fail to do this, HP's optimizing linker may eliminate
4691 an addil, but not update the ldw/stw/ldo instruction that
4692 uses the result of the addil. */
4693 if (round_constant)
4694 offset = ((offset + 0x1000) & ~0x1fff);
4696 if (GET_CODE (XEXP (x, 0)) == PLUS)
4698 if (offset < 0)
4700 offset = -offset;
4701 sep = "-";
4703 else
4704 sep = "+";
4706 else if (GET_CODE (XEXP (x, 0)) == MINUS
4707 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4708 sep = "-";
4709 else abort ();
4711 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4712 fputs ("-$global$", file);
4713 if (offset)
4714 fprintf (file, "%s%d", sep, offset);
4716 else
4717 output_addr_const (file, x);
4720 void
4721 output_deferred_plabels (file)
4722 FILE *file;
4724 size_t i;
4725 /* If we have deferred plabels, then we need to switch into the data
4726 section and align it to a 4 byte boundary before we output the
4727 deferred plabels. */
4728 if (n_deferred_plabels)
4730 data_section ();
4731 ASM_OUTPUT_ALIGN (file, 2);
4734 /* Now output the deferred plabels. */
4735 for (i = 0; i < n_deferred_plabels; i++)
4737 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4738 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4739 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4743 /* HP's millicode routines mean something special to the assembler.
4744 Keep track of which ones we have used. */
4746 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
4747 static void import_milli PARAMS ((enum millicodes));
4748 static char imported[(int) end1000];
4749 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
4750 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4751 #define MILLI_START 10
4753 static void
4754 import_milli (code)
4755 enum millicodes code;
4757 char str[sizeof (import_string)];
4759 if (!imported[(int) code])
4761 imported[(int) code] = 1;
4762 strcpy (str, import_string);
4763 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4764 output_asm_insn (str, 0);
4768 /* The register constraints have put the operands and return value in
4769 the proper registers. */
4771 const char *
4772 output_mul_insn (unsignedp, insn)
4773 int unsignedp ATTRIBUTE_UNUSED;
4774 rtx insn;
4776 import_milli (mulI);
4777 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4780 /* Emit the rtl for doing a division by a constant. */
4782 /* Do magic division millicodes exist for this value? */
4783 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4784 1, 1};
4786 /* We'll use an array to keep track of the magic millicodes and
4787 whether or not we've used them already. [n][0] is signed, [n][1] is
4788 unsigned. */
4790 static int div_milli[16][2];
4793 div_operand (op, mode)
4794 rtx op;
4795 enum machine_mode mode;
4797 return (mode == SImode
4798 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4799 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4800 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4804 emit_hpdiv_const (operands, unsignedp)
4805 rtx *operands;
4806 int unsignedp;
4808 if (GET_CODE (operands[2]) == CONST_INT
4809 && INTVAL (operands[2]) > 0
4810 && INTVAL (operands[2]) < 16
4811 && magic_milli[INTVAL (operands[2])])
4813 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4815 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4816 emit
4817 (gen_rtx
4818 (PARALLEL, VOIDmode,
4819 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4820 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4821 SImode,
4822 gen_rtx_REG (SImode, 26),
4823 operands[2])),
4824 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4825 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4826 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4827 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4828 gen_rtx_CLOBBER (VOIDmode, ret))));
4829 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4830 return 1;
4832 return 0;
4835 const char *
4836 output_div_insn (operands, unsignedp, insn)
4837 rtx *operands;
4838 int unsignedp;
4839 rtx insn;
4841 int divisor;
4843 /* If the divisor is a constant, try to use one of the special
4844 opcodes .*/
4845 if (GET_CODE (operands[0]) == CONST_INT)
4847 static char buf[100];
4848 divisor = INTVAL (operands[0]);
4849 if (!div_milli[divisor][unsignedp])
4851 div_milli[divisor][unsignedp] = 1;
4852 if (unsignedp)
4853 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4854 else
4855 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4857 if (unsignedp)
4859 sprintf (buf, "$$divU_");
4860 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4861 return output_millicode_call (insn,
4862 gen_rtx_SYMBOL_REF (SImode, buf));
4864 else
4866 sprintf (buf, "$$divI_");
4867 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4868 return output_millicode_call (insn,
4869 gen_rtx_SYMBOL_REF (SImode, buf));
4872 /* Divisor isn't a special constant. */
4873 else
4875 if (unsignedp)
4877 import_milli (divU);
4878 return output_millicode_call (insn,
4879 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4881 else
4883 import_milli (divI);
4884 return output_millicode_call (insn,
4885 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4890 /* Output a $$rem millicode to do mod. */
4892 const char *
4893 output_mod_insn (unsignedp, insn)
4894 int unsignedp;
4895 rtx insn;
4897 if (unsignedp)
4899 import_milli (remU);
4900 return output_millicode_call (insn,
4901 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4903 else
4905 import_milli (remI);
4906 return output_millicode_call (insn,
4907 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4911 void
4912 output_arg_descriptor (call_insn)
4913 rtx call_insn;
4915 const char *arg_regs[4];
4916 enum machine_mode arg_mode;
4917 rtx link;
4918 int i, output_flag = 0;
4919 int regno;
4921 /* We neither need nor want argument location descriptors for the
4922 64bit runtime environment or the ELF32 environment. */
4923 if (TARGET_64BIT || TARGET_ELF32)
4924 return;
4926 for (i = 0; i < 4; i++)
4927 arg_regs[i] = 0;
4929 /* Specify explicitly that no argument relocations should take place
4930 if using the portable runtime calling conventions. */
4931 if (TARGET_PORTABLE_RUNTIME)
4933 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4934 asm_out_file);
4935 return;
4938 if (GET_CODE (call_insn) != CALL_INSN)
4939 abort ();
4940 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4942 rtx use = XEXP (link, 0);
4944 if (! (GET_CODE (use) == USE
4945 && GET_CODE (XEXP (use, 0)) == REG
4946 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4947 continue;
4949 arg_mode = GET_MODE (XEXP (use, 0));
4950 regno = REGNO (XEXP (use, 0));
4951 if (regno >= 23 && regno <= 26)
4953 arg_regs[26 - regno] = "GR";
4954 if (arg_mode == DImode)
4955 arg_regs[25 - regno] = "GR";
4957 else if (regno >= 32 && regno <= 39)
4959 if (arg_mode == SFmode)
4960 arg_regs[(regno - 32) / 2] = "FR";
4961 else
4963 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4964 arg_regs[(regno - 34) / 2] = "FR";
4965 arg_regs[(regno - 34) / 2 + 1] = "FU";
4966 #else
4967 arg_regs[(regno - 34) / 2] = "FU";
4968 arg_regs[(regno - 34) / 2 + 1] = "FR";
4969 #endif
4973 fputs ("\t.CALL ", asm_out_file);
4974 for (i = 0; i < 4; i++)
4976 if (arg_regs[i])
4978 if (output_flag++)
4979 fputc (',', asm_out_file);
4980 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4983 fputc ('\n', asm_out_file);
4986 /* Return the class of any secondary reload register that is needed to
4987 move IN into a register in class CLASS using mode MODE.
4989 Profiling has showed this routine and its descendants account for
4990 a significant amount of compile time (~7%). So it has been
4991 optimized to reduce redundant computations and eliminate useless
4992 function calls.
4994 It might be worthwhile to try and make this a leaf function too. */
4996 enum reg_class
4997 secondary_reload_class (class, mode, in)
4998 enum reg_class class;
4999 enum machine_mode mode;
5000 rtx in;
5002 int regno, is_symbolic;
5004 /* Trying to load a constant into a FP register during PIC code
5005 generation will require %r1 as a scratch register. */
5006 if (flag_pic
5007 && GET_MODE_CLASS (mode) == MODE_INT
5008 && FP_REG_CLASS_P (class)
5009 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5010 return R1_REGS;
5012 /* Profiling showed the PA port spends about 1.3% of its compilation
5013 time in true_regnum from calls inside secondary_reload_class. */
5015 if (GET_CODE (in) == REG)
5017 regno = REGNO (in);
5018 if (regno >= FIRST_PSEUDO_REGISTER)
5019 regno = true_regnum (in);
5021 else if (GET_CODE (in) == SUBREG)
5022 regno = true_regnum (in);
5023 else
5024 regno = -1;
5026 /* If we have something like (mem (mem (...)), we can safely assume the
5027 inner MEM will end up in a general register after reloading, so there's
5028 no need for a secondary reload. */
5029 if (GET_CODE (in) == MEM
5030 && GET_CODE (XEXP (in, 0)) == MEM)
5031 return NO_REGS;
5033 /* Handle out of range displacement for integer mode loads/stores of
5034 FP registers. */
5035 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5036 && GET_MODE_CLASS (mode) == MODE_INT
5037 && FP_REG_CLASS_P (class))
5038 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5039 return GENERAL_REGS;
5041 /* A SAR<->FP register copy requires a secondary register (GPR) as
5042 well as secondary memory. */
5043 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5044 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5045 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5046 return GENERAL_REGS;
5048 if (GET_CODE (in) == HIGH)
5049 in = XEXP (in, 0);
5051 /* Profiling has showed GCC spends about 2.6% of its compilation
5052 time in symbolic_operand from calls inside secondary_reload_class.
5054 We use an inline copy and only compute its return value once to avoid
5055 useless work. */
5056 switch (GET_CODE (in))
5058 rtx tmp;
5060 case SYMBOL_REF:
5061 case LABEL_REF:
5062 is_symbolic = 1;
5063 break;
5064 case CONST:
5065 tmp = XEXP (in, 0);
5066 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5067 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5068 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5069 break;
5071 default:
5072 is_symbolic = 0;
5073 break;
5076 if (!flag_pic
5077 && is_symbolic
5078 && read_only_operand (in, VOIDmode))
5079 return NO_REGS;
5081 if (class != R1_REGS && is_symbolic)
5082 return R1_REGS;
5084 return NO_REGS;
5087 enum direction
5088 function_arg_padding (mode, type)
5089 enum machine_mode mode;
5090 tree type;
5092 int size;
5094 if (mode == BLKmode)
5096 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
5097 size = int_size_in_bytes (type) * BITS_PER_UNIT;
5098 else
5099 return upward; /* Don't know if this is right, but */
5100 /* same as old definition. */
5102 else
5103 size = GET_MODE_BITSIZE (mode);
5104 if (size < PARM_BOUNDARY)
5105 return downward;
5106 else if (size % PARM_BOUNDARY)
5107 return upward;
5108 else
5109 return none;
5113 /* Do what is necessary for `va_start'. We look at the current function
5114 to determine if stdargs or varargs is used and fill in an initial
5115 va_list. A pointer to this constructor is returned. */
5117 struct rtx_def *
5118 hppa_builtin_saveregs ()
5120 rtx offset, dest;
5121 tree fntype = TREE_TYPE (current_function_decl);
5122 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5123 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5124 != void_type_node)))
5125 ? UNITS_PER_WORD : 0);
5127 if (argadj)
5128 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5129 else
5130 offset = current_function_arg_offset_rtx;
5132 if (TARGET_64BIT)
5134 int i, off;
5136 /* Adjust for varargs/stdarg differences. */
5137 if (argadj)
5138 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5139 else
5140 offset = current_function_arg_offset_rtx;
5142 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5143 from the incoming arg pointer and growing to larger addresses. */
5144 for (i = 26, off = -64; i >= 19; i--, off += 8)
5145 emit_move_insn (gen_rtx_MEM (word_mode,
5146 plus_constant (arg_pointer_rtx, off)),
5147 gen_rtx_REG (word_mode, i));
5149 /* The incoming args pointer points just beyond the flushback area;
5150 normally this is not a serious concern. However, when we are doing
5151 varargs/stdargs we want to make the arg pointer point to the start
5152 of the incoming argument area. */
5153 emit_move_insn (virtual_incoming_args_rtx,
5154 plus_constant (arg_pointer_rtx, -64));
5156 /* Now return a pointer to the first anonymous argument. */
5157 return copy_to_reg (expand_binop (Pmode, add_optab,
5158 virtual_incoming_args_rtx,
5159 offset, 0, 0, OPTAB_LIB_WIDEN));
5162 /* Store general registers on the stack. */
5163 dest = gen_rtx_MEM (BLKmode,
5164 plus_constant (current_function_internal_arg_pointer,
5165 -16));
5166 set_mem_alias_set (dest, get_varargs_alias_set ());
5167 set_mem_align (dest, BITS_PER_WORD);
5168 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
5170 /* move_block_from_reg will emit code to store the argument registers
5171 individually as scalar stores.
5173 However, other insns may later load from the same addresses for
5174 a structure load (passing a struct to a varargs routine).
5176 The alias code assumes that such aliasing can never happen, so we
5177 have to keep memory referencing insns from moving up beyond the
5178 last argument register store. So we emit a blockage insn here. */
5179 emit_insn (gen_blockage ());
5181 return copy_to_reg (expand_binop (Pmode, add_optab,
5182 current_function_internal_arg_pointer,
5183 offset, 0, 0, OPTAB_LIB_WIDEN));
5186 void
5187 hppa_va_start (valist, nextarg)
5188 tree valist;
5189 rtx nextarg;
5191 nextarg = expand_builtin_saveregs ();
5192 std_expand_builtin_va_start (valist, nextarg);
5196 hppa_va_arg (valist, type)
5197 tree valist, type;
5199 HOST_WIDE_INT align, size, ofs;
5200 tree t, ptr, pptr;
5202 if (TARGET_64BIT)
5204 /* Every argument in PA64 is passed by value (including large structs).
5205 Arguments with size greater than 8 must be aligned 0 MOD 16. */
5207 size = int_size_in_bytes (type);
5208 if (size > UNITS_PER_WORD)
5210 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5211 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5212 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5213 build_int_2 (-2 * UNITS_PER_WORD, -1));
5214 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5215 TREE_SIDE_EFFECTS (t) = 1;
5216 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5218 return std_expand_builtin_va_arg (valist, type);
5221 /* Compute the rounded size of the type. */
5222 align = PARM_BOUNDARY / BITS_PER_UNIT;
5223 size = int_size_in_bytes (type);
5225 ptr = build_pointer_type (type);
5227 /* "Large" types are passed by reference. */
5228 if (size > 8)
5230 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5231 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5232 TREE_SIDE_EFFECTS (t) = 1;
5234 pptr = build_pointer_type (ptr);
5235 t = build1 (NOP_EXPR, pptr, t);
5236 TREE_SIDE_EFFECTS (t) = 1;
5238 t = build1 (INDIRECT_REF, ptr, t);
5239 TREE_SIDE_EFFECTS (t) = 1;
5241 else
5243 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5244 build_int_2 (-size, -1));
5246 /* Copied from va-pa.h, but we probably don't need to align
5247 to word size, since we generate and preserve that invariant. */
5248 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5249 build_int_2 ((size > 4 ? -8 : -4), -1));
5251 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5252 TREE_SIDE_EFFECTS (t) = 1;
5254 ofs = (8 - size) % 4;
5255 if (ofs)
5257 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
5258 TREE_SIDE_EFFECTS (t) = 1;
5261 t = build1 (NOP_EXPR, ptr, t);
5262 TREE_SIDE_EFFECTS (t) = 1;
5265 /* Calculate! */
5266 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
5271 /* This routine handles all the normal conditional branch sequences we
5272 might need to generate. It handles compare immediate vs compare
5273 register, nullification of delay slots, varying length branches,
5274 negated branches, and all combinations of the above. It returns the
5275 output appropriate to emit the branch corresponding to all given
5276 parameters. */
5278 const char *
5279 output_cbranch (operands, nullify, length, negated, insn)
5280 rtx *operands;
5281 int nullify, length, negated;
5282 rtx insn;
5284 static char buf[100];
5285 int useskip = 0;
5287 /* A conditional branch to the following instruction (eg the delay slot) is
5288 asking for a disaster. This can happen when not optimizing.
5290 In such cases it is safe to emit nothing. */
5292 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5293 return "";
5295 /* If this is a long branch with its delay slot unfilled, set `nullify'
5296 as it can nullify the delay slot and save a nop. */
5297 if (length == 8 && dbr_sequence_length () == 0)
5298 nullify = 1;
5300 /* If this is a short forward conditional branch which did not get
5301 its delay slot filled, the delay slot can still be nullified. */
5302 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5303 nullify = forward_branch_p (insn);
5305 /* A forward branch over a single nullified insn can be done with a
5306 comclr instruction. This avoids a single cycle penalty due to
5307 mis-predicted branch if we fall through (branch not taken). */
5308 if (length == 4
5309 && next_real_insn (insn) != 0
5310 && get_attr_length (next_real_insn (insn)) == 4
5311 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5312 && nullify)
5313 useskip = 1;
5315 switch (length)
5317 /* All short conditional branches except backwards with an unfilled
5318 delay slot. */
5319 case 4:
5320 if (useskip)
5321 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5322 else
5323 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5324 if (GET_MODE (operands[1]) == DImode)
5325 strcat (buf, "*");
5326 if (negated)
5327 strcat (buf, "%B3");
5328 else
5329 strcat (buf, "%S3");
5330 if (useskip)
5331 strcat (buf, " %2,%r1,%%r0");
5332 else if (nullify)
5333 strcat (buf, ",n %2,%r1,%0");
5334 else
5335 strcat (buf, " %2,%r1,%0");
5336 break;
5338 /* All long conditionals. Note an short backward branch with an
5339 unfilled delay slot is treated just like a long backward branch
5340 with an unfilled delay slot. */
5341 case 8:
5342 /* Handle weird backwards branch with a filled delay slot
5343 with is nullified. */
5344 if (dbr_sequence_length () != 0
5345 && ! forward_branch_p (insn)
5346 && nullify)
5348 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5349 if (GET_MODE (operands[1]) == DImode)
5350 strcat (buf, "*");
5351 if (negated)
5352 strcat (buf, "%S3");
5353 else
5354 strcat (buf, "%B3");
5355 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5357 /* Handle short backwards branch with an unfilled delay slot.
5358 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5359 taken and untaken branches. */
5360 else if (dbr_sequence_length () == 0
5361 && ! forward_branch_p (insn)
5362 && INSN_ADDRESSES_SET_P ()
5363 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5364 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5366 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5367 if (GET_MODE (operands[1]) == DImode)
5368 strcat (buf, "*");
5369 if (negated)
5370 strcat (buf, "%B3 %2,%r1,%0%#");
5371 else
5372 strcat (buf, "%S3 %2,%r1,%0%#");
5374 else
5376 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5377 if (GET_MODE (operands[1]) == DImode)
5378 strcat (buf, "*");
5379 if (negated)
5380 strcat (buf, "%S3");
5381 else
5382 strcat (buf, "%B3");
5383 if (nullify)
5384 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5385 else
5386 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5388 break;
5390 case 20:
5391 /* Very long branch. Right now we only handle these when not
5392 optimizing. See "jump" pattern in pa.md for details. */
5393 if (optimize)
5394 abort ();
5396 /* Create a reversed conditional branch which branches around
5397 the following insns. */
5398 if (negated)
5399 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5400 else
5401 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5402 if (GET_MODE (operands[1]) == DImode)
5404 if (negated)
5405 strcpy (buf,
5406 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5407 else
5408 strcpy (buf,
5409 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5411 output_asm_insn (buf, operands);
5413 /* Output an insn to save %r1. */
5414 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5416 /* Now output a very long branch to the original target. */
5417 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5419 /* Now restore the value of %r1 in the delay slot. We're not
5420 optimizing so we know nothing else can be in the delay slot. */
5421 return "ldw -16(%%r30),%%r1";
5423 case 28:
5424 /* Very long branch when generating PIC code. Right now we only
5425 handle these when not optimizing. See "jump" pattern in pa.md
5426 for details. */
5427 if (optimize)
5428 abort ();
5430 /* Create a reversed conditional branch which branches around
5431 the following insns. */
5432 if (negated)
5433 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5434 else
5435 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5436 if (GET_MODE (operands[1]) == DImode)
5438 if (negated)
5439 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5440 else
5441 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5443 output_asm_insn (buf, operands);
5445 /* Output an insn to save %r1. */
5446 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5448 /* Now output a very long PIC branch to the original target. */
5450 rtx xoperands[5];
5452 xoperands[0] = operands[0];
5453 xoperands[1] = operands[1];
5454 xoperands[2] = operands[2];
5455 xoperands[3] = operands[3];
5456 if (TARGET_SOM || ! TARGET_GAS)
5457 xoperands[4] = gen_label_rtx ();
5459 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5460 if (TARGET_SOM || ! TARGET_GAS)
5462 output_asm_insn ("addil L'%l0-%l4,%%r1", xoperands);
5463 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5464 CODE_LABEL_NUMBER (xoperands[4]));
5465 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1", xoperands);
5467 else
5469 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5470 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1",
5471 xoperands);
5473 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5476 /* Now restore the value of %r1 in the delay slot. We're not
5477 optimizing so we know nothing else can be in the delay slot. */
5478 return "ldw -16(%%r30),%%r1";
5480 default:
5481 abort ();
5483 return buf;
5486 /* This routine handles all the branch-on-bit conditional branch sequences we
5487 might need to generate. It handles nullification of delay slots,
5488 varying length branches, negated branches and all combinations of the
5489 above. it returns the appropriate output template to emit the branch. */
5491 const char *
5492 output_bb (operands, nullify, length, negated, insn, which)
5493 rtx *operands ATTRIBUTE_UNUSED;
5494 int nullify, length, negated;
5495 rtx insn;
5496 int which;
5498 static char buf[100];
5499 int useskip = 0;
5501 /* A conditional branch to the following instruction (eg the delay slot) is
5502 asking for a disaster. I do not think this can happen as this pattern
5503 is only used when optimizing; jump optimization should eliminate the
5504 jump. But be prepared just in case. */
5506 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5507 return "";
5509 /* If this is a long branch with its delay slot unfilled, set `nullify'
5510 as it can nullify the delay slot and save a nop. */
5511 if (length == 8 && dbr_sequence_length () == 0)
5512 nullify = 1;
5514 /* If this is a short forward conditional branch which did not get
5515 its delay slot filled, the delay slot can still be nullified. */
5516 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5517 nullify = forward_branch_p (insn);
5519 /* A forward branch over a single nullified insn can be done with a
5520 extrs instruction. This avoids a single cycle penalty due to
5521 mis-predicted branch if we fall through (branch not taken). */
5523 if (length == 4
5524 && next_real_insn (insn) != 0
5525 && get_attr_length (next_real_insn (insn)) == 4
5526 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5527 && nullify)
5528 useskip = 1;
5530 switch (length)
5533 /* All short conditional branches except backwards with an unfilled
5534 delay slot. */
5535 case 4:
5536 if (useskip)
5537 strcpy (buf, "{extrs,|extrw,s,}");
5538 else
5539 strcpy (buf, "bb,");
5540 if (useskip && GET_MODE (operands[0]) == DImode)
5541 strcpy (buf, "extrd,s,*");
5542 else if (GET_MODE (operands[0]) == DImode)
5543 strcpy (buf, "bb,*");
5544 if ((which == 0 && negated)
5545 || (which == 1 && ! negated))
5546 strcat (buf, ">=");
5547 else
5548 strcat (buf, "<");
5549 if (useskip)
5550 strcat (buf, " %0,%1,1,%%r0");
5551 else if (nullify && negated)
5552 strcat (buf, ",n %0,%1,%3");
5553 else if (nullify && ! negated)
5554 strcat (buf, ",n %0,%1,%2");
5555 else if (! nullify && negated)
5556 strcat (buf, "%0,%1,%3");
5557 else if (! nullify && ! negated)
5558 strcat (buf, " %0,%1,%2");
5559 break;
5561 /* All long conditionals. Note an short backward branch with an
5562 unfilled delay slot is treated just like a long backward branch
5563 with an unfilled delay slot. */
5564 case 8:
5565 /* Handle weird backwards branch with a filled delay slot
5566 with is nullified. */
5567 if (dbr_sequence_length () != 0
5568 && ! forward_branch_p (insn)
5569 && nullify)
5571 strcpy (buf, "bb,");
5572 if (GET_MODE (operands[0]) == DImode)
5573 strcat (buf, "*");
5574 if ((which == 0 && negated)
5575 || (which == 1 && ! negated))
5576 strcat (buf, "<");
5577 else
5578 strcat (buf, ">=");
5579 if (negated)
5580 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5581 else
5582 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5584 /* Handle short backwards branch with an unfilled delay slot.
5585 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5586 taken and untaken branches. */
5587 else if (dbr_sequence_length () == 0
5588 && ! forward_branch_p (insn)
5589 && INSN_ADDRESSES_SET_P ()
5590 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5591 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5593 strcpy (buf, "bb,");
5594 if (GET_MODE (operands[0]) == DImode)
5595 strcat (buf, "*");
5596 if ((which == 0 && negated)
5597 || (which == 1 && ! negated))
5598 strcat (buf, ">=");
5599 else
5600 strcat (buf, "<");
5601 if (negated)
5602 strcat (buf, " %0,%1,%3%#");
5603 else
5604 strcat (buf, " %0,%1,%2%#");
5606 else
5608 strcpy (buf, "{extrs,|extrw,s,}");
5609 if (GET_MODE (operands[0]) == DImode)
5610 strcpy (buf, "extrd,s,*");
5611 if ((which == 0 && negated)
5612 || (which == 1 && ! negated))
5613 strcat (buf, "<");
5614 else
5615 strcat (buf, ">=");
5616 if (nullify && negated)
5617 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5618 else if (nullify && ! negated)
5619 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5620 else if (negated)
5621 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5622 else
5623 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5625 break;
5627 default:
5628 abort ();
5630 return buf;
5633 /* This routine handles all the branch-on-variable-bit conditional branch
5634 sequences we might need to generate. It handles nullification of delay
5635 slots, varying length branches, negated branches and all combinations
5636 of the above. it returns the appropriate output template to emit the
5637 branch. */
5639 const char *
5640 output_bvb (operands, nullify, length, negated, insn, which)
5641 rtx *operands ATTRIBUTE_UNUSED;
5642 int nullify, length, negated;
5643 rtx insn;
5644 int which;
5646 static char buf[100];
5647 int useskip = 0;
5649 /* A conditional branch to the following instruction (eg the delay slot) is
5650 asking for a disaster. I do not think this can happen as this pattern
5651 is only used when optimizing; jump optimization should eliminate the
5652 jump. But be prepared just in case. */
5654 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5655 return "";
5657 /* If this is a long branch with its delay slot unfilled, set `nullify'
5658 as it can nullify the delay slot and save a nop. */
5659 if (length == 8 && dbr_sequence_length () == 0)
5660 nullify = 1;
5662 /* If this is a short forward conditional branch which did not get
5663 its delay slot filled, the delay slot can still be nullified. */
5664 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5665 nullify = forward_branch_p (insn);
5667 /* A forward branch over a single nullified insn can be done with a
5668 extrs instruction. This avoids a single cycle penalty due to
5669 mis-predicted branch if we fall through (branch not taken). */
5671 if (length == 4
5672 && next_real_insn (insn) != 0
5673 && get_attr_length (next_real_insn (insn)) == 4
5674 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5675 && nullify)
5676 useskip = 1;
5678 switch (length)
5681 /* All short conditional branches except backwards with an unfilled
5682 delay slot. */
5683 case 4:
5684 if (useskip)
5685 strcpy (buf, "{vextrs,|extrw,s,}");
5686 else
5687 strcpy (buf, "{bvb,|bb,}");
5688 if (useskip && GET_MODE (operands[0]) == DImode)
5689 strcpy (buf, "extrd,s,*}");
5690 else if (GET_MODE (operands[0]) == DImode)
5691 strcpy (buf, "bb,*");
5692 if ((which == 0 && negated)
5693 || (which == 1 && ! negated))
5694 strcat (buf, ">=");
5695 else
5696 strcat (buf, "<");
5697 if (useskip)
5698 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5699 else if (nullify && negated)
5700 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5701 else if (nullify && ! negated)
5702 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5703 else if (! nullify && negated)
5704 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5705 else if (! nullify && ! negated)
5706 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5707 break;
5709 /* All long conditionals. Note an short backward branch with an
5710 unfilled delay slot is treated just like a long backward branch
5711 with an unfilled delay slot. */
5712 case 8:
5713 /* Handle weird backwards branch with a filled delay slot
5714 with is nullified. */
5715 if (dbr_sequence_length () != 0
5716 && ! forward_branch_p (insn)
5717 && nullify)
5719 strcpy (buf, "{bvb,|bb,}");
5720 if (GET_MODE (operands[0]) == DImode)
5721 strcat (buf, "*");
5722 if ((which == 0 && negated)
5723 || (which == 1 && ! negated))
5724 strcat (buf, "<");
5725 else
5726 strcat (buf, ">=");
5727 if (negated)
5728 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5729 else
5730 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5732 /* Handle short backwards branch with an unfilled delay slot.
5733 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5734 taken and untaken branches. */
5735 else if (dbr_sequence_length () == 0
5736 && ! forward_branch_p (insn)
5737 && INSN_ADDRESSES_SET_P ()
5738 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5739 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5741 strcpy (buf, "{bvb,|bb,}");
5742 if (GET_MODE (operands[0]) == DImode)
5743 strcat (buf, "*");
5744 if ((which == 0 && negated)
5745 || (which == 1 && ! negated))
5746 strcat (buf, ">=");
5747 else
5748 strcat (buf, "<");
5749 if (negated)
5750 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5751 else
5752 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5754 else
5756 strcpy (buf, "{vextrs,|extrw,s,}");
5757 if (GET_MODE (operands[0]) == DImode)
5758 strcpy (buf, "extrd,s,*");
5759 if ((which == 0 && negated)
5760 || (which == 1 && ! negated))
5761 strcat (buf, "<");
5762 else
5763 strcat (buf, ">=");
5764 if (nullify && negated)
5765 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5766 else if (nullify && ! negated)
5767 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5768 else if (negated)
5769 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5770 else
5771 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5773 break;
5775 default:
5776 abort ();
5778 return buf;
5781 /* Return the output template for emitting a dbra type insn.
5783 Note it may perform some output operations on its own before
5784 returning the final output string. */
5785 const char *
5786 output_dbra (operands, insn, which_alternative)
5787 rtx *operands;
5788 rtx insn;
5789 int which_alternative;
5792 /* A conditional branch to the following instruction (eg the delay slot) is
5793 asking for a disaster. Be prepared! */
5795 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5797 if (which_alternative == 0)
5798 return "ldo %1(%0),%0";
5799 else if (which_alternative == 1)
5801 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
5802 output_asm_insn ("ldw -16(%%r30),%4", operands);
5803 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5804 return "{fldws|fldw} -16(%%r30),%0";
5806 else
5808 output_asm_insn ("ldw %0,%4", operands);
5809 return "ldo %1(%4),%4\n\tstw %4,%0";
5813 if (which_alternative == 0)
5815 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5816 int length = get_attr_length (insn);
5818 /* If this is a long branch with its delay slot unfilled, set `nullify'
5819 as it can nullify the delay slot and save a nop. */
5820 if (length == 8 && dbr_sequence_length () == 0)
5821 nullify = 1;
5823 /* If this is a short forward conditional branch which did not get
5824 its delay slot filled, the delay slot can still be nullified. */
5825 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5826 nullify = forward_branch_p (insn);
5828 /* Handle short versions first. */
5829 if (length == 4 && nullify)
5830 return "addib,%C2,n %1,%0,%3";
5831 else if (length == 4 && ! nullify)
5832 return "addib,%C2 %1,%0,%3";
5833 else if (length == 8)
5835 /* Handle weird backwards branch with a fulled delay slot
5836 which is nullified. */
5837 if (dbr_sequence_length () != 0
5838 && ! forward_branch_p (insn)
5839 && nullify)
5840 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5841 /* Handle short backwards branch with an unfilled delay slot.
5842 Using a addb;nop rather than addi;bl saves 1 cycle for both
5843 taken and untaken branches. */
5844 else if (dbr_sequence_length () == 0
5845 && ! forward_branch_p (insn)
5846 && INSN_ADDRESSES_SET_P ()
5847 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5848 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5849 return "addib,%C2 %1,%0,%3%#";
5851 /* Handle normal cases. */
5852 if (nullify)
5853 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5854 else
5855 return "addi,%N2 %1,%0,%0\n\tb %3";
5857 else
5858 abort ();
5860 /* Deal with gross reload from FP register case. */
5861 else if (which_alternative == 1)
5863 /* Move loop counter from FP register to MEM then into a GR,
5864 increment the GR, store the GR into MEM, and finally reload
5865 the FP register from MEM from within the branch's delay slot. */
5866 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
5867 operands);
5868 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5869 if (get_attr_length (insn) == 24)
5870 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5871 else
5872 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5874 /* Deal with gross reload from memory case. */
5875 else
5877 /* Reload loop counter from memory, the store back to memory
5878 happens in the branch's delay slot. */
5879 output_asm_insn ("ldw %0,%4", operands);
5880 if (get_attr_length (insn) == 12)
5881 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5882 else
5883 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5887 /* Return the output template for emitting a dbra type insn.
5889 Note it may perform some output operations on its own before
5890 returning the final output string. */
5891 const char *
5892 output_movb (operands, insn, which_alternative, reverse_comparison)
5893 rtx *operands;
5894 rtx insn;
5895 int which_alternative;
5896 int reverse_comparison;
5899 /* A conditional branch to the following instruction (eg the delay slot) is
5900 asking for a disaster. Be prepared! */
5902 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5904 if (which_alternative == 0)
5905 return "copy %1,%0";
5906 else if (which_alternative == 1)
5908 output_asm_insn ("stw %1,-16(%%r30)", operands);
5909 return "{fldws|fldw} -16(%%r30),%0";
5911 else if (which_alternative == 2)
5912 return "stw %1,%0";
5913 else
5914 return "mtsar %r1";
5917 /* Support the second variant. */
5918 if (reverse_comparison)
5919 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5921 if (which_alternative == 0)
5923 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5924 int length = get_attr_length (insn);
5926 /* If this is a long branch with its delay slot unfilled, set `nullify'
5927 as it can nullify the delay slot and save a nop. */
5928 if (length == 8 && dbr_sequence_length () == 0)
5929 nullify = 1;
5931 /* If this is a short forward conditional branch which did not get
5932 its delay slot filled, the delay slot can still be nullified. */
5933 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5934 nullify = forward_branch_p (insn);
5936 /* Handle short versions first. */
5937 if (length == 4 && nullify)
5938 return "movb,%C2,n %1,%0,%3";
5939 else if (length == 4 && ! nullify)
5940 return "movb,%C2 %1,%0,%3";
5941 else if (length == 8)
5943 /* Handle weird backwards branch with a filled delay slot
5944 which is nullified. */
5945 if (dbr_sequence_length () != 0
5946 && ! forward_branch_p (insn)
5947 && nullify)
5948 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5950 /* Handle short backwards branch with an unfilled delay slot.
5951 Using a movb;nop rather than or;bl saves 1 cycle for both
5952 taken and untaken branches. */
5953 else if (dbr_sequence_length () == 0
5954 && ! forward_branch_p (insn)
5955 && INSN_ADDRESSES_SET_P ()
5956 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5957 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5958 return "movb,%C2 %1,%0,%3%#";
5959 /* Handle normal cases. */
5960 if (nullify)
5961 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5962 else
5963 return "or,%N2 %1,%%r0,%0\n\tb %3";
5965 else
5966 abort ();
5968 /* Deal with gross reload from FP register case. */
5969 else if (which_alternative == 1)
5971 /* Move loop counter from FP register to MEM then into a GR,
5972 increment the GR, store the GR into MEM, and finally reload
5973 the FP register from MEM from within the branch's delay slot. */
5974 output_asm_insn ("stw %1,-16(%%r30)", operands);
5975 if (get_attr_length (insn) == 12)
5976 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5977 else
5978 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5980 /* Deal with gross reload from memory case. */
5981 else if (which_alternative == 2)
5983 /* Reload loop counter from memory, the store back to memory
5984 happens in the branch's delay slot. */
5985 if (get_attr_length (insn) == 8)
5986 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5987 else
5988 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5990 /* Handle SAR as a destination. */
5991 else
5993 if (get_attr_length (insn) == 8)
5994 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5995 else
5996 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6001 /* INSN is a millicode call. It may have an unconditional jump in its delay
6002 slot.
6004 CALL_DEST is the routine we are calling. */
6006 const char *
6007 output_millicode_call (insn, call_dest)
6008 rtx insn;
6009 rtx call_dest;
6011 int attr_length = get_attr_length (insn);
6012 int seq_length = dbr_sequence_length ();
6013 int distance;
6014 rtx xoperands[4];
6015 rtx seq_insn;
6017 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6019 /* Handle common case -- empty delay slot or no jump in the delay slot,
6020 and we're sure that the branch will reach the beginning of the $CODE$
6021 subspace. The within reach form of the $$sh_func_adrs call has
6022 a length of 28 and attribute type of multi. This length is the
6023 same as the maximum length of an out of reach PIC call to $$div. */
6024 if ((seq_length == 0
6025 && (attr_length == 8
6026 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6027 || (seq_length != 0
6028 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6029 && attr_length == 4))
6031 xoperands[0] = call_dest;
6032 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
6033 return "";
6036 /* This call may not reach the beginning of the $CODE$ subspace. */
6037 if (attr_length > 8)
6039 int delay_insn_deleted = 0;
6041 /* We need to emit an inline long-call branch. */
6042 if (seq_length != 0
6043 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6045 /* A non-jump insn in the delay slot. By definition we can
6046 emit this insn before the call. */
6047 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6049 /* Now delete the delay insn. */
6050 PUT_CODE (NEXT_INSN (insn), NOTE);
6051 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6052 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6053 delay_insn_deleted = 1;
6056 /* PIC long millicode call sequence. */
6057 if (flag_pic)
6059 xoperands[0] = call_dest;
6060 if (TARGET_SOM || ! TARGET_GAS)
6061 xoperands[1] = gen_label_rtx ();
6063 /* Get our address + 8 into %r1. */
6064 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6066 if (TARGET_SOM || ! TARGET_GAS)
6068 /* Add %r1 to the offset of our target from the next insn. */
6069 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
6070 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6071 CODE_LABEL_NUMBER (xoperands[1]));
6072 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
6074 else
6076 output_asm_insn ("addil L%%%0-$PIC_pcrel$0+4,%%r1", xoperands);
6077 output_asm_insn ("ldo R%%%0-$PIC_pcrel$0+8(%%r1),%%r1",
6078 xoperands);
6081 /* Get the return address into %r31. */
6082 output_asm_insn ("blr 0,%3", xoperands);
6084 /* Branch to our target which is in %r1. */
6085 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
6087 /* Empty delay slot. Note this insn gets fetched twice and
6088 executed once. To be safe we use a nop. */
6089 output_asm_insn ("nop", xoperands);
6091 /* Pure portable runtime doesn't allow be/ble; we also don't have
6092 PIC support in the assembler/linker, so this sequence is needed. */
6093 else if (TARGET_PORTABLE_RUNTIME)
6095 xoperands[0] = call_dest;
6096 /* Get the address of our target into %r29. */
6097 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
6098 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
6100 /* Get our return address into %r31. */
6101 output_asm_insn ("blr %%r0,%3", xoperands);
6103 /* Jump to our target address in %r29. */
6104 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
6106 /* Empty delay slot. Note this insn gets fetched twice and
6107 executed once. To be safe we use a nop. */
6108 output_asm_insn ("nop", xoperands);
6110 /* If we're allowed to use be/ble instructions, then this is the
6111 best sequence to use for a long millicode call. */
6112 else
6114 xoperands[0] = call_dest;
6115 output_asm_insn ("ldil L%%%0,%3", xoperands);
6116 if (TARGET_PA_20)
6117 output_asm_insn ("be,l R%%%0(%%sr4,%3),%%sr0,%%r31", xoperands);
6118 else
6119 output_asm_insn ("ble R%%%0(%%sr4,%3)", xoperands);
6120 output_asm_insn ("nop", xoperands);
6123 /* If we had a jump in the call's delay slot, output it now. */
6124 if (seq_length != 0 && !delay_insn_deleted)
6126 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6127 output_asm_insn ("b,n %0", xoperands);
6129 /* Now delete the delay insn. */
6130 PUT_CODE (NEXT_INSN (insn), NOTE);
6131 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6132 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6134 return "";
6137 /* This call has an unconditional jump in its delay slot and the
6138 call is known to reach its target or the beginning of the current
6139 subspace. */
6141 /* Use the containing sequence insn's address. */
6142 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6144 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6145 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6147 /* If the branch was too far away, emit a normal call followed
6148 by a nop, followed by the unconditional branch.
6150 If the branch is close, then adjust %r2 from within the
6151 call's delay slot. */
6153 xoperands[0] = call_dest;
6154 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6155 if (! VAL_14_BITS_P (distance))
6156 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
6157 else
6159 xoperands[2] = gen_label_rtx ();
6160 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
6161 xoperands);
6162 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6163 CODE_LABEL_NUMBER (xoperands[2]));
6166 /* Delete the jump. */
6167 PUT_CODE (NEXT_INSN (insn), NOTE);
6168 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6169 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6170 return "";
6173 /* INSN is either a function call. It may have an unconditional jump
6174 in its delay slot.
6176 CALL_DEST is the routine we are calling. */
6178 const char *
6179 output_call (insn, call_dest, sibcall)
6180 rtx insn;
6181 rtx call_dest;
6182 int sibcall;
6184 int attr_length = get_attr_length (insn);
6185 int seq_length = dbr_sequence_length ();
6186 int distance;
6187 rtx xoperands[4];
6188 rtx seq_insn;
6190 /* Handle common case -- empty delay slot or no jump in the delay slot,
6191 and we're sure that the branch will reach the beginning of the $CODE$
6192 subspace. */
6193 if ((seq_length == 0 && attr_length == 12)
6194 || (seq_length != 0
6195 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6196 && attr_length == 8))
6198 xoperands[0] = call_dest;
6199 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6200 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
6201 return "";
6204 /* This call may not reach the beginning of the $CODE$ subspace. */
6205 if (attr_length > 12)
6207 int delay_insn_deleted = 0;
6208 rtx xoperands[2];
6209 rtx link;
6211 /* We need to emit an inline long-call branch. Furthermore,
6212 because we're changing a named function call into an indirect
6213 function call well after the parameters have been set up, we
6214 need to make sure any FP args appear in both the integer
6215 and FP registers. Also, we need move any delay slot insn
6216 out of the delay slot. And finally, we can't rely on the linker
6217 being able to fix the call to $$dyncall! -- Yuk!. */
6218 if (seq_length != 0
6219 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6221 /* A non-jump insn in the delay slot. By definition we can
6222 emit this insn before the call (and in fact before argument
6223 relocating. */
6224 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6226 /* Now delete the delay insn. */
6227 PUT_CODE (NEXT_INSN (insn), NOTE);
6228 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6229 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6230 delay_insn_deleted = 1;
6233 /* Now copy any FP arguments into integer registers. */
6234 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6236 int arg_mode, regno;
6237 rtx use = XEXP (link, 0);
6238 if (! (GET_CODE (use) == USE
6239 && GET_CODE (XEXP (use, 0)) == REG
6240 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6241 continue;
6243 arg_mode = GET_MODE (XEXP (use, 0));
6244 regno = REGNO (XEXP (use, 0));
6245 /* Is it a floating point register? */
6246 if (regno >= 32 && regno <= 39)
6248 /* Copy from the FP register into an integer register
6249 (via memory). */
6250 if (arg_mode == SFmode)
6252 xoperands[0] = XEXP (use, 0);
6253 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6254 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
6255 xoperands);
6256 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6258 else
6260 xoperands[0] = XEXP (use, 0);
6261 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6262 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
6263 xoperands);
6264 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6265 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6270 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
6271 we don't have any direct calls in that case. */
6273 size_t i;
6274 const char *name = XSTR (call_dest, 0);
6276 /* See if we have already put this function on the list
6277 of deferred plabels. This list is generally small,
6278 so a liner search is not too ugly. If it proves too
6279 slow replace it with something faster. */
6280 for (i = 0; i < n_deferred_plabels; i++)
6281 if (strcmp (name, deferred_plabels[i].name) == 0)
6282 break;
6284 /* If the deferred plabel list is empty, or this entry was
6285 not found on the list, create a new entry on the list. */
6286 if (deferred_plabels == NULL || i == n_deferred_plabels)
6288 const char *real_name;
6290 if (deferred_plabels == 0)
6291 deferred_plabels = (struct deferred_plabel *)
6292 ggc_alloc (sizeof (struct deferred_plabel));
6293 else
6294 deferred_plabels = (struct deferred_plabel *)
6295 ggc_realloc (deferred_plabels,
6296 ((n_deferred_plabels + 1)
6297 * sizeof (struct deferred_plabel)));
6299 i = n_deferred_plabels++;
6300 deferred_plabels[i].internal_label = gen_label_rtx ();
6301 deferred_plabels[i].name = ggc_strdup (name);
6303 /* Gross. We have just implicitly taken the address of this
6304 function, mark it as such. */
6305 real_name = (*targetm.strip_name_encoding) (name);
6306 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
6309 /* We have to load the address of the function using a procedure
6310 label (plabel). Inline plabels can lose for PIC and other
6311 cases, so avoid them by creating a 32bit plabel in the data
6312 segment. */
6313 if (flag_pic)
6315 xoperands[0] = deferred_plabels[i].internal_label;
6316 if (TARGET_SOM || ! TARGET_GAS)
6317 xoperands[1] = gen_label_rtx ();
6319 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
6320 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
6321 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
6323 /* Get our address + 8 into %r1. */
6324 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6326 if (TARGET_SOM || ! TARGET_GAS)
6328 /* Add %r1 to the offset of dyncall from the next insn. */
6329 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
6330 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6331 CODE_LABEL_NUMBER (xoperands[1]));
6332 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
6334 else
6336 output_asm_insn ("addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1",
6337 xoperands);
6338 output_asm_insn ("ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
6339 xoperands);
6342 /* Get the return address into %r31. */
6343 output_asm_insn ("blr %%r0,%%r31", xoperands);
6345 /* Branch to our target which is in %r1. */
6346 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6348 if (sibcall)
6350 /* This call never returns, so we do not need to fix the
6351 return pointer. */
6352 output_asm_insn ("nop", xoperands);
6354 else
6356 /* Copy the return address into %r2 also. */
6357 output_asm_insn ("copy %%r31,%%r2", xoperands);
6360 else
6362 xoperands[0] = deferred_plabels[i].internal_label;
6364 /* Get the address of our target into %r22. */
6365 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
6366 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
6368 /* Get the high part of the address of $dyncall into %r2, then
6369 add in the low part in the branch instruction. */
6370 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
6371 if (TARGET_PA_20)
6372 output_asm_insn ("be,l R%%$$dyncall(%%sr4,%%r2),%%sr0,%%r31",
6373 xoperands);
6374 else
6375 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
6377 if (sibcall)
6379 /* This call never returns, so we do not need to fix the
6380 return pointer. */
6381 output_asm_insn ("nop", xoperands);
6383 else
6385 /* Copy the return address into %r2 also. */
6386 output_asm_insn ("copy %%r31,%%r2", xoperands);
6391 /* If we had a jump in the call's delay slot, output it now. */
6392 if (seq_length != 0 && !delay_insn_deleted)
6394 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6395 output_asm_insn ("b,n %0", xoperands);
6397 /* Now delete the delay insn. */
6398 PUT_CODE (NEXT_INSN (insn), NOTE);
6399 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6400 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6402 return "";
6405 /* This call has an unconditional jump in its delay slot and the
6406 call is known to reach its target or the beginning of the current
6407 subspace. */
6409 /* Use the containing sequence insn's address. */
6410 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6412 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6413 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6415 /* If the branch is too far away, emit a normal call followed
6416 by a nop, followed by the unconditional branch. If the branch
6417 is close, then adjust %r2 in the call's delay slot. */
6419 xoperands[0] = call_dest;
6420 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6421 if (! VAL_14_BITS_P (distance))
6422 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
6423 else
6425 xoperands[3] = gen_label_rtx ();
6426 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
6427 xoperands);
6428 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6429 CODE_LABEL_NUMBER (xoperands[3]));
6432 /* Delete the jump. */
6433 PUT_CODE (NEXT_INSN (insn), NOTE);
6434 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6435 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6436 return "";
6439 /* In HPUX 8.0's shared library scheme, special relocations are needed
6440 for function labels if they might be passed to a function
6441 in a shared library (because shared libraries don't live in code
6442 space), and special magic is needed to construct their address. */
6444 void
6445 hppa_encode_label (sym)
6446 rtx sym;
6448 const char *str = XSTR (sym, 0);
6449 int len = strlen (str) + 1;
6450 char *newstr, *p;
6452 p = newstr = alloca (len + 1);
6453 if (str[0] == '*')
6455 str++;
6456 len--;
6458 *p++ = '@';
6459 strcpy (p, str);
6461 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6464 static void
6465 pa_encode_section_info (decl, first)
6466 tree decl;
6467 int first;
6469 if (first && TEXT_SPACE_P (decl))
6471 rtx rtl;
6472 if (TREE_CODE (decl) == FUNCTION_DECL
6473 || TREE_CODE (decl) == VAR_DECL)
6474 rtl = DECL_RTL (decl);
6475 else
6476 rtl = TREE_CST_RTL (decl);
6477 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
6478 if (TREE_CODE (decl) == FUNCTION_DECL)
6479 hppa_encode_label (XEXP (DECL_RTL (decl), 0));
6483 /* This is sort of inverse to pa_encode_section_info. */
6485 static const char *
6486 pa_strip_name_encoding (str)
6487 const char *str;
6489 return str + (*str == '*' || *str == '@');
6493 function_label_operand (op, mode)
6494 rtx op;
6495 enum machine_mode mode ATTRIBUTE_UNUSED;
6497 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6500 /* Returns 1 if OP is a function label involved in a simple addition
6501 with a constant. Used to keep certain patterns from matching
6502 during instruction combination. */
6504 is_function_label_plus_const (op)
6505 rtx op;
6507 /* Strip off any CONST. */
6508 if (GET_CODE (op) == CONST)
6509 op = XEXP (op, 0);
6511 return (GET_CODE (op) == PLUS
6512 && function_label_operand (XEXP (op, 0), Pmode)
6513 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6516 /* Output assembly code for a thunk to FUNCTION. */
6518 void
6519 pa_asm_output_mi_thunk (file, thunk_fndecl, delta, function)
6520 FILE *file;
6521 tree thunk_fndecl;
6522 HOST_WIDE_INT delta;
6523 tree function;
6525 const char *target_name = XSTR (XEXP (DECL_RTL (function), 0), 0);
6526 static unsigned int current_thunk_number;
6527 char label[16];
6528 const char *lab;
6529 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
6530 lab = (*targetm.strip_name_encoding) (label);
6531 target_name = (*targetm.strip_name_encoding) (target_name);
6532 /* FIXME: total_code_bytes is not handled correctly in files with
6533 mi thunks. */
6534 pa_output_function_prologue (file, 0);
6535 if (VAL_14_BITS_P (delta))
6537 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6539 fprintf (file, "\taddil LT%%%s,%%r19\n", lab);
6540 fprintf (file, "\tldw RT%%%s(%%r1),%%r22\n", lab);
6541 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6542 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
6543 fprintf (file, "\tdepi 0,31,2,%%r22\n");
6544 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
6545 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6546 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
6547 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
6548 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6549 fprintf (file, "(%%r26),%%r26\n");
6551 else
6553 fprintf (file, "\tb %s\n\tldo ", target_name);
6554 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6555 fprintf (file, "(%%r26),%%r26\n");
6558 else
6560 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6562 fprintf (file, "\taddil L%%");
6563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6564 fprintf (file, ",%%r26\n\tldo R%%");
6565 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6566 fprintf (file, "(%%r1),%%r26\n");
6567 fprintf (file, "\taddil LT%%%s,%%r19\n", lab);
6568 fprintf (file, "\tldw RT%%%s(%%r1),%%r22\n", lab);
6569 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6570 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
6571 fprintf (file, "\tdepi 0,31,2,%%r22\n");
6572 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
6573 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
6574 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
6575 fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
6577 else
6579 fprintf (file, "\taddil L%%");
6580 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6581 fprintf (file, ",%%r26\n\tb %s\n\tldo R%%", target_name);
6582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
6583 fprintf (file, "(%%r1),%%r26\n");
6587 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
6588 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
6590 data_section ();
6591 fprintf (file, "\t.align 4\n");
6592 ASM_OUTPUT_INTERNAL_LABEL (file, "LTHN", current_thunk_number);
6593 fprintf (file, "\t.word P%%%s\n", target_name);
6594 function_section (thunk_fndecl);
6596 current_thunk_number++;
6599 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6600 use in fmpyadd instructions. */
6602 fmpyaddoperands (operands)
6603 rtx *operands;
6605 enum machine_mode mode = GET_MODE (operands[0]);
6607 /* Must be a floating point mode. */
6608 if (mode != SFmode && mode != DFmode)
6609 return 0;
6611 /* All modes must be the same. */
6612 if (! (mode == GET_MODE (operands[1])
6613 && mode == GET_MODE (operands[2])
6614 && mode == GET_MODE (operands[3])
6615 && mode == GET_MODE (operands[4])
6616 && mode == GET_MODE (operands[5])))
6617 return 0;
6619 /* All operands must be registers. */
6620 if (! (GET_CODE (operands[1]) == REG
6621 && GET_CODE (operands[2]) == REG
6622 && GET_CODE (operands[3]) == REG
6623 && GET_CODE (operands[4]) == REG
6624 && GET_CODE (operands[5]) == REG))
6625 return 0;
6627 /* Only 2 real operands to the addition. One of the input operands must
6628 be the same as the output operand. */
6629 if (! rtx_equal_p (operands[3], operands[4])
6630 && ! rtx_equal_p (operands[3], operands[5]))
6631 return 0;
6633 /* Inout operand of add can not conflict with any operands from multiply. */
6634 if (rtx_equal_p (operands[3], operands[0])
6635 || rtx_equal_p (operands[3], operands[1])
6636 || rtx_equal_p (operands[3], operands[2]))
6637 return 0;
6639 /* multiply can not feed into addition operands. */
6640 if (rtx_equal_p (operands[4], operands[0])
6641 || rtx_equal_p (operands[5], operands[0]))
6642 return 0;
6644 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6645 if (mode == SFmode
6646 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6647 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6648 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6649 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6650 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6651 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6652 return 0;
6654 /* Passed. Operands are suitable for fmpyadd. */
6655 return 1;
6658 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6659 use in fmpysub instructions. */
6661 fmpysuboperands (operands)
6662 rtx *operands;
6664 enum machine_mode mode = GET_MODE (operands[0]);
6666 /* Must be a floating point mode. */
6667 if (mode != SFmode && mode != DFmode)
6668 return 0;
6670 /* All modes must be the same. */
6671 if (! (mode == GET_MODE (operands[1])
6672 && mode == GET_MODE (operands[2])
6673 && mode == GET_MODE (operands[3])
6674 && mode == GET_MODE (operands[4])
6675 && mode == GET_MODE (operands[5])))
6676 return 0;
6678 /* All operands must be registers. */
6679 if (! (GET_CODE (operands[1]) == REG
6680 && GET_CODE (operands[2]) == REG
6681 && GET_CODE (operands[3]) == REG
6682 && GET_CODE (operands[4]) == REG
6683 && GET_CODE (operands[5]) == REG))
6684 return 0;
6686 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6687 operation, so operands[4] must be the same as operand[3]. */
6688 if (! rtx_equal_p (operands[3], operands[4]))
6689 return 0;
6691 /* multiply can not feed into subtraction. */
6692 if (rtx_equal_p (operands[5], operands[0]))
6693 return 0;
6695 /* Inout operand of sub can not conflict with any operands from multiply. */
6696 if (rtx_equal_p (operands[3], operands[0])
6697 || rtx_equal_p (operands[3], operands[1])
6698 || rtx_equal_p (operands[3], operands[2]))
6699 return 0;
6701 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6702 if (mode == SFmode
6703 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6704 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6705 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6706 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6707 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6708 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6709 return 0;
6711 /* Passed. Operands are suitable for fmpysub. */
6712 return 1;
6716 plus_xor_ior_operator (op, mode)
6717 rtx op;
6718 enum machine_mode mode ATTRIBUTE_UNUSED;
6720 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6721 || GET_CODE (op) == IOR);
6724 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6725 constants for shadd instructions. */
6726 static int
6727 shadd_constant_p (val)
6728 int val;
6730 if (val == 2 || val == 4 || val == 8)
6731 return 1;
6732 else
6733 return 0;
6736 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6737 the valid constant for shadd instructions. */
6739 shadd_operand (op, mode)
6740 rtx op;
6741 enum machine_mode mode ATTRIBUTE_UNUSED;
6743 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6746 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6749 basereg_operand (op, mode)
6750 rtx op;
6751 enum machine_mode mode;
6753 /* cse will create some unscaled indexed addresses, however; it
6754 generally isn't a win on the PA, so avoid creating unscaled
6755 indexed addresses until after cse is finished. */
6756 if (!cse_not_expected)
6757 return 0;
6759 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6760 we don't have to worry about the braindamaged implicit space
6761 register selection from the basereg. */
6762 if (TARGET_NO_SPACE_REGS)
6763 return (GET_CODE (op) == REG);
6765 /* While it's always safe to index off the frame pointer, it's not
6766 always profitable, particularly when the frame pointer is being
6767 eliminated. */
6768 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6769 return 1;
6771 return (GET_CODE (op) == REG
6772 && REG_POINTER (op)
6773 && register_operand (op, mode));
6776 /* Return 1 if this operand is anything other than a hard register. */
6779 non_hard_reg_operand (op, mode)
6780 rtx op;
6781 enum machine_mode mode ATTRIBUTE_UNUSED;
6783 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6786 /* Return 1 if INSN branches forward. Should be using insn_addresses
6787 to avoid walking through all the insns... */
6788 static int
6789 forward_branch_p (insn)
6790 rtx insn;
6792 rtx label = JUMP_LABEL (insn);
6794 while (insn)
6796 if (insn == label)
6797 break;
6798 else
6799 insn = NEXT_INSN (insn);
6802 return (insn == label);
6805 /* Return 1 if OP is an equality comparison, else return 0. */
6807 eq_neq_comparison_operator (op, mode)
6808 rtx op;
6809 enum machine_mode mode ATTRIBUTE_UNUSED;
6811 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6814 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6816 movb_comparison_operator (op, mode)
6817 rtx op;
6818 enum machine_mode mode ATTRIBUTE_UNUSED;
6820 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6821 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6824 /* Return 1 if INSN is in the delay slot of a call instruction. */
6826 jump_in_call_delay (insn)
6827 rtx insn;
6830 if (GET_CODE (insn) != JUMP_INSN)
6831 return 0;
6833 if (PREV_INSN (insn)
6834 && PREV_INSN (PREV_INSN (insn))
6835 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6837 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6839 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6840 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6843 else
6844 return 0;
6847 /* Output an unconditional move and branch insn. */
6849 const char *
6850 output_parallel_movb (operands, length)
6851 rtx *operands;
6852 int length;
6854 /* These are the cases in which we win. */
6855 if (length == 4)
6856 return "mov%I1b,tr %1,%0,%2";
6858 /* None of these cases wins, but they don't lose either. */
6859 if (dbr_sequence_length () == 0)
6861 /* Nothing in the delay slot, fake it by putting the combined
6862 insn (the copy or add) in the delay slot of a bl. */
6863 if (GET_CODE (operands[1]) == CONST_INT)
6864 return "b %2\n\tldi %1,%0";
6865 else
6866 return "b %2\n\tcopy %1,%0";
6868 else
6870 /* Something in the delay slot, but we've got a long branch. */
6871 if (GET_CODE (operands[1]) == CONST_INT)
6872 return "ldi %1,%0\n\tb %2";
6873 else
6874 return "copy %1,%0\n\tb %2";
6878 /* Output an unconditional add and branch insn. */
6880 const char *
6881 output_parallel_addb (operands, length)
6882 rtx *operands;
6883 int length;
6885 /* To make life easy we want operand0 to be the shared input/output
6886 operand and operand1 to be the readonly operand. */
6887 if (operands[0] == operands[1])
6888 operands[1] = operands[2];
6890 /* These are the cases in which we win. */
6891 if (length == 4)
6892 return "add%I1b,tr %1,%0,%3";
6894 /* None of these cases win, but they don't lose either. */
6895 if (dbr_sequence_length () == 0)
6897 /* Nothing in the delay slot, fake it by putting the combined
6898 insn (the copy or add) in the delay slot of a bl. */
6899 return "b %3\n\tadd%I1 %1,%0,%0";
6901 else
6903 /* Something in the delay slot, but we've got a long branch. */
6904 return "add%I1 %1,%0,%0\n\tb %3";
6908 /* Return nonzero if INSN (a jump insn) immediately follows a call
6909 to a named function. This is used to avoid filling the delay slot
6910 of the jump since it can usually be eliminated by modifying RP in
6911 the delay slot of the call. */
6914 following_call (insn)
6915 rtx insn;
6917 if (! TARGET_JUMP_IN_DELAY)
6918 return 0;
6920 /* Find the previous real insn, skipping NOTEs. */
6921 insn = PREV_INSN (insn);
6922 while (insn && GET_CODE (insn) == NOTE)
6923 insn = PREV_INSN (insn);
6925 /* Check for CALL_INSNs and millicode calls. */
6926 if (insn
6927 && ((GET_CODE (insn) == CALL_INSN
6928 && get_attr_type (insn) != TYPE_DYNCALL)
6929 || (GET_CODE (insn) == INSN
6930 && GET_CODE (PATTERN (insn)) != SEQUENCE
6931 && GET_CODE (PATTERN (insn)) != USE
6932 && GET_CODE (PATTERN (insn)) != CLOBBER
6933 && get_attr_type (insn) == TYPE_MILLI)))
6934 return 1;
6936 return 0;
6939 /* We use this hook to perform a PA specific optimization which is difficult
6940 to do in earlier passes.
6942 We want the delay slots of branches within jump tables to be filled.
6943 None of the compiler passes at the moment even has the notion that a
6944 PA jump table doesn't contain addresses, but instead contains actual
6945 instructions!
6947 Because we actually jump into the table, the addresses of each entry
6948 must stay constant in relation to the beginning of the table (which
6949 itself must stay constant relative to the instruction to jump into
6950 it). I don't believe we can guarantee earlier passes of the compiler
6951 will adhere to those rules.
6953 So, late in the compilation process we find all the jump tables, and
6954 expand them into real code -- eg each entry in the jump table vector
6955 will get an appropriate label followed by a jump to the final target.
6957 Reorg and the final jump pass can then optimize these branches and
6958 fill their delay slots. We end up with smaller, more efficient code.
6960 The jump instructions within the table are special; we must be able
6961 to identify them during assembly output (if the jumps don't get filled
6962 we need to emit a nop rather than nullifying the delay slot)). We
6963 identify jumps in switch tables by marking the SET with DImode.
6965 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6966 insns. This serves two purposes, first it prevents jump.c from
6967 noticing that the last N entries in the table jump to the instruction
6968 immediately after the table and deleting the jumps. Second, those
6969 insns mark where we should emit .begin_brtab and .end_brtab directives
6970 when using GAS (allows for better link time optimizations). */
6972 void
6973 pa_reorg (insns)
6974 rtx insns;
6976 rtx insn;
6978 remove_useless_addtr_insns (insns, 1);
6980 if (pa_cpu < PROCESSOR_8000)
6981 pa_combine_instructions (get_insns ());
6984 /* This is fairly cheap, so always run it if optimizing. */
6985 if (optimize > 0 && !TARGET_BIG_SWITCH)
6987 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6988 insns = get_insns ();
6989 for (insn = insns; insn; insn = NEXT_INSN (insn))
6991 rtx pattern, tmp, location;
6992 unsigned int length, i;
6994 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6995 if (GET_CODE (insn) != JUMP_INSN
6996 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6997 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6998 continue;
7000 /* Emit marker for the beginning of the branch table. */
7001 emit_insn_before (gen_begin_brtab (), insn);
7003 pattern = PATTERN (insn);
7004 location = PREV_INSN (insn);
7005 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
7007 for (i = 0; i < length; i++)
7009 /* Emit a label before each jump to keep jump.c from
7010 removing this code. */
7011 tmp = gen_label_rtx ();
7012 LABEL_NUSES (tmp) = 1;
7013 emit_label_after (tmp, location);
7014 location = NEXT_INSN (location);
7016 if (GET_CODE (pattern) == ADDR_VEC)
7018 /* Emit the jump itself. */
7019 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
7020 tmp = emit_jump_insn_after (tmp, location);
7021 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
7022 /* It is easy to rely on the branch table markers
7023 during assembly output to trigger the correct code
7024 for a switch table jump with an unfilled delay slot,
7026 However, that requires state and assumes that we look
7027 at insns in order.
7029 We can't make such assumptions when computing the length
7030 of instructions. Ugh. We could walk the insn chain to
7031 determine if this instruction is in a branch table, but
7032 that can get rather expensive, particularly during the
7033 branch shortening phase of the compiler.
7035 So instead we mark this jump as being special. This is
7036 far from ideal and knows that no code after this will
7037 muck around with the mode of the JUMP_INSN itself. */
7038 PUT_MODE (tmp, SImode);
7039 LABEL_NUSES (JUMP_LABEL (tmp))++;
7040 location = NEXT_INSN (location);
7042 else
7044 /* Emit the jump itself. */
7045 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
7046 tmp = emit_jump_insn_after (tmp, location);
7047 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
7048 /* It is easy to rely on the branch table markers
7049 during assembly output to trigger the correct code
7050 for a switch table jump with an unfilled delay slot,
7052 However, that requires state and assumes that we look
7053 at insns in order.
7055 We can't make such assumptions when computing the length
7056 of instructions. Ugh. We could walk the insn chain to
7057 determine if this instruction is in a branch table, but
7058 that can get rather expensive, particularly during the
7059 branch shortening phase of the compiler.
7061 So instead we mark this jump as being special. This is
7062 far from ideal and knows that no code after this will
7063 muck around with the mode of the JUMP_INSN itself. */
7064 PUT_MODE (tmp, SImode);
7065 LABEL_NUSES (JUMP_LABEL (tmp))++;
7066 location = NEXT_INSN (location);
7069 /* Emit a BARRIER after the jump. */
7070 emit_barrier_after (location);
7071 location = NEXT_INSN (location);
7074 /* Emit marker for the end of the branch table. */
7075 emit_insn_before (gen_end_brtab (), location);
7076 location = NEXT_INSN (location);
7077 emit_barrier_after (location);
7079 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
7080 delete_insn (insn);
7083 else
7085 /* Sill need an end_brtab insn. */
7086 insns = get_insns ();
7087 for (insn = insns; insn; insn = NEXT_INSN (insn))
7089 /* Find an ADDR_VEC insn. */
7090 if (GET_CODE (insn) != JUMP_INSN
7091 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7092 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7093 continue;
7095 /* Now generate markers for the beginning and end of the
7096 branch table. */
7097 emit_insn_before (gen_begin_brtab (), insn);
7098 emit_insn_after (gen_end_brtab (), insn);
7103 /* The PA has a number of odd instructions which can perform multiple
7104 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
7105 it may be profitable to combine two instructions into one instruction
7106 with two outputs. It's not profitable PA2.0 machines because the
7107 two outputs would take two slots in the reorder buffers.
7109 This routine finds instructions which can be combined and combines
7110 them. We only support some of the potential combinations, and we
7111 only try common ways to find suitable instructions.
7113 * addb can add two registers or a register and a small integer
7114 and jump to a nearby (+-8k) location. Normally the jump to the
7115 nearby location is conditional on the result of the add, but by
7116 using the "true" condition we can make the jump unconditional.
7117 Thus addb can perform two independent operations in one insn.
7119 * movb is similar to addb in that it can perform a reg->reg
7120 or small immediate->reg copy and jump to a nearby (+-8k location).
7122 * fmpyadd and fmpysub can perform a FP multiply and either an
7123 FP add or FP sub if the operands of the multiply and add/sub are
7124 independent (there are other minor restrictions). Note both
7125 the fmpy and fadd/fsub can in theory move to better spots according
7126 to data dependencies, but for now we require the fmpy stay at a
7127 fixed location.
7129 * Many of the memory operations can perform pre & post updates
7130 of index registers. GCC's pre/post increment/decrement addressing
7131 is far too simple to take advantage of all the possibilities. This
7132 pass may not be suitable since those insns may not be independent.
7134 * comclr can compare two ints or an int and a register, nullify
7135 the following instruction and zero some other register. This
7136 is more difficult to use as it's harder to find an insn which
7137 will generate a comclr than finding something like an unconditional
7138 branch. (conditional moves & long branches create comclr insns).
7140 * Most arithmetic operations can conditionally skip the next
7141 instruction. They can be viewed as "perform this operation
7142 and conditionally jump to this nearby location" (where nearby
7143 is an insns away). These are difficult to use due to the
7144 branch length restrictions. */
7146 static void
7147 pa_combine_instructions (insns)
7148 rtx insns ATTRIBUTE_UNUSED;
7150 rtx anchor, new;
7152 /* This can get expensive since the basic algorithm is on the
7153 order of O(n^2) (or worse). Only do it for -O2 or higher
7154 levels of optimization. */
7155 if (optimize < 2)
7156 return;
7158 /* Walk down the list of insns looking for "anchor" insns which
7159 may be combined with "floating" insns. As the name implies,
7160 "anchor" instructions don't move, while "floating" insns may
7161 move around. */
7162 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7163 new = make_insn_raw (new);
7165 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7167 enum attr_pa_combine_type anchor_attr;
7168 enum attr_pa_combine_type floater_attr;
7170 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7171 Also ignore any special USE insns. */
7172 if ((GET_CODE (anchor) != INSN
7173 && GET_CODE (anchor) != JUMP_INSN
7174 && GET_CODE (anchor) != CALL_INSN)
7175 || GET_CODE (PATTERN (anchor)) == USE
7176 || GET_CODE (PATTERN (anchor)) == CLOBBER
7177 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7178 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7179 continue;
7181 anchor_attr = get_attr_pa_combine_type (anchor);
7182 /* See if anchor is an insn suitable for combination. */
7183 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7184 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7185 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7186 && ! forward_branch_p (anchor)))
7188 rtx floater;
7190 for (floater = PREV_INSN (anchor);
7191 floater;
7192 floater = PREV_INSN (floater))
7194 if (GET_CODE (floater) == NOTE
7195 || (GET_CODE (floater) == INSN
7196 && (GET_CODE (PATTERN (floater)) == USE
7197 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7198 continue;
7200 /* Anything except a regular INSN will stop our search. */
7201 if (GET_CODE (floater) != INSN
7202 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7203 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7205 floater = NULL_RTX;
7206 break;
7209 /* See if FLOATER is suitable for combination with the
7210 anchor. */
7211 floater_attr = get_attr_pa_combine_type (floater);
7212 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7213 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7214 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7215 && floater_attr == PA_COMBINE_TYPE_FMPY))
7217 /* If ANCHOR and FLOATER can be combined, then we're
7218 done with this pass. */
7219 if (pa_can_combine_p (new, anchor, floater, 0,
7220 SET_DEST (PATTERN (floater)),
7221 XEXP (SET_SRC (PATTERN (floater)), 0),
7222 XEXP (SET_SRC (PATTERN (floater)), 1)))
7223 break;
7226 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7227 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7229 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7231 if (pa_can_combine_p (new, anchor, floater, 0,
7232 SET_DEST (PATTERN (floater)),
7233 XEXP (SET_SRC (PATTERN (floater)), 0),
7234 XEXP (SET_SRC (PATTERN (floater)), 1)))
7235 break;
7237 else
7239 if (pa_can_combine_p (new, anchor, floater, 0,
7240 SET_DEST (PATTERN (floater)),
7241 SET_SRC (PATTERN (floater)),
7242 SET_SRC (PATTERN (floater))))
7243 break;
7248 /* If we didn't find anything on the backwards scan try forwards. */
7249 if (!floater
7250 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7251 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7253 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7255 if (GET_CODE (floater) == NOTE
7256 || (GET_CODE (floater) == INSN
7257 && (GET_CODE (PATTERN (floater)) == USE
7258 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7260 continue;
7262 /* Anything except a regular INSN will stop our search. */
7263 if (GET_CODE (floater) != INSN
7264 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7265 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7267 floater = NULL_RTX;
7268 break;
7271 /* See if FLOATER is suitable for combination with the
7272 anchor. */
7273 floater_attr = get_attr_pa_combine_type (floater);
7274 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7275 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7276 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7277 && floater_attr == PA_COMBINE_TYPE_FMPY))
7279 /* If ANCHOR and FLOATER can be combined, then we're
7280 done with this pass. */
7281 if (pa_can_combine_p (new, anchor, floater, 1,
7282 SET_DEST (PATTERN (floater)),
7283 XEXP (SET_SRC (PATTERN (floater)),
7285 XEXP (SET_SRC (PATTERN (floater)),
7286 1)))
7287 break;
7292 /* FLOATER will be nonzero if we found a suitable floating
7293 insn for combination with ANCHOR. */
7294 if (floater
7295 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7296 || anchor_attr == PA_COMBINE_TYPE_FMPY))
7298 /* Emit the new instruction and delete the old anchor. */
7299 emit_insn_before (gen_rtx_PARALLEL
7300 (VOIDmode,
7301 gen_rtvec (2, PATTERN (anchor),
7302 PATTERN (floater))),
7303 anchor);
7305 PUT_CODE (anchor, NOTE);
7306 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7307 NOTE_SOURCE_FILE (anchor) = 0;
7309 /* Emit a special USE insn for FLOATER, then delete
7310 the floating insn. */
7311 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7312 delete_insn (floater);
7314 continue;
7316 else if (floater
7317 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
7319 rtx temp;
7320 /* Emit the new_jump instruction and delete the old anchor. */
7321 temp
7322 = emit_jump_insn_before (gen_rtx_PARALLEL
7323 (VOIDmode,
7324 gen_rtvec (2, PATTERN (anchor),
7325 PATTERN (floater))),
7326 anchor);
7328 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
7329 PUT_CODE (anchor, NOTE);
7330 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7331 NOTE_SOURCE_FILE (anchor) = 0;
7333 /* Emit a special USE insn for FLOATER, then delete
7334 the floating insn. */
7335 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7336 delete_insn (floater);
7337 continue;
7343 static int
7344 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
7345 rtx new, anchor, floater;
7346 int reversed;
7347 rtx dest, src1, src2;
7349 int insn_code_number;
7350 rtx start, end;
7352 /* Create a PARALLEL with the patterns of ANCHOR and
7353 FLOATER, try to recognize it, then test constraints
7354 for the resulting pattern.
7356 If the pattern doesn't match or the constraints
7357 aren't met keep searching for a suitable floater
7358 insn. */
7359 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7360 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7361 INSN_CODE (new) = -1;
7362 insn_code_number = recog_memoized (new);
7363 if (insn_code_number < 0
7364 || (extract_insn (new), ! constrain_operands (1)))
7365 return 0;
7367 if (reversed)
7369 start = anchor;
7370 end = floater;
7372 else
7374 start = floater;
7375 end = anchor;
7378 /* There's up to three operands to consider. One
7379 output and two inputs.
7381 The output must not be used between FLOATER & ANCHOR
7382 exclusive. The inputs must not be set between
7383 FLOATER and ANCHOR exclusive. */
7385 if (reg_used_between_p (dest, start, end))
7386 return 0;
7388 if (reg_set_between_p (src1, start, end))
7389 return 0;
7391 if (reg_set_between_p (src2, start, end))
7392 return 0;
7394 /* If we get here, then everything is good. */
7395 return 1;
7398 /* Return nonzero if references for INSN are delayed.
7400 Millicode insns are actually function calls with some special
7401 constraints on arguments and register usage.
7403 Millicode calls always expect their arguments in the integer argument
7404 registers, and always return their result in %r29 (ret1). They
7405 are expected to clobber their arguments, %r1, %r29, and the return
7406 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
7408 This function tells reorg that the references to arguments and
7409 millicode calls do not appear to happen until after the millicode call.
7410 This allows reorg to put insns which set the argument registers into the
7411 delay slot of the millicode call -- thus they act more like traditional
7412 CALL_INSNs.
7414 Note we can not consider side effects of the insn to be delayed because
7415 the branch and link insn will clobber the return pointer. If we happened
7416 to use the return pointer in the delay slot of the call, then we lose.
7418 get_attr_type will try to recognize the given insn, so make sure to
7419 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
7420 in particular. */
7422 insn_refs_are_delayed (insn)
7423 rtx insn;
7425 return ((GET_CODE (insn) == INSN
7426 && GET_CODE (PATTERN (insn)) != SEQUENCE
7427 && GET_CODE (PATTERN (insn)) != USE
7428 && GET_CODE (PATTERN (insn)) != CLOBBER
7429 && get_attr_type (insn) == TYPE_MILLI));
7432 /* Return the location of a parameter that is passed in a register or NULL
7433 if the parameter has any component that is passed in memory.
7435 This is new code and will be pushed to into the net sources after
7436 further testing.
7438 ??? We might want to restructure this so that it looks more like other
7439 ports. */
7441 function_arg (cum, mode, type, named, incoming)
7442 CUMULATIVE_ARGS *cum;
7443 enum machine_mode mode;
7444 tree type;
7445 int named ATTRIBUTE_UNUSED;
7446 int incoming;
7448 int max_arg_words = (TARGET_64BIT ? 8 : 4);
7449 int arg_size = FUNCTION_ARG_SIZE (mode, type);
7450 int alignment = 0;
7451 int fpr_reg_base;
7452 int gpr_reg_base;
7453 rtx retval;
7455 if (! TARGET_64BIT)
7457 /* If this arg would be passed partially or totally on the stack, then
7458 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
7459 handle arguments which are split between regs and stack slots if
7460 the ABI mandates split arguments. */
7461 if (cum->words + arg_size > max_arg_words
7462 || mode == VOIDmode)
7463 return NULL_RTX;
7465 else
7467 if (arg_size > 1)
7468 alignment = cum->words & 1;
7469 if (cum->words + alignment >= max_arg_words
7470 || mode == VOIDmode)
7471 return NULL_RTX;
7474 /* The 32bit ABIs and the 64bit ABIs are rather different,
7475 particularly in their handling of FP registers. We might
7476 be able to cleverly share code between them, but I'm not
7477 going to bother in the hope that splitting them up results
7478 in code that is more easily understood. */
7480 if (TARGET_64BIT)
7482 /* Advance the base registers to their current locations.
7484 Remember, gprs grow towards smaller register numbers while
7485 fprs grow to higher register numbers. Also remember that
7486 although FP regs are 32-bit addressable, we pretend that
7487 the registers are 64-bits wide. */
7488 gpr_reg_base = 26 - cum->words;
7489 fpr_reg_base = 32 + cum->words;
7491 /* Arguments wider than one word need special treatment. */
7492 if (arg_size > 1)
7494 /* Double-extended precision (80-bit), quad-precision (128-bit)
7495 and aggregates including complex numbers are aligned on
7496 128-bit boundaries. The first eight 64-bit argument slots
7497 are associated one-to-one, with general registers r26
7498 through r19, and also with floating-point registers fr4
7499 through fr11. Arguments larger than one word are always
7500 passed in general registers. */
7502 rtx loc[8];
7503 int i, offset = 0, ub = arg_size;
7505 /* Align the base register. */
7506 gpr_reg_base -= alignment;
7508 ub = MIN (ub, max_arg_words - cum->words - alignment);
7509 for (i = 0; i < ub; i++)
7511 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7512 gen_rtx_REG (DImode, gpr_reg_base),
7513 GEN_INT (offset));
7514 gpr_reg_base -= 1;
7515 offset += 8;
7518 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
7521 else
7523 /* If the argument is larger than a word, then we know precisely
7524 which registers we must use. */
7525 if (arg_size > 1)
7527 if (cum->words)
7529 gpr_reg_base = 23;
7530 fpr_reg_base = 38;
7532 else
7534 gpr_reg_base = 25;
7535 fpr_reg_base = 34;
7538 else
7540 /* We have a single word (32 bits). A simple computation
7541 will get us the register #s we need. */
7542 gpr_reg_base = 26 - cum->words;
7543 fpr_reg_base = 32 + 2 * cum->words;
7547 /* Determine if the argument needs to be passed in both general and
7548 floating point registers. */
7549 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7550 /* If we are doing soft-float with portable runtime, then there
7551 is no need to worry about FP regs. */
7552 && ! TARGET_SOFT_FLOAT
7553 /* The parameter must be some kind of float, else we can just
7554 pass it in integer registers. */
7555 && FLOAT_MODE_P (mode)
7556 /* The target function must not have a prototype. */
7557 && cum->nargs_prototype <= 0
7558 /* libcalls do not need to pass items in both FP and general
7559 registers. */
7560 && type != NULL_TREE
7561 /* All this hair applies to outgoing args only. */
7562 && ! incoming)
7563 /* Also pass outgoing floating arguments in both registers in indirect
7564 calls with the 32 bit ABI and the HP assembler since there is no
7565 way to the specify argument locations in static functions. */
7566 || (! TARGET_64BIT
7567 && ! TARGET_GAS
7568 && ! incoming
7569 && cum->indirect
7570 && FLOAT_MODE_P (mode)))
7572 retval
7573 = gen_rtx_PARALLEL
7574 (mode,
7575 gen_rtvec (2,
7576 gen_rtx_EXPR_LIST (VOIDmode,
7577 gen_rtx_REG (mode, fpr_reg_base),
7578 const0_rtx),
7579 gen_rtx_EXPR_LIST (VOIDmode,
7580 gen_rtx_REG (mode, gpr_reg_base),
7581 const0_rtx)));
7583 else
7585 /* See if we should pass this parameter in a general register. */
7586 if (TARGET_SOFT_FLOAT
7587 /* Indirect calls in the normal 32bit ABI require all arguments
7588 to be passed in general registers. */
7589 || (!TARGET_PORTABLE_RUNTIME
7590 && !TARGET_64BIT
7591 && !TARGET_ELF32
7592 && cum->indirect)
7593 /* If the parameter is not a floating point parameter, then
7594 it belongs in GPRs. */
7595 || !FLOAT_MODE_P (mode))
7596 retval = gen_rtx_REG (mode, gpr_reg_base);
7597 else
7598 retval = gen_rtx_REG (mode, fpr_reg_base);
7600 return retval;
7604 /* If this arg would be passed totally in registers or totally on the stack,
7605 then this routine should return zero. It is currently called only for
7606 the 64-bit target. */
7608 function_arg_partial_nregs (cum, mode, type, named)
7609 CUMULATIVE_ARGS *cum;
7610 enum machine_mode mode;
7611 tree type;
7612 int named ATTRIBUTE_UNUSED;
7614 unsigned int max_arg_words = 8;
7615 unsigned int offset = 0;
7617 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7618 offset = 1;
7620 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7621 /* Arg fits fully into registers. */
7622 return 0;
7623 else if (cum->words + offset >= max_arg_words)
7624 /* Arg fully on the stack. */
7625 return 0;
7626 else
7627 /* Arg is split. */
7628 return max_arg_words - cum->words - offset;
7632 /* Return 1 if this is a comparison operator. This allows the use of
7633 MATCH_OPERATOR to recognize all the branch insns. */
7636 cmpib_comparison_operator (op, mode)
7637 register rtx op;
7638 enum machine_mode mode;
7640 return ((mode == VOIDmode || GET_MODE (op) == mode)
7641 && (GET_CODE (op) == EQ
7642 || GET_CODE (op) == NE
7643 || GET_CODE (op) == GT
7644 || GET_CODE (op) == GTU
7645 || GET_CODE (op) == GE
7646 || GET_CODE (op) == LT
7647 || GET_CODE (op) == LE
7648 || GET_CODE (op) == LEU));
7651 /* On hpux10, the linker will give an error if we have a reference
7652 in the read-only data section to a symbol defined in a shared
7653 library. Therefore, expressions that might require a reloc can
7654 not be placed in the read-only data section. */
7656 static void
7657 pa_select_section (exp, reloc, align)
7658 tree exp;
7659 int reloc;
7660 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
7662 if (TREE_CODE (exp) == VAR_DECL
7663 && TREE_READONLY (exp)
7664 && !TREE_THIS_VOLATILE (exp)
7665 && DECL_INITIAL (exp)
7666 && (DECL_INITIAL (exp) == error_mark_node
7667 || TREE_CONSTANT (DECL_INITIAL (exp)))
7668 && !reloc)
7669 readonly_data_section ();
7670 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
7671 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
7672 && !reloc)
7673 readonly_data_section ();
7674 else
7675 data_section ();
7678 static void
7679 pa_globalize_label (stream, name)
7680 FILE *stream;
7681 const char *name;
7683 /* We only handle DATA objects here, functions are globalized in
7684 ASM_DECLARE_FUNCTION_NAME. */
7685 if (! FUNCTION_NAME_P (name))
7687 fputs ("\t.EXPORT ", stream);
7688 assemble_name (stream, name);
7689 fputs (",DATA\n", stream);
7692 #include "gt-pa.h"