FSF GCC merge 02/23/03
[official-gcc.git] / gcc / config / pa / pa.c
blobea6df059104a06d94d4ed0453038949235d7a93d
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "libfuncs.h"
41 #include "reload.h"
42 #include "c-tree.h"
43 #include "integrate.h"
44 #include "function.h"
45 #include "obstack.h"
46 #include "toplev.h"
47 #include "ggc.h"
48 #include "recog.h"
49 #include "predict.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
54 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
56 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
57 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
59 static int
60 hppa_use_dfa_pipeline_interface ()
62 return 1;
65 /* Return nonzero if there is a bypass for the output of
66 OUT_INSN and the fp store IN_INSN. */
67 int
68 hppa_fpstore_bypass_p (out_insn, in_insn)
69 rtx out_insn, in_insn;
71 enum machine_mode store_mode;
72 enum machine_mode other_mode;
73 rtx set;
75 if (recog_memoized (in_insn) < 0
76 || get_attr_type (in_insn) != TYPE_FPSTORE
77 || recog_memoized (out_insn) < 0)
78 return 0;
80 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
82 set = single_set (out_insn);
83 if (!set)
84 return 0;
86 other_mode = GET_MODE (SET_SRC (set));
88 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
92 #ifndef DO_FRAME_NOTES
93 #ifdef INCOMING_RETURN_ADDR_RTX
94 #define DO_FRAME_NOTES 1
95 #else
96 #define DO_FRAME_NOTES 0
97 #endif
98 #endif
100 static int hppa_address_cost PARAMS ((rtx));
101 static bool hppa_rtx_costs PARAMS ((rtx, int, int, int *));
102 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
103 static void pa_combine_instructions PARAMS ((rtx));
104 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
105 static int forward_branch_p PARAMS ((rtx));
106 static int shadd_constant_p PARAMS ((int));
107 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
108 static int compute_movstrsi_length PARAMS ((rtx));
109 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
110 static void remove_useless_addtr_insns PARAMS ((rtx, int));
111 static void store_reg PARAMS ((int, int, int));
112 static void store_reg_modify PARAMS ((int, int, int));
113 static void load_reg PARAMS ((int, int, int));
114 static void set_reg_plus_d PARAMS ((int, int, int, int));
115 static void pa_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
116 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
117 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
118 static int pa_adjust_priority PARAMS ((rtx, int));
119 static int pa_issue_rate PARAMS ((void));
120 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
121 ATTRIBUTE_UNUSED;
122 static void pa_encode_section_info PARAMS ((tree, int));
123 static const char *pa_strip_name_encoding PARAMS ((const char *));
124 static bool pa_function_ok_for_sibcall PARAMS ((tree, tree));
125 static void pa_globalize_label PARAMS ((FILE *, const char *))
126 ATTRIBUTE_UNUSED;
127 static void pa_asm_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree));
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor PARAMS ((rtx, int));
131 static void pa_asm_out_destructor PARAMS ((rtx, int));
132 #endif
133 static void copy_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
134 static int length_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
135 static struct deferred_plabel *get_plabel PARAMS ((const char *))
136 ATTRIBUTE_UNUSED;
138 /* Save the operands last given to a compare for use when we
139 generate a scc or bcc insn. */
140 rtx hppa_compare_op0, hppa_compare_op1;
141 enum cmp_type hppa_branch_type;
143 /* Which cpu we are scheduling for. */
144 enum processor_type pa_cpu;
146 /* String to hold which cpu we are scheduling for. */
147 const char *pa_cpu_string;
149 /* Which architecture we are generating code for. */
150 enum architecture_type pa_arch;
152 /* String to hold which architecture we are generating code for. */
153 const char *pa_arch_string;
155 /* Counts for the number of callee-saved general and floating point
156 registers which were saved by the current function's prologue. */
157 static int gr_saved, fr_saved;
159 static rtx find_addr_reg PARAMS ((rtx));
161 /* Keep track of the number of bytes we have output in the CODE subspaces
162 during this compilation so we'll know when to emit inline long-calls. */
163 unsigned long total_code_bytes;
165 /* Variables to handle plabels that we discover are necessary at assembly
166 output time. They are output after the current function. */
167 struct deferred_plabel GTY(())
169 rtx internal_label;
170 const char *name;
172 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
173 deferred_plabels;
174 static size_t n_deferred_plabels = 0;
176 /* Initialize the GCC target structure. */
178 #undef TARGET_ASM_ALIGNED_HI_OP
179 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
180 #undef TARGET_ASM_ALIGNED_SI_OP
181 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
182 #undef TARGET_ASM_ALIGNED_DI_OP
183 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
184 #undef TARGET_ASM_UNALIGNED_HI_OP
185 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
186 #undef TARGET_ASM_UNALIGNED_SI_OP
187 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
188 #undef TARGET_ASM_UNALIGNED_DI_OP
189 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
190 #undef TARGET_ASM_INTEGER
191 #define TARGET_ASM_INTEGER pa_assemble_integer
193 #undef TARGET_ASM_FUNCTION_PROLOGUE
194 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
195 #undef TARGET_ASM_FUNCTION_EPILOGUE
196 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
198 #undef TARGET_SCHED_ADJUST_COST
199 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
200 #undef TARGET_SCHED_ADJUST_PRIORITY
201 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
202 #undef TARGET_SCHED_ISSUE_RATE
203 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
205 #undef TARGET_ENCODE_SECTION_INFO
206 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
207 #undef TARGET_STRIP_NAME_ENCODING
208 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
210 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
211 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
213 #undef TARGET_ASM_OUTPUT_MI_THUNK
214 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
215 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
216 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
218 #if !defined(USE_COLLECT2)
219 #undef TARGET_ASM_CONSTRUCTOR
220 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
221 #undef TARGET_ASM_DESTRUCTOR
222 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
223 #endif
225 #undef TARGET_RTX_COSTS
226 #define TARGET_RTX_COSTS hppa_rtx_costs
227 #undef TARGET_ADDRESS_COST
228 #define TARGET_ADDRESS_COST hppa_address_cost
230 struct gcc_target targetm = TARGET_INITIALIZER;
232 void
233 override_options ()
235 if (pa_cpu_string == NULL)
236 pa_cpu_string = TARGET_SCHED_DEFAULT;
238 if (! strcmp (pa_cpu_string, "8000"))
240 pa_cpu_string = "8000";
241 pa_cpu = PROCESSOR_8000;
243 else if (! strcmp (pa_cpu_string, "7100"))
245 pa_cpu_string = "7100";
246 pa_cpu = PROCESSOR_7100;
248 else if (! strcmp (pa_cpu_string, "700"))
250 pa_cpu_string = "700";
251 pa_cpu = PROCESSOR_700;
253 else if (! strcmp (pa_cpu_string, "7100LC"))
255 pa_cpu_string = "7100LC";
256 pa_cpu = PROCESSOR_7100LC;
258 else if (! strcmp (pa_cpu_string, "7200"))
260 pa_cpu_string = "7200";
261 pa_cpu = PROCESSOR_7200;
263 else if (! strcmp (pa_cpu_string, "7300"))
265 pa_cpu_string = "7300";
266 pa_cpu = PROCESSOR_7300;
268 else
270 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
273 /* Set the instruction set architecture. */
274 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
276 pa_arch_string = "1.0";
277 pa_arch = ARCHITECTURE_10;
278 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
280 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
282 pa_arch_string = "1.1";
283 pa_arch = ARCHITECTURE_11;
284 target_flags &= ~MASK_PA_20;
285 target_flags |= MASK_PA_11;
287 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
289 pa_arch_string = "2.0";
290 pa_arch = ARCHITECTURE_20;
291 target_flags |= MASK_PA_11 | MASK_PA_20;
293 else if (pa_arch_string)
295 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
298 /* Unconditional branches in the delay slot are not compatible with dwarf2
299 call frame information. There is no benefit in using this optimization
300 on PA8000 and later processors. */
301 if (pa_cpu >= PROCESSOR_8000
302 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
303 || flag_unwind_tables)
304 target_flags &= ~MASK_JUMP_IN_DELAY;
306 if (flag_pic && TARGET_PORTABLE_RUNTIME)
308 warning ("PIC code generation is not supported in the portable runtime model\n");
311 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
313 warning ("PIC code generation is not compatible with fast indirect calls\n");
316 if (! TARGET_GAS && write_symbols != NO_DEBUG)
318 warning ("-g is only supported when using GAS on this processor,");
319 warning ("-g option disabled");
320 write_symbols = NO_DEBUG;
323 /* We only support the "big PIC" model now. And we always generate PIC
324 code when in 64bit mode. */
325 if (flag_pic == 1 || TARGET_64BIT)
326 flag_pic = 2;
328 /* We can't guarantee that .dword is available for 32-bit targets. */
329 if (UNITS_PER_WORD == 4)
330 targetm.asm_out.aligned_op.di = NULL;
332 /* The unaligned ops are only available when using GAS. */
333 if (!TARGET_GAS)
335 targetm.asm_out.unaligned_op.hi = NULL;
336 targetm.asm_out.unaligned_op.si = NULL;
337 targetm.asm_out.unaligned_op.di = NULL;
341 /* Return nonzero only if OP is a register of mode MODE,
342 or CONST0_RTX. */
344 reg_or_0_operand (op, mode)
345 rtx op;
346 enum machine_mode mode;
348 return (op == CONST0_RTX (mode) || register_operand (op, mode));
351 /* Return nonzero if OP is suitable for use in a call to a named
352 function.
354 For 2.5 try to eliminate either call_operand_address or
355 function_label_operand, they perform very similar functions. */
357 call_operand_address (op, mode)
358 rtx op;
359 enum machine_mode mode ATTRIBUTE_UNUSED;
361 return (GET_MODE (op) == word_mode
362 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
365 /* Return 1 if X contains a symbolic expression. We know these
366 expressions will have one of a few well defined forms, so
367 we need only check those forms. */
369 symbolic_expression_p (x)
370 register rtx x;
373 /* Strip off any HIGH. */
374 if (GET_CODE (x) == HIGH)
375 x = XEXP (x, 0);
377 return (symbolic_operand (x, VOIDmode));
381 symbolic_operand (op, mode)
382 register rtx op;
383 enum machine_mode mode ATTRIBUTE_UNUSED;
385 switch (GET_CODE (op))
387 case SYMBOL_REF:
388 case LABEL_REF:
389 return 1;
390 case CONST:
391 op = XEXP (op, 0);
392 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
393 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
394 && GET_CODE (XEXP (op, 1)) == CONST_INT);
395 default:
396 return 0;
400 /* Return truth value of statement that OP is a symbolic memory
401 operand of mode MODE. */
404 symbolic_memory_operand (op, mode)
405 rtx op;
406 enum machine_mode mode ATTRIBUTE_UNUSED;
408 if (GET_CODE (op) == SUBREG)
409 op = SUBREG_REG (op);
410 if (GET_CODE (op) != MEM)
411 return 0;
412 op = XEXP (op, 0);
413 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
414 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
417 /* Return 1 if the operand is either a register or a memory operand that is
418 not symbolic. */
421 reg_or_nonsymb_mem_operand (op, mode)
422 register rtx op;
423 enum machine_mode mode;
425 if (register_operand (op, mode))
426 return 1;
428 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
429 return 1;
431 return 0;
434 /* Return 1 if the operand is either a register, zero, or a memory operand
435 that is not symbolic. */
438 reg_or_0_or_nonsymb_mem_operand (op, mode)
439 register rtx op;
440 enum machine_mode mode;
442 if (register_operand (op, mode))
443 return 1;
445 if (op == CONST0_RTX (mode))
446 return 1;
448 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
449 return 1;
451 return 0;
454 /* Return 1 if the operand is a register operand or a non-symbolic memory
455 operand after reload. This predicate is used for branch patterns that
456 internally handle register reloading. We need to accept non-symbolic
457 memory operands after reload to ensure that the pattern is still valid
458 if reload didn't find a hard register for the operand. */
461 reg_before_reload_operand (op, mode)
462 register rtx op;
463 enum machine_mode mode;
465 /* Don't accept a SUBREG since it will need a reload. */
466 if (GET_CODE (op) == SUBREG)
467 return 0;
469 if (register_operand (op, mode))
470 return 1;
472 if (reload_completed
473 && memory_operand (op, mode)
474 && ! symbolic_memory_operand (op, mode))
475 return 1;
477 return 0;
480 /* Accept any constant that can be moved in one instruction into a
481 general register. */
483 cint_ok_for_move (intval)
484 HOST_WIDE_INT intval;
486 /* OK if ldo, ldil, or zdepi, can be used. */
487 return (CONST_OK_FOR_LETTER_P (intval, 'J')
488 || CONST_OK_FOR_LETTER_P (intval, 'N')
489 || CONST_OK_FOR_LETTER_P (intval, 'K'));
492 /* Accept anything that can be moved in one instruction into a general
493 register. */
495 move_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
499 if (register_operand (op, mode))
500 return 1;
502 if (GET_CODE (op) == CONSTANT_P_RTX)
503 return 1;
505 if (GET_CODE (op) == CONST_INT)
506 return cint_ok_for_move (INTVAL (op));
508 if (GET_CODE (op) == SUBREG)
509 op = SUBREG_REG (op);
510 if (GET_CODE (op) != MEM)
511 return 0;
513 op = XEXP (op, 0);
515 /* We consider a LO_SUM DLT reference a move_operand now since it has
516 been merged into the normal movsi/movdi patterns. */
517 if (GET_CODE (op) == LO_SUM
518 && GET_CODE (XEXP (op, 0)) == REG
519 && REG_OK_FOR_BASE_P (XEXP (op, 0))
520 && GET_CODE (XEXP (op, 1)) == UNSPEC
521 && GET_MODE (op) == Pmode)
522 return 1;
524 /* Since move_operand is only used for source operands, we can always
525 allow scaled indexing! */
526 if (! TARGET_DISABLE_INDEXING
527 && GET_CODE (op) == PLUS
528 && ((GET_CODE (XEXP (op, 0)) == MULT
529 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
530 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
531 && INTVAL (XEXP (XEXP (op, 0), 1))
532 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
533 && GET_CODE (XEXP (op, 1)) == REG)
534 || (GET_CODE (XEXP (op, 1)) == MULT
535 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
536 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
537 && INTVAL (XEXP (XEXP (op, 1), 1))
538 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
539 && GET_CODE (XEXP (op, 0)) == REG)))
540 return 1;
542 return memory_address_p (mode, op);
545 /* Accept REG and any CONST_INT that can be moved in one instruction into a
546 general register. */
548 reg_or_cint_move_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
552 if (register_operand (op, mode))
553 return 1;
555 if (GET_CODE (op) == CONST_INT)
556 return cint_ok_for_move (INTVAL (op));
558 return 0;
562 pic_label_operand (op, mode)
563 rtx op;
564 enum machine_mode mode ATTRIBUTE_UNUSED;
566 if (!flag_pic)
567 return 0;
569 switch (GET_CODE (op))
571 case LABEL_REF:
572 return 1;
573 case CONST:
574 op = XEXP (op, 0);
575 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
576 && GET_CODE (XEXP (op, 1)) == CONST_INT);
577 default:
578 return 0;
583 fp_reg_operand (op, mode)
584 rtx op;
585 enum machine_mode mode ATTRIBUTE_UNUSED;
587 return reg_renumber && FP_REG_P (op);
592 /* Return truth value of whether OP can be used as an operand in a
593 three operand arithmetic insn that accepts registers of mode MODE
594 or 14-bit signed integers. */
596 arith_operand (op, mode)
597 rtx op;
598 enum machine_mode mode;
600 return (register_operand (op, mode)
601 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
604 /* Return truth value of whether OP can be used as an operand in a
605 three operand arithmetic insn that accepts registers of mode MODE
606 or 11-bit signed integers. */
608 arith11_operand (op, mode)
609 rtx op;
610 enum machine_mode mode;
612 return (register_operand (op, mode)
613 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
616 /* Return truth value of whether OP can be used as an operand in a
617 adddi3 insn. */
619 adddi3_operand (op, mode)
620 rtx op;
621 enum machine_mode mode;
623 return (register_operand (op, mode)
624 || (GET_CODE (op) == CONST_INT
625 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
628 /* A constant integer suitable for use in a PRE_MODIFY memory
629 reference. */
631 pre_cint_operand (op, mode)
632 rtx op;
633 enum machine_mode mode ATTRIBUTE_UNUSED;
635 return (GET_CODE (op) == CONST_INT
636 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
639 /* A constant integer suitable for use in a POST_MODIFY memory
640 reference. */
642 post_cint_operand (op, mode)
643 rtx op;
644 enum machine_mode mode ATTRIBUTE_UNUSED;
646 return (GET_CODE (op) == CONST_INT
647 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
651 arith_double_operand (op, mode)
652 rtx op;
653 enum machine_mode mode;
655 return (register_operand (op, mode)
656 || (GET_CODE (op) == CONST_DOUBLE
657 && GET_MODE (op) == mode
658 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
659 && ((CONST_DOUBLE_HIGH (op) >= 0)
660 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
663 /* Return truth value of whether OP is an integer which fits the
664 range constraining immediate operands in three-address insns, or
665 is an integer register. */
668 ireg_or_int5_operand (op, mode)
669 rtx op;
670 enum machine_mode mode ATTRIBUTE_UNUSED;
672 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
673 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
676 /* Return nonzero if OP is an integer register, else return zero. */
678 ireg_operand (op, mode)
679 rtx op;
680 enum machine_mode mode ATTRIBUTE_UNUSED;
682 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
685 /* Return truth value of whether OP is an integer which fits the
686 range constraining immediate operands in three-address insns. */
689 int5_operand (op, mode)
690 rtx op;
691 enum machine_mode mode ATTRIBUTE_UNUSED;
693 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
697 uint5_operand (op, mode)
698 rtx op;
699 enum machine_mode mode ATTRIBUTE_UNUSED;
701 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
705 int11_operand (op, mode)
706 rtx op;
707 enum machine_mode mode ATTRIBUTE_UNUSED;
709 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
713 uint32_operand (op, mode)
714 rtx op;
715 enum machine_mode mode ATTRIBUTE_UNUSED;
717 #if HOST_BITS_PER_WIDE_INT > 32
718 /* All allowed constants will fit a CONST_INT. */
719 return (GET_CODE (op) == CONST_INT
720 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
721 #else
722 return (GET_CODE (op) == CONST_INT
723 || (GET_CODE (op) == CONST_DOUBLE
724 && CONST_DOUBLE_HIGH (op) == 0));
725 #endif
729 arith5_operand (op, mode)
730 rtx op;
731 enum machine_mode mode;
733 return register_operand (op, mode) || int5_operand (op, mode);
736 /* True iff zdepi can be used to generate this CONST_INT.
737 zdepi first sign extends a 5 bit signed number to a given field
738 length, then places this field anywhere in a zero. */
740 zdepi_cint_p (x)
741 unsigned HOST_WIDE_INT x;
743 unsigned HOST_WIDE_INT lsb_mask, t;
745 /* This might not be obvious, but it's at least fast.
746 This function is critical; we don't have the time loops would take. */
747 lsb_mask = x & -x;
748 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
749 /* Return true iff t is a power of two. */
750 return ((t & (t - 1)) == 0);
753 /* True iff depi or extru can be used to compute (reg & mask).
754 Accept bit pattern like these:
755 0....01....1
756 1....10....0
757 1..10..01..1 */
759 and_mask_p (mask)
760 unsigned HOST_WIDE_INT mask;
762 mask = ~mask;
763 mask += mask & -mask;
764 return (mask & (mask - 1)) == 0;
767 /* True iff depi or extru can be used to compute (reg & OP). */
769 and_operand (op, mode)
770 rtx op;
771 enum machine_mode mode;
773 return (register_operand (op, mode)
774 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
777 /* True iff depi can be used to compute (reg | MASK). */
779 ior_mask_p (mask)
780 unsigned HOST_WIDE_INT mask;
782 mask += mask & -mask;
783 return (mask & (mask - 1)) == 0;
786 /* True iff depi can be used to compute (reg | OP). */
788 ior_operand (op, mode)
789 rtx op;
790 enum machine_mode mode ATTRIBUTE_UNUSED;
792 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
796 lhs_lshift_operand (op, mode)
797 rtx op;
798 enum machine_mode mode;
800 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
803 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
804 Such values can be the left hand side x in (x << r), using the zvdepi
805 instruction. */
807 lhs_lshift_cint_operand (op, mode)
808 rtx op;
809 enum machine_mode mode ATTRIBUTE_UNUSED;
811 unsigned HOST_WIDE_INT x;
812 if (GET_CODE (op) != CONST_INT)
813 return 0;
814 x = INTVAL (op) >> 4;
815 return (x & (x + 1)) == 0;
819 arith32_operand (op, mode)
820 rtx op;
821 enum machine_mode mode;
823 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
827 pc_or_label_operand (op, mode)
828 rtx op;
829 enum machine_mode mode ATTRIBUTE_UNUSED;
831 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
834 /* Legitimize PIC addresses. If the address is already
835 position-independent, we return ORIG. Newly generated
836 position-independent addresses go to REG. If we need more
837 than one register, we lose. */
840 legitimize_pic_address (orig, mode, reg)
841 rtx orig, reg;
842 enum machine_mode mode;
844 rtx pic_ref = orig;
846 /* Labels need special handling. */
847 if (pic_label_operand (orig, mode))
849 /* We do not want to go through the movXX expanders here since that
850 would create recursion.
852 Nor do we really want to call a generator for a named pattern
853 since that requires multiple patterns if we want to support
854 multiple word sizes.
856 So instead we just emit the raw set, which avoids the movXX
857 expanders completely. */
858 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
859 current_function_uses_pic_offset_table = 1;
860 return reg;
862 if (GET_CODE (orig) == SYMBOL_REF)
864 if (reg == 0)
865 abort ();
867 emit_move_insn (reg,
868 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
869 gen_rtx_HIGH (word_mode, orig)));
870 pic_ref
871 = gen_rtx_MEM (Pmode,
872 gen_rtx_LO_SUM (Pmode, reg,
873 gen_rtx_UNSPEC (Pmode,
874 gen_rtvec (1, orig),
875 0)));
877 current_function_uses_pic_offset_table = 1;
878 RTX_UNCHANGING_P (pic_ref) = 1;
879 emit_move_insn (reg, pic_ref);
880 return reg;
882 else if (GET_CODE (orig) == CONST)
884 rtx base;
886 if (GET_CODE (XEXP (orig, 0)) == PLUS
887 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
888 return orig;
890 if (reg == 0)
891 abort ();
893 if (GET_CODE (XEXP (orig, 0)) == PLUS)
895 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
896 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
897 base == reg ? 0 : reg);
899 else abort ();
900 if (GET_CODE (orig) == CONST_INT)
902 if (INT_14_BITS (orig))
903 return plus_constant (base, INTVAL (orig));
904 orig = force_reg (Pmode, orig);
906 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907 /* Likewise, should we set special REG_NOTEs here? */
909 return pic_ref;
912 /* Try machine-dependent ways of modifying an illegitimate address
913 to be legitimate. If we find one, return the new, valid address.
914 This macro is used in only one place: `memory_address' in explow.c.
916 OLDX is the address as it was before break_out_memory_refs was called.
917 In some cases it is useful to look at this to decide what needs to be done.
919 MODE and WIN are passed so that this macro can use
920 GO_IF_LEGITIMATE_ADDRESS.
922 It is always safe for this macro to do nothing. It exists to recognize
923 opportunities to optimize the output.
925 For the PA, transform:
927 memory(X + <large int>)
929 into:
931 if (<large int> & mask) >= 16
932 Y = (<large int> & ~mask) + mask + 1 Round up.
933 else
934 Y = (<large int> & ~mask) Round down.
935 Z = X + Y
936 memory (Z + (<large int> - Y));
938 This is for CSE to find several similar references, and only use one Z.
940 X can either be a SYMBOL_REF or REG, but because combine can not
941 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
942 D will not fit in 14 bits.
944 MODE_FLOAT references allow displacements which fit in 5 bits, so use
945 0x1f as the mask.
947 MODE_INT references allow displacements which fit in 14 bits, so use
948 0x3fff as the mask.
950 This relies on the fact that most mode MODE_FLOAT references will use FP
951 registers and most mode MODE_INT references will use integer registers.
952 (In the rare case of an FP register used in an integer MODE, we depend
953 on secondary reloads to clean things up.)
956 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
957 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
958 addressing modes to be used).
960 Put X and Z into registers. Then put the entire expression into
961 a register. */
964 hppa_legitimize_address (x, oldx, mode)
965 rtx x, oldx ATTRIBUTE_UNUSED;
966 enum machine_mode mode;
968 rtx orig = x;
970 if (flag_pic)
971 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
973 /* Strip off CONST. */
974 if (GET_CODE (x) == CONST)
975 x = XEXP (x, 0);
977 /* Special case. Get the SYMBOL_REF into a register and use indexing.
978 That should always be safe. */
979 if (GET_CODE (x) == PLUS
980 && GET_CODE (XEXP (x, 0)) == REG
981 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
983 rtx reg = force_reg (Pmode, XEXP (x, 1));
984 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
987 /* Note we must reject symbols which represent function addresses
988 since the assembler/linker can't handle arithmetic on plabels. */
989 if (GET_CODE (x) == PLUS
990 && GET_CODE (XEXP (x, 1)) == CONST_INT
991 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
992 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
993 || GET_CODE (XEXP (x, 0)) == REG))
995 rtx int_part, ptr_reg;
996 int newoffset;
997 int offset = INTVAL (XEXP (x, 1));
998 int mask;
1000 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1001 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1003 /* Choose which way to round the offset. Round up if we
1004 are >= halfway to the next boundary. */
1005 if ((offset & mask) >= ((mask + 1) / 2))
1006 newoffset = (offset & ~ mask) + mask + 1;
1007 else
1008 newoffset = (offset & ~ mask);
1010 /* If the newoffset will not fit in 14 bits (ldo), then
1011 handling this would take 4 or 5 instructions (2 to load
1012 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1013 add the new offset and the SYMBOL_REF.) Combine can
1014 not handle 4->2 or 5->2 combinations, so do not create
1015 them. */
1016 if (! VAL_14_BITS_P (newoffset)
1017 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1019 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1020 rtx tmp_reg
1021 = force_reg (Pmode,
1022 gen_rtx_HIGH (Pmode, const_part));
1023 ptr_reg
1024 = force_reg (Pmode,
1025 gen_rtx_LO_SUM (Pmode,
1026 tmp_reg, const_part));
1028 else
1030 if (! VAL_14_BITS_P (newoffset))
1031 int_part = force_reg (Pmode, GEN_INT (newoffset));
1032 else
1033 int_part = GEN_INT (newoffset);
1035 ptr_reg = force_reg (Pmode,
1036 gen_rtx_PLUS (Pmode,
1037 force_reg (Pmode, XEXP (x, 0)),
1038 int_part));
1040 return plus_constant (ptr_reg, offset - newoffset);
1043 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1045 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1046 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1047 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1048 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1049 || GET_CODE (XEXP (x, 1)) == SUBREG)
1050 && GET_CODE (XEXP (x, 1)) != CONST)
1052 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1053 rtx reg1, reg2;
1055 reg1 = XEXP (x, 1);
1056 if (GET_CODE (reg1) != REG)
1057 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1059 reg2 = XEXP (XEXP (x, 0), 0);
1060 if (GET_CODE (reg2) != REG)
1061 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1063 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1064 gen_rtx_MULT (Pmode,
1065 reg2,
1066 GEN_INT (val)),
1067 reg1));
1070 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1072 Only do so for floating point modes since this is more speculative
1073 and we lose if it's an integer store. */
1074 if (GET_CODE (x) == PLUS
1075 && GET_CODE (XEXP (x, 0)) == PLUS
1076 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1077 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1078 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1079 && (mode == SFmode || mode == DFmode))
1082 /* First, try and figure out what to use as a base register. */
1083 rtx reg1, reg2, base, idx, orig_base;
1085 reg1 = XEXP (XEXP (x, 0), 1);
1086 reg2 = XEXP (x, 1);
1087 base = NULL_RTX;
1088 idx = NULL_RTX;
1090 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1091 then emit_move_sequence will turn on REG_POINTER so we'll know
1092 it's a base register below. */
1093 if (GET_CODE (reg1) != REG)
1094 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1096 if (GET_CODE (reg2) != REG)
1097 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1099 /* Figure out what the base and index are. */
1101 if (GET_CODE (reg1) == REG
1102 && REG_POINTER (reg1))
1104 base = reg1;
1105 orig_base = XEXP (XEXP (x, 0), 1);
1106 idx = gen_rtx_PLUS (Pmode,
1107 gen_rtx_MULT (Pmode,
1108 XEXP (XEXP (XEXP (x, 0), 0), 0),
1109 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1110 XEXP (x, 1));
1112 else if (GET_CODE (reg2) == REG
1113 && REG_POINTER (reg2))
1115 base = reg2;
1116 orig_base = XEXP (x, 1);
1117 idx = XEXP (x, 0);
1120 if (base == 0)
1121 return orig;
1123 /* If the index adds a large constant, try to scale the
1124 constant so that it can be loaded with only one insn. */
1125 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1126 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1127 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1128 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1130 /* Divide the CONST_INT by the scale factor, then add it to A. */
1131 int val = INTVAL (XEXP (idx, 1));
1133 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1134 reg1 = XEXP (XEXP (idx, 0), 0);
1135 if (GET_CODE (reg1) != REG)
1136 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1138 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1140 /* We can now generate a simple scaled indexed address. */
1141 return
1142 force_reg
1143 (Pmode, gen_rtx_PLUS (Pmode,
1144 gen_rtx_MULT (Pmode, reg1,
1145 XEXP (XEXP (idx, 0), 1)),
1146 base));
1149 /* If B + C is still a valid base register, then add them. */
1150 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1151 && INTVAL (XEXP (idx, 1)) <= 4096
1152 && INTVAL (XEXP (idx, 1)) >= -4096)
1154 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1155 rtx reg1, reg2;
1157 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1159 reg2 = XEXP (XEXP (idx, 0), 0);
1160 if (GET_CODE (reg2) != CONST_INT)
1161 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1163 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1164 gen_rtx_MULT (Pmode,
1165 reg2,
1166 GEN_INT (val)),
1167 reg1));
1170 /* Get the index into a register, then add the base + index and
1171 return a register holding the result. */
1173 /* First get A into a register. */
1174 reg1 = XEXP (XEXP (idx, 0), 0);
1175 if (GET_CODE (reg1) != REG)
1176 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1178 /* And get B into a register. */
1179 reg2 = XEXP (idx, 1);
1180 if (GET_CODE (reg2) != REG)
1181 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1183 reg1 = force_reg (Pmode,
1184 gen_rtx_PLUS (Pmode,
1185 gen_rtx_MULT (Pmode, reg1,
1186 XEXP (XEXP (idx, 0), 1)),
1187 reg2));
1189 /* Add the result to our base register and return. */
1190 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1194 /* Uh-oh. We might have an address for x[n-100000]. This needs
1195 special handling to avoid creating an indexed memory address
1196 with x-100000 as the base.
1198 If the constant part is small enough, then it's still safe because
1199 there is a guard page at the beginning and end of the data segment.
1201 Scaled references are common enough that we want to try and rearrange the
1202 terms so that we can use indexing for these addresses too. Only
1203 do the optimization for floatint point modes. */
1205 if (GET_CODE (x) == PLUS
1206 && symbolic_expression_p (XEXP (x, 1)))
1208 /* Ugly. We modify things here so that the address offset specified
1209 by the index expression is computed first, then added to x to form
1210 the entire address. */
1212 rtx regx1, regx2, regy1, regy2, y;
1214 /* Strip off any CONST. */
1215 y = XEXP (x, 1);
1216 if (GET_CODE (y) == CONST)
1217 y = XEXP (y, 0);
1219 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1221 /* See if this looks like
1222 (plus (mult (reg) (shadd_const))
1223 (const (plus (symbol_ref) (const_int))))
1225 Where const_int is small. In that case the const
1226 expression is a valid pointer for indexing.
1228 If const_int is big, but can be divided evenly by shadd_const
1229 and added to (reg). This allows more scaled indexed addresses. */
1230 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1231 && GET_CODE (XEXP (x, 0)) == MULT
1232 && GET_CODE (XEXP (y, 1)) == CONST_INT
1233 && INTVAL (XEXP (y, 1)) >= -4096
1234 && INTVAL (XEXP (y, 1)) <= 4095
1235 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1236 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1238 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1239 rtx reg1, reg2;
1241 reg1 = XEXP (x, 1);
1242 if (GET_CODE (reg1) != REG)
1243 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1245 reg2 = XEXP (XEXP (x, 0), 0);
1246 if (GET_CODE (reg2) != REG)
1247 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1249 return force_reg (Pmode,
1250 gen_rtx_PLUS (Pmode,
1251 gen_rtx_MULT (Pmode,
1252 reg2,
1253 GEN_INT (val)),
1254 reg1));
1256 else if ((mode == DFmode || mode == SFmode)
1257 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1258 && GET_CODE (XEXP (x, 0)) == MULT
1259 && GET_CODE (XEXP (y, 1)) == CONST_INT
1260 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1261 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1262 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1264 regx1
1265 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1266 / INTVAL (XEXP (XEXP (x, 0), 1))));
1267 regx2 = XEXP (XEXP (x, 0), 0);
1268 if (GET_CODE (regx2) != REG)
1269 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1270 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1271 regx2, regx1));
1272 return
1273 force_reg (Pmode,
1274 gen_rtx_PLUS (Pmode,
1275 gen_rtx_MULT (Pmode, regx2,
1276 XEXP (XEXP (x, 0), 1)),
1277 force_reg (Pmode, XEXP (y, 0))));
1279 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1280 && INTVAL (XEXP (y, 1)) >= -4096
1281 && INTVAL (XEXP (y, 1)) <= 4095)
1283 /* This is safe because of the guard page at the
1284 beginning and end of the data space. Just
1285 return the original address. */
1286 return orig;
1288 else
1290 /* Doesn't look like one we can optimize. */
1291 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1292 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1293 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1294 regx1 = force_reg (Pmode,
1295 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1296 regx1, regy2));
1297 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1302 return orig;
1305 /* For the HPPA, REG and REG+CONST is cost 0
1306 and addresses involving symbolic constants are cost 2.
1308 PIC addresses are very expensive.
1310 It is no coincidence that this has the same structure
1311 as GO_IF_LEGITIMATE_ADDRESS. */
1313 static int
1314 hppa_address_cost (X)
1315 rtx X;
1317 switch (GET_CODE (X))
1319 case REG:
1320 case PLUS:
1321 case LO_SUM:
1322 return 1;
1323 case HIGH:
1324 return 2;
1325 default:
1326 return 4;
1330 /* Compute a (partial) cost for rtx X. Return true if the complete
1331 cost has been computed, and false if subexpressions should be
1332 scanned. In either case, *TOTAL contains the cost result. */
1334 static bool
1335 hppa_rtx_costs (x, code, outer_code, total)
1336 rtx x;
1337 int code, outer_code;
1338 int *total;
1340 switch (code)
1342 case CONST_INT:
1343 if (INTVAL (x) == 0)
1344 *total = 0;
1345 else if (INT_14_BITS (x))
1346 *total = 1;
1347 else
1348 *total = 2;
1349 return true;
1351 case HIGH:
1352 *total = 2;
1353 return true;
1355 case CONST:
1356 case LABEL_REF:
1357 case SYMBOL_REF:
1358 *total = 4;
1359 return true;
1361 case CONST_DOUBLE:
1362 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1363 && outer_code != SET)
1364 *total = 0;
1365 else
1366 *total = 8;
1367 return true;
1369 case MULT:
1370 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1371 *total = COSTS_N_INSNS (3);
1372 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1373 *total = COSTS_N_INSNS (8);
1374 else
1375 *total = COSTS_N_INSNS (20);
1376 return true;
1378 case DIV:
1379 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1381 *total = COSTS_N_INSNS (14);
1382 return true;
1384 /* FALLTHRU */
1386 case UDIV:
1387 case MOD:
1388 case UMOD:
1389 *total = COSTS_N_INSNS (60);
1390 return true;
1392 case PLUS: /* this includes shNadd insns */
1393 case MINUS:
1394 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1395 *total = COSTS_N_INSNS (3);
1396 else
1397 *total = COSTS_N_INSNS (1);
1398 return true;
1400 case ASHIFT:
1401 case ASHIFTRT:
1402 case LSHIFTRT:
1403 *total = COSTS_N_INSNS (1);
1404 return true;
1406 default:
1407 return false;
1411 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1412 new rtx with the correct mode. */
1413 static inline rtx
1414 force_mode (mode, orig)
1415 enum machine_mode mode;
1416 rtx orig;
1418 if (mode == GET_MODE (orig))
1419 return orig;
1421 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1422 abort ();
1424 return gen_rtx_REG (mode, REGNO (orig));
1427 /* Emit insns to move operands[1] into operands[0].
1429 Return 1 if we have written out everything that needs to be done to
1430 do the move. Otherwise, return 0 and the caller will emit the move
1431 normally.
1433 Note SCRATCH_REG may not be in the proper mode depending on how it
1434 will be used. This routine is resposible for creating a new copy
1435 of SCRATCH_REG in the proper mode. */
1438 emit_move_sequence (operands, mode, scratch_reg)
1439 rtx *operands;
1440 enum machine_mode mode;
1441 rtx scratch_reg;
1443 register rtx operand0 = operands[0];
1444 register rtx operand1 = operands[1];
1445 register rtx tem;
1447 if (scratch_reg
1448 && reload_in_progress && GET_CODE (operand0) == REG
1449 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1450 operand0 = reg_equiv_mem[REGNO (operand0)];
1451 else if (scratch_reg
1452 && reload_in_progress && GET_CODE (operand0) == SUBREG
1453 && GET_CODE (SUBREG_REG (operand0)) == REG
1454 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1456 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1457 the code which tracks sets/uses for delete_output_reload. */
1458 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1459 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1460 SUBREG_BYTE (operand0));
1461 operand0 = alter_subreg (&temp);
1464 if (scratch_reg
1465 && reload_in_progress && GET_CODE (operand1) == REG
1466 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1467 operand1 = reg_equiv_mem[REGNO (operand1)];
1468 else if (scratch_reg
1469 && reload_in_progress && GET_CODE (operand1) == SUBREG
1470 && GET_CODE (SUBREG_REG (operand1)) == REG
1471 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1473 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1474 the code which tracks sets/uses for delete_output_reload. */
1475 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1476 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1477 SUBREG_BYTE (operand1));
1478 operand1 = alter_subreg (&temp);
1481 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1482 && ((tem = find_replacement (&XEXP (operand0, 0)))
1483 != XEXP (operand0, 0)))
1484 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1485 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1486 && ((tem = find_replacement (&XEXP (operand1, 0)))
1487 != XEXP (operand1, 0)))
1488 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1490 /* Handle secondary reloads for loads/stores of FP registers from
1491 REG+D addresses where D does not fit in 5 bits, including
1492 (subreg (mem (addr))) cases. */
1493 if (fp_reg_operand (operand0, mode)
1494 && ((GET_CODE (operand1) == MEM
1495 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1496 || ((GET_CODE (operand1) == SUBREG
1497 && GET_CODE (XEXP (operand1, 0)) == MEM
1498 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1499 && scratch_reg)
1501 if (GET_CODE (operand1) == SUBREG)
1502 operand1 = XEXP (operand1, 0);
1504 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1505 it in WORD_MODE regardless of what mode it was originally given
1506 to us. */
1507 scratch_reg = force_mode (word_mode, scratch_reg);
1509 /* D might not fit in 14 bits either; for such cases load D into
1510 scratch reg. */
1511 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1513 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1514 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1515 Pmode,
1516 XEXP (XEXP (operand1, 0), 0),
1517 scratch_reg));
1519 else
1520 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1521 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1522 gen_rtx_MEM (mode, scratch_reg)));
1523 return 1;
1525 else if (fp_reg_operand (operand1, mode)
1526 && ((GET_CODE (operand0) == MEM
1527 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1528 || ((GET_CODE (operand0) == SUBREG)
1529 && GET_CODE (XEXP (operand0, 0)) == MEM
1530 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1531 && scratch_reg)
1533 if (GET_CODE (operand0) == SUBREG)
1534 operand0 = XEXP (operand0, 0);
1536 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1537 it in WORD_MODE regardless of what mode it was originally given
1538 to us. */
1539 scratch_reg = force_mode (word_mode, scratch_reg);
1541 /* D might not fit in 14 bits either; for such cases load D into
1542 scratch reg. */
1543 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1545 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1546 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1547 0)),
1548 Pmode,
1549 XEXP (XEXP (operand0, 0),
1551 scratch_reg));
1553 else
1554 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1555 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1556 operand1));
1557 return 1;
1559 /* Handle secondary reloads for loads of FP registers from constant
1560 expressions by forcing the constant into memory.
1562 use scratch_reg to hold the address of the memory location.
1564 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1565 NO_REGS when presented with a const_int and a register class
1566 containing only FP registers. Doing so unfortunately creates
1567 more problems than it solves. Fix this for 2.5. */
1568 else if (fp_reg_operand (operand0, mode)
1569 && CONSTANT_P (operand1)
1570 && scratch_reg)
1572 rtx xoperands[2];
1574 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1575 it in WORD_MODE regardless of what mode it was originally given
1576 to us. */
1577 scratch_reg = force_mode (word_mode, scratch_reg);
1579 /* Force the constant into memory and put the address of the
1580 memory location into scratch_reg. */
1581 xoperands[0] = scratch_reg;
1582 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1583 emit_move_sequence (xoperands, Pmode, 0);
1585 /* Now load the destination register. */
1586 emit_insn (gen_rtx_SET (mode, operand0,
1587 gen_rtx_MEM (mode, scratch_reg)));
1588 return 1;
1590 /* Handle secondary reloads for SAR. These occur when trying to load
1591 the SAR from memory, FP register, or with a constant. */
1592 else if (GET_CODE (operand0) == REG
1593 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1594 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1595 && (GET_CODE (operand1) == MEM
1596 || GET_CODE (operand1) == CONST_INT
1597 || (GET_CODE (operand1) == REG
1598 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1599 && scratch_reg)
1601 /* D might not fit in 14 bits either; for such cases load D into
1602 scratch reg. */
1603 if (GET_CODE (operand1) == MEM
1604 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1606 /* We are reloading the address into the scratch register, so we
1607 want to make sure the scratch register is a full register. */
1608 scratch_reg = force_mode (word_mode, scratch_reg);
1610 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1611 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1612 0)),
1613 Pmode,
1614 XEXP (XEXP (operand1, 0),
1616 scratch_reg));
1618 /* Now we are going to load the scratch register from memory,
1619 we want to load it in the same width as the original MEM,
1620 which must be the same as the width of the ultimate destination,
1621 OPERAND0. */
1622 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1624 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1625 scratch_reg));
1627 else
1629 /* We want to load the scratch register using the same mode as
1630 the ultimate destination. */
1631 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1633 emit_move_insn (scratch_reg, operand1);
1636 /* And emit the insn to set the ultimate destination. We know that
1637 the scratch register has the same mode as the destination at this
1638 point. */
1639 emit_move_insn (operand0, scratch_reg);
1640 return 1;
1642 /* Handle most common case: storing into a register. */
1643 else if (register_operand (operand0, mode))
1645 if (register_operand (operand1, mode)
1646 || (GET_CODE (operand1) == CONST_INT
1647 && cint_ok_for_move (INTVAL (operand1)))
1648 || (operand1 == CONST0_RTX (mode))
1649 || (GET_CODE (operand1) == HIGH
1650 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1651 /* Only `general_operands' can come here, so MEM is ok. */
1652 || GET_CODE (operand1) == MEM)
1654 /* Run this case quickly. */
1655 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1656 return 1;
1659 else if (GET_CODE (operand0) == MEM)
1661 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1662 && !(reload_in_progress || reload_completed))
1664 rtx temp = gen_reg_rtx (DFmode);
1666 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1667 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1668 return 1;
1670 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1672 /* Run this case quickly. */
1673 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1674 return 1;
1676 if (! (reload_in_progress || reload_completed))
1678 operands[0] = validize_mem (operand0);
1679 operands[1] = operand1 = force_reg (mode, operand1);
1683 /* Simplify the source if we need to.
1684 Note we do have to handle function labels here, even though we do
1685 not consider them legitimate constants. Loop optimizations can
1686 call the emit_move_xxx with one as a source. */
1687 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1688 || function_label_operand (operand1, mode)
1689 || (GET_CODE (operand1) == HIGH
1690 && symbolic_operand (XEXP (operand1, 0), mode)))
1692 int ishighonly = 0;
1694 if (GET_CODE (operand1) == HIGH)
1696 ishighonly = 1;
1697 operand1 = XEXP (operand1, 0);
1699 if (symbolic_operand (operand1, mode))
1701 /* Argh. The assembler and linker can't handle arithmetic
1702 involving plabels.
1704 So we force the plabel into memory, load operand0 from
1705 the memory location, then add in the constant part. */
1706 if ((GET_CODE (operand1) == CONST
1707 && GET_CODE (XEXP (operand1, 0)) == PLUS
1708 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1709 || function_label_operand (operand1, mode))
1711 rtx temp, const_part;
1713 /* Figure out what (if any) scratch register to use. */
1714 if (reload_in_progress || reload_completed)
1716 scratch_reg = scratch_reg ? scratch_reg : operand0;
1717 /* SCRATCH_REG will hold an address and maybe the actual
1718 data. We want it in WORD_MODE regardless of what mode it
1719 was originally given to us. */
1720 scratch_reg = force_mode (word_mode, scratch_reg);
1722 else if (flag_pic)
1723 scratch_reg = gen_reg_rtx (Pmode);
1725 if (GET_CODE (operand1) == CONST)
1727 /* Save away the constant part of the expression. */
1728 const_part = XEXP (XEXP (operand1, 0), 1);
1729 if (GET_CODE (const_part) != CONST_INT)
1730 abort ();
1732 /* Force the function label into memory. */
1733 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1735 else
1737 /* No constant part. */
1738 const_part = NULL_RTX;
1740 /* Force the function label into memory. */
1741 temp = force_const_mem (mode, operand1);
1745 /* Get the address of the memory location. PIC-ify it if
1746 necessary. */
1747 temp = XEXP (temp, 0);
1748 if (flag_pic)
1749 temp = legitimize_pic_address (temp, mode, scratch_reg);
1751 /* Put the address of the memory location into our destination
1752 register. */
1753 operands[1] = temp;
1754 emit_move_sequence (operands, mode, scratch_reg);
1756 /* Now load from the memory location into our destination
1757 register. */
1758 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1759 emit_move_sequence (operands, mode, scratch_reg);
1761 /* And add back in the constant part. */
1762 if (const_part != NULL_RTX)
1763 expand_inc (operand0, const_part);
1765 return 1;
1768 if (flag_pic)
1770 rtx temp;
1772 if (reload_in_progress || reload_completed)
1774 temp = scratch_reg ? scratch_reg : operand0;
1775 /* TEMP will hold an address and maybe the actual
1776 data. We want it in WORD_MODE regardless of what mode it
1777 was originally given to us. */
1778 temp = force_mode (word_mode, temp);
1780 else
1781 temp = gen_reg_rtx (Pmode);
1783 /* (const (plus (symbol) (const_int))) must be forced to
1784 memory during/after reload if the const_int will not fit
1785 in 14 bits. */
1786 if (GET_CODE (operand1) == CONST
1787 && GET_CODE (XEXP (operand1, 0)) == PLUS
1788 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1789 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1790 && (reload_completed || reload_in_progress)
1791 && flag_pic)
1793 operands[1] = force_const_mem (mode, operand1);
1794 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1795 mode, temp);
1796 emit_move_sequence (operands, mode, temp);
1798 else
1800 operands[1] = legitimize_pic_address (operand1, mode, temp);
1801 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1804 /* On the HPPA, references to data space are supposed to use dp,
1805 register 27, but showing it in the RTL inhibits various cse
1806 and loop optimizations. */
1807 else
1809 rtx temp, set;
1811 if (reload_in_progress || reload_completed)
1813 temp = scratch_reg ? scratch_reg : operand0;
1814 /* TEMP will hold an address and maybe the actual
1815 data. We want it in WORD_MODE regardless of what mode it
1816 was originally given to us. */
1817 temp = force_mode (word_mode, temp);
1819 else
1820 temp = gen_reg_rtx (mode);
1822 /* Loading a SYMBOL_REF into a register makes that register
1823 safe to be used as the base in an indexed address.
1825 Don't mark hard registers though. That loses. */
1826 if (GET_CODE (operand0) == REG
1827 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1828 REG_POINTER (operand0) = 1;
1829 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1830 REG_POINTER (temp) = 1;
1831 if (ishighonly)
1832 set = gen_rtx_SET (mode, operand0, temp);
1833 else
1834 set = gen_rtx_SET (VOIDmode,
1835 operand0,
1836 gen_rtx_LO_SUM (mode, temp, operand1));
1838 emit_insn (gen_rtx_SET (VOIDmode,
1839 temp,
1840 gen_rtx_HIGH (mode, operand1)));
1841 emit_insn (set);
1844 return 1;
1846 else if (GET_CODE (operand1) != CONST_INT
1847 || ! cint_ok_for_move (INTVAL (operand1)))
1849 rtx extend = NULL_RTX;
1850 rtx temp;
1852 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1853 && HOST_BITS_PER_WIDE_INT > 32
1854 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1856 HOST_WIDE_INT val = INTVAL (operand1);
1857 HOST_WIDE_INT nval;
1859 /* Extract the low order 32 bits of the value and sign extend.
1860 If the new value is the same as the original value, we can
1861 can use the original value as-is. If the new value is
1862 different, we use it and insert the most-significant 32-bits
1863 of the original value into the final result. */
1864 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1865 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1866 if (val != nval)
1868 #if HOST_BITS_PER_WIDE_INT > 32
1869 extend = GEN_INT (val >> 32);
1870 #endif
1871 operand1 = GEN_INT (nval);
1875 if (reload_in_progress || reload_completed)
1876 temp = operand0;
1877 else
1878 temp = gen_reg_rtx (mode);
1880 /* We don't directly split DImode constants on 32-bit targets
1881 because PLUS uses an 11-bit immediate and the insn sequence
1882 generated is not as efficient as the one using HIGH/LO_SUM. */
1883 if (GET_CODE (operand1) == CONST_INT
1884 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1886 /* Directly break constant into high and low parts. This
1887 provides better optimization opportunities because various
1888 passes recognize constants split with PLUS but not LO_SUM.
1889 We use a 14-bit signed low part except when the addition
1890 of 0x4000 to the high part might change the sign of the
1891 high part. */
1892 HOST_WIDE_INT value = INTVAL (operand1);
1893 HOST_WIDE_INT low = value & 0x3fff;
1894 HOST_WIDE_INT high = value & ~ 0x3fff;
1896 if (low >= 0x2000)
1898 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1899 high += 0x2000;
1900 else
1901 high += 0x4000;
1904 low = value - high;
1906 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1907 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1909 else
1911 emit_insn (gen_rtx_SET (VOIDmode, temp,
1912 gen_rtx_HIGH (mode, operand1)));
1913 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1916 emit_move_insn (operands[0], operands[1]);
1918 if (extend != NULL_RTX)
1919 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1920 extend));
1922 return 1;
1925 /* Now have insn-emit do whatever it normally does. */
1926 return 0;
1929 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1930 it will need a link/runtime reloc). */
1933 reloc_needed (exp)
1934 tree exp;
1936 int reloc = 0;
1938 switch (TREE_CODE (exp))
1940 case ADDR_EXPR:
1941 return 1;
1943 case PLUS_EXPR:
1944 case MINUS_EXPR:
1945 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1946 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1947 break;
1949 case NOP_EXPR:
1950 case CONVERT_EXPR:
1951 case NON_LVALUE_EXPR:
1952 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1953 break;
1955 case CONSTRUCTOR:
1957 register tree link;
1958 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1959 if (TREE_VALUE (link) != 0)
1960 reloc |= reloc_needed (TREE_VALUE (link));
1962 break;
1964 case ERROR_MARK:
1965 break;
1967 default:
1968 break;
1970 return reloc;
1973 /* Does operand (which is a symbolic_operand) live in text space?
1974 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1975 will be true. */
1978 read_only_operand (operand, mode)
1979 rtx operand;
1980 enum machine_mode mode ATTRIBUTE_UNUSED;
1982 if (GET_CODE (operand) == CONST)
1983 operand = XEXP (XEXP (operand, 0), 0);
1984 if (flag_pic)
1986 if (GET_CODE (operand) == SYMBOL_REF)
1987 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1989 else
1991 if (GET_CODE (operand) == SYMBOL_REF)
1992 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1994 return 1;
1998 /* Return the best assembler insn template
1999 for moving operands[1] into operands[0] as a fullword. */
2000 const char *
2001 singlemove_string (operands)
2002 rtx *operands;
2004 HOST_WIDE_INT intval;
2006 if (GET_CODE (operands[0]) == MEM)
2007 return "stw %r1,%0";
2008 if (GET_CODE (operands[1]) == MEM)
2009 return "ldw %1,%0";
2010 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2012 long i;
2013 REAL_VALUE_TYPE d;
2015 if (GET_MODE (operands[1]) != SFmode)
2016 abort ();
2018 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2019 bit pattern. */
2020 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2021 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2023 operands[1] = GEN_INT (i);
2024 /* Fall through to CONST_INT case. */
2026 if (GET_CODE (operands[1]) == CONST_INT)
2028 intval = INTVAL (operands[1]);
2030 if (VAL_14_BITS_P (intval))
2031 return "ldi %1,%0";
2032 else if ((intval & 0x7ff) == 0)
2033 return "ldil L'%1,%0";
2034 else if (zdepi_cint_p (intval))
2035 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2036 else
2037 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2039 return "copy %1,%0";
2043 /* Compute position (in OP[1]) and width (in OP[2])
2044 useful for copying IMM to a register using the zdepi
2045 instructions. Store the immediate value to insert in OP[0]. */
2046 static void
2047 compute_zdepwi_operands (imm, op)
2048 unsigned HOST_WIDE_INT imm;
2049 unsigned *op;
2051 int lsb, len;
2053 /* Find the least significant set bit in IMM. */
2054 for (lsb = 0; lsb < 32; lsb++)
2056 if ((imm & 1) != 0)
2057 break;
2058 imm >>= 1;
2061 /* Choose variants based on *sign* of the 5-bit field. */
2062 if ((imm & 0x10) == 0)
2063 len = (lsb <= 28) ? 4 : 32 - lsb;
2064 else
2066 /* Find the width of the bitstring in IMM. */
2067 for (len = 5; len < 32; len++)
2069 if ((imm & (1 << len)) == 0)
2070 break;
2073 /* Sign extend IMM as a 5-bit value. */
2074 imm = (imm & 0xf) - 0x10;
2077 op[0] = imm;
2078 op[1] = 31 - lsb;
2079 op[2] = len;
2082 /* Compute position (in OP[1]) and width (in OP[2])
2083 useful for copying IMM to a register using the depdi,z
2084 instructions. Store the immediate value to insert in OP[0]. */
2085 void
2086 compute_zdepdi_operands (imm, op)
2087 unsigned HOST_WIDE_INT imm;
2088 unsigned *op;
2090 HOST_WIDE_INT lsb, len;
2092 /* Find the least significant set bit in IMM. */
2093 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2095 if ((imm & 1) != 0)
2096 break;
2097 imm >>= 1;
2100 /* Choose variants based on *sign* of the 5-bit field. */
2101 if ((imm & 0x10) == 0)
2102 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2103 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2104 else
2106 /* Find the width of the bitstring in IMM. */
2107 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2109 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2110 break;
2113 /* Sign extend IMM as a 5-bit value. */
2114 imm = (imm & 0xf) - 0x10;
2117 op[0] = imm;
2118 op[1] = 63 - lsb;
2119 op[2] = len;
2122 /* Output assembler code to perform a doubleword move insn
2123 with operands OPERANDS. */
2125 const char *
2126 output_move_double (operands)
2127 rtx *operands;
2129 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2130 rtx latehalf[2];
2131 rtx addreg0 = 0, addreg1 = 0;
2133 /* First classify both operands. */
2135 if (REG_P (operands[0]))
2136 optype0 = REGOP;
2137 else if (offsettable_memref_p (operands[0]))
2138 optype0 = OFFSOP;
2139 else if (GET_CODE (operands[0]) == MEM)
2140 optype0 = MEMOP;
2141 else
2142 optype0 = RNDOP;
2144 if (REG_P (operands[1]))
2145 optype1 = REGOP;
2146 else if (CONSTANT_P (operands[1]))
2147 optype1 = CNSTOP;
2148 else if (offsettable_memref_p (operands[1]))
2149 optype1 = OFFSOP;
2150 else if (GET_CODE (operands[1]) == MEM)
2151 optype1 = MEMOP;
2152 else
2153 optype1 = RNDOP;
2155 /* Check for the cases that the operand constraints are not
2156 supposed to allow to happen. Abort if we get one,
2157 because generating code for these cases is painful. */
2159 if (optype0 != REGOP && optype1 != REGOP)
2160 abort ();
2162 /* Handle auto decrementing and incrementing loads and stores
2163 specifically, since the structure of the function doesn't work
2164 for them without major modification. Do it better when we learn
2165 this port about the general inc/dec addressing of PA.
2166 (This was written by tege. Chide him if it doesn't work.) */
2168 if (optype0 == MEMOP)
2170 /* We have to output the address syntax ourselves, since print_operand
2171 doesn't deal with the addresses we want to use. Fix this later. */
2173 rtx addr = XEXP (operands[0], 0);
2174 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2176 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2178 operands[0] = XEXP (addr, 0);
2179 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2180 abort ();
2182 if (!reg_overlap_mentioned_p (high_reg, addr))
2184 /* No overlap between high target register and address
2185 register. (We do this in a non-obvious way to
2186 save a register file writeback) */
2187 if (GET_CODE (addr) == POST_INC)
2188 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2189 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2191 else
2192 abort ();
2194 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2196 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2198 operands[0] = XEXP (addr, 0);
2199 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2200 abort ();
2202 if (!reg_overlap_mentioned_p (high_reg, addr))
2204 /* No overlap between high target register and address
2205 register. (We do this in a non-obvious way to
2206 save a register file writeback) */
2207 if (GET_CODE (addr) == PRE_INC)
2208 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2209 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2211 else
2212 abort ();
2215 if (optype1 == MEMOP)
2217 /* We have to output the address syntax ourselves, since print_operand
2218 doesn't deal with the addresses we want to use. Fix this later. */
2220 rtx addr = XEXP (operands[1], 0);
2221 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2223 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2225 operands[1] = XEXP (addr, 0);
2226 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2227 abort ();
2229 if (!reg_overlap_mentioned_p (high_reg, addr))
2231 /* No overlap between high target register and address
2232 register. (We do this in a non-obvious way to
2233 save a register file writeback) */
2234 if (GET_CODE (addr) == POST_INC)
2235 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2236 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2238 else
2240 /* This is an undefined situation. We should load into the
2241 address register *and* update that register. Probably
2242 we don't need to handle this at all. */
2243 if (GET_CODE (addr) == POST_INC)
2244 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2245 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2248 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2250 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2252 operands[1] = XEXP (addr, 0);
2253 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2254 abort ();
2256 if (!reg_overlap_mentioned_p (high_reg, addr))
2258 /* No overlap between high target register and address
2259 register. (We do this in a non-obvious way to
2260 save a register file writeback) */
2261 if (GET_CODE (addr) == PRE_INC)
2262 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2263 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2265 else
2267 /* This is an undefined situation. We should load into the
2268 address register *and* update that register. Probably
2269 we don't need to handle this at all. */
2270 if (GET_CODE (addr) == PRE_INC)
2271 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2272 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2275 else if (GET_CODE (addr) == PLUS
2276 && GET_CODE (XEXP (addr, 0)) == MULT)
2278 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2280 if (!reg_overlap_mentioned_p (high_reg, addr))
2282 rtx xoperands[3];
2284 xoperands[0] = high_reg;
2285 xoperands[1] = XEXP (addr, 1);
2286 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2287 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2288 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2289 xoperands);
2290 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2292 else
2294 rtx xoperands[3];
2296 xoperands[0] = high_reg;
2297 xoperands[1] = XEXP (addr, 1);
2298 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2299 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2300 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2301 xoperands);
2302 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2307 /* If an operand is an unoffsettable memory ref, find a register
2308 we can increment temporarily to make it refer to the second word. */
2310 if (optype0 == MEMOP)
2311 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2313 if (optype1 == MEMOP)
2314 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2316 /* Ok, we can do one word at a time.
2317 Normally we do the low-numbered word first.
2319 In either case, set up in LATEHALF the operands to use
2320 for the high-numbered word and in some cases alter the
2321 operands in OPERANDS to be suitable for the low-numbered word. */
2323 if (optype0 == REGOP)
2324 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2325 else if (optype0 == OFFSOP)
2326 latehalf[0] = adjust_address (operands[0], SImode, 4);
2327 else
2328 latehalf[0] = operands[0];
2330 if (optype1 == REGOP)
2331 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2332 else if (optype1 == OFFSOP)
2333 latehalf[1] = adjust_address (operands[1], SImode, 4);
2334 else if (optype1 == CNSTOP)
2335 split_double (operands[1], &operands[1], &latehalf[1]);
2336 else
2337 latehalf[1] = operands[1];
2339 /* If the first move would clobber the source of the second one,
2340 do them in the other order.
2342 This can happen in two cases:
2344 mem -> register where the first half of the destination register
2345 is the same register used in the memory's address. Reload
2346 can create such insns.
2348 mem in this case will be either register indirect or register
2349 indirect plus a valid offset.
2351 register -> register move where REGNO(dst) == REGNO(src + 1)
2352 someone (Tim/Tege?) claimed this can happen for parameter loads.
2354 Handle mem -> register case first. */
2355 if (optype0 == REGOP
2356 && (optype1 == MEMOP || optype1 == OFFSOP)
2357 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2358 operands[1], 0))
2360 /* Do the late half first. */
2361 if (addreg1)
2362 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2363 output_asm_insn (singlemove_string (latehalf), latehalf);
2365 /* Then clobber. */
2366 if (addreg1)
2367 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2368 return singlemove_string (operands);
2371 /* Now handle register -> register case. */
2372 if (optype0 == REGOP && optype1 == REGOP
2373 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2375 output_asm_insn (singlemove_string (latehalf), latehalf);
2376 return singlemove_string (operands);
2379 /* Normal case: do the two words, low-numbered first. */
2381 output_asm_insn (singlemove_string (operands), operands);
2383 /* Make any unoffsettable addresses point at high-numbered word. */
2384 if (addreg0)
2385 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2386 if (addreg1)
2387 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2389 /* Do that word. */
2390 output_asm_insn (singlemove_string (latehalf), latehalf);
2392 /* Undo the adds we just did. */
2393 if (addreg0)
2394 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2395 if (addreg1)
2396 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2398 return "";
2401 const char *
2402 output_fp_move_double (operands)
2403 rtx *operands;
2405 if (FP_REG_P (operands[0]))
2407 if (FP_REG_P (operands[1])
2408 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2409 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2410 else
2411 output_asm_insn ("fldd%F1 %1,%0", operands);
2413 else if (FP_REG_P (operands[1]))
2415 output_asm_insn ("fstd%F0 %1,%0", operands);
2417 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2419 if (GET_CODE (operands[0]) == REG)
2421 rtx xoperands[2];
2422 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2423 xoperands[0] = operands[0];
2424 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2426 /* This is a pain. You have to be prepared to deal with an
2427 arbitrary address here including pre/post increment/decrement.
2429 so avoid this in the MD. */
2430 else
2431 abort ();
2433 else abort ();
2434 return "";
2437 /* Return a REG that occurs in ADDR with coefficient 1.
2438 ADDR can be effectively incremented by incrementing REG. */
2440 static rtx
2441 find_addr_reg (addr)
2442 rtx addr;
2444 while (GET_CODE (addr) == PLUS)
2446 if (GET_CODE (XEXP (addr, 0)) == REG)
2447 addr = XEXP (addr, 0);
2448 else if (GET_CODE (XEXP (addr, 1)) == REG)
2449 addr = XEXP (addr, 1);
2450 else if (CONSTANT_P (XEXP (addr, 0)))
2451 addr = XEXP (addr, 1);
2452 else if (CONSTANT_P (XEXP (addr, 1)))
2453 addr = XEXP (addr, 0);
2454 else
2455 abort ();
2457 if (GET_CODE (addr) == REG)
2458 return addr;
2459 abort ();
2462 /* Emit code to perform a block move.
2464 OPERANDS[0] is the destination pointer as a REG, clobbered.
2465 OPERANDS[1] is the source pointer as a REG, clobbered.
2466 OPERANDS[2] is a register for temporary storage.
2467 OPERANDS[4] is the size as a CONST_INT
2468 OPERANDS[3] is a register for temporary storage.
2469 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2470 OPERANDS[6] is another temporary register. */
2472 const char *
2473 output_block_move (operands, size_is_constant)
2474 rtx *operands;
2475 int size_is_constant ATTRIBUTE_UNUSED;
2477 int align = INTVAL (operands[5]);
2478 unsigned long n_bytes = INTVAL (operands[4]);
2480 /* We can't move more than four bytes at a time because the PA
2481 has no longer integer move insns. (Could use fp mem ops?) */
2482 if (align > 4)
2483 align = 4;
2485 /* Note that we know each loop below will execute at least twice
2486 (else we would have open-coded the copy). */
2487 switch (align)
2489 case 4:
2490 /* Pre-adjust the loop counter. */
2491 operands[4] = GEN_INT (n_bytes - 8);
2492 output_asm_insn ("ldi %4,%2", operands);
2494 /* Copying loop. */
2495 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2496 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2497 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2498 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2499 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2501 /* Handle the residual. There could be up to 7 bytes of
2502 residual to copy! */
2503 if (n_bytes % 8 != 0)
2505 operands[4] = GEN_INT (n_bytes % 4);
2506 if (n_bytes % 8 >= 4)
2507 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2508 if (n_bytes % 4 != 0)
2509 output_asm_insn ("ldw 0(%1),%6", operands);
2510 if (n_bytes % 8 >= 4)
2511 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2512 if (n_bytes % 4 != 0)
2513 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2515 return "";
2517 case 2:
2518 /* Pre-adjust the loop counter. */
2519 operands[4] = GEN_INT (n_bytes - 4);
2520 output_asm_insn ("ldi %4,%2", operands);
2522 /* Copying loop. */
2523 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2524 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2525 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2526 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2527 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2529 /* Handle the residual. */
2530 if (n_bytes % 4 != 0)
2532 if (n_bytes % 4 >= 2)
2533 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2534 if (n_bytes % 2 != 0)
2535 output_asm_insn ("ldb 0(%1),%6", operands);
2536 if (n_bytes % 4 >= 2)
2537 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2538 if (n_bytes % 2 != 0)
2539 output_asm_insn ("stb %6,0(%0)", operands);
2541 return "";
2543 case 1:
2544 /* Pre-adjust the loop counter. */
2545 operands[4] = GEN_INT (n_bytes - 2);
2546 output_asm_insn ("ldi %4,%2", operands);
2548 /* Copying loop. */
2549 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2550 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2551 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2552 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2553 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2555 /* Handle the residual. */
2556 if (n_bytes % 2 != 0)
2558 output_asm_insn ("ldb 0(%1),%3", operands);
2559 output_asm_insn ("stb %3,0(%0)", operands);
2561 return "";
2563 default:
2564 abort ();
2568 /* Count the number of insns necessary to handle this block move.
2570 Basic structure is the same as emit_block_move, except that we
2571 count insns rather than emit them. */
2573 static int
2574 compute_movstrsi_length (insn)
2575 rtx insn;
2577 rtx pat = PATTERN (insn);
2578 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2579 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2580 unsigned int n_insns = 0;
2582 /* We can't move more than four bytes at a time because the PA
2583 has no longer integer move insns. (Could use fp mem ops?) */
2584 if (align > 4)
2585 align = 4;
2587 /* The basic copying loop. */
2588 n_insns = 6;
2590 /* Residuals. */
2591 if (n_bytes % (2 * align) != 0)
2593 if ((n_bytes % (2 * align)) >= align)
2594 n_insns += 2;
2596 if ((n_bytes % align) != 0)
2597 n_insns += 2;
2600 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2601 return n_insns * 4;
2605 const char *
2606 output_and (operands)
2607 rtx *operands;
2609 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2611 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2612 int ls0, ls1, ms0, p, len;
2614 for (ls0 = 0; ls0 < 32; ls0++)
2615 if ((mask & (1 << ls0)) == 0)
2616 break;
2618 for (ls1 = ls0; ls1 < 32; ls1++)
2619 if ((mask & (1 << ls1)) != 0)
2620 break;
2622 for (ms0 = ls1; ms0 < 32; ms0++)
2623 if ((mask & (1 << ms0)) == 0)
2624 break;
2626 if (ms0 != 32)
2627 abort ();
2629 if (ls1 == 32)
2631 len = ls0;
2633 if (len == 0)
2634 abort ();
2636 operands[2] = GEN_INT (len);
2637 return "{extru|extrw,u} %1,31,%2,%0";
2639 else
2641 /* We could use this `depi' for the case above as well, but `depi'
2642 requires one more register file access than an `extru'. */
2644 p = 31 - ls0;
2645 len = ls1 - ls0;
2647 operands[2] = GEN_INT (p);
2648 operands[3] = GEN_INT (len);
2649 return "{depi|depwi} 0,%2,%3,%0";
2652 else
2653 return "and %1,%2,%0";
2656 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2657 storing the result in operands[0]. */
2658 const char *
2659 output_64bit_and (operands)
2660 rtx *operands;
2662 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2664 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2665 int ls0, ls1, ms0, p, len;
2667 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2668 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2669 break;
2671 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2672 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2673 break;
2675 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2676 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2677 break;
2679 if (ms0 != HOST_BITS_PER_WIDE_INT)
2680 abort ();
2682 if (ls1 == HOST_BITS_PER_WIDE_INT)
2684 len = ls0;
2686 if (len == 0)
2687 abort ();
2689 operands[2] = GEN_INT (len);
2690 return "extrd,u %1,63,%2,%0";
2692 else
2694 /* We could use this `depi' for the case above as well, but `depi'
2695 requires one more register file access than an `extru'. */
2697 p = 63 - ls0;
2698 len = ls1 - ls0;
2700 operands[2] = GEN_INT (p);
2701 operands[3] = GEN_INT (len);
2702 return "depdi 0,%2,%3,%0";
2705 else
2706 return "and %1,%2,%0";
2709 const char *
2710 output_ior (operands)
2711 rtx *operands;
2713 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2714 int bs0, bs1, p, len;
2716 if (INTVAL (operands[2]) == 0)
2717 return "copy %1,%0";
2719 for (bs0 = 0; bs0 < 32; bs0++)
2720 if ((mask & (1 << bs0)) != 0)
2721 break;
2723 for (bs1 = bs0; bs1 < 32; bs1++)
2724 if ((mask & (1 << bs1)) == 0)
2725 break;
2727 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2728 abort ();
2730 p = 31 - bs0;
2731 len = bs1 - bs0;
2733 operands[2] = GEN_INT (p);
2734 operands[3] = GEN_INT (len);
2735 return "{depi|depwi} -1,%2,%3,%0";
2738 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2739 storing the result in operands[0]. */
2740 const char *
2741 output_64bit_ior (operands)
2742 rtx *operands;
2744 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2745 int bs0, bs1, p, len;
2747 if (INTVAL (operands[2]) == 0)
2748 return "copy %1,%0";
2750 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2751 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2752 break;
2754 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2755 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2756 break;
2758 if (bs1 != HOST_BITS_PER_WIDE_INT
2759 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2760 abort ();
2762 p = 63 - bs0;
2763 len = bs1 - bs0;
2765 operands[2] = GEN_INT (p);
2766 operands[3] = GEN_INT (len);
2767 return "depdi -1,%2,%3,%0";
2770 /* Target hook for assembling integer objects. This code handles
2771 aligned SI and DI integers specially, since function references must
2772 be preceded by P%. */
2774 static bool
2775 pa_assemble_integer (x, size, aligned_p)
2776 rtx x;
2777 unsigned int size;
2778 int aligned_p;
2780 if (size == UNITS_PER_WORD && aligned_p
2781 && function_label_operand (x, VOIDmode))
2783 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2784 output_addr_const (asm_out_file, x);
2785 fputc ('\n', asm_out_file);
2786 return true;
2788 return default_assemble_integer (x, size, aligned_p);
2791 /* Output an ascii string. */
2792 void
2793 output_ascii (file, p, size)
2794 FILE *file;
2795 const char *p;
2796 int size;
2798 int i;
2799 int chars_output;
2800 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2802 /* The HP assembler can only take strings of 256 characters at one
2803 time. This is a limitation on input line length, *not* the
2804 length of the string. Sigh. Even worse, it seems that the
2805 restriction is in number of input characters (see \xnn &
2806 \whatever). So we have to do this very carefully. */
2808 fputs ("\t.STRING \"", file);
2810 chars_output = 0;
2811 for (i = 0; i < size; i += 4)
2813 int co = 0;
2814 int io = 0;
2815 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2817 register unsigned int c = (unsigned char) p[i + io];
2819 if (c == '\"' || c == '\\')
2820 partial_output[co++] = '\\';
2821 if (c >= ' ' && c < 0177)
2822 partial_output[co++] = c;
2823 else
2825 unsigned int hexd;
2826 partial_output[co++] = '\\';
2827 partial_output[co++] = 'x';
2828 hexd = c / 16 - 0 + '0';
2829 if (hexd > '9')
2830 hexd -= '9' - 'a' + 1;
2831 partial_output[co++] = hexd;
2832 hexd = c % 16 - 0 + '0';
2833 if (hexd > '9')
2834 hexd -= '9' - 'a' + 1;
2835 partial_output[co++] = hexd;
2838 if (chars_output + co > 243)
2840 fputs ("\"\n\t.STRING \"", file);
2841 chars_output = 0;
2843 fwrite (partial_output, 1, (size_t) co, file);
2844 chars_output += co;
2845 co = 0;
2847 fputs ("\"\n", file);
2850 /* Try to rewrite floating point comparisons & branches to avoid
2851 useless add,tr insns.
2853 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2854 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2855 first attempt to remove useless add,tr insns. It is zero
2856 for the second pass as reorg sometimes leaves bogus REG_DEAD
2857 notes lying around.
2859 When CHECK_NOTES is zero we can only eliminate add,tr insns
2860 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2861 instructions. */
2862 static void
2863 remove_useless_addtr_insns (insns, check_notes)
2864 rtx insns;
2865 int check_notes;
2867 rtx insn;
2868 static int pass = 0;
2870 /* This is fairly cheap, so always run it when optimizing. */
2871 if (optimize > 0)
2873 int fcmp_count = 0;
2874 int fbranch_count = 0;
2876 /* Walk all the insns in this function looking for fcmp & fbranch
2877 instructions. Keep track of how many of each we find. */
2878 insns = get_insns ();
2879 for (insn = insns; insn; insn = next_insn (insn))
2881 rtx tmp;
2883 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2884 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2885 continue;
2887 tmp = PATTERN (insn);
2889 /* It must be a set. */
2890 if (GET_CODE (tmp) != SET)
2891 continue;
2893 /* If the destination is CCFP, then we've found an fcmp insn. */
2894 tmp = SET_DEST (tmp);
2895 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2897 fcmp_count++;
2898 continue;
2901 tmp = PATTERN (insn);
2902 /* If this is an fbranch instruction, bump the fbranch counter. */
2903 if (GET_CODE (tmp) == SET
2904 && SET_DEST (tmp) == pc_rtx
2905 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2906 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2907 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2908 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2910 fbranch_count++;
2911 continue;
2916 /* Find all floating point compare + branch insns. If possible,
2917 reverse the comparison & the branch to avoid add,tr insns. */
2918 for (insn = insns; insn; insn = next_insn (insn))
2920 rtx tmp, next;
2922 /* Ignore anything that isn't an INSN. */
2923 if (GET_CODE (insn) != INSN)
2924 continue;
2926 tmp = PATTERN (insn);
2928 /* It must be a set. */
2929 if (GET_CODE (tmp) != SET)
2930 continue;
2932 /* The destination must be CCFP, which is register zero. */
2933 tmp = SET_DEST (tmp);
2934 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2935 continue;
2937 /* INSN should be a set of CCFP.
2939 See if the result of this insn is used in a reversed FP
2940 conditional branch. If so, reverse our condition and
2941 the branch. Doing so avoids useless add,tr insns. */
2942 next = next_insn (insn);
2943 while (next)
2945 /* Jumps, calls and labels stop our search. */
2946 if (GET_CODE (next) == JUMP_INSN
2947 || GET_CODE (next) == CALL_INSN
2948 || GET_CODE (next) == CODE_LABEL)
2949 break;
2951 /* As does another fcmp insn. */
2952 if (GET_CODE (next) == INSN
2953 && GET_CODE (PATTERN (next)) == SET
2954 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2955 && REGNO (SET_DEST (PATTERN (next))) == 0)
2956 break;
2958 next = next_insn (next);
2961 /* Is NEXT_INSN a branch? */
2962 if (next
2963 && GET_CODE (next) == JUMP_INSN)
2965 rtx pattern = PATTERN (next);
2967 /* If it a reversed fp conditional branch (eg uses add,tr)
2968 and CCFP dies, then reverse our conditional and the branch
2969 to avoid the add,tr. */
2970 if (GET_CODE (pattern) == SET
2971 && SET_DEST (pattern) == pc_rtx
2972 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2973 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2974 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2975 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2976 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2977 && (fcmp_count == fbranch_count
2978 || (check_notes
2979 && find_regno_note (next, REG_DEAD, 0))))
2981 /* Reverse the branch. */
2982 tmp = XEXP (SET_SRC (pattern), 1);
2983 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2984 XEXP (SET_SRC (pattern), 2) = tmp;
2985 INSN_CODE (next) = -1;
2987 /* Reverse our condition. */
2988 tmp = PATTERN (insn);
2989 PUT_CODE (XEXP (tmp, 1),
2990 (reverse_condition_maybe_unordered
2991 (GET_CODE (XEXP (tmp, 1)))));
2997 pass = !pass;
3001 /* You may have trouble believing this, but this is the 32 bit HP-PA
3002 stack layout. Wow.
3004 Offset Contents
3006 Variable arguments (optional; any number may be allocated)
3008 SP-(4*(N+9)) arg word N
3010 SP-56 arg word 5
3011 SP-52 arg word 4
3013 Fixed arguments (must be allocated; may remain unused)
3015 SP-48 arg word 3
3016 SP-44 arg word 2
3017 SP-40 arg word 1
3018 SP-36 arg word 0
3020 Frame Marker
3022 SP-32 External Data Pointer (DP)
3023 SP-28 External sr4
3024 SP-24 External/stub RP (RP')
3025 SP-20 Current RP
3026 SP-16 Static Link
3027 SP-12 Clean up
3028 SP-8 Calling Stub RP (RP'')
3029 SP-4 Previous SP
3031 Top of Frame
3033 SP-0 Stack Pointer (points to next available address)
3037 /* This function saves registers as follows. Registers marked with ' are
3038 this function's registers (as opposed to the previous function's).
3039 If a frame_pointer isn't needed, r4 is saved as a general register;
3040 the space for the frame pointer is still allocated, though, to keep
3041 things simple.
3044 Top of Frame
3046 SP (FP') Previous FP
3047 SP + 4 Alignment filler (sigh)
3048 SP + 8 Space for locals reserved here.
3052 SP + n All call saved register used.
3056 SP + o All call saved fp registers used.
3060 SP + p (SP') points to next available address.
3064 /* Global variables set by output_function_prologue(). */
3065 /* Size of frame. Need to know this to emit return insns from
3066 leaf procedures. */
3067 static int actual_fsize;
3068 static int local_fsize, save_fregs;
3070 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3071 Handle case where DISP > 8k by using the add_high_const patterns.
3073 Note in DISP > 8k case, we will leave the high part of the address
3074 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3076 static void
3077 store_reg (reg, disp, base)
3078 int reg, disp, base;
3080 rtx insn, dest, src, basereg;
3082 src = gen_rtx_REG (word_mode, reg);
3083 basereg = gen_rtx_REG (Pmode, base);
3084 if (VAL_14_BITS_P (disp))
3086 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3087 insn = emit_move_insn (dest, src);
3089 else
3091 rtx delta = GEN_INT (disp);
3092 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3093 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3094 emit_move_insn (tmpreg, high);
3095 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3096 insn = emit_move_insn (dest, src);
3097 if (DO_FRAME_NOTES)
3099 REG_NOTES (insn)
3100 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3101 gen_rtx_SET (VOIDmode,
3102 gen_rtx_MEM (word_mode,
3103 gen_rtx_PLUS (word_mode, basereg,
3104 delta)),
3105 src),
3106 REG_NOTES (insn));
3110 if (DO_FRAME_NOTES)
3111 RTX_FRAME_RELATED_P (insn) = 1;
3114 /* Emit RTL to store REG at the memory location specified by BASE and then
3115 add MOD to BASE. MOD must be <= 8k. */
3117 static void
3118 store_reg_modify (base, reg, mod)
3119 int base, reg, mod;
3121 rtx insn, basereg, srcreg, delta;
3123 if (! VAL_14_BITS_P (mod))
3124 abort ();
3126 basereg = gen_rtx_REG (Pmode, base);
3127 srcreg = gen_rtx_REG (word_mode, reg);
3128 delta = GEN_INT (mod);
3130 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3131 if (DO_FRAME_NOTES)
3133 RTX_FRAME_RELATED_P (insn) = 1;
3135 /* RTX_FRAME_RELATED_P must be set on each frame related set
3136 in a parallel with more than one element. Don't set
3137 RTX_FRAME_RELATED_P in the first set if reg is temporary
3138 register 1. The effect of this operation is recorded in
3139 the initial copy. */
3140 if (reg != 1)
3142 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3143 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3145 else
3147 /* The first element of a PARALLEL is always processed if it is
3148 a SET. Thus, we need an expression list for this case. */
3149 REG_NOTES (insn)
3150 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3151 gen_rtx_SET (VOIDmode, basereg,
3152 gen_rtx_PLUS (word_mode, basereg, delta)),
3153 REG_NOTES (insn));
3158 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3159 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3160 whether to add a frame note or not.
3162 In the DISP > 8k case, we leave the high part of the address in %r1.
3163 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3165 static void
3166 set_reg_plus_d (reg, base, disp, note)
3167 int reg, base, disp, note;
3169 rtx insn;
3171 if (VAL_14_BITS_P (disp))
3173 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3174 plus_constant (gen_rtx_REG (Pmode, base), disp));
3176 else
3178 rtx basereg = gen_rtx_REG (Pmode, base);
3179 rtx delta = GEN_INT (disp);
3181 emit_move_insn (gen_rtx_REG (Pmode, 1),
3182 gen_rtx_PLUS (Pmode, basereg,
3183 gen_rtx_HIGH (Pmode, delta)));
3184 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3185 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3186 delta));
3189 if (DO_FRAME_NOTES && note)
3190 RTX_FRAME_RELATED_P (insn) = 1;
3194 compute_frame_size (size, fregs_live)
3195 int size;
3196 int *fregs_live;
3198 int i, fsize;
3200 /* Space for frame pointer + filler. If any frame is allocated
3201 we need to add this in because of STARTING_FRAME_OFFSET.
3203 Similar code also appears in hppa_expand_prologue. Change both
3204 of them at the same time. */
3205 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
3207 /* If the current function calls __builtin_eh_return, then we need
3208 to allocate stack space for registers that will hold data for
3209 the exception handler. */
3210 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3212 unsigned int i;
3214 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3215 continue;
3216 fsize += i * UNITS_PER_WORD;
3219 /* Account for space used by the callee general register saves. */
3220 for (i = 18; i >= 3; i--)
3221 if (regs_ever_live[i])
3222 fsize += UNITS_PER_WORD;
3224 /* Round the stack. */
3225 fsize = (fsize + 7) & ~7;
3227 /* Account for space used by the callee floating point register saves. */
3228 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3229 if (regs_ever_live[i]
3230 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3232 if (fregs_live)
3233 *fregs_live = 1;
3235 /* We always save both halves of the FP register, so always
3236 increment the frame size by 8 bytes. */
3237 fsize += 8;
3240 /* The various ABIs include space for the outgoing parameters in the
3241 size of the current function's stack frame. */
3242 fsize += current_function_outgoing_args_size;
3244 /* Allocate space for the fixed frame marker. This space must be
3245 allocated for any function that makes calls or otherwise allocates
3246 stack space. */
3247 if (!current_function_is_leaf || fsize)
3248 fsize += TARGET_64BIT ? 16 : 32;
3250 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
3253 /* Generate the assembly code for function entry. FILE is a stdio
3254 stream to output the code to. SIZE is an int: how many units of
3255 temporary storage to allocate.
3257 Refer to the array `regs_ever_live' to determine which registers to
3258 save; `regs_ever_live[I]' is nonzero if register number I is ever
3259 used in the function. This function is responsible for knowing
3260 which registers should not be saved even if used. */
3262 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3263 of memory. If any fpu reg is used in the function, we allocate
3264 such a block here, at the bottom of the frame, just in case it's needed.
3266 If this function is a leaf procedure, then we may choose not
3267 to do a "save" insn. The decision about whether or not
3268 to do this is made in regclass.c. */
3270 static void
3271 pa_output_function_prologue (file, size)
3272 FILE *file;
3273 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3275 /* The function's label and associated .PROC must never be
3276 separated and must be output *after* any profiling declarations
3277 to avoid changing spaces/subspaces within a procedure. */
3278 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3279 fputs ("\t.PROC\n", file);
3281 /* hppa_expand_prologue does the dirty work now. We just need
3282 to output the assembler directives which denote the start
3283 of a function. */
3284 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3285 if (regs_ever_live[2])
3286 fputs (",CALLS,SAVE_RP", file);
3287 else
3288 fputs (",NO_CALLS", file);
3290 if (frame_pointer_needed)
3291 fputs (",SAVE_SP", file);
3293 /* Pass on information about the number of callee register saves
3294 performed in the prologue.
3296 The compiler is supposed to pass the highest register number
3297 saved, the assembler then has to adjust that number before
3298 entering it into the unwind descriptor (to account for any
3299 caller saved registers with lower register numbers than the
3300 first callee saved register). */
3301 if (gr_saved)
3302 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3304 if (fr_saved)
3305 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3307 fputs ("\n\t.ENTRY\n", file);
3309 remove_useless_addtr_insns (get_insns (), 0);
3312 void
3313 hppa_expand_prologue ()
3315 int size = get_frame_size ();
3316 int merge_sp_adjust_with_store = 0;
3317 int i, offset;
3318 rtx insn, tmpreg;
3320 gr_saved = 0;
3321 fr_saved = 0;
3322 save_fregs = 0;
3324 /* Allocate space for frame pointer + filler. If any frame is allocated
3325 we need to add this in because of STARTING_FRAME_OFFSET.
3327 Similar code also appears in compute_frame_size. Change both
3328 of them at the same time. */
3329 local_fsize = size + (size || frame_pointer_needed
3330 ? STARTING_FRAME_OFFSET : 0);
3332 actual_fsize = compute_frame_size (size, &save_fregs);
3334 /* Compute a few things we will use often. */
3335 tmpreg = gen_rtx_REG (word_mode, 1);
3337 /* Save RP first. The calling conventions manual states RP will
3338 always be stored into the caller's frame at sp - 20 or sp - 16
3339 depending on which ABI is in use. */
3340 if (regs_ever_live[2] || current_function_calls_eh_return)
3341 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3343 /* Allocate the local frame and set up the frame pointer if needed. */
3344 if (actual_fsize != 0)
3346 if (frame_pointer_needed)
3348 /* Copy the old frame pointer temporarily into %r1. Set up the
3349 new stack pointer, then store away the saved old frame pointer
3350 into the stack at sp and at the same time update the stack
3351 pointer by actual_fsize bytes. Two versions, first
3352 handles small (<8k) frames. The second handles large (>=8k)
3353 frames. */
3354 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3355 if (DO_FRAME_NOTES)
3357 /* We need to record the frame pointer save here since the
3358 new frame pointer is set in the following insn. */
3359 RTX_FRAME_RELATED_P (insn) = 1;
3360 REG_NOTES (insn)
3361 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3362 gen_rtx_SET (VOIDmode,
3363 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3364 frame_pointer_rtx),
3365 REG_NOTES (insn));
3368 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3369 if (DO_FRAME_NOTES)
3370 RTX_FRAME_RELATED_P (insn) = 1;
3372 if (VAL_14_BITS_P (actual_fsize))
3373 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3374 else
3376 /* It is incorrect to store the saved frame pointer at *sp,
3377 then increment sp (writes beyond the current stack boundary).
3379 So instead use stwm to store at *sp and post-increment the
3380 stack pointer as an atomic operation. Then increment sp to
3381 finish allocating the new frame. */
3382 int adjust1 = 8192 - 64;
3383 int adjust2 = actual_fsize - adjust1;
3385 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3386 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3387 adjust2, 1);
3390 /* Prevent register spills from being scheduled before the
3391 stack pointer is raised. Necessary as we will be storing
3392 registers using the frame pointer as a base register, and
3393 we happen to set fp before raising sp. */
3394 emit_insn (gen_blockage ());
3396 /* no frame pointer needed. */
3397 else
3399 /* In some cases we can perform the first callee register save
3400 and allocating the stack frame at the same time. If so, just
3401 make a note of it and defer allocating the frame until saving
3402 the callee registers. */
3403 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3404 merge_sp_adjust_with_store = 1;
3405 /* Can not optimize. Adjust the stack frame by actual_fsize
3406 bytes. */
3407 else
3408 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3409 actual_fsize, 1);
3413 /* Normal register save.
3415 Do not save the frame pointer in the frame_pointer_needed case. It
3416 was done earlier. */
3417 if (frame_pointer_needed)
3419 offset = local_fsize;
3421 /* Saving the EH return data registers in the frame is the simplest
3422 way to get the frame unwind information emitted. We put them
3423 just before the general registers. */
3424 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3426 unsigned int i, regno;
3428 for (i = 0; ; ++i)
3430 regno = EH_RETURN_DATA_REGNO (i);
3431 if (regno == INVALID_REGNUM)
3432 break;
3434 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3435 offset += UNITS_PER_WORD;
3439 for (i = 18; i >= 4; i--)
3440 if (regs_ever_live[i] && ! call_used_regs[i])
3442 store_reg (i, offset, FRAME_POINTER_REGNUM);
3443 offset += UNITS_PER_WORD;
3444 gr_saved++;
3446 /* Account for %r3 which is saved in a special place. */
3447 gr_saved++;
3449 /* No frame pointer needed. */
3450 else
3452 offset = local_fsize - actual_fsize;
3454 /* Saving the EH return data registers in the frame is the simplest
3455 way to get the frame unwind information emitted. */
3456 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3458 unsigned int i, regno;
3460 for (i = 0; ; ++i)
3462 regno = EH_RETURN_DATA_REGNO (i);
3463 if (regno == INVALID_REGNUM)
3464 break;
3466 /* If merge_sp_adjust_with_store is nonzero, then we can
3467 optimize the first save. */
3468 if (merge_sp_adjust_with_store)
3470 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3471 merge_sp_adjust_with_store = 0;
3473 else
3474 store_reg (regno, offset, STACK_POINTER_REGNUM);
3475 offset += UNITS_PER_WORD;
3479 for (i = 18; i >= 3; i--)
3480 if (regs_ever_live[i] && ! call_used_regs[i])
3482 /* If merge_sp_adjust_with_store is nonzero, then we can
3483 optimize the first GR save. */
3484 if (merge_sp_adjust_with_store)
3486 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3487 merge_sp_adjust_with_store = 0;
3489 else
3490 store_reg (i, offset, STACK_POINTER_REGNUM);
3491 offset += UNITS_PER_WORD;
3492 gr_saved++;
3495 /* If we wanted to merge the SP adjustment with a GR save, but we never
3496 did any GR saves, then just emit the adjustment here. */
3497 if (merge_sp_adjust_with_store)
3498 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3499 actual_fsize, 1);
3502 /* The hppa calling conventions say that %r19, the pic offset
3503 register, is saved at sp - 32 (in this function's frame)
3504 when generating PIC code. FIXME: What is the correct thing
3505 to do for functions which make no calls and allocate no
3506 frame? Do we need to allocate a frame, or can we just omit
3507 the save? For now we'll just omit the save. */
3508 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3509 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3511 /* Align pointer properly (doubleword boundary). */
3512 offset = (offset + 7) & ~7;
3514 /* Floating point register store. */
3515 if (save_fregs)
3517 rtx base;
3519 /* First get the frame or stack pointer to the start of the FP register
3520 save area. */
3521 if (frame_pointer_needed)
3523 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3524 base = frame_pointer_rtx;
3526 else
3528 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3529 base = stack_pointer_rtx;
3532 /* Now actually save the FP registers. */
3533 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3535 if (regs_ever_live[i]
3536 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3538 rtx addr, insn, reg;
3539 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3540 reg = gen_rtx_REG (DFmode, i);
3541 insn = emit_move_insn (addr, reg);
3542 if (DO_FRAME_NOTES)
3544 RTX_FRAME_RELATED_P (insn) = 1;
3545 if (TARGET_64BIT)
3547 rtx mem = gen_rtx_MEM (DFmode,
3548 plus_constant (base, offset));
3549 REG_NOTES (insn)
3550 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3551 gen_rtx_SET (VOIDmode, mem, reg),
3552 REG_NOTES (insn));
3554 else
3556 rtx meml = gen_rtx_MEM (SFmode,
3557 plus_constant (base, offset));
3558 rtx memr = gen_rtx_MEM (SFmode,
3559 plus_constant (base, offset + 4));
3560 rtx regl = gen_rtx_REG (SFmode, i);
3561 rtx regr = gen_rtx_REG (SFmode, i + 1);
3562 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3563 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3564 rtvec vec;
3566 RTX_FRAME_RELATED_P (setl) = 1;
3567 RTX_FRAME_RELATED_P (setr) = 1;
3568 vec = gen_rtvec (2, setl, setr);
3569 REG_NOTES (insn)
3570 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3571 gen_rtx_SEQUENCE (VOIDmode, vec),
3572 REG_NOTES (insn));
3575 offset += GET_MODE_SIZE (DFmode);
3576 fr_saved++;
3581 /* FIXME: expand_call and expand_millicode_call need to be fixed to
3582 prevent insns with frame notes being scheduled in the delay slot
3583 of calls. This causes problems because the dwarf2 output code
3584 processes the insn list serially. For now, limit the migration
3585 of prologue insns with a blockage. */
3586 if (DO_FRAME_NOTES)
3587 emit_insn (gen_blockage ());
3590 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3591 Handle case where DISP > 8k by using the add_high_const patterns. */
3593 static void
3594 load_reg (reg, disp, base)
3595 int reg, disp, base;
3597 rtx src, dest, basereg;
3599 dest = gen_rtx_REG (word_mode, reg);
3600 basereg = gen_rtx_REG (Pmode, base);
3601 if (VAL_14_BITS_P (disp))
3603 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3604 emit_move_insn (dest, src);
3606 else
3608 rtx delta = GEN_INT (disp);
3609 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3610 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3611 emit_move_insn (tmpreg, high);
3612 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3613 emit_move_insn (dest, src);
3617 /* This function generates the assembly code for function exit.
3618 Args are as for output_function_prologue ().
3620 The function epilogue should not depend on the current stack
3621 pointer! It should use the frame pointer only. This is mandatory
3622 because of alloca; we also take advantage of it to omit stack
3623 adjustments before returning. */
3625 static void
3626 pa_output_function_epilogue (file, size)
3627 FILE *file;
3628 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3630 int last_address = 0;
3631 rtx insn = get_last_insn ();
3633 /* hppa_expand_epilogue does the dirty work now. We just need
3634 to output the assembler directives which denote the end
3635 of a function.
3637 To make debuggers happy, emit a nop if the epilogue was completely
3638 eliminated due to a volatile call as the last insn in the
3639 current function. That way the return address (in %r2) will
3640 always point to a valid instruction in the current function. */
3642 /* Get the last real insn. */
3643 if (GET_CODE (insn) == NOTE)
3644 insn = prev_real_insn (insn);
3646 /* If it is a sequence, then look inside. */
3647 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3648 insn = XVECEXP (PATTERN (insn), 0, 0);
3650 /* If insn is a CALL_INSN, then it must be a call to a volatile
3651 function (otherwise there would be epilogue insns). */
3652 if (insn && GET_CODE (insn) == CALL_INSN)
3654 fputs ("\tnop\n", file);
3655 last_address += 4;
3658 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3660 /* Finally, update the total number of code bytes output so far. */
3661 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3662 && !flag_function_sections)
3664 if (INSN_ADDRESSES_SET_P ())
3666 unsigned long old_total = total_code_bytes;
3668 insn = get_last_nonnote_insn ();
3669 last_address += INSN_ADDRESSES (INSN_UID (insn));
3670 if (INSN_P (insn))
3671 last_address += insn_default_length (insn);
3673 total_code_bytes += last_address;
3674 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3676 /* Be prepared to handle overflows. */
3677 if (old_total > total_code_bytes)
3678 total_code_bytes = -1;
3680 else
3681 total_code_bytes = -1;
3685 void
3686 hppa_expand_epilogue ()
3688 rtx tmpreg;
3689 int offset, i;
3690 int merge_sp_adjust_with_load = 0;
3691 int ret_off = 0;
3693 /* We will use this often. */
3694 tmpreg = gen_rtx_REG (word_mode, 1);
3696 /* Try to restore RP early to avoid load/use interlocks when
3697 RP gets used in the return (bv) instruction. This appears to still
3698 be necessary even when we schedule the prologue and epilogue. */
3699 if (regs_ever_live [2] || current_function_calls_eh_return)
3701 ret_off = TARGET_64BIT ? -16 : -20;
3702 if (frame_pointer_needed)
3704 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3705 ret_off = 0;
3707 else
3709 /* No frame pointer, and stack is smaller than 8k. */
3710 if (VAL_14_BITS_P (ret_off - actual_fsize))
3712 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3713 ret_off = 0;
3718 /* General register restores. */
3719 if (frame_pointer_needed)
3721 offset = local_fsize;
3723 /* If the current function calls __builtin_eh_return, then we need
3724 to restore the saved EH data registers. */
3725 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3727 unsigned int i, regno;
3729 for (i = 0; ; ++i)
3731 regno = EH_RETURN_DATA_REGNO (i);
3732 if (regno == INVALID_REGNUM)
3733 break;
3735 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3736 offset += UNITS_PER_WORD;
3740 for (i = 18; i >= 4; i--)
3741 if (regs_ever_live[i] && ! call_used_regs[i])
3743 load_reg (i, offset, FRAME_POINTER_REGNUM);
3744 offset += UNITS_PER_WORD;
3747 else
3749 offset = local_fsize - actual_fsize;
3751 /* If the current function calls __builtin_eh_return, then we need
3752 to restore the saved EH data registers. */
3753 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3755 unsigned int i, regno;
3757 for (i = 0; ; ++i)
3759 regno = EH_RETURN_DATA_REGNO (i);
3760 if (regno == INVALID_REGNUM)
3761 break;
3763 /* Only for the first load.
3764 merge_sp_adjust_with_load holds the register load
3765 with which we will merge the sp adjustment. */
3766 if (merge_sp_adjust_with_load == 0
3767 && local_fsize == 0
3768 && VAL_14_BITS_P (-actual_fsize))
3769 merge_sp_adjust_with_load = regno;
3770 else
3771 load_reg (regno, offset, STACK_POINTER_REGNUM);
3772 offset += UNITS_PER_WORD;
3776 for (i = 18; i >= 3; i--)
3778 if (regs_ever_live[i] && ! call_used_regs[i])
3780 /* Only for the first load.
3781 merge_sp_adjust_with_load holds the register load
3782 with which we will merge the sp adjustment. */
3783 if (merge_sp_adjust_with_load == 0
3784 && local_fsize == 0
3785 && VAL_14_BITS_P (-actual_fsize))
3786 merge_sp_adjust_with_load = i;
3787 else
3788 load_reg (i, offset, STACK_POINTER_REGNUM);
3789 offset += UNITS_PER_WORD;
3794 /* Align pointer properly (doubleword boundary). */
3795 offset = (offset + 7) & ~7;
3797 /* FP register restores. */
3798 if (save_fregs)
3800 /* Adjust the register to index off of. */
3801 if (frame_pointer_needed)
3802 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3803 else
3804 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3806 /* Actually do the restores now. */
3807 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3808 if (regs_ever_live[i]
3809 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3811 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3812 rtx dest = gen_rtx_REG (DFmode, i);
3813 emit_move_insn (dest, src);
3817 /* Emit a blockage insn here to keep these insns from being moved to
3818 an earlier spot in the epilogue, or into the main instruction stream.
3820 This is necessary as we must not cut the stack back before all the
3821 restores are finished. */
3822 emit_insn (gen_blockage ());
3824 /* Reset stack pointer (and possibly frame pointer). The stack
3825 pointer is initially set to fp + 64 to avoid a race condition. */
3826 if (frame_pointer_needed)
3828 rtx delta = GEN_INT (-64);
3830 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3831 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3833 /* If we were deferring a callee register restore, do it now. */
3834 else if (merge_sp_adjust_with_load)
3836 rtx delta = GEN_INT (-actual_fsize);
3837 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3839 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3841 else if (actual_fsize != 0)
3842 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3843 - actual_fsize, 0);
3845 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3846 frame greater than 8k), do so now. */
3847 if (ret_off != 0)
3848 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3850 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3852 rtx sa = EH_RETURN_STACKADJ_RTX;
3854 emit_insn (gen_blockage ());
3855 emit_insn (TARGET_64BIT
3856 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3857 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3862 hppa_pic_save_rtx ()
3864 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3867 void
3868 hppa_profile_hook (label_no)
3869 int label_no;
3871 rtx begin_label_rtx, call_insn;
3872 char begin_label_name[16];
3874 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3875 label_no);
3876 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3878 if (TARGET_64BIT)
3879 emit_move_insn (arg_pointer_rtx,
3880 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3881 GEN_INT (64)));
3883 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3885 #ifndef NO_PROFILE_COUNTERS
3887 rtx count_label_rtx, addr, r24;
3888 char count_label_name[16];
3890 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3891 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3893 addr = force_reg (Pmode, count_label_rtx);
3894 r24 = gen_rtx_REG (Pmode, 24);
3895 emit_move_insn (r24, addr);
3897 /* %r25 is set from within the output pattern. */
3898 call_insn =
3899 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3900 GEN_INT (TARGET_64BIT ? 24 : 12),
3901 begin_label_rtx));
3903 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3905 #else
3906 /* %r25 is set from within the output pattern. */
3907 call_insn =
3908 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3909 GEN_INT (TARGET_64BIT ? 16 : 8),
3910 begin_label_rtx));
3911 #endif
3913 /* Indicate the _mcount call cannot throw, nor will it execute a
3914 non-local goto. */
3915 REG_NOTES (call_insn)
3916 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3918 if (flag_pic)
3920 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3921 if (TARGET_64BIT)
3922 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3924 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3928 /* Fetch the return address for the frame COUNT steps up from
3929 the current frame, after the prologue. FRAMEADDR is the
3930 frame pointer of the COUNT frame.
3932 We want to ignore any export stub remnants here. To handle this,
3933 we examine the code at the return address, and if it is an export
3934 stub, we return a memory rtx for the stub return address stored
3935 at frame-24.
3937 The value returned is used in two different ways:
3939 1. To find a function's caller.
3941 2. To change the return address for a function.
3943 This function handles most instances of case 1; however, it will
3944 fail if there are two levels of stubs to execute on the return
3945 path. The only way I believe that can happen is if the return value
3946 needs a parameter relocation, which never happens for C code.
3948 This function handles most instances of case 2; however, it will
3949 fail if we did not originally have stub code on the return path
3950 but will need stub code on the new return path. This can happen if
3951 the caller & callee are both in the main program, but the new
3952 return location is in a shared library. */
3955 return_addr_rtx (count, frameaddr)
3956 int count;
3957 rtx frameaddr;
3959 rtx label;
3960 rtx rp;
3961 rtx saved_rp;
3962 rtx ins;
3964 if (count != 0)
3965 return NULL_RTX;
3967 rp = get_hard_reg_initial_val (Pmode, 2);
3969 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3970 return rp;
3972 saved_rp = gen_reg_rtx (Pmode);
3973 emit_move_insn (saved_rp, rp);
3975 /* Get pointer to the instruction stream. We have to mask out the
3976 privilege level from the two low order bits of the return address
3977 pointer here so that ins will point to the start of the first
3978 instruction that would have been executed if we returned. */
3979 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3980 label = gen_label_rtx ();
3982 /* Check the instruction stream at the normal return address for the
3983 export stub:
3985 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3986 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3987 0x00011820 | stub+16: mtsp r1,sr0
3988 0xe0400002 | stub+20: be,n 0(sr0,rp)
3990 If it is an export stub, than our return address is really in
3991 -24[frameaddr]. */
3993 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3994 NULL_RTX, SImode, 1);
3995 emit_jump_insn (gen_bne (label));
3997 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3998 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3999 emit_jump_insn (gen_bne (label));
4001 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4002 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4003 emit_jump_insn (gen_bne (label));
4005 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4006 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4008 /* If there is no export stub then just use the value saved from
4009 the return pointer register. */
4011 emit_jump_insn (gen_bne (label));
4013 /* Here we know that our return address points to an export
4014 stub. We don't want to return the address of the export stub,
4015 but rather the return address of the export stub. That return
4016 address is stored at -24[frameaddr]. */
4018 emit_move_insn (saved_rp,
4019 gen_rtx_MEM (Pmode,
4020 memory_address (Pmode,
4021 plus_constant (frameaddr,
4022 -24))));
4024 emit_label (label);
4025 return saved_rp;
4028 /* This is only valid once reload has completed because it depends on
4029 knowing exactly how much (if any) frame there is and...
4031 It's only valid if there is no frame marker to de-allocate and...
4033 It's only valid if %r2 hasn't been saved into the caller's frame
4034 (we're not profiling and %r2 isn't live anywhere). */
4036 hppa_can_use_return_insn_p ()
4038 return (reload_completed
4039 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4040 && ! regs_ever_live[2]
4041 && ! frame_pointer_needed);
4044 void
4045 emit_bcond_fp (code, operand0)
4046 enum rtx_code code;
4047 rtx operand0;
4049 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4050 gen_rtx_IF_THEN_ELSE (VOIDmode,
4051 gen_rtx_fmt_ee (code,
4052 VOIDmode,
4053 gen_rtx_REG (CCFPmode, 0),
4054 const0_rtx),
4055 gen_rtx_LABEL_REF (VOIDmode, operand0),
4056 pc_rtx)));
4061 gen_cmp_fp (code, operand0, operand1)
4062 enum rtx_code code;
4063 rtx operand0, operand1;
4065 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4066 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4069 /* Adjust the cost of a scheduling dependency. Return the new cost of
4070 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4072 static int
4073 pa_adjust_cost (insn, link, dep_insn, cost)
4074 rtx insn;
4075 rtx link;
4076 rtx dep_insn;
4077 int cost;
4079 enum attr_type attr_type;
4081 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4082 true dependencies as they are described with bypasses now. */
4083 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4084 return cost;
4086 if (! recog_memoized (insn))
4087 return 0;
4089 attr_type = get_attr_type (insn);
4091 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4093 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4094 cycles later. */
4096 if (attr_type == TYPE_FPLOAD)
4098 rtx pat = PATTERN (insn);
4099 rtx dep_pat = PATTERN (dep_insn);
4100 if (GET_CODE (pat) == PARALLEL)
4102 /* This happens for the fldXs,mb patterns. */
4103 pat = XVECEXP (pat, 0, 0);
4105 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4106 /* If this happens, we have to extend this to schedule
4107 optimally. Return 0 for now. */
4108 return 0;
4110 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4112 if (! recog_memoized (dep_insn))
4113 return 0;
4114 switch (get_attr_type (dep_insn))
4116 case TYPE_FPALU:
4117 case TYPE_FPMULSGL:
4118 case TYPE_FPMULDBL:
4119 case TYPE_FPDIVSGL:
4120 case TYPE_FPDIVDBL:
4121 case TYPE_FPSQRTSGL:
4122 case TYPE_FPSQRTDBL:
4123 /* A fpload can't be issued until one cycle before a
4124 preceding arithmetic operation has finished if
4125 the target of the fpload is any of the sources
4126 (or destination) of the arithmetic operation. */
4127 return insn_default_latency (dep_insn) - 1;
4129 default:
4130 return 0;
4134 else if (attr_type == TYPE_FPALU)
4136 rtx pat = PATTERN (insn);
4137 rtx dep_pat = PATTERN (dep_insn);
4138 if (GET_CODE (pat) == PARALLEL)
4140 /* This happens for the fldXs,mb patterns. */
4141 pat = XVECEXP (pat, 0, 0);
4143 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4144 /* If this happens, we have to extend this to schedule
4145 optimally. Return 0 for now. */
4146 return 0;
4148 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4150 if (! recog_memoized (dep_insn))
4151 return 0;
4152 switch (get_attr_type (dep_insn))
4154 case TYPE_FPDIVSGL:
4155 case TYPE_FPDIVDBL:
4156 case TYPE_FPSQRTSGL:
4157 case TYPE_FPSQRTDBL:
4158 /* An ALU flop can't be issued until two cycles before a
4159 preceding divide or sqrt operation has finished if
4160 the target of the ALU flop is any of the sources
4161 (or destination) of the divide or sqrt operation. */
4162 return insn_default_latency (dep_insn) - 2;
4164 default:
4165 return 0;
4170 /* For other anti dependencies, the cost is 0. */
4171 return 0;
4173 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4175 /* Output dependency; DEP_INSN writes a register that INSN writes some
4176 cycles later. */
4177 if (attr_type == TYPE_FPLOAD)
4179 rtx pat = PATTERN (insn);
4180 rtx dep_pat = PATTERN (dep_insn);
4181 if (GET_CODE (pat) == PARALLEL)
4183 /* This happens for the fldXs,mb patterns. */
4184 pat = XVECEXP (pat, 0, 0);
4186 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4187 /* If this happens, we have to extend this to schedule
4188 optimally. Return 0 for now. */
4189 return 0;
4191 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4193 if (! recog_memoized (dep_insn))
4194 return 0;
4195 switch (get_attr_type (dep_insn))
4197 case TYPE_FPALU:
4198 case TYPE_FPMULSGL:
4199 case TYPE_FPMULDBL:
4200 case TYPE_FPDIVSGL:
4201 case TYPE_FPDIVDBL:
4202 case TYPE_FPSQRTSGL:
4203 case TYPE_FPSQRTDBL:
4204 /* A fpload can't be issued until one cycle before a
4205 preceding arithmetic operation has finished if
4206 the target of the fpload is the destination of the
4207 arithmetic operation.
4209 Exception: For PA7100LC, PA7200 and PA7300, the cost
4210 is 3 cycles, unless they bundle together. We also
4211 pay the penalty if the second insn is a fpload. */
4212 return insn_default_latency (dep_insn) - 1;
4214 default:
4215 return 0;
4219 else if (attr_type == TYPE_FPALU)
4221 rtx pat = PATTERN (insn);
4222 rtx dep_pat = PATTERN (dep_insn);
4223 if (GET_CODE (pat) == PARALLEL)
4225 /* This happens for the fldXs,mb patterns. */
4226 pat = XVECEXP (pat, 0, 0);
4228 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4229 /* If this happens, we have to extend this to schedule
4230 optimally. Return 0 for now. */
4231 return 0;
4233 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4235 if (! recog_memoized (dep_insn))
4236 return 0;
4237 switch (get_attr_type (dep_insn))
4239 case TYPE_FPDIVSGL:
4240 case TYPE_FPDIVDBL:
4241 case TYPE_FPSQRTSGL:
4242 case TYPE_FPSQRTDBL:
4243 /* An ALU flop can't be issued until two cycles before a
4244 preceding divide or sqrt operation has finished if
4245 the target of the ALU flop is also the target of
4246 the divide or sqrt operation. */
4247 return insn_default_latency (dep_insn) - 2;
4249 default:
4250 return 0;
4255 /* For other output dependencies, the cost is 0. */
4256 return 0;
4258 else
4259 abort ();
4262 /* Adjust scheduling priorities. We use this to try and keep addil
4263 and the next use of %r1 close together. */
4264 static int
4265 pa_adjust_priority (insn, priority)
4266 rtx insn;
4267 int priority;
4269 rtx set = single_set (insn);
4270 rtx src, dest;
4271 if (set)
4273 src = SET_SRC (set);
4274 dest = SET_DEST (set);
4275 if (GET_CODE (src) == LO_SUM
4276 && symbolic_operand (XEXP (src, 1), VOIDmode)
4277 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4278 priority >>= 3;
4280 else if (GET_CODE (src) == MEM
4281 && GET_CODE (XEXP (src, 0)) == LO_SUM
4282 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4283 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4284 priority >>= 1;
4286 else if (GET_CODE (dest) == MEM
4287 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4288 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4289 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4290 priority >>= 3;
4292 return priority;
4295 /* The 700 can only issue a single insn at a time.
4296 The 7XXX processors can issue two insns at a time.
4297 The 8000 can issue 4 insns at a time. */
4298 static int
4299 pa_issue_rate ()
4301 switch (pa_cpu)
4303 case PROCESSOR_700: return 1;
4304 case PROCESSOR_7100: return 2;
4305 case PROCESSOR_7100LC: return 2;
4306 case PROCESSOR_7200: return 2;
4307 case PROCESSOR_7300: return 2;
4308 case PROCESSOR_8000: return 4;
4310 default:
4311 abort ();
4317 /* Return any length adjustment needed by INSN which already has its length
4318 computed as LENGTH. Return zero if no adjustment is necessary.
4320 For the PA: function calls, millicode calls, and backwards short
4321 conditional branches with unfilled delay slots need an adjustment by +1
4322 (to account for the NOP which will be inserted into the instruction stream).
4324 Also compute the length of an inline block move here as it is too
4325 complicated to express as a length attribute in pa.md. */
4327 pa_adjust_insn_length (insn, length)
4328 rtx insn;
4329 int length;
4331 rtx pat = PATTERN (insn);
4333 /* Call insns which are *not* indirect and have unfilled delay slots. */
4334 if (GET_CODE (insn) == CALL_INSN)
4337 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4338 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4339 return 4;
4340 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4341 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4342 == SYMBOL_REF)
4343 return 4;
4344 else
4345 return 0;
4347 /* Jumps inside switch tables which have unfilled delay slots
4348 also need adjustment. */
4349 else if (GET_CODE (insn) == JUMP_INSN
4350 && simplejump_p (insn)
4351 && GET_MODE (insn) == SImode)
4352 return 4;
4353 /* Millicode insn with an unfilled delay slot. */
4354 else if (GET_CODE (insn) == INSN
4355 && GET_CODE (pat) != SEQUENCE
4356 && GET_CODE (pat) != USE
4357 && GET_CODE (pat) != CLOBBER
4358 && get_attr_type (insn) == TYPE_MILLI)
4359 return 4;
4360 /* Block move pattern. */
4361 else if (GET_CODE (insn) == INSN
4362 && GET_CODE (pat) == PARALLEL
4363 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4364 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4365 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4366 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4367 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4368 return compute_movstrsi_length (insn) - 4;
4369 /* Conditional branch with an unfilled delay slot. */
4370 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4372 /* Adjust a short backwards conditional with an unfilled delay slot. */
4373 if (GET_CODE (pat) == SET
4374 && length == 4
4375 && ! forward_branch_p (insn))
4376 return 4;
4377 else if (GET_CODE (pat) == PARALLEL
4378 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4379 && length == 4)
4380 return 4;
4381 /* Adjust dbra insn with short backwards conditional branch with
4382 unfilled delay slot -- only for case where counter is in a
4383 general register register. */
4384 else if (GET_CODE (pat) == PARALLEL
4385 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4386 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4387 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4388 && length == 4
4389 && ! forward_branch_p (insn))
4390 return 4;
4391 else
4392 return 0;
4394 return 0;
4397 /* Print operand X (an rtx) in assembler syntax to file FILE.
4398 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4399 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4401 void
4402 print_operand (file, x, code)
4403 FILE *file;
4404 rtx x;
4405 int code;
4407 switch (code)
4409 case '#':
4410 /* Output a 'nop' if there's nothing for the delay slot. */
4411 if (dbr_sequence_length () == 0)
4412 fputs ("\n\tnop", file);
4413 return;
4414 case '*':
4415 /* Output a nullification completer if there's nothing for the */
4416 /* delay slot or nullification is requested. */
4417 if (dbr_sequence_length () == 0 ||
4418 (final_sequence &&
4419 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4420 fputs (",n", file);
4421 return;
4422 case 'R':
4423 /* Print out the second register name of a register pair.
4424 I.e., R (6) => 7. */
4425 fputs (reg_names[REGNO (x) + 1], file);
4426 return;
4427 case 'r':
4428 /* A register or zero. */
4429 if (x == const0_rtx
4430 || (x == CONST0_RTX (DFmode))
4431 || (x == CONST0_RTX (SFmode)))
4433 fputs ("%r0", file);
4434 return;
4436 else
4437 break;
4438 case 'f':
4439 /* A register or zero (floating point). */
4440 if (x == const0_rtx
4441 || (x == CONST0_RTX (DFmode))
4442 || (x == CONST0_RTX (SFmode)))
4444 fputs ("%fr0", file);
4445 return;
4447 else
4448 break;
4449 case 'A':
4451 rtx xoperands[2];
4453 xoperands[0] = XEXP (XEXP (x, 0), 0);
4454 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4455 output_global_address (file, xoperands[1], 0);
4456 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4457 return;
4460 case 'C': /* Plain (C)ondition */
4461 case 'X':
4462 switch (GET_CODE (x))
4464 case EQ:
4465 fputs ("=", file); break;
4466 case NE:
4467 fputs ("<>", file); break;
4468 case GT:
4469 fputs (">", file); break;
4470 case GE:
4471 fputs (">=", file); break;
4472 case GEU:
4473 fputs (">>=", file); break;
4474 case GTU:
4475 fputs (">>", file); break;
4476 case LT:
4477 fputs ("<", file); break;
4478 case LE:
4479 fputs ("<=", file); break;
4480 case LEU:
4481 fputs ("<<=", file); break;
4482 case LTU:
4483 fputs ("<<", file); break;
4484 default:
4485 abort ();
4487 return;
4488 case 'N': /* Condition, (N)egated */
4489 switch (GET_CODE (x))
4491 case EQ:
4492 fputs ("<>", file); break;
4493 case NE:
4494 fputs ("=", file); break;
4495 case GT:
4496 fputs ("<=", file); break;
4497 case GE:
4498 fputs ("<", file); break;
4499 case GEU:
4500 fputs ("<<", file); break;
4501 case GTU:
4502 fputs ("<<=", file); break;
4503 case LT:
4504 fputs (">=", file); break;
4505 case LE:
4506 fputs (">", file); break;
4507 case LEU:
4508 fputs (">>", file); break;
4509 case LTU:
4510 fputs (">>=", file); break;
4511 default:
4512 abort ();
4514 return;
4515 /* For floating point comparisons. Note that the output
4516 predicates are the complement of the desired mode. */
4517 case 'Y':
4518 switch (GET_CODE (x))
4520 case EQ:
4521 fputs ("!=", file); break;
4522 case NE:
4523 fputs ("=", file); break;
4524 case GT:
4525 fputs ("!>", file); break;
4526 case GE:
4527 fputs ("!>=", file); break;
4528 case LT:
4529 fputs ("!<", file); break;
4530 case LE:
4531 fputs ("!<=", file); break;
4532 case LTGT:
4533 fputs ("!<>", file); break;
4534 case UNLE:
4535 fputs (">", file); break;
4536 case UNLT:
4537 fputs (">=", file); break;
4538 case UNGE:
4539 fputs ("<", file); break;
4540 case UNGT:
4541 fputs ("<=", file); break;
4542 case UNEQ:
4543 fputs ("<>", file); break;
4544 case UNORDERED:
4545 fputs ("<=>", file); break;
4546 case ORDERED:
4547 fputs ("!<=>", file); break;
4548 default:
4549 abort ();
4551 return;
4552 case 'S': /* Condition, operands are (S)wapped. */
4553 switch (GET_CODE (x))
4555 case EQ:
4556 fputs ("=", file); break;
4557 case NE:
4558 fputs ("<>", file); break;
4559 case GT:
4560 fputs ("<", file); break;
4561 case GE:
4562 fputs ("<=", file); break;
4563 case GEU:
4564 fputs ("<<=", file); break;
4565 case GTU:
4566 fputs ("<<", file); break;
4567 case LT:
4568 fputs (">", file); break;
4569 case LE:
4570 fputs (">=", file); break;
4571 case LEU:
4572 fputs (">>=", file); break;
4573 case LTU:
4574 fputs (">>", file); break;
4575 default:
4576 abort ();
4578 return;
4579 case 'B': /* Condition, (B)oth swapped and negate. */
4580 switch (GET_CODE (x))
4582 case EQ:
4583 fputs ("<>", file); break;
4584 case NE:
4585 fputs ("=", file); break;
4586 case GT:
4587 fputs (">=", file); break;
4588 case GE:
4589 fputs (">", file); break;
4590 case GEU:
4591 fputs (">>", file); break;
4592 case GTU:
4593 fputs (">>=", file); break;
4594 case LT:
4595 fputs ("<=", file); break;
4596 case LE:
4597 fputs ("<", file); break;
4598 case LEU:
4599 fputs ("<<", file); break;
4600 case LTU:
4601 fputs ("<<=", file); break;
4602 default:
4603 abort ();
4605 return;
4606 case 'k':
4607 if (GET_CODE (x) == CONST_INT)
4609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4610 return;
4612 abort ();
4613 case 'Q':
4614 if (GET_CODE (x) == CONST_INT)
4616 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4617 return;
4619 abort ();
4620 case 'L':
4621 if (GET_CODE (x) == CONST_INT)
4623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4624 return;
4626 abort ();
4627 case 'O':
4628 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4630 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4631 return;
4633 abort ();
4634 case 'p':
4635 if (GET_CODE (x) == CONST_INT)
4637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4638 return;
4640 abort ();
4641 case 'P':
4642 if (GET_CODE (x) == CONST_INT)
4644 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4645 return;
4647 abort ();
4648 case 'I':
4649 if (GET_CODE (x) == CONST_INT)
4650 fputs ("i", file);
4651 return;
4652 case 'M':
4653 case 'F':
4654 switch (GET_CODE (XEXP (x, 0)))
4656 case PRE_DEC:
4657 case PRE_INC:
4658 if (ASSEMBLER_DIALECT == 0)
4659 fputs ("s,mb", file);
4660 else
4661 fputs (",mb", file);
4662 break;
4663 case POST_DEC:
4664 case POST_INC:
4665 if (ASSEMBLER_DIALECT == 0)
4666 fputs ("s,ma", file);
4667 else
4668 fputs (",ma", file);
4669 break;
4670 case PLUS:
4671 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4672 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4674 if (ASSEMBLER_DIALECT == 0)
4675 fputs ("x,s", file);
4676 else
4677 fputs (",s", file);
4679 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4680 fputs ("s", file);
4681 break;
4682 default:
4683 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4684 fputs ("s", file);
4685 break;
4687 return;
4688 case 'G':
4689 output_global_address (file, x, 0);
4690 return;
4691 case 'H':
4692 output_global_address (file, x, 1);
4693 return;
4694 case 0: /* Don't do anything special */
4695 break;
4696 case 'Z':
4698 unsigned op[3];
4699 compute_zdepwi_operands (INTVAL (x), op);
4700 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4701 return;
4703 case 'z':
4705 unsigned op[3];
4706 compute_zdepdi_operands (INTVAL (x), op);
4707 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4708 return;
4710 case 'c':
4711 /* We can get here from a .vtable_inherit due to our
4712 CONSTANT_ADDRESS_P rejecting perfectly good constant
4713 addresses. */
4714 break;
4715 default:
4716 abort ();
4718 if (GET_CODE (x) == REG)
4720 fputs (reg_names [REGNO (x)], file);
4721 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4723 fputs ("R", file);
4724 return;
4726 if (FP_REG_P (x)
4727 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4728 && (REGNO (x) & 1) == 0)
4729 fputs ("L", file);
4731 else if (GET_CODE (x) == MEM)
4733 int size = GET_MODE_SIZE (GET_MODE (x));
4734 rtx base = NULL_RTX;
4735 switch (GET_CODE (XEXP (x, 0)))
4737 case PRE_DEC:
4738 case POST_DEC:
4739 base = XEXP (XEXP (x, 0), 0);
4740 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4741 break;
4742 case PRE_INC:
4743 case POST_INC:
4744 base = XEXP (XEXP (x, 0), 0);
4745 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4746 break;
4747 default:
4748 if (GET_CODE (XEXP (x, 0)) == PLUS
4749 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4750 fprintf (file, "%s(%s)",
4751 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4752 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4753 else if (GET_CODE (XEXP (x, 0)) == PLUS
4754 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4755 fprintf (file, "%s(%s)",
4756 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4757 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4758 else
4759 output_address (XEXP (x, 0));
4760 break;
4763 else
4764 output_addr_const (file, x);
4767 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4769 void
4770 output_global_address (file, x, round_constant)
4771 FILE *file;
4772 rtx x;
4773 int round_constant;
4776 /* Imagine (high (const (plus ...))). */
4777 if (GET_CODE (x) == HIGH)
4778 x = XEXP (x, 0);
4780 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4781 assemble_name (file, XSTR (x, 0));
4782 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4784 assemble_name (file, XSTR (x, 0));
4785 fputs ("-$global$", file);
4787 else if (GET_CODE (x) == CONST)
4789 const char *sep = "";
4790 int offset = 0; /* assembler wants -$global$ at end */
4791 rtx base = NULL_RTX;
4793 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4795 base = XEXP (XEXP (x, 0), 0);
4796 output_addr_const (file, base);
4798 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4799 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4800 else abort ();
4802 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4804 base = XEXP (XEXP (x, 0), 1);
4805 output_addr_const (file, base);
4807 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4808 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4809 else abort ();
4811 /* How bogus. The compiler is apparently responsible for
4812 rounding the constant if it uses an LR field selector.
4814 The linker and/or assembler seem a better place since
4815 they have to do this kind of thing already.
4817 If we fail to do this, HP's optimizing linker may eliminate
4818 an addil, but not update the ldw/stw/ldo instruction that
4819 uses the result of the addil. */
4820 if (round_constant)
4821 offset = ((offset + 0x1000) & ~0x1fff);
4823 if (GET_CODE (XEXP (x, 0)) == PLUS)
4825 if (offset < 0)
4827 offset = -offset;
4828 sep = "-";
4830 else
4831 sep = "+";
4833 else if (GET_CODE (XEXP (x, 0)) == MINUS
4834 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4835 sep = "-";
4836 else abort ();
4838 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4839 fputs ("-$global$", file);
4840 if (offset)
4841 fprintf (file, "%s%d", sep, offset);
4843 else
4844 output_addr_const (file, x);
4847 static struct deferred_plabel *
4848 get_plabel (fname)
4849 const char *fname;
4851 size_t i;
4853 /* See if we have already put this function on the list of deferred
4854 plabels. This list is generally small, so a liner search is not
4855 too ugly. If it proves too slow replace it with something faster. */
4856 for (i = 0; i < n_deferred_plabels; i++)
4857 if (strcmp (fname, deferred_plabels[i].name) == 0)
4858 break;
4860 /* If the deferred plabel list is empty, or this entry was not found
4861 on the list, create a new entry on the list. */
4862 if (deferred_plabels == NULL || i == n_deferred_plabels)
4864 const char *real_name;
4866 if (deferred_plabels == 0)
4867 deferred_plabels = (struct deferred_plabel *)
4868 ggc_alloc (sizeof (struct deferred_plabel));
4869 else
4870 deferred_plabels = (struct deferred_plabel *)
4871 ggc_realloc (deferred_plabels,
4872 ((n_deferred_plabels + 1)
4873 * sizeof (struct deferred_plabel)));
4875 i = n_deferred_plabels++;
4876 deferred_plabels[i].internal_label = gen_label_rtx ();
4877 deferred_plabels[i].name = ggc_strdup (fname);
4879 /* Gross. We have just implicitly taken the address of this function,
4880 mark it as such. */
4881 real_name = (*targetm.strip_name_encoding) (fname);
4882 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
4885 return &deferred_plabels[i];
4888 void
4889 output_deferred_plabels (file)
4890 FILE *file;
4892 size_t i;
4893 /* If we have deferred plabels, then we need to switch into the data
4894 section and align it to a 4 byte boundary before we output the
4895 deferred plabels. */
4896 if (n_deferred_plabels)
4898 data_section ();
4899 ASM_OUTPUT_ALIGN (file, TARGET_64BIT ? 3 : 2);
4902 /* Now output the deferred plabels. */
4903 for (i = 0; i < n_deferred_plabels; i++)
4905 (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4906 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4907 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4911 /* HP's millicode routines mean something special to the assembler.
4912 Keep track of which ones we have used. */
4914 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
4915 static void import_milli PARAMS ((enum millicodes));
4916 static char imported[(int) end1000];
4917 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
4918 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4919 #define MILLI_START 10
4921 static void
4922 import_milli (code)
4923 enum millicodes code;
4925 char str[sizeof (import_string)];
4927 if (!imported[(int) code])
4929 imported[(int) code] = 1;
4930 strcpy (str, import_string);
4931 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4932 output_asm_insn (str, 0);
4936 /* The register constraints have put the operands and return value in
4937 the proper registers. */
4939 const char *
4940 output_mul_insn (unsignedp, insn)
4941 int unsignedp ATTRIBUTE_UNUSED;
4942 rtx insn;
4944 import_milli (mulI);
4945 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4948 /* Emit the rtl for doing a division by a constant. */
4950 /* Do magic division millicodes exist for this value? */
4951 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4952 1, 1};
4954 /* We'll use an array to keep track of the magic millicodes and
4955 whether or not we've used them already. [n][0] is signed, [n][1] is
4956 unsigned. */
4958 static int div_milli[16][2];
4961 div_operand (op, mode)
4962 rtx op;
4963 enum machine_mode mode;
4965 return (mode == SImode
4966 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4967 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4968 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4972 emit_hpdiv_const (operands, unsignedp)
4973 rtx *operands;
4974 int unsignedp;
4976 if (GET_CODE (operands[2]) == CONST_INT
4977 && INTVAL (operands[2]) > 0
4978 && INTVAL (operands[2]) < 16
4979 && magic_milli[INTVAL (operands[2])])
4981 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4983 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4984 emit
4985 (gen_rtx
4986 (PARALLEL, VOIDmode,
4987 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4988 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4989 SImode,
4990 gen_rtx_REG (SImode, 26),
4991 operands[2])),
4992 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4993 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4994 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4995 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4996 gen_rtx_CLOBBER (VOIDmode, ret))));
4997 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4998 return 1;
5000 return 0;
5003 const char *
5004 output_div_insn (operands, unsignedp, insn)
5005 rtx *operands;
5006 int unsignedp;
5007 rtx insn;
5009 int divisor;
5011 /* If the divisor is a constant, try to use one of the special
5012 opcodes .*/
5013 if (GET_CODE (operands[0]) == CONST_INT)
5015 static char buf[100];
5016 divisor = INTVAL (operands[0]);
5017 if (!div_milli[divisor][unsignedp])
5019 div_milli[divisor][unsignedp] = 1;
5020 if (unsignedp)
5021 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5022 else
5023 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5025 if (unsignedp)
5027 sprintf (buf, "$$divU_");
5028 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
5029 return output_millicode_call (insn,
5030 gen_rtx_SYMBOL_REF (SImode, buf));
5032 else
5034 sprintf (buf, "$$divI_");
5035 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
5036 return output_millicode_call (insn,
5037 gen_rtx_SYMBOL_REF (SImode, buf));
5040 /* Divisor isn't a special constant. */
5041 else
5043 if (unsignedp)
5045 import_milli (divU);
5046 return output_millicode_call (insn,
5047 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5049 else
5051 import_milli (divI);
5052 return output_millicode_call (insn,
5053 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5058 /* Output a $$rem millicode to do mod. */
5060 const char *
5061 output_mod_insn (unsignedp, insn)
5062 int unsignedp;
5063 rtx insn;
5065 if (unsignedp)
5067 import_milli (remU);
5068 return output_millicode_call (insn,
5069 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5071 else
5073 import_milli (remI);
5074 return output_millicode_call (insn,
5075 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5079 void
5080 output_arg_descriptor (call_insn)
5081 rtx call_insn;
5083 const char *arg_regs[4];
5084 enum machine_mode arg_mode;
5085 rtx link;
5086 int i, output_flag = 0;
5087 int regno;
5089 /* We neither need nor want argument location descriptors for the
5090 64bit runtime environment or the ELF32 environment. */
5091 if (TARGET_64BIT || TARGET_ELF32)
5092 return;
5094 for (i = 0; i < 4; i++)
5095 arg_regs[i] = 0;
5097 /* Specify explicitly that no argument relocations should take place
5098 if using the portable runtime calling conventions. */
5099 if (TARGET_PORTABLE_RUNTIME)
5101 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5102 asm_out_file);
5103 return;
5106 if (GET_CODE (call_insn) != CALL_INSN)
5107 abort ();
5108 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5110 rtx use = XEXP (link, 0);
5112 if (! (GET_CODE (use) == USE
5113 && GET_CODE (XEXP (use, 0)) == REG
5114 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5115 continue;
5117 arg_mode = GET_MODE (XEXP (use, 0));
5118 regno = REGNO (XEXP (use, 0));
5119 if (regno >= 23 && regno <= 26)
5121 arg_regs[26 - regno] = "GR";
5122 if (arg_mode == DImode)
5123 arg_regs[25 - regno] = "GR";
5125 else if (regno >= 32 && regno <= 39)
5127 if (arg_mode == SFmode)
5128 arg_regs[(regno - 32) / 2] = "FR";
5129 else
5131 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5132 arg_regs[(regno - 34) / 2] = "FR";
5133 arg_regs[(regno - 34) / 2 + 1] = "FU";
5134 #else
5135 arg_regs[(regno - 34) / 2] = "FU";
5136 arg_regs[(regno - 34) / 2 + 1] = "FR";
5137 #endif
5141 fputs ("\t.CALL ", asm_out_file);
5142 for (i = 0; i < 4; i++)
5144 if (arg_regs[i])
5146 if (output_flag++)
5147 fputc (',', asm_out_file);
5148 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5151 fputc ('\n', asm_out_file);
5154 /* Return the class of any secondary reload register that is needed to
5155 move IN into a register in class CLASS using mode MODE.
5157 Profiling has showed this routine and its descendants account for
5158 a significant amount of compile time (~7%). So it has been
5159 optimized to reduce redundant computations and eliminate useless
5160 function calls.
5162 It might be worthwhile to try and make this a leaf function too. */
5164 enum reg_class
5165 secondary_reload_class (class, mode, in)
5166 enum reg_class class;
5167 enum machine_mode mode;
5168 rtx in;
5170 int regno, is_symbolic;
5172 /* Trying to load a constant into a FP register during PIC code
5173 generation will require %r1 as a scratch register. */
5174 if (flag_pic
5175 && GET_MODE_CLASS (mode) == MODE_INT
5176 && FP_REG_CLASS_P (class)
5177 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5178 return R1_REGS;
5180 /* Profiling showed the PA port spends about 1.3% of its compilation
5181 time in true_regnum from calls inside secondary_reload_class. */
5183 if (GET_CODE (in) == REG)
5185 regno = REGNO (in);
5186 if (regno >= FIRST_PSEUDO_REGISTER)
5187 regno = true_regnum (in);
5189 else if (GET_CODE (in) == SUBREG)
5190 regno = true_regnum (in);
5191 else
5192 regno = -1;
5194 /* If we have something like (mem (mem (...)), we can safely assume the
5195 inner MEM will end up in a general register after reloading, so there's
5196 no need for a secondary reload. */
5197 if (GET_CODE (in) == MEM
5198 && GET_CODE (XEXP (in, 0)) == MEM)
5199 return NO_REGS;
5201 /* Handle out of range displacement for integer mode loads/stores of
5202 FP registers. */
5203 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5204 && GET_MODE_CLASS (mode) == MODE_INT
5205 && FP_REG_CLASS_P (class))
5206 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5207 return GENERAL_REGS;
5209 /* A SAR<->FP register copy requires a secondary register (GPR) as
5210 well as secondary memory. */
5211 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5212 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5213 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5214 return GENERAL_REGS;
5216 if (GET_CODE (in) == HIGH)
5217 in = XEXP (in, 0);
5219 /* Profiling has showed GCC spends about 2.6% of its compilation
5220 time in symbolic_operand from calls inside secondary_reload_class.
5222 We use an inline copy and only compute its return value once to avoid
5223 useless work. */
5224 switch (GET_CODE (in))
5226 rtx tmp;
5228 case SYMBOL_REF:
5229 case LABEL_REF:
5230 is_symbolic = 1;
5231 break;
5232 case CONST:
5233 tmp = XEXP (in, 0);
5234 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5235 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5236 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5237 break;
5239 default:
5240 is_symbolic = 0;
5241 break;
5244 if (!flag_pic
5245 && is_symbolic
5246 && read_only_operand (in, VOIDmode))
5247 return NO_REGS;
5249 if (class != R1_REGS && is_symbolic)
5250 return R1_REGS;
5252 return NO_REGS;
5255 enum direction
5256 function_arg_padding (mode, type)
5257 enum machine_mode mode;
5258 tree type;
5260 if (mode == BLKmode
5261 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5263 /* Return none if justification is not required. */
5264 if (type
5265 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5266 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5267 return none;
5269 /* The directions set here are ignored when a BLKmode argument larger
5270 than a word is placed in a register. Different code is used for
5271 the stack and registers. This makes it difficult to have a
5272 consistent data representation for both the stack and registers.
5273 For both runtimes, the justification and padding for arguments on
5274 the stack and in registers should be identical. */
5275 if (TARGET_64BIT)
5276 /* The 64-bit runtime specifies left justification for aggregates. */
5277 return upward;
5278 else
5279 /* The 32-bit runtime architecture specifies right justification.
5280 When the argument is passed on the stack, the argument is padded
5281 with garbage on the left. The HP compiler pads with zeros. */
5282 return downward;
5285 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5286 return downward;
5287 else
5288 return none;
5292 /* Do what is necessary for `va_start'. We look at the current function
5293 to determine if stdargs or varargs is used and fill in an initial
5294 va_list. A pointer to this constructor is returned. */
5296 struct rtx_def *
5297 hppa_builtin_saveregs ()
5299 rtx offset, dest;
5300 tree fntype = TREE_TYPE (current_function_decl);
5301 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5302 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5303 != void_type_node)))
5304 ? UNITS_PER_WORD : 0);
5306 if (argadj)
5307 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5308 else
5309 offset = current_function_arg_offset_rtx;
5311 if (TARGET_64BIT)
5313 int i, off;
5315 /* Adjust for varargs/stdarg differences. */
5316 if (argadj)
5317 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5318 else
5319 offset = current_function_arg_offset_rtx;
5321 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5322 from the incoming arg pointer and growing to larger addresses. */
5323 for (i = 26, off = -64; i >= 19; i--, off += 8)
5324 emit_move_insn (gen_rtx_MEM (word_mode,
5325 plus_constant (arg_pointer_rtx, off)),
5326 gen_rtx_REG (word_mode, i));
5328 /* The incoming args pointer points just beyond the flushback area;
5329 normally this is not a serious concern. However, when we are doing
5330 varargs/stdargs we want to make the arg pointer point to the start
5331 of the incoming argument area. */
5332 emit_move_insn (virtual_incoming_args_rtx,
5333 plus_constant (arg_pointer_rtx, -64));
5335 /* Now return a pointer to the first anonymous argument. */
5336 return copy_to_reg (expand_binop (Pmode, add_optab,
5337 virtual_incoming_args_rtx,
5338 offset, 0, 0, OPTAB_LIB_WIDEN));
5341 /* Store general registers on the stack. */
5342 dest = gen_rtx_MEM (BLKmode,
5343 plus_constant (current_function_internal_arg_pointer,
5344 -16));
5345 set_mem_alias_set (dest, get_varargs_alias_set ());
5346 set_mem_align (dest, BITS_PER_WORD);
5347 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
5349 /* move_block_from_reg will emit code to store the argument registers
5350 individually as scalar stores.
5352 However, other insns may later load from the same addresses for
5353 a structure load (passing a struct to a varargs routine).
5355 The alias code assumes that such aliasing can never happen, so we
5356 have to keep memory referencing insns from moving up beyond the
5357 last argument register store. So we emit a blockage insn here. */
5358 emit_insn (gen_blockage ());
5360 return copy_to_reg (expand_binop (Pmode, add_optab,
5361 current_function_internal_arg_pointer,
5362 offset, 0, 0, OPTAB_LIB_WIDEN));
5365 void
5366 hppa_va_start (valist, nextarg)
5367 tree valist;
5368 rtx nextarg;
5370 nextarg = expand_builtin_saveregs ();
5371 std_expand_builtin_va_start (valist, nextarg);
5375 hppa_va_arg (valist, type)
5376 tree valist, type;
5378 HOST_WIDE_INT size = int_size_in_bytes (type);
5379 HOST_WIDE_INT ofs;
5380 tree t, ptr, pptr;
5382 if (TARGET_64BIT)
5384 /* Every argument in PA64 is supposed to be passed by value
5385 (including large structs). However, as a GCC extension, we
5386 pass zero and variable sized arguments by reference. Empty
5387 structures are a GCC extension not supported by the HP
5388 compilers. Thus, passing them by reference isn't likely
5389 to conflict with the ABI. For variable sized arguments,
5390 GCC doesn't have the infrastructure to allocate these to
5391 registers. */
5393 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5395 if (size > UNITS_PER_WORD)
5397 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5398 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5399 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5400 build_int_2 (-2 * UNITS_PER_WORD, -1));
5401 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5402 TREE_SIDE_EFFECTS (t) = 1;
5403 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5406 if (size > 0)
5407 return std_expand_builtin_va_arg (valist, type);
5408 else
5410 ptr = build_pointer_type (type);
5412 /* Args grow upward. */
5413 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5414 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5415 TREE_SIDE_EFFECTS (t) = 1;
5417 pptr = build_pointer_type (ptr);
5418 t = build1 (NOP_EXPR, pptr, t);
5419 TREE_SIDE_EFFECTS (t) = 1;
5421 t = build1 (INDIRECT_REF, ptr, t);
5422 TREE_SIDE_EFFECTS (t) = 1;
5425 else /* !TARGET_64BIT */
5427 ptr = build_pointer_type (type);
5429 /* "Large" and variable sized types are passed by reference. */
5430 if (size > 8 || size <= 0)
5432 /* Args grow downward. */
5433 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5434 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5435 TREE_SIDE_EFFECTS (t) = 1;
5437 pptr = build_pointer_type (ptr);
5438 t = build1 (NOP_EXPR, pptr, t);
5439 TREE_SIDE_EFFECTS (t) = 1;
5441 t = build1 (INDIRECT_REF, ptr, t);
5442 TREE_SIDE_EFFECTS (t) = 1;
5444 else
5446 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5447 build_int_2 (-size, -1));
5449 /* Copied from va-pa.h, but we probably don't need to align to
5450 word size, since we generate and preserve that invariant. */
5451 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5452 build_int_2 ((size > 4 ? -8 : -4), -1));
5454 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5455 TREE_SIDE_EFFECTS (t) = 1;
5457 ofs = (8 - size) % 4;
5458 if (ofs)
5460 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5461 build_int_2 (ofs, 0));
5462 TREE_SIDE_EFFECTS (t) = 1;
5465 t = build1 (NOP_EXPR, ptr, t);
5466 TREE_SIDE_EFFECTS (t) = 1;
5470 /* Calculate! */
5471 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5476 /* This routine handles all the normal conditional branch sequences we
5477 might need to generate. It handles compare immediate vs compare
5478 register, nullification of delay slots, varying length branches,
5479 negated branches, and all combinations of the above. It returns the
5480 output appropriate to emit the branch corresponding to all given
5481 parameters. */
5483 const char *
5484 output_cbranch (operands, nullify, length, negated, insn)
5485 rtx *operands;
5486 int nullify, length, negated;
5487 rtx insn;
5489 static char buf[100];
5490 int useskip = 0;
5492 /* A conditional branch to the following instruction (eg the delay slot) is
5493 asking for a disaster. This can happen when not optimizing.
5495 In such cases it is safe to emit nothing. */
5497 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5498 return "";
5500 /* If this is a long branch with its delay slot unfilled, set `nullify'
5501 as it can nullify the delay slot and save a nop. */
5502 if (length == 8 && dbr_sequence_length () == 0)
5503 nullify = 1;
5505 /* If this is a short forward conditional branch which did not get
5506 its delay slot filled, the delay slot can still be nullified. */
5507 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5508 nullify = forward_branch_p (insn);
5510 /* A forward branch over a single nullified insn can be done with a
5511 comclr instruction. This avoids a single cycle penalty due to
5512 mis-predicted branch if we fall through (branch not taken). */
5513 if (length == 4
5514 && next_real_insn (insn) != 0
5515 && get_attr_length (next_real_insn (insn)) == 4
5516 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5517 && nullify)
5518 useskip = 1;
5520 switch (length)
5522 /* All short conditional branches except backwards with an unfilled
5523 delay slot. */
5524 case 4:
5525 if (useskip)
5526 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5527 else
5528 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5529 if (GET_MODE (operands[1]) == DImode)
5530 strcat (buf, "*");
5531 if (negated)
5532 strcat (buf, "%B3");
5533 else
5534 strcat (buf, "%S3");
5535 if (useskip)
5536 strcat (buf, " %2,%r1,%%r0");
5537 else if (nullify)
5538 strcat (buf, ",n %2,%r1,%0");
5539 else
5540 strcat (buf, " %2,%r1,%0");
5541 break;
5543 /* All long conditionals. Note a short backward branch with an
5544 unfilled delay slot is treated just like a long backward branch
5545 with an unfilled delay slot. */
5546 case 8:
5547 /* Handle weird backwards branch with a filled delay slot
5548 with is nullified. */
5549 if (dbr_sequence_length () != 0
5550 && ! forward_branch_p (insn)
5551 && nullify)
5553 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5554 if (GET_MODE (operands[1]) == DImode)
5555 strcat (buf, "*");
5556 if (negated)
5557 strcat (buf, "%S3");
5558 else
5559 strcat (buf, "%B3");
5560 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5562 /* Handle short backwards branch with an unfilled delay slot.
5563 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5564 taken and untaken branches. */
5565 else if (dbr_sequence_length () == 0
5566 && ! forward_branch_p (insn)
5567 && INSN_ADDRESSES_SET_P ()
5568 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5569 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5571 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5572 if (GET_MODE (operands[1]) == DImode)
5573 strcat (buf, "*");
5574 if (negated)
5575 strcat (buf, "%B3 %2,%r1,%0%#");
5576 else
5577 strcat (buf, "%S3 %2,%r1,%0%#");
5579 else
5581 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5582 if (GET_MODE (operands[1]) == DImode)
5583 strcat (buf, "*");
5584 if (negated)
5585 strcat (buf, "%S3");
5586 else
5587 strcat (buf, "%B3");
5588 if (nullify)
5589 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5590 else
5591 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5593 break;
5595 case 20:
5596 /* Very long branch. Right now we only handle these when not
5597 optimizing. See "jump" pattern in pa.md for details. */
5598 if (optimize)
5599 abort ();
5601 /* Create a reversed conditional branch which branches around
5602 the following insns. */
5603 if (negated)
5604 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5605 else
5606 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5607 if (GET_MODE (operands[1]) == DImode)
5609 if (negated)
5610 strcpy (buf,
5611 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5612 else
5613 strcpy (buf,
5614 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5616 output_asm_insn (buf, operands);
5618 /* Output an insn to save %r1. */
5619 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5621 /* Now output a very long branch to the original target. */
5622 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5624 /* Now restore the value of %r1 in the delay slot. We're not
5625 optimizing so we know nothing else can be in the delay slot. */
5626 return "ldw -16(%%r30),%%r1";
5628 case 28:
5629 /* Very long branch when generating PIC code. Right now we only
5630 handle these when not optimizing. See "jump" pattern in pa.md
5631 for details. */
5632 if (optimize)
5633 abort ();
5635 /* Create a reversed conditional branch which branches around
5636 the following insns. */
5637 if (negated)
5638 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5639 else
5640 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5641 if (GET_MODE (operands[1]) == DImode)
5643 if (negated)
5644 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5645 else
5646 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5648 output_asm_insn (buf, operands);
5650 /* Output an insn to save %r1. */
5651 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5653 /* Now output a very long PIC branch to the original target. */
5655 rtx xoperands[5];
5657 xoperands[0] = operands[0];
5658 xoperands[1] = operands[1];
5659 xoperands[2] = operands[2];
5660 xoperands[3] = operands[3];
5662 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5663 if (TARGET_SOM || !TARGET_GAS)
5665 xoperands[4] = gen_label_rtx ();
5666 output_asm_insn ("addil L'%l0-%l4,%%r1", xoperands);
5667 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5668 CODE_LABEL_NUMBER (xoperands[4]));
5669 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1", xoperands);
5671 else
5673 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5674 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1",
5675 xoperands);
5677 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5680 /* Now restore the value of %r1 in the delay slot. We're not
5681 optimizing so we know nothing else can be in the delay slot. */
5682 return "ldw -16(%%r30),%%r1";
5684 default:
5685 abort ();
5687 return buf;
5690 /* This routine handles all the branch-on-bit conditional branch sequences we
5691 might need to generate. It handles nullification of delay slots,
5692 varying length branches, negated branches and all combinations of the
5693 above. it returns the appropriate output template to emit the branch. */
5695 const char *
5696 output_bb (operands, nullify, length, negated, insn, which)
5697 rtx *operands ATTRIBUTE_UNUSED;
5698 int nullify, length, negated;
5699 rtx insn;
5700 int which;
5702 static char buf[100];
5703 int useskip = 0;
5705 /* A conditional branch to the following instruction (eg the delay slot) is
5706 asking for a disaster. I do not think this can happen as this pattern
5707 is only used when optimizing; jump optimization should eliminate the
5708 jump. But be prepared just in case. */
5710 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5711 return "";
5713 /* If this is a long branch with its delay slot unfilled, set `nullify'
5714 as it can nullify the delay slot and save a nop. */
5715 if (length == 8 && dbr_sequence_length () == 0)
5716 nullify = 1;
5718 /* If this is a short forward conditional branch which did not get
5719 its delay slot filled, the delay slot can still be nullified. */
5720 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5721 nullify = forward_branch_p (insn);
5723 /* A forward branch over a single nullified insn can be done with a
5724 extrs instruction. This avoids a single cycle penalty due to
5725 mis-predicted branch if we fall through (branch not taken). */
5727 if (length == 4
5728 && next_real_insn (insn) != 0
5729 && get_attr_length (next_real_insn (insn)) == 4
5730 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5731 && nullify)
5732 useskip = 1;
5734 switch (length)
5737 /* All short conditional branches except backwards with an unfilled
5738 delay slot. */
5739 case 4:
5740 if (useskip)
5741 strcpy (buf, "{extrs,|extrw,s,}");
5742 else
5743 strcpy (buf, "bb,");
5744 if (useskip && GET_MODE (operands[0]) == DImode)
5745 strcpy (buf, "extrd,s,*");
5746 else if (GET_MODE (operands[0]) == DImode)
5747 strcpy (buf, "bb,*");
5748 if ((which == 0 && negated)
5749 || (which == 1 && ! negated))
5750 strcat (buf, ">=");
5751 else
5752 strcat (buf, "<");
5753 if (useskip)
5754 strcat (buf, " %0,%1,1,%%r0");
5755 else if (nullify && negated)
5756 strcat (buf, ",n %0,%1,%3");
5757 else if (nullify && ! negated)
5758 strcat (buf, ",n %0,%1,%2");
5759 else if (! nullify && negated)
5760 strcat (buf, "%0,%1,%3");
5761 else if (! nullify && ! negated)
5762 strcat (buf, " %0,%1,%2");
5763 break;
5765 /* All long conditionals. Note a short backward branch with an
5766 unfilled delay slot is treated just like a long backward branch
5767 with an unfilled delay slot. */
5768 case 8:
5769 /* Handle weird backwards branch with a filled delay slot
5770 with is nullified. */
5771 if (dbr_sequence_length () != 0
5772 && ! forward_branch_p (insn)
5773 && nullify)
5775 strcpy (buf, "bb,");
5776 if (GET_MODE (operands[0]) == DImode)
5777 strcat (buf, "*");
5778 if ((which == 0 && negated)
5779 || (which == 1 && ! negated))
5780 strcat (buf, "<");
5781 else
5782 strcat (buf, ">=");
5783 if (negated)
5784 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5785 else
5786 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5788 /* Handle short backwards branch with an unfilled delay slot.
5789 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5790 taken and untaken branches. */
5791 else if (dbr_sequence_length () == 0
5792 && ! forward_branch_p (insn)
5793 && INSN_ADDRESSES_SET_P ()
5794 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5795 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5797 strcpy (buf, "bb,");
5798 if (GET_MODE (operands[0]) == DImode)
5799 strcat (buf, "*");
5800 if ((which == 0 && negated)
5801 || (which == 1 && ! negated))
5802 strcat (buf, ">=");
5803 else
5804 strcat (buf, "<");
5805 if (negated)
5806 strcat (buf, " %0,%1,%3%#");
5807 else
5808 strcat (buf, " %0,%1,%2%#");
5810 else
5812 strcpy (buf, "{extrs,|extrw,s,}");
5813 if (GET_MODE (operands[0]) == DImode)
5814 strcpy (buf, "extrd,s,*");
5815 if ((which == 0 && negated)
5816 || (which == 1 && ! negated))
5817 strcat (buf, "<");
5818 else
5819 strcat (buf, ">=");
5820 if (nullify && negated)
5821 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5822 else if (nullify && ! negated)
5823 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5824 else if (negated)
5825 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5826 else
5827 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5829 break;
5831 default:
5832 abort ();
5834 return buf;
5837 /* This routine handles all the branch-on-variable-bit conditional branch
5838 sequences we might need to generate. It handles nullification of delay
5839 slots, varying length branches, negated branches and all combinations
5840 of the above. it returns the appropriate output template to emit the
5841 branch. */
5843 const char *
5844 output_bvb (operands, nullify, length, negated, insn, which)
5845 rtx *operands ATTRIBUTE_UNUSED;
5846 int nullify, length, negated;
5847 rtx insn;
5848 int which;
5850 static char buf[100];
5851 int useskip = 0;
5853 /* A conditional branch to the following instruction (eg the delay slot) is
5854 asking for a disaster. I do not think this can happen as this pattern
5855 is only used when optimizing; jump optimization should eliminate the
5856 jump. But be prepared just in case. */
5858 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5859 return "";
5861 /* If this is a long branch with its delay slot unfilled, set `nullify'
5862 as it can nullify the delay slot and save a nop. */
5863 if (length == 8 && dbr_sequence_length () == 0)
5864 nullify = 1;
5866 /* If this is a short forward conditional branch which did not get
5867 its delay slot filled, the delay slot can still be nullified. */
5868 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5869 nullify = forward_branch_p (insn);
5871 /* A forward branch over a single nullified insn can be done with a
5872 extrs instruction. This avoids a single cycle penalty due to
5873 mis-predicted branch if we fall through (branch not taken). */
5875 if (length == 4
5876 && next_real_insn (insn) != 0
5877 && get_attr_length (next_real_insn (insn)) == 4
5878 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5879 && nullify)
5880 useskip = 1;
5882 switch (length)
5885 /* All short conditional branches except backwards with an unfilled
5886 delay slot. */
5887 case 4:
5888 if (useskip)
5889 strcpy (buf, "{vextrs,|extrw,s,}");
5890 else
5891 strcpy (buf, "{bvb,|bb,}");
5892 if (useskip && GET_MODE (operands[0]) == DImode)
5893 strcpy (buf, "extrd,s,*}");
5894 else if (GET_MODE (operands[0]) == DImode)
5895 strcpy (buf, "bb,*");
5896 if ((which == 0 && negated)
5897 || (which == 1 && ! negated))
5898 strcat (buf, ">=");
5899 else
5900 strcat (buf, "<");
5901 if (useskip)
5902 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5903 else if (nullify && negated)
5904 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5905 else if (nullify && ! negated)
5906 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5907 else if (! nullify && negated)
5908 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5909 else if (! nullify && ! negated)
5910 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5911 break;
5913 /* All long conditionals. Note a short backward branch with an
5914 unfilled delay slot is treated just like a long backward branch
5915 with an unfilled delay slot. */
5916 case 8:
5917 /* Handle weird backwards branch with a filled delay slot
5918 with is nullified. */
5919 if (dbr_sequence_length () != 0
5920 && ! forward_branch_p (insn)
5921 && nullify)
5923 strcpy (buf, "{bvb,|bb,}");
5924 if (GET_MODE (operands[0]) == DImode)
5925 strcat (buf, "*");
5926 if ((which == 0 && negated)
5927 || (which == 1 && ! negated))
5928 strcat (buf, "<");
5929 else
5930 strcat (buf, ">=");
5931 if (negated)
5932 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5933 else
5934 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5936 /* Handle short backwards branch with an unfilled delay slot.
5937 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5938 taken and untaken branches. */
5939 else if (dbr_sequence_length () == 0
5940 && ! forward_branch_p (insn)
5941 && INSN_ADDRESSES_SET_P ()
5942 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5943 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5945 strcpy (buf, "{bvb,|bb,}");
5946 if (GET_MODE (operands[0]) == DImode)
5947 strcat (buf, "*");
5948 if ((which == 0 && negated)
5949 || (which == 1 && ! negated))
5950 strcat (buf, ">=");
5951 else
5952 strcat (buf, "<");
5953 if (negated)
5954 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5955 else
5956 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5958 else
5960 strcpy (buf, "{vextrs,|extrw,s,}");
5961 if (GET_MODE (operands[0]) == DImode)
5962 strcpy (buf, "extrd,s,*");
5963 if ((which == 0 && negated)
5964 || (which == 1 && ! negated))
5965 strcat (buf, "<");
5966 else
5967 strcat (buf, ">=");
5968 if (nullify && negated)
5969 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5970 else if (nullify && ! negated)
5971 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5972 else if (negated)
5973 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5974 else
5975 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5977 break;
5979 default:
5980 abort ();
5982 return buf;
5985 /* Return the output template for emitting a dbra type insn.
5987 Note it may perform some output operations on its own before
5988 returning the final output string. */
5989 const char *
5990 output_dbra (operands, insn, which_alternative)
5991 rtx *operands;
5992 rtx insn;
5993 int which_alternative;
5996 /* A conditional branch to the following instruction (eg the delay slot) is
5997 asking for a disaster. Be prepared! */
5999 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
6001 if (which_alternative == 0)
6002 return "ldo %1(%0),%0";
6003 else if (which_alternative == 1)
6005 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6006 output_asm_insn ("ldw -16(%%r30),%4", operands);
6007 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6008 return "{fldws|fldw} -16(%%r30),%0";
6010 else
6012 output_asm_insn ("ldw %0,%4", operands);
6013 return "ldo %1(%4),%4\n\tstw %4,%0";
6017 if (which_alternative == 0)
6019 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6020 int length = get_attr_length (insn);
6022 /* If this is a long branch with its delay slot unfilled, set `nullify'
6023 as it can nullify the delay slot and save a nop. */
6024 if (length == 8 && dbr_sequence_length () == 0)
6025 nullify = 1;
6027 /* If this is a short forward conditional branch which did not get
6028 its delay slot filled, the delay slot can still be nullified. */
6029 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6030 nullify = forward_branch_p (insn);
6032 /* Handle short versions first. */
6033 if (length == 4 && nullify)
6034 return "addib,%C2,n %1,%0,%3";
6035 else if (length == 4 && ! nullify)
6036 return "addib,%C2 %1,%0,%3";
6037 else if (length == 8)
6039 /* Handle weird backwards branch with a fulled delay slot
6040 which is nullified. */
6041 if (dbr_sequence_length () != 0
6042 && ! forward_branch_p (insn)
6043 && nullify)
6044 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6045 /* Handle short backwards branch with an unfilled delay slot.
6046 Using a addb;nop rather than addi;bl saves 1 cycle for both
6047 taken and untaken branches. */
6048 else if (dbr_sequence_length () == 0
6049 && ! forward_branch_p (insn)
6050 && INSN_ADDRESSES_SET_P ()
6051 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6052 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6053 return "addib,%C2 %1,%0,%3%#";
6055 /* Handle normal cases. */
6056 if (nullify)
6057 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6058 else
6059 return "addi,%N2 %1,%0,%0\n\tb %3";
6061 else
6062 abort ();
6064 /* Deal with gross reload from FP register case. */
6065 else if (which_alternative == 1)
6067 /* Move loop counter from FP register to MEM then into a GR,
6068 increment the GR, store the GR into MEM, and finally reload
6069 the FP register from MEM from within the branch's delay slot. */
6070 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6071 operands);
6072 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6073 if (get_attr_length (insn) == 24)
6074 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6075 else
6076 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6078 /* Deal with gross reload from memory case. */
6079 else
6081 /* Reload loop counter from memory, the store back to memory
6082 happens in the branch's delay slot. */
6083 output_asm_insn ("ldw %0,%4", operands);
6084 if (get_attr_length (insn) == 12)
6085 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6086 else
6087 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6091 /* Return the output template for emitting a dbra type insn.
6093 Note it may perform some output operations on its own before
6094 returning the final output string. */
6095 const char *
6096 output_movb (operands, insn, which_alternative, reverse_comparison)
6097 rtx *operands;
6098 rtx insn;
6099 int which_alternative;
6100 int reverse_comparison;
6103 /* A conditional branch to the following instruction (eg the delay slot) is
6104 asking for a disaster. Be prepared! */
6106 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
6108 if (which_alternative == 0)
6109 return "copy %1,%0";
6110 else if (which_alternative == 1)
6112 output_asm_insn ("stw %1,-16(%%r30)", operands);
6113 return "{fldws|fldw} -16(%%r30),%0";
6115 else if (which_alternative == 2)
6116 return "stw %1,%0";
6117 else
6118 return "mtsar %r1";
6121 /* Support the second variant. */
6122 if (reverse_comparison)
6123 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6125 if (which_alternative == 0)
6127 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6128 int length = get_attr_length (insn);
6130 /* If this is a long branch with its delay slot unfilled, set `nullify'
6131 as it can nullify the delay slot and save a nop. */
6132 if (length == 8 && dbr_sequence_length () == 0)
6133 nullify = 1;
6135 /* If this is a short forward conditional branch which did not get
6136 its delay slot filled, the delay slot can still be nullified. */
6137 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6138 nullify = forward_branch_p (insn);
6140 /* Handle short versions first. */
6141 if (length == 4 && nullify)
6142 return "movb,%C2,n %1,%0,%3";
6143 else if (length == 4 && ! nullify)
6144 return "movb,%C2 %1,%0,%3";
6145 else if (length == 8)
6147 /* Handle weird backwards branch with a filled delay slot
6148 which is nullified. */
6149 if (dbr_sequence_length () != 0
6150 && ! forward_branch_p (insn)
6151 && nullify)
6152 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6154 /* Handle short backwards branch with an unfilled delay slot.
6155 Using a movb;nop rather than or;bl saves 1 cycle for both
6156 taken and untaken branches. */
6157 else if (dbr_sequence_length () == 0
6158 && ! forward_branch_p (insn)
6159 && INSN_ADDRESSES_SET_P ()
6160 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6161 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6162 return "movb,%C2 %1,%0,%3%#";
6163 /* Handle normal cases. */
6164 if (nullify)
6165 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6166 else
6167 return "or,%N2 %1,%%r0,%0\n\tb %3";
6169 else
6170 abort ();
6172 /* Deal with gross reload from FP register case. */
6173 else if (which_alternative == 1)
6175 /* Move loop counter from FP register to MEM then into a GR,
6176 increment the GR, store the GR into MEM, and finally reload
6177 the FP register from MEM from within the branch's delay slot. */
6178 output_asm_insn ("stw %1,-16(%%r30)", operands);
6179 if (get_attr_length (insn) == 12)
6180 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6181 else
6182 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6184 /* Deal with gross reload from memory case. */
6185 else if (which_alternative == 2)
6187 /* Reload loop counter from memory, the store back to memory
6188 happens in the branch's delay slot. */
6189 if (get_attr_length (insn) == 8)
6190 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6191 else
6192 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6194 /* Handle SAR as a destination. */
6195 else
6197 if (get_attr_length (insn) == 8)
6198 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6199 else
6200 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6204 /* Copy any FP arguments in INSN into integer registers. */
6205 static void
6206 copy_fp_args (insn)
6207 rtx insn;
6209 rtx link;
6210 rtx xoperands[2];
6212 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6214 int arg_mode, regno;
6215 rtx use = XEXP (link, 0);
6217 if (! (GET_CODE (use) == USE
6218 && GET_CODE (XEXP (use, 0)) == REG
6219 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6220 continue;
6222 arg_mode = GET_MODE (XEXP (use, 0));
6223 regno = REGNO (XEXP (use, 0));
6225 /* Is it a floating point register? */
6226 if (regno >= 32 && regno <= 39)
6228 /* Copy the FP register into an integer register via memory. */
6229 if (arg_mode == SFmode)
6231 xoperands[0] = XEXP (use, 0);
6232 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6233 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6234 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6236 else
6238 xoperands[0] = XEXP (use, 0);
6239 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6240 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6241 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6242 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6248 /* Compute length of the FP argument copy sequence for INSN. */
6249 static int
6250 length_fp_args (insn)
6251 rtx insn;
6253 int length = 0;
6254 rtx link;
6256 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6258 int arg_mode, regno;
6259 rtx use = XEXP (link, 0);
6261 if (! (GET_CODE (use) == USE
6262 && GET_CODE (XEXP (use, 0)) == REG
6263 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6264 continue;
6266 arg_mode = GET_MODE (XEXP (use, 0));
6267 regno = REGNO (XEXP (use, 0));
6269 /* Is it a floating point register? */
6270 if (regno >= 32 && regno <= 39)
6272 if (arg_mode == SFmode)
6273 length += 8;
6274 else
6275 length += 12;
6279 return length;
6282 /* Return the attribute length for the millicode call instruction INSN.
6283 The length must match the code generated by output_millicode_call.
6284 We include the delay slot in the returned length as it is better to
6285 over estimate the length than to under estimate it. */
6288 attr_length_millicode_call (insn)
6289 rtx insn;
6291 unsigned long distance = -1;
6293 if (INSN_ADDRESSES_SET_P ())
6295 distance = (total_code_bytes + insn_current_reference_address (insn));
6296 if (distance < total_code_bytes)
6297 distance = -1;
6300 if (TARGET_64BIT)
6302 if (!TARGET_LONG_CALLS && distance < 7600000)
6303 return 8;
6305 return 20;
6307 else if (TARGET_PORTABLE_RUNTIME)
6308 return 24;
6309 else
6311 if (!TARGET_LONG_CALLS && distance < 240000)
6312 return 8;
6314 if (TARGET_LONG_ABS_CALL && !flag_pic)
6315 return 12;
6317 return 24;
6321 /* INSN is a function call. It may have an unconditional jump
6322 in its delay slot.
6324 CALL_DEST is the routine we are calling. */
6326 const char *
6327 output_millicode_call (insn, call_dest)
6328 rtx insn;
6329 rtx call_dest;
6331 int attr_length = get_attr_length (insn);
6332 int seq_length = dbr_sequence_length ();
6333 int distance;
6334 rtx seq_insn;
6335 rtx xoperands[3];
6337 xoperands[0] = call_dest;
6338 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6340 /* Handle the common case where we are sure that the branch will
6341 reach the beginning of the $CODE$ subspace. The within reach
6342 form of the $$sh_func_adrs call has a length of 28. Because
6343 it has an attribute type of multi, it never has a nonzero
6344 sequence length. The length of the $$sh_func_adrs is the same
6345 as certain out of reach PIC calls to other routines. */
6346 if (!TARGET_LONG_CALLS
6347 && ((seq_length == 0
6348 && (attr_length == 12
6349 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6350 || (seq_length != 0 && attr_length == 8)))
6352 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6354 else
6356 if (TARGET_64BIT)
6358 /* It might seem that one insn could be saved by accessing
6359 the millicode function using the linkage table. However,
6360 this doesn't work in shared libraries and other dynamically
6361 loaded objects. Using a pc-relative sequence also avoids
6362 problems related to the implicit use of the gp register. */
6363 output_asm_insn ("b,l .+8,%%r1", xoperands);
6365 if (TARGET_GAS)
6367 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6368 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6370 else
6372 xoperands[1] = gen_label_rtx ();
6373 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6374 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6375 CODE_LABEL_NUMBER (xoperands[1]));
6376 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6379 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6381 else if (TARGET_PORTABLE_RUNTIME)
6383 /* Pure portable runtime doesn't allow be/ble; we also don't
6384 have PIC support in the assembler/linker, so this sequence
6385 is needed. */
6387 /* Get the address of our target into %r1. */
6388 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6389 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6391 /* Get our return address into %r31. */
6392 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6393 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6395 /* Jump to our target address in %r1. */
6396 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6398 else if (!flag_pic)
6400 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6401 if (TARGET_PA_20)
6402 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6403 else
6404 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6406 else
6408 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6409 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6411 if (TARGET_SOM || !TARGET_GAS)
6413 /* The HP assembler can generate relocations for the
6414 difference of two symbols. GAS can do this for a
6415 millicode symbol but not an arbitrary external
6416 symbol when generating SOM output. */
6417 xoperands[1] = gen_label_rtx ();
6418 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6419 CODE_LABEL_NUMBER (xoperands[1]));
6420 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6421 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6423 else
6425 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6426 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6427 xoperands);
6430 /* Jump to our target address in %r1. */
6431 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6435 if (seq_length == 0)
6436 output_asm_insn ("nop", xoperands);
6438 /* We are done if there isn't a jump in the delay slot. */
6439 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6440 return "";
6442 /* This call has an unconditional jump in its delay slot. */
6443 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6445 /* See if the return address can be adjusted. Use the containing
6446 sequence insn's address. */
6447 if (INSN_ADDRESSES_SET_P ())
6449 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6450 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6451 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6453 if (VAL_14_BITS_P (distance))
6455 xoperands[1] = gen_label_rtx ();
6456 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6457 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6458 CODE_LABEL_NUMBER (xoperands[1]));
6460 else
6461 /* ??? This branch may not reach its target. */
6462 output_asm_insn ("nop\n\tb,n %0", xoperands);
6464 else
6465 /* ??? This branch may not reach its target. */
6466 output_asm_insn ("nop\n\tb,n %0", xoperands);
6468 /* Delete the jump. */
6469 PUT_CODE (NEXT_INSN (insn), NOTE);
6470 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6471 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6473 return "";
6476 /* Return the attribute length of the call instruction INSN. The SIBCALL
6477 flag indicates whether INSN is a regular call or a sibling call. The
6478 length must match the code generated by output_call. We include the delay
6479 slot in the returned length as it is better to over estimate the length
6480 than to under estimate it. */
6483 attr_length_call (insn, sibcall)
6484 rtx insn;
6485 int sibcall;
6487 unsigned long distance = -1;
6489 if (INSN_ADDRESSES_SET_P ())
6491 distance = (total_code_bytes + insn_current_reference_address (insn));
6492 if (distance < total_code_bytes)
6493 distance = -1;
6496 if (TARGET_64BIT)
6498 if (!TARGET_LONG_CALLS
6499 && ((!sibcall && distance < 7600000) || distance < 240000))
6500 return 8;
6502 return (sibcall ? 28 : 24);
6504 else
6506 if (!TARGET_LONG_CALLS
6507 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6508 || distance < 240000))
6509 return 8;
6511 if (TARGET_LONG_ABS_CALL && !flag_pic)
6512 return 12;
6514 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6515 || (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6517 if (TARGET_PA_20)
6518 return 20;
6520 return 28;
6522 else
6524 int length = 0;
6526 if (TARGET_SOM)
6527 length += length_fp_args (insn);
6529 if (flag_pic)
6530 length += 4;
6532 if (TARGET_PA_20)
6533 return (length + 32);
6535 if (!TARGET_NO_SPACE_REGS)
6536 length += 8;
6538 if (!sibcall)
6539 length += 8;
6541 return (length + 32);
6546 /* INSN is a function call. It may have an unconditional jump
6547 in its delay slot.
6549 CALL_DEST is the routine we are calling. */
6551 const char *
6552 output_call (insn, call_dest, sibcall)
6553 rtx insn;
6554 rtx call_dest;
6555 int sibcall;
6557 int delay_insn_deleted = 0;
6558 int delay_slot_filled = 0;
6559 int seq_length = dbr_sequence_length ();
6560 rtx xoperands[2];
6562 xoperands[0] = call_dest;
6564 /* Handle the common case where we're sure that the branch will reach
6565 the beginning of the $CODE$ subspace. */
6566 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
6568 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6569 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6571 else
6573 if (TARGET_64BIT)
6575 /* ??? As far as I can tell, the HP linker doesn't support the
6576 long pc-relative sequence described in the 64-bit runtime
6577 architecture. So, we use a slightly longer indirect call. */
6578 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6580 xoperands[0] = p->internal_label;
6581 xoperands[1] = gen_label_rtx ();
6583 /* If this isn't a sibcall, we put the load of %r27 into the
6584 delay slot. We can't do this in a sibcall as we don't
6585 have a second call-clobbered scratch register available. */
6586 if (seq_length != 0
6587 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6588 && !sibcall)
6590 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6591 optimize, 0, 0);
6593 /* Now delete the delay insn. */
6594 PUT_CODE (NEXT_INSN (insn), NOTE);
6595 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6596 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6597 delay_insn_deleted = 1;
6600 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6601 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6602 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6604 if (sibcall)
6606 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6607 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6608 output_asm_insn ("bve (%%r1)", xoperands);
6610 else
6612 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6613 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6614 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6615 delay_slot_filled = 1;
6618 else
6620 int indirect_call = 0;
6622 /* Emit a long call. There are several different sequences
6623 of increasing length and complexity. In most cases,
6624 they don't allow an instruction in the delay slot. */
6625 if (!(TARGET_LONG_ABS_CALL && !flag_pic)
6626 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6627 && !(TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6628 indirect_call = 1;
6630 if (seq_length != 0
6631 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6632 && !sibcall
6633 && (!TARGET_PA_20 || indirect_call))
6635 /* A non-jump insn in the delay slot. By definition we can
6636 emit this insn before the call (and in fact before argument
6637 relocating. */
6638 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6640 /* Now delete the delay insn. */
6641 PUT_CODE (NEXT_INSN (insn), NOTE);
6642 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6643 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6644 delay_insn_deleted = 1;
6647 if (TARGET_LONG_ABS_CALL && !flag_pic)
6649 /* This is the best sequence for making long calls in
6650 non-pic code. Unfortunately, GNU ld doesn't provide
6651 the stub needed for external calls, and GAS's support
6652 for this with the SOM linker is buggy. */
6653 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6654 if (sibcall)
6655 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
6656 else
6658 if (TARGET_PA_20)
6659 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
6660 xoperands);
6661 else
6662 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6664 output_asm_insn ("copy %%r31,%%r2", xoperands);
6665 delay_slot_filled = 1;
6668 else
6670 if (TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6672 /* The HP assembler and linker can handle relocations
6673 for the difference of two symbols. GAS and the HP
6674 linker can't do this when one of the symbols is
6675 external. */
6676 xoperands[1] = gen_label_rtx ();
6677 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6678 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6679 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6680 CODE_LABEL_NUMBER (xoperands[1]));
6681 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6683 else if (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL)
6685 /* GAS currently can't generate the relocations that
6686 are needed for the SOM linker under HP-UX using this
6687 sequence. The GNU linker doesn't generate the stubs
6688 that are needed for external calls on TARGET_ELF32
6689 with this sequence. For now, we have to use a
6690 longer plabel sequence when using GAS. */
6691 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6692 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
6693 xoperands);
6694 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
6695 xoperands);
6697 else
6699 /* Emit a long plabel-based call sequence. This is
6700 essentially an inline implementation of $$dyncall.
6701 We don't actually try to call $$dyncall as this is
6702 as difficult as calling the function itself. */
6703 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6705 xoperands[0] = p->internal_label;
6706 xoperands[1] = gen_label_rtx ();
6708 /* Since the call is indirect, FP arguments in registers
6709 need to be copied to the general registers. Then, the
6710 argument relocation stub will copy them back. */
6711 if (TARGET_SOM)
6712 copy_fp_args (insn);
6714 if (flag_pic)
6716 output_asm_insn ("addil LT'%0,%%r19", xoperands);
6717 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
6718 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
6720 else
6722 output_asm_insn ("addil LR'%0-$global$,%%r27",
6723 xoperands);
6724 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
6725 xoperands);
6728 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
6729 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
6730 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
6731 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6733 if (!sibcall && !TARGET_PA_20)
6735 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
6736 if (TARGET_NO_SPACE_REGS)
6737 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
6738 else
6739 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
6743 if (TARGET_PA_20)
6745 if (sibcall)
6746 output_asm_insn ("bve (%%r1)", xoperands);
6747 else
6749 if (indirect_call)
6751 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6752 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
6753 delay_slot_filled = 1;
6755 else
6756 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6759 else
6761 if (!TARGET_NO_SPACE_REGS)
6762 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
6763 xoperands);
6765 if (sibcall)
6767 if (TARGET_NO_SPACE_REGS)
6768 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
6769 else
6770 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
6772 else
6774 if (TARGET_NO_SPACE_REGS)
6775 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
6776 else
6777 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
6779 if (indirect_call)
6780 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
6781 else
6782 output_asm_insn ("copy %%r31,%%r2", xoperands);
6783 delay_slot_filled = 1;
6790 if (seq_length == 0 || (delay_insn_deleted && !delay_slot_filled))
6791 output_asm_insn ("nop", xoperands);
6793 /* We are done if there isn't a jump in the delay slot. */
6794 if (seq_length == 0
6795 || delay_insn_deleted
6796 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6797 return "";
6799 /* A sibcall should never have a branch in the delay slot. */
6800 if (sibcall)
6801 abort ();
6803 /* This call has an unconditional jump in its delay slot. */
6804 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6806 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
6808 /* See if the return address can be adjusted. Use the containing
6809 sequence insn's address. */
6810 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6811 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6812 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6814 if (VAL_14_BITS_P (distance))
6816 xoperands[1] = gen_label_rtx ();
6817 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
6818 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6819 CODE_LABEL_NUMBER (xoperands[1]));
6821 else
6822 /* ??? This branch may not reach its target. */
6823 output_asm_insn ("nop\n\tb,n %0", xoperands);
6825 else
6826 /* ??? This branch may not reach its target. */
6827 output_asm_insn ("b,n %0", xoperands);
6829 /* Delete the jump. */
6830 PUT_CODE (NEXT_INSN (insn), NOTE);
6831 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6832 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6834 return "";
6837 /* Return the attribute length of the indirect call instruction INSN.
6838 The length must match the code generated by output_indirect call.
6839 The returned length includes the delay slot. Currently, the delay
6840 slot of an indirect call sequence is not exposed and it is used by
6841 the sequence itself. */
6844 attr_length_indirect_call (insn)
6845 rtx insn;
6847 unsigned long distance = -1;
6849 if (INSN_ADDRESSES_SET_P ())
6851 distance = (total_code_bytes + insn_current_reference_address (insn));
6852 if (distance < total_code_bytes)
6853 distance = -1;
6856 if (TARGET_64BIT)
6857 return 12;
6859 if (TARGET_FAST_INDIRECT_CALLS
6860 || (!TARGET_PORTABLE_RUNTIME
6861 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
6862 return 8;
6864 if (flag_pic)
6865 return 24;
6867 if (TARGET_PORTABLE_RUNTIME)
6868 return 20;
6870 /* Out of reach, can use ble. */
6871 return 12;
6874 const char *
6875 output_indirect_call (insn, call_dest)
6876 rtx insn;
6877 rtx call_dest;
6879 rtx xoperands[1];
6881 if (TARGET_64BIT)
6883 xoperands[0] = call_dest;
6884 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
6885 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
6886 return "";
6889 /* First the special case for kernels, level 0 systems, etc. */
6890 if (TARGET_FAST_INDIRECT_CALLS)
6891 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
6893 /* Now the normal case -- we can reach $$dyncall directly or
6894 we're sure that we can get there via a long-branch stub.
6896 No need to check target flags as the length uniquely identifies
6897 the remaining cases. */
6898 if (attr_length_indirect_call (insn) == 8)
6899 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
6901 /* Long millicode call, but we are not generating PIC or portable runtime
6902 code. */
6903 if (attr_length_indirect_call (insn) == 12)
6904 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
6906 /* Long millicode call for portable runtime. */
6907 if (attr_length_indirect_call (insn) == 20)
6908 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
6910 /* We need a long PIC call to $$dyncall. */
6911 xoperands[0] = NULL_RTX;
6912 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6913 if (TARGET_SOM || !TARGET_GAS)
6915 xoperands[0] = gen_label_rtx ();
6916 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
6917 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6918 CODE_LABEL_NUMBER (xoperands[0]));
6919 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
6921 else
6923 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
6924 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
6925 xoperands);
6927 output_asm_insn ("blr %%r0,%%r2", xoperands);
6928 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
6929 return "";
6932 /* Return the total length of the save and restore instructions needed for
6933 the data linkage table pointer (i.e., the PIC register) across the call
6934 instruction INSN. No-return calls do not require a save and restore.
6935 In addition, we may be able to avoid the save and restore for calls
6936 within the same translation unit. */
6939 attr_length_save_restore_dltp (insn)
6940 rtx insn;
6942 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
6943 return 0;
6945 return 8;
6948 /* In HPUX 8.0's shared library scheme, special relocations are needed
6949 for function labels if they might be passed to a function
6950 in a shared library (because shared libraries don't live in code
6951 space), and special magic is needed to construct their address. */
6953 void
6954 hppa_encode_label (sym)
6955 rtx sym;
6957 const char *str = XSTR (sym, 0);
6958 int len = strlen (str) + 1;
6959 char *newstr, *p;
6961 p = newstr = alloca (len + 1);
6962 *p++ = '@';
6963 strcpy (p, str);
6965 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6968 static void
6969 pa_encode_section_info (decl, first)
6970 tree decl;
6971 int first;
6973 if (first && TEXT_SPACE_P (decl))
6975 rtx rtl;
6976 if (TREE_CODE (decl) == FUNCTION_DECL
6977 || TREE_CODE (decl) == VAR_DECL)
6978 rtl = DECL_RTL (decl);
6979 else
6980 rtl = TREE_CST_RTL (decl);
6981 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
6982 if (TREE_CODE (decl) == FUNCTION_DECL)
6983 hppa_encode_label (XEXP (DECL_RTL (decl), 0));
6987 /* This is sort of inverse to pa_encode_section_info. */
6989 static const char *
6990 pa_strip_name_encoding (str)
6991 const char *str;
6993 str += (*str == '@');
6994 str += (*str == '*');
6995 return str;
6999 function_label_operand (op, mode)
7000 rtx op;
7001 enum machine_mode mode ATTRIBUTE_UNUSED;
7003 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7006 /* Returns 1 if OP is a function label involved in a simple addition
7007 with a constant. Used to keep certain patterns from matching
7008 during instruction combination. */
7010 is_function_label_plus_const (op)
7011 rtx op;
7013 /* Strip off any CONST. */
7014 if (GET_CODE (op) == CONST)
7015 op = XEXP (op, 0);
7017 return (GET_CODE (op) == PLUS
7018 && function_label_operand (XEXP (op, 0), Pmode)
7019 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7022 /* Output assembly code for a thunk to FUNCTION. */
7024 static void
7025 pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
7026 FILE *file;
7027 tree thunk_fndecl;
7028 HOST_WIDE_INT delta;
7029 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED;
7030 tree function;
7032 const char *target_name = XSTR (XEXP (DECL_RTL (function), 0), 0);
7033 static unsigned int current_thunk_number;
7034 char label[16];
7035 const char *lab;
7036 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7037 lab = (*targetm.strip_name_encoding) (label);
7038 target_name = (*targetm.strip_name_encoding) (target_name);
7039 /* FIXME: total_code_bytes is not handled correctly in files with
7040 mi thunks. */
7041 pa_output_function_prologue (file, 0);
7042 if (VAL_14_BITS_P (delta))
7044 if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
7046 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7047 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7048 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7049 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7050 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7051 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7052 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7053 if (TARGET_NO_SPACE_REGS)
7054 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7055 else
7057 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
7058 fprintf (file, "\tmtsp %%r1,%%sr0\n");
7059 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7061 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7062 fprintf (file, "(%%r26),%%r26\n");
7064 else
7066 fprintf (file, "\tb %s\n\tldo ", target_name);
7067 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7068 fprintf (file, "(%%r26),%%r26\n");
7071 else
7073 if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
7075 fprintf (file, "\taddil L'");
7076 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7077 fprintf (file, ",%%r26\n\tldo R'");
7078 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7079 fprintf (file, "(%%r1),%%r26\n");
7080 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7081 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7082 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7083 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7084 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7085 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7086 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7087 if (TARGET_NO_SPACE_REGS)
7088 fprintf (file, "\tbe 0(%%sr4,%%r22)");
7089 else
7091 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
7092 fprintf (file, "\tmtsp %%r1,%%sr0\n");
7093 fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
7096 else
7098 fprintf (file, "\taddil L'");
7099 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7100 fprintf (file, ",%%r26\n\tb %s\n\tldo R'", target_name);
7101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
7102 fprintf (file, "(%%r1),%%r26\n");
7106 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7107 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
7109 data_section ();
7110 fprintf (file, "\t.align 4\n");
7111 (*targetm.asm_out.internal_label) (file, "LTHN", current_thunk_number);
7112 fprintf (file, "\t.word P'%s\n", target_name);
7113 function_section (thunk_fndecl);
7115 current_thunk_number++;
7118 /* Only direct calls to static functions are allowed to be sibling (tail)
7119 call optimized.
7121 This restriction is necessary because some linker generated stubs will
7122 store return pointers into rp' in some cases which might clobber a
7123 live value already in rp'.
7125 In a sibcall the current function and the target function share stack
7126 space. Thus if the path to the current function and the path to the
7127 target function save a value in rp', they save the value into the
7128 same stack slot, which has undesirable consequences.
7130 Because of the deferred binding nature of shared libraries any function
7131 with external scope could be in a different load module and thus require
7132 rp' to be saved when calling that function. So sibcall optimizations
7133 can only be safe for static function.
7135 Note that GCC never needs return value relocations, so we don't have to
7136 worry about static calls with return value relocations (which require
7137 saving rp').
7139 It is safe to perform a sibcall optimization when the target function
7140 will never return. */
7141 static bool
7142 pa_function_ok_for_sibcall (decl, exp)
7143 tree decl;
7144 tree exp ATTRIBUTE_UNUSED;
7146 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7147 single subspace mode and the call is not indirect. As far as I know,
7148 there is no operating system support for the multiple subspace mode.
7149 It might be possible to support indirect calls if we didn't use
7150 $$dyncall (see the indirect sequence generated in output_call). */
7151 if (TARGET_ELF32)
7152 return (decl != NULL_TREE);
7154 /* Sibcalls are not ok because the arg pointer register is not a fixed
7155 register. This prevents the sibcall optimization from occuring. In
7156 addition, there are problems with stub placement using GNU ld. This
7157 is because a normal sibcall branch uses a 17-bit relocation while
7158 a regular call branch uses a 22-bit relocation. As a result, more
7159 care needs to be taken in the placement of long-branch stubs. */
7160 if (TARGET_64BIT)
7161 return false;
7163 return (decl
7164 && !TARGET_PORTABLE_RUNTIME
7165 && !TREE_PUBLIC (decl));
7168 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7169 use in fmpyadd instructions. */
7171 fmpyaddoperands (operands)
7172 rtx *operands;
7174 enum machine_mode mode = GET_MODE (operands[0]);
7176 /* Must be a floating point mode. */
7177 if (mode != SFmode && mode != DFmode)
7178 return 0;
7180 /* All modes must be the same. */
7181 if (! (mode == GET_MODE (operands[1])
7182 && mode == GET_MODE (operands[2])
7183 && mode == GET_MODE (operands[3])
7184 && mode == GET_MODE (operands[4])
7185 && mode == GET_MODE (operands[5])))
7186 return 0;
7188 /* All operands must be registers. */
7189 if (! (GET_CODE (operands[1]) == REG
7190 && GET_CODE (operands[2]) == REG
7191 && GET_CODE (operands[3]) == REG
7192 && GET_CODE (operands[4]) == REG
7193 && GET_CODE (operands[5]) == REG))
7194 return 0;
7196 /* Only 2 real operands to the addition. One of the input operands must
7197 be the same as the output operand. */
7198 if (! rtx_equal_p (operands[3], operands[4])
7199 && ! rtx_equal_p (operands[3], operands[5]))
7200 return 0;
7202 /* Inout operand of add can not conflict with any operands from multiply. */
7203 if (rtx_equal_p (operands[3], operands[0])
7204 || rtx_equal_p (operands[3], operands[1])
7205 || rtx_equal_p (operands[3], operands[2]))
7206 return 0;
7208 /* multiply can not feed into addition operands. */
7209 if (rtx_equal_p (operands[4], operands[0])
7210 || rtx_equal_p (operands[5], operands[0]))
7211 return 0;
7213 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7214 if (mode == SFmode
7215 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7216 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7217 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7218 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7219 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7220 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7221 return 0;
7223 /* Passed. Operands are suitable for fmpyadd. */
7224 return 1;
7227 #if !defined(USE_COLLECT2)
7228 static void
7229 pa_asm_out_constructor (symbol, priority)
7230 rtx symbol;
7231 int priority;
7233 if (!function_label_operand (symbol, VOIDmode))
7234 hppa_encode_label (symbol);
7236 #ifdef CTORS_SECTION_ASM_OP
7237 default_ctor_section_asm_out_constructor (symbol, priority);
7238 #else
7239 # ifdef TARGET_ASM_NAMED_SECTION
7240 default_named_section_asm_out_constructor (symbol, priority);
7241 # else
7242 default_stabs_asm_out_constructor (symbol, priority);
7243 # endif
7244 #endif
7247 static void
7248 pa_asm_out_destructor (symbol, priority)
7249 rtx symbol;
7250 int priority;
7252 if (!function_label_operand (symbol, VOIDmode))
7253 hppa_encode_label (symbol);
7255 #ifdef DTORS_SECTION_ASM_OP
7256 default_dtor_section_asm_out_destructor (symbol, priority);
7257 #else
7258 # ifdef TARGET_ASM_NAMED_SECTION
7259 default_named_section_asm_out_destructor (symbol, priority);
7260 # else
7261 default_stabs_asm_out_destructor (symbol, priority);
7262 # endif
7263 #endif
7265 #endif
7267 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7268 use in fmpysub instructions. */
7270 fmpysuboperands (operands)
7271 rtx *operands;
7273 enum machine_mode mode = GET_MODE (operands[0]);
7275 /* Must be a floating point mode. */
7276 if (mode != SFmode && mode != DFmode)
7277 return 0;
7279 /* All modes must be the same. */
7280 if (! (mode == GET_MODE (operands[1])
7281 && mode == GET_MODE (operands[2])
7282 && mode == GET_MODE (operands[3])
7283 && mode == GET_MODE (operands[4])
7284 && mode == GET_MODE (operands[5])))
7285 return 0;
7287 /* All operands must be registers. */
7288 if (! (GET_CODE (operands[1]) == REG
7289 && GET_CODE (operands[2]) == REG
7290 && GET_CODE (operands[3]) == REG
7291 && GET_CODE (operands[4]) == REG
7292 && GET_CODE (operands[5]) == REG))
7293 return 0;
7295 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
7296 operation, so operands[4] must be the same as operand[3]. */
7297 if (! rtx_equal_p (operands[3], operands[4]))
7298 return 0;
7300 /* multiply can not feed into subtraction. */
7301 if (rtx_equal_p (operands[5], operands[0]))
7302 return 0;
7304 /* Inout operand of sub can not conflict with any operands from multiply. */
7305 if (rtx_equal_p (operands[3], operands[0])
7306 || rtx_equal_p (operands[3], operands[1])
7307 || rtx_equal_p (operands[3], operands[2]))
7308 return 0;
7310 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7311 if (mode == SFmode
7312 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7313 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7314 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7315 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7316 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7317 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7318 return 0;
7320 /* Passed. Operands are suitable for fmpysub. */
7321 return 1;
7325 plus_xor_ior_operator (op, mode)
7326 rtx op;
7327 enum machine_mode mode ATTRIBUTE_UNUSED;
7329 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
7330 || GET_CODE (op) == IOR);
7333 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
7334 constants for shadd instructions. */
7335 static int
7336 shadd_constant_p (val)
7337 int val;
7339 if (val == 2 || val == 4 || val == 8)
7340 return 1;
7341 else
7342 return 0;
7345 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
7346 the valid constant for shadd instructions. */
7348 shadd_operand (op, mode)
7349 rtx op;
7350 enum machine_mode mode ATTRIBUTE_UNUSED;
7352 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
7355 /* Return 1 if OP is valid as a base register in a reg + reg address. */
7358 basereg_operand (op, mode)
7359 rtx op;
7360 enum machine_mode mode;
7362 /* cse will create some unscaled indexed addresses, however; it
7363 generally isn't a win on the PA, so avoid creating unscaled
7364 indexed addresses until after cse is finished. */
7365 if (!cse_not_expected)
7366 return 0;
7368 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7369 we don't have to worry about the braindamaged implicit space
7370 register selection from the basereg. */
7371 if (TARGET_NO_SPACE_REGS)
7372 return (GET_CODE (op) == REG);
7374 /* While it's always safe to index off the frame pointer, it's not
7375 always profitable, particularly when the frame pointer is being
7376 eliminated. */
7377 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7378 return 1;
7380 return (GET_CODE (op) == REG
7381 && REG_POINTER (op)
7382 && register_operand (op, mode));
7385 /* Return 1 if this operand is anything other than a hard register. */
7388 non_hard_reg_operand (op, mode)
7389 rtx op;
7390 enum machine_mode mode ATTRIBUTE_UNUSED;
7392 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7395 /* Return 1 if INSN branches forward. Should be using insn_addresses
7396 to avoid walking through all the insns... */
7397 static int
7398 forward_branch_p (insn)
7399 rtx insn;
7401 rtx label = JUMP_LABEL (insn);
7403 while (insn)
7405 if (insn == label)
7406 break;
7407 else
7408 insn = NEXT_INSN (insn);
7411 return (insn == label);
7414 /* Return 1 if OP is an equality comparison, else return 0. */
7416 eq_neq_comparison_operator (op, mode)
7417 rtx op;
7418 enum machine_mode mode ATTRIBUTE_UNUSED;
7420 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7423 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7425 movb_comparison_operator (op, mode)
7426 rtx op;
7427 enum machine_mode mode ATTRIBUTE_UNUSED;
7429 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7430 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7433 /* Return 1 if INSN is in the delay slot of a call instruction. */
7435 jump_in_call_delay (insn)
7436 rtx insn;
7439 if (GET_CODE (insn) != JUMP_INSN)
7440 return 0;
7442 if (PREV_INSN (insn)
7443 && PREV_INSN (PREV_INSN (insn))
7444 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7446 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
7448 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7449 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7452 else
7453 return 0;
7456 /* Output an unconditional move and branch insn. */
7458 const char *
7459 output_parallel_movb (operands, length)
7460 rtx *operands;
7461 int length;
7463 /* These are the cases in which we win. */
7464 if (length == 4)
7465 return "mov%I1b,tr %1,%0,%2";
7467 /* None of these cases wins, but they don't lose either. */
7468 if (dbr_sequence_length () == 0)
7470 /* Nothing in the delay slot, fake it by putting the combined
7471 insn (the copy or add) in the delay slot of a bl. */
7472 if (GET_CODE (operands[1]) == CONST_INT)
7473 return "b %2\n\tldi %1,%0";
7474 else
7475 return "b %2\n\tcopy %1,%0";
7477 else
7479 /* Something in the delay slot, but we've got a long branch. */
7480 if (GET_CODE (operands[1]) == CONST_INT)
7481 return "ldi %1,%0\n\tb %2";
7482 else
7483 return "copy %1,%0\n\tb %2";
7487 /* Output an unconditional add and branch insn. */
7489 const char *
7490 output_parallel_addb (operands, length)
7491 rtx *operands;
7492 int length;
7494 /* To make life easy we want operand0 to be the shared input/output
7495 operand and operand1 to be the readonly operand. */
7496 if (operands[0] == operands[1])
7497 operands[1] = operands[2];
7499 /* These are the cases in which we win. */
7500 if (length == 4)
7501 return "add%I1b,tr %1,%0,%3";
7503 /* None of these cases win, but they don't lose either. */
7504 if (dbr_sequence_length () == 0)
7506 /* Nothing in the delay slot, fake it by putting the combined
7507 insn (the copy or add) in the delay slot of a bl. */
7508 return "b %3\n\tadd%I1 %1,%0,%0";
7510 else
7512 /* Something in the delay slot, but we've got a long branch. */
7513 return "add%I1 %1,%0,%0\n\tb %3";
7517 /* Return nonzero if INSN (a jump insn) immediately follows a call
7518 to a named function. This is used to avoid filling the delay slot
7519 of the jump since it can usually be eliminated by modifying RP in
7520 the delay slot of the call. */
7523 following_call (insn)
7524 rtx insn;
7526 if (! TARGET_JUMP_IN_DELAY)
7527 return 0;
7529 /* Find the previous real insn, skipping NOTEs. */
7530 insn = PREV_INSN (insn);
7531 while (insn && GET_CODE (insn) == NOTE)
7532 insn = PREV_INSN (insn);
7534 /* Check for CALL_INSNs and millicode calls. */
7535 if (insn
7536 && ((GET_CODE (insn) == CALL_INSN
7537 && get_attr_type (insn) != TYPE_DYNCALL)
7538 || (GET_CODE (insn) == INSN
7539 && GET_CODE (PATTERN (insn)) != SEQUENCE
7540 && GET_CODE (PATTERN (insn)) != USE
7541 && GET_CODE (PATTERN (insn)) != CLOBBER
7542 && get_attr_type (insn) == TYPE_MILLI)))
7543 return 1;
7545 return 0;
7548 /* We use this hook to perform a PA specific optimization which is difficult
7549 to do in earlier passes.
7551 We want the delay slots of branches within jump tables to be filled.
7552 None of the compiler passes at the moment even has the notion that a
7553 PA jump table doesn't contain addresses, but instead contains actual
7554 instructions!
7556 Because we actually jump into the table, the addresses of each entry
7557 must stay constant in relation to the beginning of the table (which
7558 itself must stay constant relative to the instruction to jump into
7559 it). I don't believe we can guarantee earlier passes of the compiler
7560 will adhere to those rules.
7562 So, late in the compilation process we find all the jump tables, and
7563 expand them into real code -- eg each entry in the jump table vector
7564 will get an appropriate label followed by a jump to the final target.
7566 Reorg and the final jump pass can then optimize these branches and
7567 fill their delay slots. We end up with smaller, more efficient code.
7569 The jump instructions within the table are special; we must be able
7570 to identify them during assembly output (if the jumps don't get filled
7571 we need to emit a nop rather than nullifying the delay slot)). We
7572 identify jumps in switch tables by marking the SET with DImode.
7574 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
7575 insns. This serves two purposes, first it prevents jump.c from
7576 noticing that the last N entries in the table jump to the instruction
7577 immediately after the table and deleting the jumps. Second, those
7578 insns mark where we should emit .begin_brtab and .end_brtab directives
7579 when using GAS (allows for better link time optimizations). */
7581 void
7582 pa_reorg (insns)
7583 rtx insns;
7585 rtx insn;
7587 remove_useless_addtr_insns (insns, 1);
7589 if (pa_cpu < PROCESSOR_8000)
7590 pa_combine_instructions (get_insns ());
7593 /* This is fairly cheap, so always run it if optimizing. */
7594 if (optimize > 0 && !TARGET_BIG_SWITCH)
7596 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
7597 insns = get_insns ();
7598 for (insn = insns; insn; insn = NEXT_INSN (insn))
7600 rtx pattern, tmp, location;
7601 unsigned int length, i;
7603 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
7604 if (GET_CODE (insn) != JUMP_INSN
7605 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7606 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7607 continue;
7609 /* Emit marker for the beginning of the branch table. */
7610 emit_insn_before (gen_begin_brtab (), insn);
7612 pattern = PATTERN (insn);
7613 location = PREV_INSN (insn);
7614 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
7616 for (i = 0; i < length; i++)
7618 /* Emit a label before each jump to keep jump.c from
7619 removing this code. */
7620 tmp = gen_label_rtx ();
7621 LABEL_NUSES (tmp) = 1;
7622 emit_label_after (tmp, location);
7623 location = NEXT_INSN (location);
7625 if (GET_CODE (pattern) == ADDR_VEC)
7627 /* Emit the jump itself. */
7628 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
7629 tmp = emit_jump_insn_after (tmp, location);
7630 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
7631 /* It is easy to rely on the branch table markers
7632 during assembly output to trigger the correct code
7633 for a switch table jump with an unfilled delay slot,
7635 However, that requires state and assumes that we look
7636 at insns in order.
7638 We can't make such assumptions when computing the length
7639 of instructions. Ugh. We could walk the insn chain to
7640 determine if this instruction is in a branch table, but
7641 that can get rather expensive, particularly during the
7642 branch shortening phase of the compiler.
7644 So instead we mark this jump as being special. This is
7645 far from ideal and knows that no code after this will
7646 muck around with the mode of the JUMP_INSN itself. */
7647 PUT_MODE (tmp, SImode);
7648 LABEL_NUSES (JUMP_LABEL (tmp))++;
7649 location = NEXT_INSN (location);
7651 else
7653 /* Emit the jump itself. */
7654 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
7655 tmp = emit_jump_insn_after (tmp, location);
7656 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
7657 /* It is easy to rely on the branch table markers
7658 during assembly output to trigger the correct code
7659 for a switch table jump with an unfilled delay slot,
7661 However, that requires state and assumes that we look
7662 at insns in order.
7664 We can't make such assumptions when computing the length
7665 of instructions. Ugh. We could walk the insn chain to
7666 determine if this instruction is in a branch table, but
7667 that can get rather expensive, particularly during the
7668 branch shortening phase of the compiler.
7670 So instead we mark this jump as being special. This is
7671 far from ideal and knows that no code after this will
7672 muck around with the mode of the JUMP_INSN itself. */
7673 PUT_MODE (tmp, SImode);
7674 LABEL_NUSES (JUMP_LABEL (tmp))++;
7675 location = NEXT_INSN (location);
7678 /* Emit a BARRIER after the jump. */
7679 emit_barrier_after (location);
7680 location = NEXT_INSN (location);
7683 /* Emit marker for the end of the branch table. */
7684 emit_insn_before (gen_end_brtab (), location);
7685 location = NEXT_INSN (location);
7686 emit_barrier_after (location);
7688 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
7689 delete_insn (insn);
7692 else
7694 /* Sill need an end_brtab insn. */
7695 insns = get_insns ();
7696 for (insn = insns; insn; insn = NEXT_INSN (insn))
7698 /* Find an ADDR_VEC insn. */
7699 if (GET_CODE (insn) != JUMP_INSN
7700 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7701 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7702 continue;
7704 /* Now generate markers for the beginning and end of the
7705 branch table. */
7706 emit_insn_before (gen_begin_brtab (), insn);
7707 emit_insn_after (gen_end_brtab (), insn);
7712 /* The PA has a number of odd instructions which can perform multiple
7713 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
7714 it may be profitable to combine two instructions into one instruction
7715 with two outputs. It's not profitable PA2.0 machines because the
7716 two outputs would take two slots in the reorder buffers.
7718 This routine finds instructions which can be combined and combines
7719 them. We only support some of the potential combinations, and we
7720 only try common ways to find suitable instructions.
7722 * addb can add two registers or a register and a small integer
7723 and jump to a nearby (+-8k) location. Normally the jump to the
7724 nearby location is conditional on the result of the add, but by
7725 using the "true" condition we can make the jump unconditional.
7726 Thus addb can perform two independent operations in one insn.
7728 * movb is similar to addb in that it can perform a reg->reg
7729 or small immediate->reg copy and jump to a nearby (+-8k location).
7731 * fmpyadd and fmpysub can perform a FP multiply and either an
7732 FP add or FP sub if the operands of the multiply and add/sub are
7733 independent (there are other minor restrictions). Note both
7734 the fmpy and fadd/fsub can in theory move to better spots according
7735 to data dependencies, but for now we require the fmpy stay at a
7736 fixed location.
7738 * Many of the memory operations can perform pre & post updates
7739 of index registers. GCC's pre/post increment/decrement addressing
7740 is far too simple to take advantage of all the possibilities. This
7741 pass may not be suitable since those insns may not be independent.
7743 * comclr can compare two ints or an int and a register, nullify
7744 the following instruction and zero some other register. This
7745 is more difficult to use as it's harder to find an insn which
7746 will generate a comclr than finding something like an unconditional
7747 branch. (conditional moves & long branches create comclr insns).
7749 * Most arithmetic operations can conditionally skip the next
7750 instruction. They can be viewed as "perform this operation
7751 and conditionally jump to this nearby location" (where nearby
7752 is an insns away). These are difficult to use due to the
7753 branch length restrictions. */
7755 static void
7756 pa_combine_instructions (insns)
7757 rtx insns ATTRIBUTE_UNUSED;
7759 rtx anchor, new;
7761 /* This can get expensive since the basic algorithm is on the
7762 order of O(n^2) (or worse). Only do it for -O2 or higher
7763 levels of optimization. */
7764 if (optimize < 2)
7765 return;
7767 /* Walk down the list of insns looking for "anchor" insns which
7768 may be combined with "floating" insns. As the name implies,
7769 "anchor" instructions don't move, while "floating" insns may
7770 move around. */
7771 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7772 new = make_insn_raw (new);
7774 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7776 enum attr_pa_combine_type anchor_attr;
7777 enum attr_pa_combine_type floater_attr;
7779 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7780 Also ignore any special USE insns. */
7781 if ((GET_CODE (anchor) != INSN
7782 && GET_CODE (anchor) != JUMP_INSN
7783 && GET_CODE (anchor) != CALL_INSN)
7784 || GET_CODE (PATTERN (anchor)) == USE
7785 || GET_CODE (PATTERN (anchor)) == CLOBBER
7786 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7787 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7788 continue;
7790 anchor_attr = get_attr_pa_combine_type (anchor);
7791 /* See if anchor is an insn suitable for combination. */
7792 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7793 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7794 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7795 && ! forward_branch_p (anchor)))
7797 rtx floater;
7799 for (floater = PREV_INSN (anchor);
7800 floater;
7801 floater = PREV_INSN (floater))
7803 if (GET_CODE (floater) == NOTE
7804 || (GET_CODE (floater) == INSN
7805 && (GET_CODE (PATTERN (floater)) == USE
7806 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7807 continue;
7809 /* Anything except a regular INSN will stop our search. */
7810 if (GET_CODE (floater) != INSN
7811 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7812 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7814 floater = NULL_RTX;
7815 break;
7818 /* See if FLOATER is suitable for combination with the
7819 anchor. */
7820 floater_attr = get_attr_pa_combine_type (floater);
7821 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7822 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7823 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7824 && floater_attr == PA_COMBINE_TYPE_FMPY))
7826 /* If ANCHOR and FLOATER can be combined, then we're
7827 done with this pass. */
7828 if (pa_can_combine_p (new, anchor, floater, 0,
7829 SET_DEST (PATTERN (floater)),
7830 XEXP (SET_SRC (PATTERN (floater)), 0),
7831 XEXP (SET_SRC (PATTERN (floater)), 1)))
7832 break;
7835 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7836 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7838 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7840 if (pa_can_combine_p (new, anchor, floater, 0,
7841 SET_DEST (PATTERN (floater)),
7842 XEXP (SET_SRC (PATTERN (floater)), 0),
7843 XEXP (SET_SRC (PATTERN (floater)), 1)))
7844 break;
7846 else
7848 if (pa_can_combine_p (new, anchor, floater, 0,
7849 SET_DEST (PATTERN (floater)),
7850 SET_SRC (PATTERN (floater)),
7851 SET_SRC (PATTERN (floater))))
7852 break;
7857 /* If we didn't find anything on the backwards scan try forwards. */
7858 if (!floater
7859 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7860 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7862 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7864 if (GET_CODE (floater) == NOTE
7865 || (GET_CODE (floater) == INSN
7866 && (GET_CODE (PATTERN (floater)) == USE
7867 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7869 continue;
7871 /* Anything except a regular INSN will stop our search. */
7872 if (GET_CODE (floater) != INSN
7873 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7874 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7876 floater = NULL_RTX;
7877 break;
7880 /* See if FLOATER is suitable for combination with the
7881 anchor. */
7882 floater_attr = get_attr_pa_combine_type (floater);
7883 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7884 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7885 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7886 && floater_attr == PA_COMBINE_TYPE_FMPY))
7888 /* If ANCHOR and FLOATER can be combined, then we're
7889 done with this pass. */
7890 if (pa_can_combine_p (new, anchor, floater, 1,
7891 SET_DEST (PATTERN (floater)),
7892 XEXP (SET_SRC (PATTERN (floater)),
7894 XEXP (SET_SRC (PATTERN (floater)),
7895 1)))
7896 break;
7901 /* FLOATER will be nonzero if we found a suitable floating
7902 insn for combination with ANCHOR. */
7903 if (floater
7904 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7905 || anchor_attr == PA_COMBINE_TYPE_FMPY))
7907 /* Emit the new instruction and delete the old anchor. */
7908 emit_insn_before (gen_rtx_PARALLEL
7909 (VOIDmode,
7910 gen_rtvec (2, PATTERN (anchor),
7911 PATTERN (floater))),
7912 anchor);
7914 PUT_CODE (anchor, NOTE);
7915 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7916 NOTE_SOURCE_FILE (anchor) = 0;
7918 /* Emit a special USE insn for FLOATER, then delete
7919 the floating insn. */
7920 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7921 delete_insn (floater);
7923 continue;
7925 else if (floater
7926 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
7928 rtx temp;
7929 /* Emit the new_jump instruction and delete the old anchor. */
7930 temp
7931 = emit_jump_insn_before (gen_rtx_PARALLEL
7932 (VOIDmode,
7933 gen_rtvec (2, PATTERN (anchor),
7934 PATTERN (floater))),
7935 anchor);
7937 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
7938 PUT_CODE (anchor, NOTE);
7939 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7940 NOTE_SOURCE_FILE (anchor) = 0;
7942 /* Emit a special USE insn for FLOATER, then delete
7943 the floating insn. */
7944 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7945 delete_insn (floater);
7946 continue;
7952 static int
7953 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
7954 rtx new, anchor, floater;
7955 int reversed;
7956 rtx dest, src1, src2;
7958 int insn_code_number;
7959 rtx start, end;
7961 /* Create a PARALLEL with the patterns of ANCHOR and
7962 FLOATER, try to recognize it, then test constraints
7963 for the resulting pattern.
7965 If the pattern doesn't match or the constraints
7966 aren't met keep searching for a suitable floater
7967 insn. */
7968 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7969 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7970 INSN_CODE (new) = -1;
7971 insn_code_number = recog_memoized (new);
7972 if (insn_code_number < 0
7973 || (extract_insn (new), ! constrain_operands (1)))
7974 return 0;
7976 if (reversed)
7978 start = anchor;
7979 end = floater;
7981 else
7983 start = floater;
7984 end = anchor;
7987 /* There's up to three operands to consider. One
7988 output and two inputs.
7990 The output must not be used between FLOATER & ANCHOR
7991 exclusive. The inputs must not be set between
7992 FLOATER and ANCHOR exclusive. */
7994 if (reg_used_between_p (dest, start, end))
7995 return 0;
7997 if (reg_set_between_p (src1, start, end))
7998 return 0;
8000 if (reg_set_between_p (src2, start, end))
8001 return 0;
8003 /* If we get here, then everything is good. */
8004 return 1;
8007 /* Return nonzero if references for INSN are delayed.
8009 Millicode insns are actually function calls with some special
8010 constraints on arguments and register usage.
8012 Millicode calls always expect their arguments in the integer argument
8013 registers, and always return their result in %r29 (ret1). They
8014 are expected to clobber their arguments, %r1, %r29, and the return
8015 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8017 This function tells reorg that the references to arguments and
8018 millicode calls do not appear to happen until after the millicode call.
8019 This allows reorg to put insns which set the argument registers into the
8020 delay slot of the millicode call -- thus they act more like traditional
8021 CALL_INSNs.
8023 Note we can not consider side effects of the insn to be delayed because
8024 the branch and link insn will clobber the return pointer. If we happened
8025 to use the return pointer in the delay slot of the call, then we lose.
8027 get_attr_type will try to recognize the given insn, so make sure to
8028 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8029 in particular. */
8031 insn_refs_are_delayed (insn)
8032 rtx insn;
8034 return ((GET_CODE (insn) == INSN
8035 && GET_CODE (PATTERN (insn)) != SEQUENCE
8036 && GET_CODE (PATTERN (insn)) != USE
8037 && GET_CODE (PATTERN (insn)) != CLOBBER
8038 && get_attr_type (insn) == TYPE_MILLI));
8041 /* On the HP-PA the value is found in register(s) 28(-29), unless
8042 the mode is SF or DF. Then the value is returned in fr4 (32).
8044 This must perform the same promotions as PROMOTE_MODE, else
8045 PROMOTE_FUNCTION_RETURN will not work correctly.
8047 Small structures must be returned in a PARALLEL on PA64 in order
8048 to match the HP Compiler ABI. */
8051 function_value (valtype, func)
8052 tree valtype;
8053 tree func ATTRIBUTE_UNUSED;
8055 enum machine_mode valmode;
8057 /* Aggregates with a size less than or equal to 128 bits are returned
8058 in GR 28(-29). They are left justified. The pad bits are undefined.
8059 Larger aggregates are returned in memory. */
8060 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8062 rtx loc[2];
8063 int i, offset = 0;
8064 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8066 for (i = 0; i < ub; i++)
8068 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8069 gen_rtx_REG (DImode, 28 + i),
8070 GEN_INT (offset));
8071 offset += 8;
8074 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8077 if ((INTEGRAL_TYPE_P (valtype)
8078 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8079 || POINTER_TYPE_P (valtype))
8080 valmode = word_mode;
8081 else
8082 valmode = TYPE_MODE (valtype);
8084 if (TREE_CODE (valtype) == REAL_TYPE
8085 && TYPE_MODE (valtype) != TFmode
8086 && !TARGET_SOFT_FLOAT)
8087 return gen_rtx_REG (valmode, 32);
8089 return gen_rtx_REG (valmode, 28);
8092 /* Return the location of a parameter that is passed in a register or NULL
8093 if the parameter has any component that is passed in memory.
8095 This is new code and will be pushed to into the net sources after
8096 further testing.
8098 ??? We might want to restructure this so that it looks more like other
8099 ports. */
8101 function_arg (cum, mode, type, named, incoming)
8102 CUMULATIVE_ARGS *cum;
8103 enum machine_mode mode;
8104 tree type;
8105 int named ATTRIBUTE_UNUSED;
8106 int incoming;
8108 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8109 int alignment = 0;
8110 int arg_size;
8111 int fpr_reg_base;
8112 int gpr_reg_base;
8113 rtx retval;
8115 if (mode == VOIDmode)
8116 return NULL_RTX;
8118 arg_size = FUNCTION_ARG_SIZE (mode, type);
8120 /* If this arg would be passed partially or totally on the stack, then
8121 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8122 handle arguments which are split between regs and stack slots if
8123 the ABI mandates split arguments. */
8124 if (! TARGET_64BIT)
8126 /* The 32-bit ABI does not split arguments. */
8127 if (cum->words + arg_size > max_arg_words)
8128 return NULL_RTX;
8130 else
8132 if (arg_size > 1)
8133 alignment = cum->words & 1;
8134 if (cum->words + alignment >= max_arg_words)
8135 return NULL_RTX;
8138 /* The 32bit ABIs and the 64bit ABIs are rather different,
8139 particularly in their handling of FP registers. We might
8140 be able to cleverly share code between them, but I'm not
8141 going to bother in the hope that splitting them up results
8142 in code that is more easily understood. */
8144 if (TARGET_64BIT)
8146 /* Advance the base registers to their current locations.
8148 Remember, gprs grow towards smaller register numbers while
8149 fprs grow to higher register numbers. Also remember that
8150 although FP regs are 32-bit addressable, we pretend that
8151 the registers are 64-bits wide. */
8152 gpr_reg_base = 26 - cum->words;
8153 fpr_reg_base = 32 + cum->words;
8155 /* Arguments wider than one word and small aggregates need special
8156 treatment. */
8157 if (arg_size > 1
8158 || mode == BLKmode
8159 || (type && AGGREGATE_TYPE_P (type)))
8161 /* Double-extended precision (80-bit), quad-precision (128-bit)
8162 and aggregates including complex numbers are aligned on
8163 128-bit boundaries. The first eight 64-bit argument slots
8164 are associated one-to-one, with general registers r26
8165 through r19, and also with floating-point registers fr4
8166 through fr11. Arguments larger than one word are always
8167 passed in general registers.
8169 Using a PARALLEL with a word mode register results in left
8170 justified data on a big-endian target. */
8172 rtx loc[8];
8173 int i, offset = 0, ub = arg_size;
8175 /* Align the base register. */
8176 gpr_reg_base -= alignment;
8178 ub = MIN (ub, max_arg_words - cum->words - alignment);
8179 for (i = 0; i < ub; i++)
8181 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8182 gen_rtx_REG (DImode, gpr_reg_base),
8183 GEN_INT (offset));
8184 gpr_reg_base -= 1;
8185 offset += 8;
8188 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8191 else
8193 /* If the argument is larger than a word, then we know precisely
8194 which registers we must use. */
8195 if (arg_size > 1)
8197 if (cum->words)
8199 gpr_reg_base = 23;
8200 fpr_reg_base = 38;
8202 else
8204 gpr_reg_base = 25;
8205 fpr_reg_base = 34;
8208 /* Structures 5 to 8 bytes in size are passed in the general
8209 registers in the same manner as other non floating-point
8210 objects. The data is right-justified and zero-extended
8211 to 64 bits.
8213 This is magic. Normally, using a PARALLEL results in left
8214 justified data on a big-endian target. However, using a
8215 single double-word register provides the required right
8216 justication for 5 to 8 byte structures. This has nothing
8217 to do with the direction of padding specified for the argument.
8218 It has to do with how the data is widened and shifted into
8219 and from the register.
8221 Aside from adding load_multiple and store_multiple patterns,
8222 this is the only way that I have found to obtain right
8223 justification of BLKmode data when it has a size greater
8224 than one word. Splitting the operation into two SImode loads
8225 or returning a DImode REG results in left justified data. */
8226 if (mode == BLKmode)
8228 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8229 gen_rtx_REG (DImode, gpr_reg_base),
8230 const0_rtx);
8231 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8234 else
8236 /* We have a single word (32 bits). A simple computation
8237 will get us the register #s we need. */
8238 gpr_reg_base = 26 - cum->words;
8239 fpr_reg_base = 32 + 2 * cum->words;
8243 /* Determine if the argument needs to be passed in both general and
8244 floating point registers. */
8245 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8246 /* If we are doing soft-float with portable runtime, then there
8247 is no need to worry about FP regs. */
8248 && ! TARGET_SOFT_FLOAT
8249 /* The parameter must be some kind of float, else we can just
8250 pass it in integer registers. */
8251 && FLOAT_MODE_P (mode)
8252 /* The target function must not have a prototype. */
8253 && cum->nargs_prototype <= 0
8254 /* libcalls do not need to pass items in both FP and general
8255 registers. */
8256 && type != NULL_TREE
8257 /* All this hair applies to outgoing args only. */
8258 && ! incoming)
8259 /* Also pass outgoing floating arguments in both registers in indirect
8260 calls with the 32 bit ABI and the HP assembler since there is no
8261 way to the specify argument locations in static functions. */
8262 || (! TARGET_64BIT
8263 && ! TARGET_GAS
8264 && ! incoming
8265 && cum->indirect
8266 && FLOAT_MODE_P (mode)))
8268 retval
8269 = gen_rtx_PARALLEL
8270 (mode,
8271 gen_rtvec (2,
8272 gen_rtx_EXPR_LIST (VOIDmode,
8273 gen_rtx_REG (mode, fpr_reg_base),
8274 const0_rtx),
8275 gen_rtx_EXPR_LIST (VOIDmode,
8276 gen_rtx_REG (mode, gpr_reg_base),
8277 const0_rtx)));
8279 else
8281 /* See if we should pass this parameter in a general register. */
8282 if (TARGET_SOFT_FLOAT
8283 /* Indirect calls in the normal 32bit ABI require all arguments
8284 to be passed in general registers. */
8285 || (!TARGET_PORTABLE_RUNTIME
8286 && !TARGET_64BIT
8287 && !TARGET_ELF32
8288 && cum->indirect)
8289 /* If the parameter is not a floating point parameter, then
8290 it belongs in GPRs. */
8291 || !FLOAT_MODE_P (mode))
8292 retval = gen_rtx_REG (mode, gpr_reg_base);
8293 else
8294 retval = gen_rtx_REG (mode, fpr_reg_base);
8296 return retval;
8300 /* If this arg would be passed totally in registers or totally on the stack,
8301 then this routine should return zero. It is currently called only for
8302 the 64-bit target. */
8304 function_arg_partial_nregs (cum, mode, type, named)
8305 CUMULATIVE_ARGS *cum;
8306 enum machine_mode mode;
8307 tree type;
8308 int named ATTRIBUTE_UNUSED;
8310 unsigned int max_arg_words = 8;
8311 unsigned int offset = 0;
8313 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8314 offset = 1;
8316 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8317 /* Arg fits fully into registers. */
8318 return 0;
8319 else if (cum->words + offset >= max_arg_words)
8320 /* Arg fully on the stack. */
8321 return 0;
8322 else
8323 /* Arg is split. */
8324 return max_arg_words - cum->words - offset;
8328 /* Return 1 if this is a comparison operator. This allows the use of
8329 MATCH_OPERATOR to recognize all the branch insns. */
8332 cmpib_comparison_operator (op, mode)
8333 register rtx op;
8334 enum machine_mode mode;
8336 return ((mode == VOIDmode || GET_MODE (op) == mode)
8337 && (GET_CODE (op) == EQ
8338 || GET_CODE (op) == NE
8339 || GET_CODE (op) == GT
8340 || GET_CODE (op) == GTU
8341 || GET_CODE (op) == GE
8342 || GET_CODE (op) == LT
8343 || GET_CODE (op) == LE
8344 || GET_CODE (op) == LEU));
8347 /* On hpux10, the linker will give an error if we have a reference
8348 in the read-only data section to a symbol defined in a shared
8349 library. Therefore, expressions that might require a reloc can
8350 not be placed in the read-only data section. */
8352 static void
8353 pa_select_section (exp, reloc, align)
8354 tree exp;
8355 int reloc;
8356 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
8358 if (TREE_CODE (exp) == VAR_DECL
8359 && TREE_READONLY (exp)
8360 && !TREE_THIS_VOLATILE (exp)
8361 && DECL_INITIAL (exp)
8362 && (DECL_INITIAL (exp) == error_mark_node
8363 || TREE_CONSTANT (DECL_INITIAL (exp)))
8364 && !reloc)
8365 readonly_data_section ();
8366 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8367 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8368 && !reloc)
8369 readonly_data_section ();
8370 else
8371 data_section ();
8374 static void
8375 pa_globalize_label (stream, name)
8376 FILE *stream;
8377 const char *name;
8379 /* We only handle DATA objects here, functions are globalized in
8380 ASM_DECLARE_FUNCTION_NAME. */
8381 if (! FUNCTION_NAME_P (name))
8383 fputs ("\t.EXPORT ", stream);
8384 assemble_name (stream, name);
8385 fputs (",DATA\n", stream);
8388 #include "gt-pa.h"