This commit was manufactured by cvs2svn to create branch
[official-gcc.git] / gcc / config / pa / pa.c
blob1c994d5b2d1b61f6489a5a8f21ac098f43298cc9
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
51 static int hppa_use_dfa_pipeline_interface (void);
53 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
54 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
56 static int
57 hppa_use_dfa_pipeline_interface (void)
59 return 1;
62 /* Return nonzero if there is a bypass for the output of
63 OUT_INSN and the fp store IN_INSN. */
64 int
65 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
67 enum machine_mode store_mode;
68 enum machine_mode other_mode;
69 rtx set;
71 if (recog_memoized (in_insn) < 0
72 || get_attr_type (in_insn) != TYPE_FPSTORE
73 || recog_memoized (out_insn) < 0)
74 return 0;
76 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
78 set = single_set (out_insn);
79 if (!set)
80 return 0;
82 other_mode = GET_MODE (SET_SRC (set));
84 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
91 #else
92 #define DO_FRAME_NOTES 0
93 #endif
94 #endif
96 static void copy_reg_pointer (rtx, rtx);
97 static int hppa_address_cost (rtx);
98 static bool hppa_rtx_costs (rtx, int, int, int *);
99 static inline rtx force_mode (enum machine_mode, rtx);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
103 static int forward_branch_p (rtx);
104 static int shadd_constant_p (int);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movstr_length (rtx);
107 static int compute_clrstr_length (rtx);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
121 ATTRIBUTE_UNUSED;
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
126 ATTRIBUTE_UNUSED;
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
132 #endif
133 static void pa_init_builtins (void);
134 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
136 static struct deferred_plabel *get_plabel (const char *)
137 ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
142 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
145 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
147 static void output_deferred_plabels (void);
148 #ifdef HPUX_LONG_DOUBLE_LIBRARY
149 static void pa_hpux_init_libfuncs (void);
150 #endif
152 /* Save the operands last given to a compare for use when we
153 generate a scc or bcc insn. */
154 rtx hppa_compare_op0, hppa_compare_op1;
155 enum cmp_type hppa_branch_type;
157 /* Which cpu we are scheduling for. */
158 enum processor_type pa_cpu;
160 /* String to hold which cpu we are scheduling for. */
161 const char *pa_cpu_string;
163 /* Which architecture we are generating code for. */
164 enum architecture_type pa_arch;
166 /* String to hold which architecture we are generating code for. */
167 const char *pa_arch_string;
169 /* Counts for the number of callee-saved general and floating point
170 registers which were saved by the current function's prologue. */
171 static int gr_saved, fr_saved;
173 static rtx find_addr_reg (rtx);
175 /* Keep track of the number of bytes we have output in the CODE subspace
176 during this compilation so we'll know when to emit inline long-calls. */
177 unsigned long total_code_bytes;
179 /* The last address of the previous function plus the number of bytes in
180 associated thunks that have been output. This is used to determine if
181 a thunk can use an IA-relative branch to reach its target function. */
182 static int last_address;
184 /* Variables to handle plabels that we discover are necessary at assembly
185 output time. They are output after the current function. */
186 struct deferred_plabel GTY(())
188 rtx internal_label;
189 const char *name;
191 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
192 deferred_plabels;
193 static size_t n_deferred_plabels = 0;
196 /* Initialize the GCC target structure. */
198 #undef TARGET_ASM_ALIGNED_HI_OP
199 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
200 #undef TARGET_ASM_ALIGNED_SI_OP
201 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
202 #undef TARGET_ASM_ALIGNED_DI_OP
203 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
204 #undef TARGET_ASM_UNALIGNED_HI_OP
205 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
206 #undef TARGET_ASM_UNALIGNED_SI_OP
207 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
208 #undef TARGET_ASM_UNALIGNED_DI_OP
209 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER pa_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
215 #undef TARGET_ASM_FUNCTION_EPILOGUE
216 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
218 #undef TARGET_SCHED_ADJUST_COST
219 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
220 #undef TARGET_SCHED_ADJUST_PRIORITY
221 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
222 #undef TARGET_SCHED_ISSUE_RATE
223 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
225 #undef TARGET_ENCODE_SECTION_INFO
226 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
227 #undef TARGET_STRIP_NAME_ENCODING
228 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
231 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
233 #undef TARGET_ASM_OUTPUT_MI_THUNK
234 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
235 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
236 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
238 #undef TARGET_ASM_FILE_END
239 #define TARGET_ASM_FILE_END output_deferred_plabels
241 #if !defined(USE_COLLECT2)
242 #undef TARGET_ASM_CONSTRUCTOR
243 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
244 #undef TARGET_ASM_DESTRUCTOR
245 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
246 #endif
248 #undef TARGET_INIT_BUILTINS
249 #define TARGET_INIT_BUILTINS pa_init_builtins
251 #undef TARGET_RTX_COSTS
252 #define TARGET_RTX_COSTS hppa_rtx_costs
253 #undef TARGET_ADDRESS_COST
254 #define TARGET_ADDRESS_COST hppa_address_cost
256 #undef TARGET_MACHINE_DEPENDENT_REORG
257 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
259 #ifdef HPUX_LONG_DOUBLE_LIBRARY
260 #undef TARGET_INIT_LIBFUNCS
261 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
262 #endif
264 struct gcc_target targetm = TARGET_INITIALIZER;
266 void
267 override_options (void)
269 if (pa_cpu_string == NULL)
270 pa_cpu_string = TARGET_SCHED_DEFAULT;
272 if (! strcmp (pa_cpu_string, "8000"))
274 pa_cpu_string = "8000";
275 pa_cpu = PROCESSOR_8000;
277 else if (! strcmp (pa_cpu_string, "7100"))
279 pa_cpu_string = "7100";
280 pa_cpu = PROCESSOR_7100;
282 else if (! strcmp (pa_cpu_string, "700"))
284 pa_cpu_string = "700";
285 pa_cpu = PROCESSOR_700;
287 else if (! strcmp (pa_cpu_string, "7100LC"))
289 pa_cpu_string = "7100LC";
290 pa_cpu = PROCESSOR_7100LC;
292 else if (! strcmp (pa_cpu_string, "7200"))
294 pa_cpu_string = "7200";
295 pa_cpu = PROCESSOR_7200;
297 else if (! strcmp (pa_cpu_string, "7300"))
299 pa_cpu_string = "7300";
300 pa_cpu = PROCESSOR_7300;
302 else
304 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
307 /* Set the instruction set architecture. */
308 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
310 pa_arch_string = "1.0";
311 pa_arch = ARCHITECTURE_10;
312 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
314 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
316 pa_arch_string = "1.1";
317 pa_arch = ARCHITECTURE_11;
318 target_flags &= ~MASK_PA_20;
319 target_flags |= MASK_PA_11;
321 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
323 pa_arch_string = "2.0";
324 pa_arch = ARCHITECTURE_20;
325 target_flags |= MASK_PA_11 | MASK_PA_20;
327 else if (pa_arch_string)
329 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
332 /* Unconditional branches in the delay slot are not compatible with dwarf2
333 call frame information. There is no benefit in using this optimization
334 on PA8000 and later processors. */
335 if (pa_cpu >= PROCESSOR_8000
336 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
337 || flag_unwind_tables)
338 target_flags &= ~MASK_JUMP_IN_DELAY;
340 if (flag_pic && TARGET_PORTABLE_RUNTIME)
342 warning ("PIC code generation is not supported in the portable runtime model\n");
345 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
347 warning ("PIC code generation is not compatible with fast indirect calls\n");
350 if (! TARGET_GAS && write_symbols != NO_DEBUG)
352 warning ("-g is only supported when using GAS on this processor,");
353 warning ("-g option disabled");
354 write_symbols = NO_DEBUG;
357 /* We only support the "big PIC" model now. And we always generate PIC
358 code when in 64bit mode. */
359 if (flag_pic == 1 || TARGET_64BIT)
360 flag_pic = 2;
362 /* We can't guarantee that .dword is available for 32-bit targets. */
363 if (UNITS_PER_WORD == 4)
364 targetm.asm_out.aligned_op.di = NULL;
366 /* The unaligned ops are only available when using GAS. */
367 if (!TARGET_GAS)
369 targetm.asm_out.unaligned_op.hi = NULL;
370 targetm.asm_out.unaligned_op.si = NULL;
371 targetm.asm_out.unaligned_op.di = NULL;
375 static void
376 pa_init_builtins (void)
378 #ifdef DONT_HAVE_FPUTC_UNLOCKED
379 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
380 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
381 #endif
384 /* If FROM is a probable pointer register, mark TO as a probable
385 pointer register with the same pointer alignment as FROM. */
387 static void
388 copy_reg_pointer (rtx to, rtx from)
390 if (REG_POINTER (from))
391 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
394 /* Return nonzero only if OP is a register of mode MODE,
395 or CONST0_RTX. */
397 reg_or_0_operand (rtx op, enum machine_mode mode)
399 return (op == CONST0_RTX (mode) || register_operand (op, mode));
402 /* Return nonzero if OP is suitable for use in a call to a named
403 function.
405 For 2.5 try to eliminate either call_operand_address or
406 function_label_operand, they perform very similar functions. */
408 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
410 return (GET_MODE (op) == word_mode
411 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
414 /* Return 1 if X contains a symbolic expression. We know these
415 expressions will have one of a few well defined forms, so
416 we need only check those forms. */
418 symbolic_expression_p (rtx x)
421 /* Strip off any HIGH. */
422 if (GET_CODE (x) == HIGH)
423 x = XEXP (x, 0);
425 return (symbolic_operand (x, VOIDmode));
429 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
431 switch (GET_CODE (op))
433 case SYMBOL_REF:
434 case LABEL_REF:
435 return 1;
436 case CONST:
437 op = XEXP (op, 0);
438 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
439 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
440 && GET_CODE (XEXP (op, 1)) == CONST_INT);
441 default:
442 return 0;
446 /* Return truth value of statement that OP is a symbolic memory
447 operand of mode MODE. */
450 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
452 if (GET_CODE (op) == SUBREG)
453 op = SUBREG_REG (op);
454 if (GET_CODE (op) != MEM)
455 return 0;
456 op = XEXP (op, 0);
457 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
458 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
461 /* Return 1 if the operand is either a register, zero, or a memory operand
462 that is not symbolic. */
465 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
467 if (register_operand (op, mode))
468 return 1;
470 if (op == CONST0_RTX (mode))
471 return 1;
473 if (GET_CODE (op) == SUBREG)
474 op = SUBREG_REG (op);
476 if (GET_CODE (op) != MEM)
477 return 0;
479 /* Until problems with management of the REG_POINTER flag are resolved,
480 we need to delay creating move insns with unscaled indexed addresses
481 until CSE is not expected. */
482 if (!TARGET_NO_SPACE_REGS
483 && !cse_not_expected
484 && GET_CODE (XEXP (op, 0)) == PLUS
485 && REG_P (XEXP (XEXP (op, 0), 0))
486 && REG_P (XEXP (XEXP (op, 0), 1)))
487 return 0;
489 return (!symbolic_memory_operand (op, mode)
490 && memory_address_p (mode, XEXP (op, 0)));
493 /* Return 1 if the operand is a register operand or a non-symbolic memory
494 operand after reload. This predicate is used for branch patterns that
495 internally handle register reloading. We need to accept non-symbolic
496 memory operands after reload to ensure that the pattern is still valid
497 if reload didn't find a hard register for the operand. */
500 reg_before_reload_operand (rtx op, enum machine_mode mode)
502 /* Don't accept a SUBREG since it will need a reload. */
503 if (GET_CODE (op) == SUBREG)
504 return 0;
506 if (register_operand (op, mode))
507 return 1;
509 if (reload_completed
510 && memory_operand (op, mode)
511 && !symbolic_memory_operand (op, mode))
512 return 1;
514 return 0;
517 /* Accept any constant that can be moved in one instruction into a
518 general register. */
520 cint_ok_for_move (HOST_WIDE_INT intval)
522 /* OK if ldo, ldil, or zdepi, can be used. */
523 return (CONST_OK_FOR_LETTER_P (intval, 'J')
524 || CONST_OK_FOR_LETTER_P (intval, 'N')
525 || CONST_OK_FOR_LETTER_P (intval, 'K'));
528 /* Return 1 iff OP is an indexed memory operand. */
530 indexed_memory_operand (rtx op, enum machine_mode mode)
532 if (GET_MODE (op) != mode)
533 return 0;
535 /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */
536 if (reload_completed && GET_CODE (op) == SUBREG)
537 op = SUBREG_REG (op);
539 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
540 return 0;
542 op = XEXP (op, 0);
544 return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
547 /* Accept anything that can be used as a destination operand for a
548 move instruction. We don't accept indexed memory operands since
549 they are supported only for floating point stores. */
551 move_dest_operand (rtx op, enum machine_mode mode)
553 if (register_operand (op, mode))
554 return 1;
556 if (GET_MODE (op) != mode)
557 return 0;
559 if (GET_CODE (op) == SUBREG)
560 op = SUBREG_REG (op);
562 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
563 return 0;
565 op = XEXP (op, 0);
567 return (memory_address_p (mode, op)
568 && !IS_INDEX_ADDR_P (op)
569 && !IS_LO_SUM_DLT_ADDR_P (op));
572 /* Accept anything that can be used as a source operand for a move
573 instruction. */
575 move_src_operand (rtx op, enum machine_mode mode)
577 if (register_operand (op, mode))
578 return 1;
580 if (GET_CODE (op) == CONSTANT_P_RTX)
581 return 1;
583 if (GET_CODE (op) == CONST_INT)
584 return cint_ok_for_move (INTVAL (op));
586 if (GET_MODE (op) != mode)
587 return 0;
589 if (GET_CODE (op) == SUBREG)
590 op = SUBREG_REG (op);
592 if (GET_CODE (op) != MEM)
593 return 0;
595 /* Until problems with management of the REG_POINTER flag are resolved,
596 we need to delay creating move insns with unscaled indexed addresses
597 until CSE is not expected. */
598 if (!TARGET_NO_SPACE_REGS
599 && !cse_not_expected
600 && GET_CODE (XEXP (op, 0)) == PLUS
601 && REG_P (XEXP (XEXP (op, 0), 0))
602 && REG_P (XEXP (XEXP (op, 0), 1)))
603 return 0;
605 return memory_address_p (mode, XEXP (op, 0));
608 /* Accept REG and any CONST_INT that can be moved in one instruction into a
609 general register. */
611 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
613 if (register_operand (op, mode))
614 return 1;
616 return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
620 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
622 if (!flag_pic)
623 return 0;
625 switch (GET_CODE (op))
627 case LABEL_REF:
628 return 1;
629 case CONST:
630 op = XEXP (op, 0);
631 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
632 && GET_CODE (XEXP (op, 1)) == CONST_INT);
633 default:
634 return 0;
639 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
641 return reg_renumber && FP_REG_P (op);
646 /* Return truth value of whether OP can be used as an operand in a
647 three operand arithmetic insn that accepts registers of mode MODE
648 or 14-bit signed integers. */
650 arith_operand (rtx op, enum machine_mode mode)
652 return (register_operand (op, mode)
653 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
656 /* Return truth value of whether OP can be used as an operand in a
657 three operand arithmetic insn that accepts registers of mode MODE
658 or 11-bit signed integers. */
660 arith11_operand (rtx op, enum machine_mode mode)
662 return (register_operand (op, mode)
663 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
666 /* Return truth value of whether OP can be used as an operand in a
667 adddi3 insn. */
669 adddi3_operand (rtx op, enum machine_mode mode)
671 return (register_operand (op, mode)
672 || (GET_CODE (op) == CONST_INT
673 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
676 /* A constant integer suitable for use in a PRE_MODIFY memory
677 reference. */
679 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
681 return (GET_CODE (op) == CONST_INT
682 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
685 /* A constant integer suitable for use in a POST_MODIFY memory
686 reference. */
688 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
690 return (GET_CODE (op) == CONST_INT
691 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
695 arith_double_operand (rtx op, enum machine_mode mode)
697 return (register_operand (op, mode)
698 || (GET_CODE (op) == CONST_DOUBLE
699 && GET_MODE (op) == mode
700 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
701 && ((CONST_DOUBLE_HIGH (op) >= 0)
702 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
705 /* Return truth value of whether OP is an integer which fits the
706 range constraining immediate operands in three-address insns, or
707 is an integer register. */
710 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
712 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
713 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
716 /* Return nonzero if OP is an integer register, else return zero. */
718 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
720 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
723 /* Return truth value of whether OP is an integer which fits the
724 range constraining immediate operands in three-address insns. */
727 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
729 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
733 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
735 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
739 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
741 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
745 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
747 #if HOST_BITS_PER_WIDE_INT > 32
748 /* All allowed constants will fit a CONST_INT. */
749 return (GET_CODE (op) == CONST_INT
750 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
751 #else
752 return (GET_CODE (op) == CONST_INT
753 || (GET_CODE (op) == CONST_DOUBLE
754 && CONST_DOUBLE_HIGH (op) == 0));
755 #endif
759 arith5_operand (rtx op, enum machine_mode mode)
761 return register_operand (op, mode) || int5_operand (op, mode);
764 /* True iff zdepi can be used to generate this CONST_INT.
765 zdepi first sign extends a 5 bit signed number to a given field
766 length, then places this field anywhere in a zero. */
768 zdepi_cint_p (unsigned HOST_WIDE_INT x)
770 unsigned HOST_WIDE_INT lsb_mask, t;
772 /* This might not be obvious, but it's at least fast.
773 This function is critical; we don't have the time loops would take. */
774 lsb_mask = x & -x;
775 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
776 /* Return true iff t is a power of two. */
777 return ((t & (t - 1)) == 0);
780 /* True iff depi or extru can be used to compute (reg & mask).
781 Accept bit pattern like these:
782 0....01....1
783 1....10....0
784 1..10..01..1 */
786 and_mask_p (unsigned HOST_WIDE_INT mask)
788 mask = ~mask;
789 mask += mask & -mask;
790 return (mask & (mask - 1)) == 0;
793 /* True iff depi or extru can be used to compute (reg & OP). */
795 and_operand (rtx op, enum machine_mode mode)
797 return (register_operand (op, mode)
798 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
801 /* True iff depi can be used to compute (reg | MASK). */
803 ior_mask_p (unsigned HOST_WIDE_INT mask)
805 mask += mask & -mask;
806 return (mask & (mask - 1)) == 0;
809 /* True iff depi can be used to compute (reg | OP). */
811 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
813 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
817 lhs_lshift_operand (rtx op, enum machine_mode mode)
819 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
822 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
823 Such values can be the left hand side x in (x << r), using the zvdepi
824 instruction. */
826 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
828 unsigned HOST_WIDE_INT x;
829 if (GET_CODE (op) != CONST_INT)
830 return 0;
831 x = INTVAL (op) >> 4;
832 return (x & (x + 1)) == 0;
836 arith32_operand (rtx op, enum machine_mode mode)
838 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
842 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
844 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
847 /* Legitimize PIC addresses. If the address is already
848 position-independent, we return ORIG. Newly generated
849 position-independent addresses go to REG. If we need more
850 than one register, we lose. */
853 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
855 rtx pic_ref = orig;
857 /* Labels need special handling. */
858 if (pic_label_operand (orig, mode))
860 /* We do not want to go through the movXX expanders here since that
861 would create recursion.
863 Nor do we really want to call a generator for a named pattern
864 since that requires multiple patterns if we want to support
865 multiple word sizes.
867 So instead we just emit the raw set, which avoids the movXX
868 expanders completely. */
869 mark_reg_pointer (reg, BITS_PER_UNIT);
870 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
871 current_function_uses_pic_offset_table = 1;
872 return reg;
874 if (GET_CODE (orig) == SYMBOL_REF)
876 rtx insn, tmp_reg;
878 if (reg == 0)
879 abort ();
881 /* Before reload, allocate a temporary register for the intermediate
882 result. This allows the sequence to be deleted when the final
883 result is unused and the insns are trivially dead. */
884 tmp_reg = ((reload_in_progress || reload_completed)
885 ? reg : gen_reg_rtx (Pmode));
887 emit_move_insn (tmp_reg,
888 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
889 gen_rtx_HIGH (word_mode, orig)));
890 pic_ref
891 = gen_rtx_MEM (Pmode,
892 gen_rtx_LO_SUM (Pmode, tmp_reg,
893 gen_rtx_UNSPEC (Pmode,
894 gen_rtvec (1, orig),
895 0)));
897 current_function_uses_pic_offset_table = 1;
898 MEM_NOTRAP_P (pic_ref) = 1;
899 RTX_UNCHANGING_P (pic_ref) = 1;
900 mark_reg_pointer (reg, BITS_PER_UNIT);
901 insn = emit_move_insn (reg, pic_ref);
903 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
904 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
906 return reg;
908 else if (GET_CODE (orig) == CONST)
910 rtx base;
912 if (GET_CODE (XEXP (orig, 0)) == PLUS
913 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
914 return orig;
916 if (reg == 0)
917 abort ();
919 if (GET_CODE (XEXP (orig, 0)) == PLUS)
921 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
922 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
923 base == reg ? 0 : reg);
925 else
926 abort ();
928 if (GET_CODE (orig) == CONST_INT)
930 if (INT_14_BITS (orig))
931 return plus_constant (base, INTVAL (orig));
932 orig = force_reg (Pmode, orig);
934 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
935 /* Likewise, should we set special REG_NOTEs here? */
938 return pic_ref;
941 /* Try machine-dependent ways of modifying an illegitimate address
942 to be legitimate. If we find one, return the new, valid address.
943 This macro is used in only one place: `memory_address' in explow.c.
945 OLDX is the address as it was before break_out_memory_refs was called.
946 In some cases it is useful to look at this to decide what needs to be done.
948 MODE and WIN are passed so that this macro can use
949 GO_IF_LEGITIMATE_ADDRESS.
951 It is always safe for this macro to do nothing. It exists to recognize
952 opportunities to optimize the output.
954 For the PA, transform:
956 memory(X + <large int>)
958 into:
960 if (<large int> & mask) >= 16
961 Y = (<large int> & ~mask) + mask + 1 Round up.
962 else
963 Y = (<large int> & ~mask) Round down.
964 Z = X + Y
965 memory (Z + (<large int> - Y));
967 This is for CSE to find several similar references, and only use one Z.
969 X can either be a SYMBOL_REF or REG, but because combine can not
970 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
971 D will not fit in 14 bits.
973 MODE_FLOAT references allow displacements which fit in 5 bits, so use
974 0x1f as the mask.
976 MODE_INT references allow displacements which fit in 14 bits, so use
977 0x3fff as the mask.
979 This relies on the fact that most mode MODE_FLOAT references will use FP
980 registers and most mode MODE_INT references will use integer registers.
981 (In the rare case of an FP register used in an integer MODE, we depend
982 on secondary reloads to clean things up.)
985 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
986 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
987 addressing modes to be used).
989 Put X and Z into registers. Then put the entire expression into
990 a register. */
993 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
994 enum machine_mode mode)
996 rtx orig = x;
998 /* We need to canonicalize the order of operands in unscaled indexed
999 addresses since the code that checks if an address is valid doesn't
1000 always try both orders. */
1001 if (!TARGET_NO_SPACE_REGS
1002 && GET_CODE (x) == PLUS
1003 && GET_MODE (x) == Pmode
1004 && REG_P (XEXP (x, 0))
1005 && REG_P (XEXP (x, 1))
1006 && REG_POINTER (XEXP (x, 0))
1007 && !REG_POINTER (XEXP (x, 1)))
1008 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1010 if (flag_pic)
1011 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1013 /* Strip off CONST. */
1014 if (GET_CODE (x) == CONST)
1015 x = XEXP (x, 0);
1017 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1018 That should always be safe. */
1019 if (GET_CODE (x) == PLUS
1020 && GET_CODE (XEXP (x, 0)) == REG
1021 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1023 rtx reg = force_reg (Pmode, XEXP (x, 1));
1024 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1027 /* Note we must reject symbols which represent function addresses
1028 since the assembler/linker can't handle arithmetic on plabels. */
1029 if (GET_CODE (x) == PLUS
1030 && GET_CODE (XEXP (x, 1)) == CONST_INT
1031 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1032 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1033 || GET_CODE (XEXP (x, 0)) == REG))
1035 rtx int_part, ptr_reg;
1036 int newoffset;
1037 int offset = INTVAL (XEXP (x, 1));
1038 int mask;
1040 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1041 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1043 /* Choose which way to round the offset. Round up if we
1044 are >= halfway to the next boundary. */
1045 if ((offset & mask) >= ((mask + 1) / 2))
1046 newoffset = (offset & ~ mask) + mask + 1;
1047 else
1048 newoffset = (offset & ~ mask);
1050 /* If the newoffset will not fit in 14 bits (ldo), then
1051 handling this would take 4 or 5 instructions (2 to load
1052 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1053 add the new offset and the SYMBOL_REF.) Combine can
1054 not handle 4->2 or 5->2 combinations, so do not create
1055 them. */
1056 if (! VAL_14_BITS_P (newoffset)
1057 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1059 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1060 rtx tmp_reg
1061 = force_reg (Pmode,
1062 gen_rtx_HIGH (Pmode, const_part));
1063 ptr_reg
1064 = force_reg (Pmode,
1065 gen_rtx_LO_SUM (Pmode,
1066 tmp_reg, const_part));
1068 else
1070 if (! VAL_14_BITS_P (newoffset))
1071 int_part = force_reg (Pmode, GEN_INT (newoffset));
1072 else
1073 int_part = GEN_INT (newoffset);
1075 ptr_reg = force_reg (Pmode,
1076 gen_rtx_PLUS (Pmode,
1077 force_reg (Pmode, XEXP (x, 0)),
1078 int_part));
1080 return plus_constant (ptr_reg, offset - newoffset);
1083 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1085 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1086 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1087 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1088 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1089 || GET_CODE (XEXP (x, 1)) == SUBREG)
1090 && GET_CODE (XEXP (x, 1)) != CONST)
1092 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1093 rtx reg1, reg2;
1095 reg1 = XEXP (x, 1);
1096 if (GET_CODE (reg1) != REG)
1097 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1099 reg2 = XEXP (XEXP (x, 0), 0);
1100 if (GET_CODE (reg2) != REG)
1101 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1103 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1104 gen_rtx_MULT (Pmode,
1105 reg2,
1106 GEN_INT (val)),
1107 reg1));
1110 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1112 Only do so for floating point modes since this is more speculative
1113 and we lose if it's an integer store. */
1114 if (GET_CODE (x) == PLUS
1115 && GET_CODE (XEXP (x, 0)) == PLUS
1116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1117 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1118 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1119 && (mode == SFmode || mode == DFmode))
1122 /* First, try and figure out what to use as a base register. */
1123 rtx reg1, reg2, base, idx, orig_base;
1125 reg1 = XEXP (XEXP (x, 0), 1);
1126 reg2 = XEXP (x, 1);
1127 base = NULL_RTX;
1128 idx = NULL_RTX;
1130 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1131 then emit_move_sequence will turn on REG_POINTER so we'll know
1132 it's a base register below. */
1133 if (GET_CODE (reg1) != REG)
1134 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1136 if (GET_CODE (reg2) != REG)
1137 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1139 /* Figure out what the base and index are. */
1141 if (GET_CODE (reg1) == REG
1142 && REG_POINTER (reg1))
1144 base = reg1;
1145 orig_base = XEXP (XEXP (x, 0), 1);
1146 idx = gen_rtx_PLUS (Pmode,
1147 gen_rtx_MULT (Pmode,
1148 XEXP (XEXP (XEXP (x, 0), 0), 0),
1149 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1150 XEXP (x, 1));
1152 else if (GET_CODE (reg2) == REG
1153 && REG_POINTER (reg2))
1155 base = reg2;
1156 orig_base = XEXP (x, 1);
1157 idx = XEXP (x, 0);
1160 if (base == 0)
1161 return orig;
1163 /* If the index adds a large constant, try to scale the
1164 constant so that it can be loaded with only one insn. */
1165 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1166 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1167 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1168 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1170 /* Divide the CONST_INT by the scale factor, then add it to A. */
1171 int val = INTVAL (XEXP (idx, 1));
1173 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1174 reg1 = XEXP (XEXP (idx, 0), 0);
1175 if (GET_CODE (reg1) != REG)
1176 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1178 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1180 /* We can now generate a simple scaled indexed address. */
1181 return
1182 force_reg
1183 (Pmode, gen_rtx_PLUS (Pmode,
1184 gen_rtx_MULT (Pmode, reg1,
1185 XEXP (XEXP (idx, 0), 1)),
1186 base));
1189 /* If B + C is still a valid base register, then add them. */
1190 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1191 && INTVAL (XEXP (idx, 1)) <= 4096
1192 && INTVAL (XEXP (idx, 1)) >= -4096)
1194 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1195 rtx reg1, reg2;
1197 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1199 reg2 = XEXP (XEXP (idx, 0), 0);
1200 if (GET_CODE (reg2) != CONST_INT)
1201 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1203 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1204 gen_rtx_MULT (Pmode,
1205 reg2,
1206 GEN_INT (val)),
1207 reg1));
1210 /* Get the index into a register, then add the base + index and
1211 return a register holding the result. */
1213 /* First get A into a register. */
1214 reg1 = XEXP (XEXP (idx, 0), 0);
1215 if (GET_CODE (reg1) != REG)
1216 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1218 /* And get B into a register. */
1219 reg2 = XEXP (idx, 1);
1220 if (GET_CODE (reg2) != REG)
1221 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1223 reg1 = force_reg (Pmode,
1224 gen_rtx_PLUS (Pmode,
1225 gen_rtx_MULT (Pmode, reg1,
1226 XEXP (XEXP (idx, 0), 1)),
1227 reg2));
1229 /* Add the result to our base register and return. */
1230 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1234 /* Uh-oh. We might have an address for x[n-100000]. This needs
1235 special handling to avoid creating an indexed memory address
1236 with x-100000 as the base.
1238 If the constant part is small enough, then it's still safe because
1239 there is a guard page at the beginning and end of the data segment.
1241 Scaled references are common enough that we want to try and rearrange the
1242 terms so that we can use indexing for these addresses too. Only
1243 do the optimization for floatint point modes. */
1245 if (GET_CODE (x) == PLUS
1246 && symbolic_expression_p (XEXP (x, 1)))
1248 /* Ugly. We modify things here so that the address offset specified
1249 by the index expression is computed first, then added to x to form
1250 the entire address. */
1252 rtx regx1, regx2, regy1, regy2, y;
1254 /* Strip off any CONST. */
1255 y = XEXP (x, 1);
1256 if (GET_CODE (y) == CONST)
1257 y = XEXP (y, 0);
1259 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1261 /* See if this looks like
1262 (plus (mult (reg) (shadd_const))
1263 (const (plus (symbol_ref) (const_int))))
1265 Where const_int is small. In that case the const
1266 expression is a valid pointer for indexing.
1268 If const_int is big, but can be divided evenly by shadd_const
1269 and added to (reg). This allows more scaled indexed addresses. */
1270 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1271 && GET_CODE (XEXP (x, 0)) == MULT
1272 && GET_CODE (XEXP (y, 1)) == CONST_INT
1273 && INTVAL (XEXP (y, 1)) >= -4096
1274 && INTVAL (XEXP (y, 1)) <= 4095
1275 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1276 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1278 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1279 rtx reg1, reg2;
1281 reg1 = XEXP (x, 1);
1282 if (GET_CODE (reg1) != REG)
1283 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1285 reg2 = XEXP (XEXP (x, 0), 0);
1286 if (GET_CODE (reg2) != REG)
1287 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1289 return force_reg (Pmode,
1290 gen_rtx_PLUS (Pmode,
1291 gen_rtx_MULT (Pmode,
1292 reg2,
1293 GEN_INT (val)),
1294 reg1));
1296 else if ((mode == DFmode || mode == SFmode)
1297 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1298 && GET_CODE (XEXP (x, 0)) == MULT
1299 && GET_CODE (XEXP (y, 1)) == CONST_INT
1300 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1301 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1302 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1304 regx1
1305 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1306 / INTVAL (XEXP (XEXP (x, 0), 1))));
1307 regx2 = XEXP (XEXP (x, 0), 0);
1308 if (GET_CODE (regx2) != REG)
1309 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1310 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1311 regx2, regx1));
1312 return
1313 force_reg (Pmode,
1314 gen_rtx_PLUS (Pmode,
1315 gen_rtx_MULT (Pmode, regx2,
1316 XEXP (XEXP (x, 0), 1)),
1317 force_reg (Pmode, XEXP (y, 0))));
1319 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1320 && INTVAL (XEXP (y, 1)) >= -4096
1321 && INTVAL (XEXP (y, 1)) <= 4095)
1323 /* This is safe because of the guard page at the
1324 beginning and end of the data space. Just
1325 return the original address. */
1326 return orig;
1328 else
1330 /* Doesn't look like one we can optimize. */
1331 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1332 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1333 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1334 regx1 = force_reg (Pmode,
1335 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1336 regx1, regy2));
1337 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1342 return orig;
1345 /* For the HPPA, REG and REG+CONST is cost 0
1346 and addresses involving symbolic constants are cost 2.
1348 PIC addresses are very expensive.
1350 It is no coincidence that this has the same structure
1351 as GO_IF_LEGITIMATE_ADDRESS. */
1353 static int
1354 hppa_address_cost (rtx X)
1356 switch (GET_CODE (X))
1358 case REG:
1359 case PLUS:
1360 case LO_SUM:
1361 return 1;
1362 case HIGH:
1363 return 2;
1364 default:
1365 return 4;
1369 /* Compute a (partial) cost for rtx X. Return true if the complete
1370 cost has been computed, and false if subexpressions should be
1371 scanned. In either case, *TOTAL contains the cost result. */
1373 static bool
1374 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1376 switch (code)
1378 case CONST_INT:
1379 if (INTVAL (x) == 0)
1380 *total = 0;
1381 else if (INT_14_BITS (x))
1382 *total = 1;
1383 else
1384 *total = 2;
1385 return true;
1387 case HIGH:
1388 *total = 2;
1389 return true;
1391 case CONST:
1392 case LABEL_REF:
1393 case SYMBOL_REF:
1394 *total = 4;
1395 return true;
1397 case CONST_DOUBLE:
1398 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1399 && outer_code != SET)
1400 *total = 0;
1401 else
1402 *total = 8;
1403 return true;
1405 case MULT:
1406 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1407 *total = COSTS_N_INSNS (3);
1408 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1409 *total = COSTS_N_INSNS (8);
1410 else
1411 *total = COSTS_N_INSNS (20);
1412 return true;
1414 case DIV:
1415 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1417 *total = COSTS_N_INSNS (14);
1418 return true;
1420 /* FALLTHRU */
1422 case UDIV:
1423 case MOD:
1424 case UMOD:
1425 *total = COSTS_N_INSNS (60);
1426 return true;
1428 case PLUS: /* this includes shNadd insns */
1429 case MINUS:
1430 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1431 *total = COSTS_N_INSNS (3);
1432 else
1433 *total = COSTS_N_INSNS (1);
1434 return true;
1436 case ASHIFT:
1437 case ASHIFTRT:
1438 case LSHIFTRT:
1439 *total = COSTS_N_INSNS (1);
1440 return true;
1442 default:
1443 return false;
1447 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1448 new rtx with the correct mode. */
1449 static inline rtx
1450 force_mode (enum machine_mode mode, rtx orig)
1452 if (mode == GET_MODE (orig))
1453 return orig;
1455 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1456 abort ();
1458 return gen_rtx_REG (mode, REGNO (orig));
1461 /* Emit insns to move operands[1] into operands[0].
1463 Return 1 if we have written out everything that needs to be done to
1464 do the move. Otherwise, return 0 and the caller will emit the move
1465 normally.
1467 Note SCRATCH_REG may not be in the proper mode depending on how it
1468 will be used. This routine is responsible for creating a new copy
1469 of SCRATCH_REG in the proper mode. */
1472 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1474 register rtx operand0 = operands[0];
1475 register rtx operand1 = operands[1];
1476 register rtx tem;
1478 /* We can only handle indexed addresses in the destination operand
1479 of floating point stores. Thus, we need to break out indexed
1480 addresses from the destination operand. */
1481 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1483 /* This is only safe up to the beginning of life analysis. */
1484 if (no_new_pseudos)
1485 abort ();
1487 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1488 operand0 = replace_equiv_address (operand0, tem);
1491 /* On targets with non-equivalent space registers, break out unscaled
1492 indexed addresses from the source operand before the final CSE.
1493 We have to do this because the REG_POINTER flag is not correctly
1494 carried through various optimization passes and CSE may substitute
1495 a pseudo without the pointer set for one with the pointer set. As
1496 a result, we loose various opportunites to create insns with
1497 unscaled indexed addresses. */
1498 if (!TARGET_NO_SPACE_REGS
1499 && !cse_not_expected
1500 && GET_CODE (operand1) == MEM
1501 && GET_CODE (XEXP (operand1, 0)) == PLUS
1502 && REG_P (XEXP (XEXP (operand1, 0), 0))
1503 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1504 operand1
1505 = replace_equiv_address (operand1,
1506 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1508 if (scratch_reg
1509 && reload_in_progress && GET_CODE (operand0) == REG
1510 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1511 operand0 = reg_equiv_mem[REGNO (operand0)];
1512 else if (scratch_reg
1513 && reload_in_progress && GET_CODE (operand0) == SUBREG
1514 && GET_CODE (SUBREG_REG (operand0)) == REG
1515 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1517 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1518 the code which tracks sets/uses for delete_output_reload. */
1519 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1520 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1521 SUBREG_BYTE (operand0));
1522 operand0 = alter_subreg (&temp);
1525 if (scratch_reg
1526 && reload_in_progress && GET_CODE (operand1) == REG
1527 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1528 operand1 = reg_equiv_mem[REGNO (operand1)];
1529 else if (scratch_reg
1530 && reload_in_progress && GET_CODE (operand1) == SUBREG
1531 && GET_CODE (SUBREG_REG (operand1)) == REG
1532 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1534 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1535 the code which tracks sets/uses for delete_output_reload. */
1536 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1537 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1538 SUBREG_BYTE (operand1));
1539 operand1 = alter_subreg (&temp);
1542 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1543 && ((tem = find_replacement (&XEXP (operand0, 0)))
1544 != XEXP (operand0, 0)))
1545 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1547 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1548 && ((tem = find_replacement (&XEXP (operand1, 0)))
1549 != XEXP (operand1, 0)))
1550 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1552 /* Handle secondary reloads for loads/stores of FP registers from
1553 REG+D addresses where D does not fit in 5 bits, including
1554 (subreg (mem (addr))) cases. */
1555 if (scratch_reg
1556 && fp_reg_operand (operand0, mode)
1557 && ((GET_CODE (operand1) == MEM
1558 && !memory_address_p (DFmode, XEXP (operand1, 0)))
1559 || ((GET_CODE (operand1) == SUBREG
1560 && GET_CODE (XEXP (operand1, 0)) == MEM
1561 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))))
1563 if (GET_CODE (operand1) == SUBREG)
1564 operand1 = XEXP (operand1, 0);
1566 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1567 it in WORD_MODE regardless of what mode it was originally given
1568 to us. */
1569 scratch_reg = force_mode (word_mode, scratch_reg);
1571 /* D might not fit in 14 bits either; for such cases load D into
1572 scratch reg. */
1573 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1575 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1576 emit_move_insn (scratch_reg,
1577 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1578 Pmode,
1579 XEXP (XEXP (operand1, 0), 0),
1580 scratch_reg));
1582 else
1583 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1584 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1585 gen_rtx_MEM (mode, scratch_reg)));
1586 return 1;
1588 else if (scratch_reg
1589 && fp_reg_operand (operand1, mode)
1590 && ((GET_CODE (operand0) == MEM
1591 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1592 || ((GET_CODE (operand0) == SUBREG)
1593 && GET_CODE (XEXP (operand0, 0)) == MEM
1594 && !memory_address_p (DFmode,
1595 XEXP (XEXP (operand0, 0), 0)))))
1597 if (GET_CODE (operand0) == SUBREG)
1598 operand0 = XEXP (operand0, 0);
1600 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1601 it in WORD_MODE regardless of what mode it was originally given
1602 to us. */
1603 scratch_reg = force_mode (word_mode, scratch_reg);
1605 /* D might not fit in 14 bits either; for such cases load D into
1606 scratch reg. */
1607 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1609 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1610 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1611 0)),
1612 Pmode,
1613 XEXP (XEXP (operand0, 0),
1615 scratch_reg));
1617 else
1618 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1619 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1620 operand1));
1621 return 1;
1623 /* Handle secondary reloads for loads of FP registers from constant
1624 expressions by forcing the constant into memory.
1626 Use scratch_reg to hold the address of the memory location.
1628 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1629 NO_REGS when presented with a const_int and a register class
1630 containing only FP registers. Doing so unfortunately creates
1631 more problems than it solves. Fix this for 2.5. */
1632 else if (scratch_reg
1633 && CONSTANT_P (operand1)
1634 && fp_reg_operand (operand0, mode))
1636 rtx xoperands[2];
1638 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1639 it in WORD_MODE regardless of what mode it was originally given
1640 to us. */
1641 scratch_reg = force_mode (word_mode, scratch_reg);
1643 /* Force the constant into memory and put the address of the
1644 memory location into scratch_reg. */
1645 xoperands[0] = scratch_reg;
1646 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1647 emit_move_sequence (xoperands, Pmode, 0);
1649 /* Now load the destination register. */
1650 emit_insn (gen_rtx_SET (mode, operand0,
1651 gen_rtx_MEM (mode, scratch_reg)));
1652 return 1;
1654 /* Handle secondary reloads for SAR. These occur when trying to load
1655 the SAR from memory, FP register, or with a constant. */
1656 else if (scratch_reg
1657 && GET_CODE (operand0) == REG
1658 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1659 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1660 && (GET_CODE (operand1) == MEM
1661 || GET_CODE (operand1) == CONST_INT
1662 || (GET_CODE (operand1) == REG
1663 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1665 /* D might not fit in 14 bits either; for such cases load D into
1666 scratch reg. */
1667 if (GET_CODE (operand1) == MEM
1668 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1670 /* We are reloading the address into the scratch register, so we
1671 want to make sure the scratch register is a full register. */
1672 scratch_reg = force_mode (word_mode, scratch_reg);
1674 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1675 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1676 0)),
1677 Pmode,
1678 XEXP (XEXP (operand1, 0),
1680 scratch_reg));
1682 /* Now we are going to load the scratch register from memory,
1683 we want to load it in the same width as the original MEM,
1684 which must be the same as the width of the ultimate destination,
1685 OPERAND0. */
1686 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1688 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1689 scratch_reg));
1691 else
1693 /* We want to load the scratch register using the same mode as
1694 the ultimate destination. */
1695 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1697 emit_move_insn (scratch_reg, operand1);
1700 /* And emit the insn to set the ultimate destination. We know that
1701 the scratch register has the same mode as the destination at this
1702 point. */
1703 emit_move_insn (operand0, scratch_reg);
1704 return 1;
1706 /* Handle the most common case: storing into a register. */
1707 else if (register_operand (operand0, mode))
1709 if (register_operand (operand1, mode)
1710 || (GET_CODE (operand1) == CONST_INT
1711 && cint_ok_for_move (INTVAL (operand1)))
1712 || (operand1 == CONST0_RTX (mode))
1713 || (GET_CODE (operand1) == HIGH
1714 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1715 /* Only `general_operands' can come here, so MEM is ok. */
1716 || GET_CODE (operand1) == MEM)
1718 /* Various sets are created during RTL generation which don't
1719 have the REG_POINTER flag correctly set. After the CSE pass,
1720 instruction recognition can fail if we don't consistently
1721 set this flag when performing register copies. This should
1722 also improve the opportunities for creating insns that use
1723 unscaled indexing. */
1724 if (REG_P (operand0) && REG_P (operand1))
1726 if (REG_POINTER (operand1)
1727 && !REG_POINTER (operand0)
1728 && !HARD_REGISTER_P (operand0))
1729 copy_reg_pointer (operand0, operand1);
1730 else if (REG_POINTER (operand0)
1731 && !REG_POINTER (operand1)
1732 && !HARD_REGISTER_P (operand1))
1733 copy_reg_pointer (operand1, operand0);
1736 /* When MEMs are broken out, the REG_POINTER flag doesn't
1737 get set. In some cases, we can set the REG_POINTER flag
1738 from the declaration for the MEM. */
1739 if (REG_P (operand0)
1740 && GET_CODE (operand1) == MEM
1741 && !REG_POINTER (operand0))
1743 tree decl = MEM_EXPR (operand1);
1745 /* Set the register pointer flag and register alignment
1746 if the declaration for this memory reference is a
1747 pointer type. Fortran indirect argument references
1748 are ignored. */
1749 if (decl
1750 && !(flag_argument_noalias > 1
1751 && TREE_CODE (decl) == INDIRECT_REF
1752 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1754 tree type;
1756 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1757 tree operand 1. */
1758 if (TREE_CODE (decl) == COMPONENT_REF)
1759 decl = TREE_OPERAND (decl, 1);
1761 type = TREE_TYPE (decl);
1762 if (TREE_CODE (type) == ARRAY_TYPE)
1763 type = get_inner_array_type (type);
1765 if (POINTER_TYPE_P (type))
1767 int align;
1769 type = TREE_TYPE (type);
1770 /* Using TYPE_ALIGN_OK is rather conservative as
1771 only the ada frontend actually sets it. */
1772 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1773 : BITS_PER_UNIT);
1774 mark_reg_pointer (operand0, align);
1779 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1780 return 1;
1783 else if (GET_CODE (operand0) == MEM)
1785 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1786 && !(reload_in_progress || reload_completed))
1788 rtx temp = gen_reg_rtx (DFmode);
1790 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1791 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1792 return 1;
1794 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1796 /* Run this case quickly. */
1797 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1798 return 1;
1800 if (! (reload_in_progress || reload_completed))
1802 operands[0] = validize_mem (operand0);
1803 operands[1] = operand1 = force_reg (mode, operand1);
1807 /* Simplify the source if we need to.
1808 Note we do have to handle function labels here, even though we do
1809 not consider them legitimate constants. Loop optimizations can
1810 call the emit_move_xxx with one as a source. */
1811 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1812 || function_label_operand (operand1, mode)
1813 || (GET_CODE (operand1) == HIGH
1814 && symbolic_operand (XEXP (operand1, 0), mode)))
1816 int ishighonly = 0;
1818 if (GET_CODE (operand1) == HIGH)
1820 ishighonly = 1;
1821 operand1 = XEXP (operand1, 0);
1823 if (symbolic_operand (operand1, mode))
1825 /* Argh. The assembler and linker can't handle arithmetic
1826 involving plabels.
1828 So we force the plabel into memory, load operand0 from
1829 the memory location, then add in the constant part. */
1830 if ((GET_CODE (operand1) == CONST
1831 && GET_CODE (XEXP (operand1, 0)) == PLUS
1832 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1833 || function_label_operand (operand1, mode))
1835 rtx temp, const_part;
1837 /* Figure out what (if any) scratch register to use. */
1838 if (reload_in_progress || reload_completed)
1840 scratch_reg = scratch_reg ? scratch_reg : operand0;
1841 /* SCRATCH_REG will hold an address and maybe the actual
1842 data. We want it in WORD_MODE regardless of what mode it
1843 was originally given to us. */
1844 scratch_reg = force_mode (word_mode, scratch_reg);
1846 else if (flag_pic)
1847 scratch_reg = gen_reg_rtx (Pmode);
1849 if (GET_CODE (operand1) == CONST)
1851 /* Save away the constant part of the expression. */
1852 const_part = XEXP (XEXP (operand1, 0), 1);
1853 if (GET_CODE (const_part) != CONST_INT)
1854 abort ();
1856 /* Force the function label into memory. */
1857 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1859 else
1861 /* No constant part. */
1862 const_part = NULL_RTX;
1864 /* Force the function label into memory. */
1865 temp = force_const_mem (mode, operand1);
1869 /* Get the address of the memory location. PIC-ify it if
1870 necessary. */
1871 temp = XEXP (temp, 0);
1872 if (flag_pic)
1873 temp = legitimize_pic_address (temp, mode, scratch_reg);
1875 /* Put the address of the memory location into our destination
1876 register. */
1877 operands[1] = temp;
1878 emit_move_sequence (operands, mode, scratch_reg);
1880 /* Now load from the memory location into our destination
1881 register. */
1882 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1883 emit_move_sequence (operands, mode, scratch_reg);
1885 /* And add back in the constant part. */
1886 if (const_part != NULL_RTX)
1887 expand_inc (operand0, const_part);
1889 return 1;
1892 if (flag_pic)
1894 rtx temp;
1896 if (reload_in_progress || reload_completed)
1898 temp = scratch_reg ? scratch_reg : operand0;
1899 /* TEMP will hold an address and maybe the actual
1900 data. We want it in WORD_MODE regardless of what mode it
1901 was originally given to us. */
1902 temp = force_mode (word_mode, temp);
1904 else
1905 temp = gen_reg_rtx (Pmode);
1907 /* (const (plus (symbol) (const_int))) must be forced to
1908 memory during/after reload if the const_int will not fit
1909 in 14 bits. */
1910 if (GET_CODE (operand1) == CONST
1911 && GET_CODE (XEXP (operand1, 0)) == PLUS
1912 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1913 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1914 && (reload_completed || reload_in_progress)
1915 && flag_pic)
1917 operands[1] = force_const_mem (mode, operand1);
1918 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1919 mode, temp);
1920 emit_move_sequence (operands, mode, temp);
1922 else
1924 operands[1] = legitimize_pic_address (operand1, mode, temp);
1925 if (REG_P (operand0) && REG_P (operands[1]))
1926 copy_reg_pointer (operand0, operands[1]);
1927 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1930 /* On the HPPA, references to data space are supposed to use dp,
1931 register 27, but showing it in the RTL inhibits various cse
1932 and loop optimizations. */
1933 else
1935 rtx temp, set;
1937 if (reload_in_progress || reload_completed)
1939 temp = scratch_reg ? scratch_reg : operand0;
1940 /* TEMP will hold an address and maybe the actual
1941 data. We want it in WORD_MODE regardless of what mode it
1942 was originally given to us. */
1943 temp = force_mode (word_mode, temp);
1945 else
1946 temp = gen_reg_rtx (mode);
1948 /* Loading a SYMBOL_REF into a register makes that register
1949 safe to be used as the base in an indexed address.
1951 Don't mark hard registers though. That loses. */
1952 if (GET_CODE (operand0) == REG
1953 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1954 mark_reg_pointer (operand0, BITS_PER_UNIT);
1955 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1956 mark_reg_pointer (temp, BITS_PER_UNIT);
1958 if (ishighonly)
1959 set = gen_rtx_SET (mode, operand0, temp);
1960 else
1961 set = gen_rtx_SET (VOIDmode,
1962 operand0,
1963 gen_rtx_LO_SUM (mode, temp, operand1));
1965 emit_insn (gen_rtx_SET (VOIDmode,
1966 temp,
1967 gen_rtx_HIGH (mode, operand1)));
1968 emit_insn (set);
1971 return 1;
1973 else if (GET_CODE (operand1) != CONST_INT
1974 || !cint_ok_for_move (INTVAL (operand1)))
1976 rtx insn, temp;
1977 rtx op1 = operand1;
1978 HOST_WIDE_INT value = 0;
1979 HOST_WIDE_INT insv = 0;
1980 int insert = 0;
1982 if (GET_CODE (operand1) == CONST_INT)
1983 value = INTVAL (operand1);
1985 if (TARGET_64BIT
1986 && GET_CODE (operand1) == CONST_INT
1987 && HOST_BITS_PER_WIDE_INT > 32
1988 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1990 HOST_WIDE_INT nval;
1992 /* Extract the low order 32 bits of the value and sign extend.
1993 If the new value is the same as the original value, we can
1994 can use the original value as-is. If the new value is
1995 different, we use it and insert the most-significant 32-bits
1996 of the original value into the final result. */
1997 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1998 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1999 if (value != nval)
2001 #if HOST_BITS_PER_WIDE_INT > 32
2002 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2003 #endif
2004 insert = 1;
2005 value = nval;
2006 operand1 = GEN_INT (nval);
2010 if (reload_in_progress || reload_completed)
2011 temp = scratch_reg ? scratch_reg : operand0;
2012 else
2013 temp = gen_reg_rtx (mode);
2015 /* We don't directly split DImode constants on 32-bit targets
2016 because PLUS uses an 11-bit immediate and the insn sequence
2017 generated is not as efficient as the one using HIGH/LO_SUM. */
2018 if (GET_CODE (operand1) == CONST_INT
2019 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2020 && !insert)
2022 /* Directly break constant into high and low parts. This
2023 provides better optimization opportunities because various
2024 passes recognize constants split with PLUS but not LO_SUM.
2025 We use a 14-bit signed low part except when the addition
2026 of 0x4000 to the high part might change the sign of the
2027 high part. */
2028 HOST_WIDE_INT low = value & 0x3fff;
2029 HOST_WIDE_INT high = value & ~ 0x3fff;
2031 if (low >= 0x2000)
2033 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2034 high += 0x2000;
2035 else
2036 high += 0x4000;
2039 low = value - high;
2041 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2042 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2044 else
2046 emit_insn (gen_rtx_SET (VOIDmode, temp,
2047 gen_rtx_HIGH (mode, operand1)));
2048 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2051 insn = emit_move_insn (operands[0], operands[1]);
2053 /* Now insert the most significant 32 bits of the value
2054 into the register. When we don't have a second register
2055 available, it could take up to nine instructions to load
2056 a 64-bit integer constant. Prior to reload, we force
2057 constants that would take more than three instructions
2058 to load to the constant pool. During and after reload,
2059 we have to handle all possible values. */
2060 if (insert)
2062 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2063 register and the value to be inserted is outside the
2064 range that can be loaded with three depdi instructions. */
2065 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2067 operand1 = GEN_INT (insv);
2069 emit_insn (gen_rtx_SET (VOIDmode, temp,
2070 gen_rtx_HIGH (mode, operand1)));
2071 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2072 emit_insn (gen_insv (operand0, GEN_INT (32),
2073 const0_rtx, temp));
2075 else
2077 int len = 5, pos = 27;
2079 /* Insert the bits using the depdi instruction. */
2080 while (pos >= 0)
2082 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2083 HOST_WIDE_INT sign = v5 < 0;
2085 /* Left extend the insertion. */
2086 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2087 while (pos > 0 && (insv & 1) == sign)
2089 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2090 len += 1;
2091 pos -= 1;
2094 emit_insn (gen_insv (operand0, GEN_INT (len),
2095 GEN_INT (pos), GEN_INT (v5)));
2097 len = pos > 0 && pos < 5 ? pos : 5;
2098 pos -= len;
2103 REG_NOTES (insn)
2104 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
2106 return 1;
2109 /* Now have insn-emit do whatever it normally does. */
2110 return 0;
2113 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2114 it will need a link/runtime reloc). */
2117 reloc_needed (tree exp)
2119 int reloc = 0;
2121 switch (TREE_CODE (exp))
2123 case ADDR_EXPR:
2124 return 1;
2126 case PLUS_EXPR:
2127 case MINUS_EXPR:
2128 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2129 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2130 break;
2132 case NOP_EXPR:
2133 case CONVERT_EXPR:
2134 case NON_LVALUE_EXPR:
2135 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2136 break;
2138 case CONSTRUCTOR:
2140 register tree link;
2141 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
2142 if (TREE_VALUE (link) != 0)
2143 reloc |= reloc_needed (TREE_VALUE (link));
2145 break;
2147 case ERROR_MARK:
2148 break;
2150 default:
2151 break;
2153 return reloc;
2156 /* Does operand (which is a symbolic_operand) live in text space?
2157 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2158 will be true. */
2161 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2163 if (GET_CODE (operand) == CONST)
2164 operand = XEXP (XEXP (operand, 0), 0);
2165 if (flag_pic)
2167 if (GET_CODE (operand) == SYMBOL_REF)
2168 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2170 else
2172 if (GET_CODE (operand) == SYMBOL_REF)
2173 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2175 return 1;
2179 /* Return the best assembler insn template
2180 for moving operands[1] into operands[0] as a fullword. */
2181 const char *
2182 singlemove_string (rtx *operands)
2184 HOST_WIDE_INT intval;
2186 if (GET_CODE (operands[0]) == MEM)
2187 return "stw %r1,%0";
2188 if (GET_CODE (operands[1]) == MEM)
2189 return "ldw %1,%0";
2190 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2192 long i;
2193 REAL_VALUE_TYPE d;
2195 if (GET_MODE (operands[1]) != SFmode)
2196 abort ();
2198 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2199 bit pattern. */
2200 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2201 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2203 operands[1] = GEN_INT (i);
2204 /* Fall through to CONST_INT case. */
2206 if (GET_CODE (operands[1]) == CONST_INT)
2208 intval = INTVAL (operands[1]);
2210 if (VAL_14_BITS_P (intval))
2211 return "ldi %1,%0";
2212 else if ((intval & 0x7ff) == 0)
2213 return "ldil L'%1,%0";
2214 else if (zdepi_cint_p (intval))
2215 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2216 else
2217 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2219 return "copy %1,%0";
2223 /* Compute position (in OP[1]) and width (in OP[2])
2224 useful for copying IMM to a register using the zdepi
2225 instructions. Store the immediate value to insert in OP[0]. */
2226 static void
2227 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2229 int lsb, len;
2231 /* Find the least significant set bit in IMM. */
2232 for (lsb = 0; lsb < 32; lsb++)
2234 if ((imm & 1) != 0)
2235 break;
2236 imm >>= 1;
2239 /* Choose variants based on *sign* of the 5-bit field. */
2240 if ((imm & 0x10) == 0)
2241 len = (lsb <= 28) ? 4 : 32 - lsb;
2242 else
2244 /* Find the width of the bitstring in IMM. */
2245 for (len = 5; len < 32; len++)
2247 if ((imm & (1 << len)) == 0)
2248 break;
2251 /* Sign extend IMM as a 5-bit value. */
2252 imm = (imm & 0xf) - 0x10;
2255 op[0] = imm;
2256 op[1] = 31 - lsb;
2257 op[2] = len;
2260 /* Compute position (in OP[1]) and width (in OP[2])
2261 useful for copying IMM to a register using the depdi,z
2262 instructions. Store the immediate value to insert in OP[0]. */
2263 void
2264 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2266 HOST_WIDE_INT lsb, len;
2268 /* Find the least significant set bit in IMM. */
2269 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2271 if ((imm & 1) != 0)
2272 break;
2273 imm >>= 1;
2276 /* Choose variants based on *sign* of the 5-bit field. */
2277 if ((imm & 0x10) == 0)
2278 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2279 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2280 else
2282 /* Find the width of the bitstring in IMM. */
2283 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2285 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2286 break;
2289 /* Sign extend IMM as a 5-bit value. */
2290 imm = (imm & 0xf) - 0x10;
2293 op[0] = imm;
2294 op[1] = 63 - lsb;
2295 op[2] = len;
2298 /* Output assembler code to perform a doubleword move insn
2299 with operands OPERANDS. */
2301 const char *
2302 output_move_double (rtx *operands)
2304 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2305 rtx latehalf[2];
2306 rtx addreg0 = 0, addreg1 = 0;
2308 /* First classify both operands. */
2310 if (REG_P (operands[0]))
2311 optype0 = REGOP;
2312 else if (offsettable_memref_p (operands[0]))
2313 optype0 = OFFSOP;
2314 else if (GET_CODE (operands[0]) == MEM)
2315 optype0 = MEMOP;
2316 else
2317 optype0 = RNDOP;
2319 if (REG_P (operands[1]))
2320 optype1 = REGOP;
2321 else if (CONSTANT_P (operands[1]))
2322 optype1 = CNSTOP;
2323 else if (offsettable_memref_p (operands[1]))
2324 optype1 = OFFSOP;
2325 else if (GET_CODE (operands[1]) == MEM)
2326 optype1 = MEMOP;
2327 else
2328 optype1 = RNDOP;
2330 /* Check for the cases that the operand constraints are not
2331 supposed to allow to happen. Abort if we get one,
2332 because generating code for these cases is painful. */
2334 if (optype0 != REGOP && optype1 != REGOP)
2335 abort ();
2337 /* Handle auto decrementing and incrementing loads and stores
2338 specifically, since the structure of the function doesn't work
2339 for them without major modification. Do it better when we learn
2340 this port about the general inc/dec addressing of PA.
2341 (This was written by tege. Chide him if it doesn't work.) */
2343 if (optype0 == MEMOP)
2345 /* We have to output the address syntax ourselves, since print_operand
2346 doesn't deal with the addresses we want to use. Fix this later. */
2348 rtx addr = XEXP (operands[0], 0);
2349 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2351 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2353 operands[0] = XEXP (addr, 0);
2354 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2355 abort ();
2357 if (!reg_overlap_mentioned_p (high_reg, addr))
2359 /* No overlap between high target register and address
2360 register. (We do this in a non-obvious way to
2361 save a register file writeback) */
2362 if (GET_CODE (addr) == POST_INC)
2363 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2364 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2366 else
2367 abort ();
2369 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2371 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2373 operands[0] = XEXP (addr, 0);
2374 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2375 abort ();
2377 if (!reg_overlap_mentioned_p (high_reg, addr))
2379 /* No overlap between high target register and address
2380 register. (We do this in a non-obvious way to
2381 save a register file writeback) */
2382 if (GET_CODE (addr) == PRE_INC)
2383 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2384 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2386 else
2387 abort ();
2390 if (optype1 == MEMOP)
2392 /* We have to output the address syntax ourselves, since print_operand
2393 doesn't deal with the addresses we want to use. Fix this later. */
2395 rtx addr = XEXP (operands[1], 0);
2396 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2398 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2400 operands[1] = XEXP (addr, 0);
2401 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2402 abort ();
2404 if (!reg_overlap_mentioned_p (high_reg, addr))
2406 /* No overlap between high target register and address
2407 register. (We do this in a non-obvious way to
2408 save a register file writeback) */
2409 if (GET_CODE (addr) == POST_INC)
2410 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2411 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2413 else
2415 /* This is an undefined situation. We should load into the
2416 address register *and* update that register. Probably
2417 we don't need to handle this at all. */
2418 if (GET_CODE (addr) == POST_INC)
2419 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2420 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2423 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2425 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2427 operands[1] = XEXP (addr, 0);
2428 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2429 abort ();
2431 if (!reg_overlap_mentioned_p (high_reg, addr))
2433 /* No overlap between high target register and address
2434 register. (We do this in a non-obvious way to
2435 save a register file writeback) */
2436 if (GET_CODE (addr) == PRE_INC)
2437 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2438 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2440 else
2442 /* This is an undefined situation. We should load into the
2443 address register *and* update that register. Probably
2444 we don't need to handle this at all. */
2445 if (GET_CODE (addr) == PRE_INC)
2446 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2447 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2450 else if (GET_CODE (addr) == PLUS
2451 && GET_CODE (XEXP (addr, 0)) == MULT)
2453 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2455 if (!reg_overlap_mentioned_p (high_reg, addr))
2457 rtx xoperands[3];
2459 xoperands[0] = high_reg;
2460 xoperands[1] = XEXP (addr, 1);
2461 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2462 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2463 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2464 xoperands);
2465 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2467 else
2469 rtx xoperands[3];
2471 xoperands[0] = high_reg;
2472 xoperands[1] = XEXP (addr, 1);
2473 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2474 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2475 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2476 xoperands);
2477 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2482 /* If an operand is an unoffsettable memory ref, find a register
2483 we can increment temporarily to make it refer to the second word. */
2485 if (optype0 == MEMOP)
2486 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2488 if (optype1 == MEMOP)
2489 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2491 /* Ok, we can do one word at a time.
2492 Normally we do the low-numbered word first.
2494 In either case, set up in LATEHALF the operands to use
2495 for the high-numbered word and in some cases alter the
2496 operands in OPERANDS to be suitable for the low-numbered word. */
2498 if (optype0 == REGOP)
2499 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2500 else if (optype0 == OFFSOP)
2501 latehalf[0] = adjust_address (operands[0], SImode, 4);
2502 else
2503 latehalf[0] = operands[0];
2505 if (optype1 == REGOP)
2506 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2507 else if (optype1 == OFFSOP)
2508 latehalf[1] = adjust_address (operands[1], SImode, 4);
2509 else if (optype1 == CNSTOP)
2510 split_double (operands[1], &operands[1], &latehalf[1]);
2511 else
2512 latehalf[1] = operands[1];
2514 /* If the first move would clobber the source of the second one,
2515 do them in the other order.
2517 This can happen in two cases:
2519 mem -> register where the first half of the destination register
2520 is the same register used in the memory's address. Reload
2521 can create such insns.
2523 mem in this case will be either register indirect or register
2524 indirect plus a valid offset.
2526 register -> register move where REGNO(dst) == REGNO(src + 1)
2527 someone (Tim/Tege?) claimed this can happen for parameter loads.
2529 Handle mem -> register case first. */
2530 if (optype0 == REGOP
2531 && (optype1 == MEMOP || optype1 == OFFSOP)
2532 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2533 operands[1], 0))
2535 /* Do the late half first. */
2536 if (addreg1)
2537 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2538 output_asm_insn (singlemove_string (latehalf), latehalf);
2540 /* Then clobber. */
2541 if (addreg1)
2542 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2543 return singlemove_string (operands);
2546 /* Now handle register -> register case. */
2547 if (optype0 == REGOP && optype1 == REGOP
2548 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2550 output_asm_insn (singlemove_string (latehalf), latehalf);
2551 return singlemove_string (operands);
2554 /* Normal case: do the two words, low-numbered first. */
2556 output_asm_insn (singlemove_string (operands), operands);
2558 /* Make any unoffsettable addresses point at high-numbered word. */
2559 if (addreg0)
2560 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2561 if (addreg1)
2562 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2564 /* Do that word. */
2565 output_asm_insn (singlemove_string (latehalf), latehalf);
2567 /* Undo the adds we just did. */
2568 if (addreg0)
2569 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2570 if (addreg1)
2571 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2573 return "";
2576 const char *
2577 output_fp_move_double (rtx *operands)
2579 if (FP_REG_P (operands[0]))
2581 if (FP_REG_P (operands[1])
2582 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2583 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2584 else
2585 output_asm_insn ("fldd%F1 %1,%0", operands);
2587 else if (FP_REG_P (operands[1]))
2589 output_asm_insn ("fstd%F0 %1,%0", operands);
2591 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2593 if (GET_CODE (operands[0]) == REG)
2595 rtx xoperands[2];
2596 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2597 xoperands[0] = operands[0];
2598 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2600 /* This is a pain. You have to be prepared to deal with an
2601 arbitrary address here including pre/post increment/decrement.
2603 so avoid this in the MD. */
2604 else
2605 abort ();
2607 else abort ();
2608 return "";
2611 /* Return a REG that occurs in ADDR with coefficient 1.
2612 ADDR can be effectively incremented by incrementing REG. */
2614 static rtx
2615 find_addr_reg (rtx addr)
2617 while (GET_CODE (addr) == PLUS)
2619 if (GET_CODE (XEXP (addr, 0)) == REG)
2620 addr = XEXP (addr, 0);
2621 else if (GET_CODE (XEXP (addr, 1)) == REG)
2622 addr = XEXP (addr, 1);
2623 else if (CONSTANT_P (XEXP (addr, 0)))
2624 addr = XEXP (addr, 1);
2625 else if (CONSTANT_P (XEXP (addr, 1)))
2626 addr = XEXP (addr, 0);
2627 else
2628 abort ();
2630 if (GET_CODE (addr) == REG)
2631 return addr;
2632 abort ();
2635 /* Emit code to perform a block move.
2637 OPERANDS[0] is the destination pointer as a REG, clobbered.
2638 OPERANDS[1] is the source pointer as a REG, clobbered.
2639 OPERANDS[2] is a register for temporary storage.
2640 OPERANDS[3] is a register for temporary storage.
2641 OPERANDS[4] is the size as a CONST_INT
2642 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2643 OPERANDS[6] is another temporary register. */
2645 const char *
2646 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2648 int align = INTVAL (operands[5]);
2649 unsigned long n_bytes = INTVAL (operands[4]);
2651 /* We can't move more than a word at a time because the PA
2652 has no longer integer move insns. (Could use fp mem ops?) */
2653 if (align > (TARGET_64BIT ? 8 : 4))
2654 align = (TARGET_64BIT ? 8 : 4);
2656 /* Note that we know each loop below will execute at least twice
2657 (else we would have open-coded the copy). */
2658 switch (align)
2660 case 8:
2661 /* Pre-adjust the loop counter. */
2662 operands[4] = GEN_INT (n_bytes - 16);
2663 output_asm_insn ("ldi %4,%2", operands);
2665 /* Copying loop. */
2666 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2667 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2668 output_asm_insn ("std,ma %3,8(%0)", operands);
2669 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2670 output_asm_insn ("std,ma %6,8(%0)", operands);
2672 /* Handle the residual. There could be up to 7 bytes of
2673 residual to copy! */
2674 if (n_bytes % 16 != 0)
2676 operands[4] = GEN_INT (n_bytes % 8);
2677 if (n_bytes % 16 >= 8)
2678 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2679 if (n_bytes % 8 != 0)
2680 output_asm_insn ("ldd 0(%1),%6", operands);
2681 if (n_bytes % 16 >= 8)
2682 output_asm_insn ("std,ma %3,8(%0)", operands);
2683 if (n_bytes % 8 != 0)
2684 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2686 return "";
2688 case 4:
2689 /* Pre-adjust the loop counter. */
2690 operands[4] = GEN_INT (n_bytes - 8);
2691 output_asm_insn ("ldi %4,%2", operands);
2693 /* Copying loop. */
2694 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2695 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2696 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2697 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2698 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2700 /* Handle the residual. There could be up to 7 bytes of
2701 residual to copy! */
2702 if (n_bytes % 8 != 0)
2704 operands[4] = GEN_INT (n_bytes % 4);
2705 if (n_bytes % 8 >= 4)
2706 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2707 if (n_bytes % 4 != 0)
2708 output_asm_insn ("ldw 0(%1),%6", operands);
2709 if (n_bytes % 8 >= 4)
2710 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2711 if (n_bytes % 4 != 0)
2712 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2714 return "";
2716 case 2:
2717 /* Pre-adjust the loop counter. */
2718 operands[4] = GEN_INT (n_bytes - 4);
2719 output_asm_insn ("ldi %4,%2", operands);
2721 /* Copying loop. */
2722 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2723 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2724 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2725 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2726 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2728 /* Handle the residual. */
2729 if (n_bytes % 4 != 0)
2731 if (n_bytes % 4 >= 2)
2732 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2733 if (n_bytes % 2 != 0)
2734 output_asm_insn ("ldb 0(%1),%6", operands);
2735 if (n_bytes % 4 >= 2)
2736 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2737 if (n_bytes % 2 != 0)
2738 output_asm_insn ("stb %6,0(%0)", operands);
2740 return "";
2742 case 1:
2743 /* Pre-adjust the loop counter. */
2744 operands[4] = GEN_INT (n_bytes - 2);
2745 output_asm_insn ("ldi %4,%2", operands);
2747 /* Copying loop. */
2748 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2749 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2750 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2751 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2752 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2754 /* Handle the residual. */
2755 if (n_bytes % 2 != 0)
2757 output_asm_insn ("ldb 0(%1),%3", operands);
2758 output_asm_insn ("stb %3,0(%0)", operands);
2760 return "";
2762 default:
2763 abort ();
2767 /* Count the number of insns necessary to handle this block move.
2769 Basic structure is the same as emit_block_move, except that we
2770 count insns rather than emit them. */
2772 static int
2773 compute_movstr_length (rtx insn)
2775 rtx pat = PATTERN (insn);
2776 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2777 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2778 unsigned int n_insns = 0;
2780 /* We can't move more than four bytes at a time because the PA
2781 has no longer integer move insns. (Could use fp mem ops?) */
2782 if (align > (TARGET_64BIT ? 8 : 4))
2783 align = (TARGET_64BIT ? 8 : 4);
2785 /* The basic copying loop. */
2786 n_insns = 6;
2788 /* Residuals. */
2789 if (n_bytes % (2 * align) != 0)
2791 if ((n_bytes % (2 * align)) >= align)
2792 n_insns += 2;
2794 if ((n_bytes % align) != 0)
2795 n_insns += 2;
2798 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2799 return n_insns * 4;
2802 /* Emit code to perform a block clear.
2804 OPERANDS[0] is the destination pointer as a REG, clobbered.
2805 OPERANDS[1] is a register for temporary storage.
2806 OPERANDS[2] is the size as a CONST_INT
2807 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2809 const char *
2810 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2812 int align = INTVAL (operands[3]);
2813 unsigned long n_bytes = INTVAL (operands[2]);
2815 /* We can't clear more than a word at a time because the PA
2816 has no longer integer move insns. */
2817 if (align > (TARGET_64BIT ? 8 : 4))
2818 align = (TARGET_64BIT ? 8 : 4);
2820 /* Note that we know each loop below will execute at least twice
2821 (else we would have open-coded the copy). */
2822 switch (align)
2824 case 8:
2825 /* Pre-adjust the loop counter. */
2826 operands[2] = GEN_INT (n_bytes - 16);
2827 output_asm_insn ("ldi %2,%1", operands);
2829 /* Loop. */
2830 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2831 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2832 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2834 /* Handle the residual. There could be up to 7 bytes of
2835 residual to copy! */
2836 if (n_bytes % 16 != 0)
2838 operands[2] = GEN_INT (n_bytes % 8);
2839 if (n_bytes % 16 >= 8)
2840 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2841 if (n_bytes % 8 != 0)
2842 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2844 return "";
2846 case 4:
2847 /* Pre-adjust the loop counter. */
2848 operands[2] = GEN_INT (n_bytes - 8);
2849 output_asm_insn ("ldi %2,%1", operands);
2851 /* Loop. */
2852 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2853 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2854 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2856 /* Handle the residual. There could be up to 7 bytes of
2857 residual to copy! */
2858 if (n_bytes % 8 != 0)
2860 operands[2] = GEN_INT (n_bytes % 4);
2861 if (n_bytes % 8 >= 4)
2862 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2863 if (n_bytes % 4 != 0)
2864 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2866 return "";
2868 case 2:
2869 /* Pre-adjust the loop counter. */
2870 operands[2] = GEN_INT (n_bytes - 4);
2871 output_asm_insn ("ldi %2,%1", operands);
2873 /* Loop. */
2874 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2875 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2876 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2878 /* Handle the residual. */
2879 if (n_bytes % 4 != 0)
2881 if (n_bytes % 4 >= 2)
2882 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2883 if (n_bytes % 2 != 0)
2884 output_asm_insn ("stb %%r0,0(%0)", operands);
2886 return "";
2888 case 1:
2889 /* Pre-adjust the loop counter. */
2890 operands[2] = GEN_INT (n_bytes - 2);
2891 output_asm_insn ("ldi %2,%1", operands);
2893 /* Loop. */
2894 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2895 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2896 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2898 /* Handle the residual. */
2899 if (n_bytes % 2 != 0)
2900 output_asm_insn ("stb %%r0,0(%0)", operands);
2902 return "";
2904 default:
2905 abort ();
2909 /* Count the number of insns necessary to handle this block move.
2911 Basic structure is the same as emit_block_move, except that we
2912 count insns rather than emit them. */
2914 static int
2915 compute_clrstr_length (rtx insn)
2917 rtx pat = PATTERN (insn);
2918 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2919 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2920 unsigned int n_insns = 0;
2922 /* We can't clear more than a word at a time because the PA
2923 has no longer integer move insns. */
2924 if (align > (TARGET_64BIT ? 8 : 4))
2925 align = (TARGET_64BIT ? 8 : 4);
2927 /* The basic loop. */
2928 n_insns = 4;
2930 /* Residuals. */
2931 if (n_bytes % (2 * align) != 0)
2933 if ((n_bytes % (2 * align)) >= align)
2934 n_insns++;
2936 if ((n_bytes % align) != 0)
2937 n_insns++;
2940 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2941 return n_insns * 4;
2945 const char *
2946 output_and (rtx *operands)
2948 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2950 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2951 int ls0, ls1, ms0, p, len;
2953 for (ls0 = 0; ls0 < 32; ls0++)
2954 if ((mask & (1 << ls0)) == 0)
2955 break;
2957 for (ls1 = ls0; ls1 < 32; ls1++)
2958 if ((mask & (1 << ls1)) != 0)
2959 break;
2961 for (ms0 = ls1; ms0 < 32; ms0++)
2962 if ((mask & (1 << ms0)) == 0)
2963 break;
2965 if (ms0 != 32)
2966 abort ();
2968 if (ls1 == 32)
2970 len = ls0;
2972 if (len == 0)
2973 abort ();
2975 operands[2] = GEN_INT (len);
2976 return "{extru|extrw,u} %1,31,%2,%0";
2978 else
2980 /* We could use this `depi' for the case above as well, but `depi'
2981 requires one more register file access than an `extru'. */
2983 p = 31 - ls0;
2984 len = ls1 - ls0;
2986 operands[2] = GEN_INT (p);
2987 operands[3] = GEN_INT (len);
2988 return "{depi|depwi} 0,%2,%3,%0";
2991 else
2992 return "and %1,%2,%0";
2995 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2996 storing the result in operands[0]. */
2997 const char *
2998 output_64bit_and (rtx *operands)
3000 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3002 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3003 int ls0, ls1, ms0, p, len;
3005 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3006 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3007 break;
3009 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3010 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3011 break;
3013 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3014 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3015 break;
3017 if (ms0 != HOST_BITS_PER_WIDE_INT)
3018 abort ();
3020 if (ls1 == HOST_BITS_PER_WIDE_INT)
3022 len = ls0;
3024 if (len == 0)
3025 abort ();
3027 operands[2] = GEN_INT (len);
3028 return "extrd,u %1,63,%2,%0";
3030 else
3032 /* We could use this `depi' for the case above as well, but `depi'
3033 requires one more register file access than an `extru'. */
3035 p = 63 - ls0;
3036 len = ls1 - ls0;
3038 operands[2] = GEN_INT (p);
3039 operands[3] = GEN_INT (len);
3040 return "depdi 0,%2,%3,%0";
3043 else
3044 return "and %1,%2,%0";
3047 const char *
3048 output_ior (rtx *operands)
3050 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3051 int bs0, bs1, p, len;
3053 if (INTVAL (operands[2]) == 0)
3054 return "copy %1,%0";
3056 for (bs0 = 0; bs0 < 32; bs0++)
3057 if ((mask & (1 << bs0)) != 0)
3058 break;
3060 for (bs1 = bs0; bs1 < 32; bs1++)
3061 if ((mask & (1 << bs1)) == 0)
3062 break;
3064 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3065 abort ();
3067 p = 31 - bs0;
3068 len = bs1 - bs0;
3070 operands[2] = GEN_INT (p);
3071 operands[3] = GEN_INT (len);
3072 return "{depi|depwi} -1,%2,%3,%0";
3075 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3076 storing the result in operands[0]. */
3077 const char *
3078 output_64bit_ior (rtx *operands)
3080 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3081 int bs0, bs1, p, len;
3083 if (INTVAL (operands[2]) == 0)
3084 return "copy %1,%0";
3086 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3087 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3088 break;
3090 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3091 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3092 break;
3094 if (bs1 != HOST_BITS_PER_WIDE_INT
3095 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3096 abort ();
3098 p = 63 - bs0;
3099 len = bs1 - bs0;
3101 operands[2] = GEN_INT (p);
3102 operands[3] = GEN_INT (len);
3103 return "depdi -1,%2,%3,%0";
3106 /* Target hook for assembling integer objects. This code handles
3107 aligned SI and DI integers specially, since function references must
3108 be preceded by P%. */
3110 static bool
3111 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3113 if (size == UNITS_PER_WORD && aligned_p
3114 && function_label_operand (x, VOIDmode))
3116 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3117 output_addr_const (asm_out_file, x);
3118 fputc ('\n', asm_out_file);
3119 return true;
3121 return default_assemble_integer (x, size, aligned_p);
3124 /* Output an ascii string. */
3125 void
3126 output_ascii (FILE *file, const char *p, int size)
3128 int i;
3129 int chars_output;
3130 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3132 /* The HP assembler can only take strings of 256 characters at one
3133 time. This is a limitation on input line length, *not* the
3134 length of the string. Sigh. Even worse, it seems that the
3135 restriction is in number of input characters (see \xnn &
3136 \whatever). So we have to do this very carefully. */
3138 fputs ("\t.STRING \"", file);
3140 chars_output = 0;
3141 for (i = 0; i < size; i += 4)
3143 int co = 0;
3144 int io = 0;
3145 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3147 register unsigned int c = (unsigned char) p[i + io];
3149 if (c == '\"' || c == '\\')
3150 partial_output[co++] = '\\';
3151 if (c >= ' ' && c < 0177)
3152 partial_output[co++] = c;
3153 else
3155 unsigned int hexd;
3156 partial_output[co++] = '\\';
3157 partial_output[co++] = 'x';
3158 hexd = c / 16 - 0 + '0';
3159 if (hexd > '9')
3160 hexd -= '9' - 'a' + 1;
3161 partial_output[co++] = hexd;
3162 hexd = c % 16 - 0 + '0';
3163 if (hexd > '9')
3164 hexd -= '9' - 'a' + 1;
3165 partial_output[co++] = hexd;
3168 if (chars_output + co > 243)
3170 fputs ("\"\n\t.STRING \"", file);
3171 chars_output = 0;
3173 fwrite (partial_output, 1, (size_t) co, file);
3174 chars_output += co;
3175 co = 0;
3177 fputs ("\"\n", file);
3180 /* Try to rewrite floating point comparisons & branches to avoid
3181 useless add,tr insns.
3183 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3184 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3185 first attempt to remove useless add,tr insns. It is zero
3186 for the second pass as reorg sometimes leaves bogus REG_DEAD
3187 notes lying around.
3189 When CHECK_NOTES is zero we can only eliminate add,tr insns
3190 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3191 instructions. */
3192 static void
3193 remove_useless_addtr_insns (int check_notes)
3195 rtx insn;
3196 static int pass = 0;
3198 /* This is fairly cheap, so always run it when optimizing. */
3199 if (optimize > 0)
3201 int fcmp_count = 0;
3202 int fbranch_count = 0;
3204 /* Walk all the insns in this function looking for fcmp & fbranch
3205 instructions. Keep track of how many of each we find. */
3206 for (insn = get_insns (); insn; insn = next_insn (insn))
3208 rtx tmp;
3210 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3211 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3212 continue;
3214 tmp = PATTERN (insn);
3216 /* It must be a set. */
3217 if (GET_CODE (tmp) != SET)
3218 continue;
3220 /* If the destination is CCFP, then we've found an fcmp insn. */
3221 tmp = SET_DEST (tmp);
3222 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3224 fcmp_count++;
3225 continue;
3228 tmp = PATTERN (insn);
3229 /* If this is an fbranch instruction, bump the fbranch counter. */
3230 if (GET_CODE (tmp) == SET
3231 && SET_DEST (tmp) == pc_rtx
3232 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3233 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3234 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3235 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3237 fbranch_count++;
3238 continue;
3243 /* Find all floating point compare + branch insns. If possible,
3244 reverse the comparison & the branch to avoid add,tr insns. */
3245 for (insn = get_insns (); insn; insn = next_insn (insn))
3247 rtx tmp, next;
3249 /* Ignore anything that isn't an INSN. */
3250 if (GET_CODE (insn) != INSN)
3251 continue;
3253 tmp = PATTERN (insn);
3255 /* It must be a set. */
3256 if (GET_CODE (tmp) != SET)
3257 continue;
3259 /* The destination must be CCFP, which is register zero. */
3260 tmp = SET_DEST (tmp);
3261 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3262 continue;
3264 /* INSN should be a set of CCFP.
3266 See if the result of this insn is used in a reversed FP
3267 conditional branch. If so, reverse our condition and
3268 the branch. Doing so avoids useless add,tr insns. */
3269 next = next_insn (insn);
3270 while (next)
3272 /* Jumps, calls and labels stop our search. */
3273 if (GET_CODE (next) == JUMP_INSN
3274 || GET_CODE (next) == CALL_INSN
3275 || GET_CODE (next) == CODE_LABEL)
3276 break;
3278 /* As does another fcmp insn. */
3279 if (GET_CODE (next) == INSN
3280 && GET_CODE (PATTERN (next)) == SET
3281 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3282 && REGNO (SET_DEST (PATTERN (next))) == 0)
3283 break;
3285 next = next_insn (next);
3288 /* Is NEXT_INSN a branch? */
3289 if (next
3290 && GET_CODE (next) == JUMP_INSN)
3292 rtx pattern = PATTERN (next);
3294 /* If it a reversed fp conditional branch (eg uses add,tr)
3295 and CCFP dies, then reverse our conditional and the branch
3296 to avoid the add,tr. */
3297 if (GET_CODE (pattern) == SET
3298 && SET_DEST (pattern) == pc_rtx
3299 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3300 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3301 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3302 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3303 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3304 && (fcmp_count == fbranch_count
3305 || (check_notes
3306 && find_regno_note (next, REG_DEAD, 0))))
3308 /* Reverse the branch. */
3309 tmp = XEXP (SET_SRC (pattern), 1);
3310 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3311 XEXP (SET_SRC (pattern), 2) = tmp;
3312 INSN_CODE (next) = -1;
3314 /* Reverse our condition. */
3315 tmp = PATTERN (insn);
3316 PUT_CODE (XEXP (tmp, 1),
3317 (reverse_condition_maybe_unordered
3318 (GET_CODE (XEXP (tmp, 1)))));
3324 pass = !pass;
3328 /* You may have trouble believing this, but this is the 32 bit HP-PA
3329 stack layout. Wow.
3331 Offset Contents
3333 Variable arguments (optional; any number may be allocated)
3335 SP-(4*(N+9)) arg word N
3337 SP-56 arg word 5
3338 SP-52 arg word 4
3340 Fixed arguments (must be allocated; may remain unused)
3342 SP-48 arg word 3
3343 SP-44 arg word 2
3344 SP-40 arg word 1
3345 SP-36 arg word 0
3347 Frame Marker
3349 SP-32 External Data Pointer (DP)
3350 SP-28 External sr4
3351 SP-24 External/stub RP (RP')
3352 SP-20 Current RP
3353 SP-16 Static Link
3354 SP-12 Clean up
3355 SP-8 Calling Stub RP (RP'')
3356 SP-4 Previous SP
3358 Top of Frame
3360 SP-0 Stack Pointer (points to next available address)
3364 /* This function saves registers as follows. Registers marked with ' are
3365 this function's registers (as opposed to the previous function's).
3366 If a frame_pointer isn't needed, r4 is saved as a general register;
3367 the space for the frame pointer is still allocated, though, to keep
3368 things simple.
3371 Top of Frame
3373 SP (FP') Previous FP
3374 SP + 4 Alignment filler (sigh)
3375 SP + 8 Space for locals reserved here.
3379 SP + n All call saved register used.
3383 SP + o All call saved fp registers used.
3387 SP + p (SP') points to next available address.
3391 /* Global variables set by output_function_prologue(). */
3392 /* Size of frame. Need to know this to emit return insns from
3393 leaf procedures. */
3394 static HOST_WIDE_INT actual_fsize, local_fsize;
3395 static int save_fregs;
3397 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3398 Handle case where DISP > 8k by using the add_high_const patterns.
3400 Note in DISP > 8k case, we will leave the high part of the address
3401 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3403 static void
3404 store_reg (int reg, HOST_WIDE_INT disp, int base)
3406 rtx insn, dest, src, basereg;
3408 src = gen_rtx_REG (word_mode, reg);
3409 basereg = gen_rtx_REG (Pmode, base);
3410 if (VAL_14_BITS_P (disp))
3412 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3413 insn = emit_move_insn (dest, src);
3415 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3417 rtx delta = GEN_INT (disp);
3418 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3420 emit_move_insn (tmpreg, delta);
3421 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3422 dest = gen_rtx_MEM (word_mode, tmpreg);
3423 insn = emit_move_insn (dest, src);
3424 if (DO_FRAME_NOTES)
3426 REG_NOTES (insn)
3427 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3428 gen_rtx_SET (VOIDmode,
3429 gen_rtx_MEM (word_mode,
3430 gen_rtx_PLUS (word_mode, basereg,
3431 delta)),
3432 src),
3433 REG_NOTES (insn));
3436 else
3438 rtx delta = GEN_INT (disp);
3439 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3440 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3442 emit_move_insn (tmpreg, high);
3443 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3444 insn = emit_move_insn (dest, src);
3445 if (DO_FRAME_NOTES)
3447 REG_NOTES (insn)
3448 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3449 gen_rtx_SET (VOIDmode,
3450 gen_rtx_MEM (word_mode,
3451 gen_rtx_PLUS (word_mode, basereg,
3452 delta)),
3453 src),
3454 REG_NOTES (insn));
3458 if (DO_FRAME_NOTES)
3459 RTX_FRAME_RELATED_P (insn) = 1;
3462 /* Emit RTL to store REG at the memory location specified by BASE and then
3463 add MOD to BASE. MOD must be <= 8k. */
3465 static void
3466 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3468 rtx insn, basereg, srcreg, delta;
3470 if (!VAL_14_BITS_P (mod))
3471 abort ();
3473 basereg = gen_rtx_REG (Pmode, base);
3474 srcreg = gen_rtx_REG (word_mode, reg);
3475 delta = GEN_INT (mod);
3477 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3478 if (DO_FRAME_NOTES)
3480 RTX_FRAME_RELATED_P (insn) = 1;
3482 /* RTX_FRAME_RELATED_P must be set on each frame related set
3483 in a parallel with more than one element. Don't set
3484 RTX_FRAME_RELATED_P in the first set if reg is temporary
3485 register 1. The effect of this operation is recorded in
3486 the initial copy. */
3487 if (reg != 1)
3489 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3490 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3492 else
3494 /* The first element of a PARALLEL is always processed if it is
3495 a SET. Thus, we need an expression list for this case. */
3496 REG_NOTES (insn)
3497 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3498 gen_rtx_SET (VOIDmode, basereg,
3499 gen_rtx_PLUS (word_mode, basereg, delta)),
3500 REG_NOTES (insn));
3505 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3506 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3507 whether to add a frame note or not.
3509 In the DISP > 8k case, we leave the high part of the address in %r1.
3510 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3512 static void
3513 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3515 rtx insn;
3517 if (VAL_14_BITS_P (disp))
3519 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3520 plus_constant (gen_rtx_REG (Pmode, base), disp));
3522 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3524 rtx basereg = gen_rtx_REG (Pmode, base);
3525 rtx delta = GEN_INT (disp);
3526 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3528 emit_move_insn (tmpreg, delta);
3529 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3530 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3532 else
3534 rtx basereg = gen_rtx_REG (Pmode, base);
3535 rtx delta = GEN_INT (disp);
3536 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3538 emit_move_insn (tmpreg,
3539 gen_rtx_PLUS (Pmode, basereg,
3540 gen_rtx_HIGH (Pmode, delta)));
3541 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3542 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3545 if (DO_FRAME_NOTES && note)
3546 RTX_FRAME_RELATED_P (insn) = 1;
3549 HOST_WIDE_INT
3550 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3552 int freg_saved = 0;
3553 int i, j;
3555 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3556 be consistent with the rounding and size calculation done here.
3557 Change them at the same time. */
3559 /* We do our own stack alignment. First, round the size of the
3560 stack locals up to a word boundary. */
3561 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3563 /* Space for previous frame pointer + filler. If any frame is
3564 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3565 waste some space here for the sake of HP compatibility. The
3566 first slot is only used when the frame pointer is needed. */
3567 if (size || frame_pointer_needed)
3568 size += STARTING_FRAME_OFFSET;
3570 /* If the current function calls __builtin_eh_return, then we need
3571 to allocate stack space for registers that will hold data for
3572 the exception handler. */
3573 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3575 unsigned int i;
3577 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3578 continue;
3579 size += i * UNITS_PER_WORD;
3582 /* Account for space used by the callee general register saves. */
3583 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3584 if (regs_ever_live[i])
3585 size += UNITS_PER_WORD;
3587 /* Account for space used by the callee floating point register saves. */
3588 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3589 if (regs_ever_live[i]
3590 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3592 freg_saved = 1;
3594 /* We always save both halves of the FP register, so always
3595 increment the frame size by 8 bytes. */
3596 size += 8;
3599 /* If any of the floating registers are saved, account for the
3600 alignment needed for the floating point register save block. */
3601 if (freg_saved)
3603 size = (size + 7) & ~7;
3604 if (fregs_live)
3605 *fregs_live = 1;
3608 /* The various ABIs include space for the outgoing parameters in the
3609 size of the current function's stack frame. We don't need to align
3610 for the outgoing arguments as their alignment is set by the final
3611 rounding for the frame as a whole. */
3612 size += current_function_outgoing_args_size;
3614 /* Allocate space for the fixed frame marker. This space must be
3615 allocated for any function that makes calls or allocates
3616 stack space. */
3617 if (!current_function_is_leaf || size)
3618 size += TARGET_64BIT ? 48 : 32;
3620 /* Finally, round to the preferred stack boundary. */
3621 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3622 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3625 /* Generate the assembly code for function entry. FILE is a stdio
3626 stream to output the code to. SIZE is an int: how many units of
3627 temporary storage to allocate.
3629 Refer to the array `regs_ever_live' to determine which registers to
3630 save; `regs_ever_live[I]' is nonzero if register number I is ever
3631 used in the function. This function is responsible for knowing
3632 which registers should not be saved even if used. */
3634 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3635 of memory. If any fpu reg is used in the function, we allocate
3636 such a block here, at the bottom of the frame, just in case it's needed.
3638 If this function is a leaf procedure, then we may choose not
3639 to do a "save" insn. The decision about whether or not
3640 to do this is made in regclass.c. */
3642 static void
3643 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3645 /* The function's label and associated .PROC must never be
3646 separated and must be output *after* any profiling declarations
3647 to avoid changing spaces/subspaces within a procedure. */
3648 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3649 fputs ("\t.PROC\n", file);
3651 /* hppa_expand_prologue does the dirty work now. We just need
3652 to output the assembler directives which denote the start
3653 of a function. */
3654 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3655 if (regs_ever_live[2])
3656 fputs (",CALLS,SAVE_RP", file);
3657 else
3658 fputs (",NO_CALLS", file);
3660 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3661 at the beginning of the frame and that it is used as the frame
3662 pointer for the frame. We do this because our current frame
3663 layout doesn't conform to that specified in the the HP runtime
3664 documentation and we need a way to indicate to programs such as
3665 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3666 isn't used by HP compilers but is supported by the assembler.
3667 However, SAVE_SP is supposed to indicate that the previous stack
3668 pointer has been saved in the frame marker. */
3669 if (frame_pointer_needed)
3670 fputs (",SAVE_SP", file);
3672 /* Pass on information about the number of callee register saves
3673 performed in the prologue.
3675 The compiler is supposed to pass the highest register number
3676 saved, the assembler then has to adjust that number before
3677 entering it into the unwind descriptor (to account for any
3678 caller saved registers with lower register numbers than the
3679 first callee saved register). */
3680 if (gr_saved)
3681 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3683 if (fr_saved)
3684 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3686 fputs ("\n\t.ENTRY\n", file);
3688 remove_useless_addtr_insns (0);
3691 void
3692 hppa_expand_prologue (void)
3694 int merge_sp_adjust_with_store = 0;
3695 HOST_WIDE_INT size = get_frame_size ();
3696 HOST_WIDE_INT offset;
3697 int i;
3698 rtx insn, tmpreg;
3700 gr_saved = 0;
3701 fr_saved = 0;
3702 save_fregs = 0;
3704 /* Compute total size for frame pointer, filler, locals and rounding to
3705 the next word boundary. Similar code appears in compute_frame_size
3706 and must be changed in tandem with this code. */
3707 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3708 if (local_fsize || frame_pointer_needed)
3709 local_fsize += STARTING_FRAME_OFFSET;
3711 actual_fsize = compute_frame_size (size, &save_fregs);
3713 /* Compute a few things we will use often. */
3714 tmpreg = gen_rtx_REG (word_mode, 1);
3716 /* Save RP first. The calling conventions manual states RP will
3717 always be stored into the caller's frame at sp - 20 or sp - 16
3718 depending on which ABI is in use. */
3719 if (regs_ever_live[2] || current_function_calls_eh_return)
3720 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3722 /* Allocate the local frame and set up the frame pointer if needed. */
3723 if (actual_fsize != 0)
3725 if (frame_pointer_needed)
3727 /* Copy the old frame pointer temporarily into %r1. Set up the
3728 new stack pointer, then store away the saved old frame pointer
3729 into the stack at sp and at the same time update the stack
3730 pointer by actual_fsize bytes. Two versions, first
3731 handles small (<8k) frames. The second handles large (>=8k)
3732 frames. */
3733 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3734 if (DO_FRAME_NOTES)
3736 /* We need to record the frame pointer save here since the
3737 new frame pointer is set in the following insn. */
3738 RTX_FRAME_RELATED_P (insn) = 1;
3739 REG_NOTES (insn)
3740 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3741 gen_rtx_SET (VOIDmode,
3742 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3743 frame_pointer_rtx),
3744 REG_NOTES (insn));
3747 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3748 if (DO_FRAME_NOTES)
3749 RTX_FRAME_RELATED_P (insn) = 1;
3751 if (VAL_14_BITS_P (actual_fsize))
3752 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3753 else
3755 /* It is incorrect to store the saved frame pointer at *sp,
3756 then increment sp (writes beyond the current stack boundary).
3758 So instead use stwm to store at *sp and post-increment the
3759 stack pointer as an atomic operation. Then increment sp to
3760 finish allocating the new frame. */
3761 HOST_WIDE_INT adjust1 = 8192 - 64;
3762 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3764 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3765 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3766 adjust2, 1);
3769 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3770 we need to store the previous stack pointer (frame pointer)
3771 into the frame marker on targets that use the HP unwind
3772 library. This allows the HP unwind library to be used to
3773 unwind GCC frames. However, we are not fully compatible
3774 with the HP library because our frame layout differs from
3775 that specified in the HP runtime specification.
3777 We don't want a frame note on this instruction as the frame
3778 marker moves during dynamic stack allocation.
3780 This instruction also serves as a blockage to prevent
3781 register spills from being scheduled before the stack
3782 pointer is raised. This is necessary as we store
3783 registers using the frame pointer as a base register,
3784 and the frame pointer is set before sp is raised. */
3785 if (TARGET_HPUX_UNWIND_LIBRARY)
3787 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3788 GEN_INT (TARGET_64BIT ? -8 : -4));
3790 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3791 frame_pointer_rtx);
3793 else
3794 emit_insn (gen_blockage ());
3796 /* no frame pointer needed. */
3797 else
3799 /* In some cases we can perform the first callee register save
3800 and allocating the stack frame at the same time. If so, just
3801 make a note of it and defer allocating the frame until saving
3802 the callee registers. */
3803 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3804 merge_sp_adjust_with_store = 1;
3805 /* Can not optimize. Adjust the stack frame by actual_fsize
3806 bytes. */
3807 else
3808 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3809 actual_fsize, 1);
3813 /* Normal register save.
3815 Do not save the frame pointer in the frame_pointer_needed case. It
3816 was done earlier. */
3817 if (frame_pointer_needed)
3819 offset = local_fsize;
3821 /* Saving the EH return data registers in the frame is the simplest
3822 way to get the frame unwind information emitted. We put them
3823 just before the general registers. */
3824 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3826 unsigned int i, regno;
3828 for (i = 0; ; ++i)
3830 regno = EH_RETURN_DATA_REGNO (i);
3831 if (regno == INVALID_REGNUM)
3832 break;
3834 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3835 offset += UNITS_PER_WORD;
3839 for (i = 18; i >= 4; i--)
3840 if (regs_ever_live[i] && ! call_used_regs[i])
3842 store_reg (i, offset, FRAME_POINTER_REGNUM);
3843 offset += UNITS_PER_WORD;
3844 gr_saved++;
3846 /* Account for %r3 which is saved in a special place. */
3847 gr_saved++;
3849 /* No frame pointer needed. */
3850 else
3852 offset = local_fsize - actual_fsize;
3854 /* Saving the EH return data registers in the frame is the simplest
3855 way to get the frame unwind information emitted. */
3856 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3858 unsigned int i, regno;
3860 for (i = 0; ; ++i)
3862 regno = EH_RETURN_DATA_REGNO (i);
3863 if (regno == INVALID_REGNUM)
3864 break;
3866 /* If merge_sp_adjust_with_store is nonzero, then we can
3867 optimize the first save. */
3868 if (merge_sp_adjust_with_store)
3870 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3871 merge_sp_adjust_with_store = 0;
3873 else
3874 store_reg (regno, offset, STACK_POINTER_REGNUM);
3875 offset += UNITS_PER_WORD;
3879 for (i = 18; i >= 3; i--)
3880 if (regs_ever_live[i] && ! call_used_regs[i])
3882 /* If merge_sp_adjust_with_store is nonzero, then we can
3883 optimize the first GR save. */
3884 if (merge_sp_adjust_with_store)
3886 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3887 merge_sp_adjust_with_store = 0;
3889 else
3890 store_reg (i, offset, STACK_POINTER_REGNUM);
3891 offset += UNITS_PER_WORD;
3892 gr_saved++;
3895 /* If we wanted to merge the SP adjustment with a GR save, but we never
3896 did any GR saves, then just emit the adjustment here. */
3897 if (merge_sp_adjust_with_store)
3898 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3899 actual_fsize, 1);
3902 /* The hppa calling conventions say that %r19, the pic offset
3903 register, is saved at sp - 32 (in this function's frame)
3904 when generating PIC code. FIXME: What is the correct thing
3905 to do for functions which make no calls and allocate no
3906 frame? Do we need to allocate a frame, or can we just omit
3907 the save? For now we'll just omit the save.
3909 We don't want a note on this insn as the frame marker can
3910 move if there is a dynamic stack allocation. */
3911 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3913 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3915 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3919 /* Align pointer properly (doubleword boundary). */
3920 offset = (offset + 7) & ~7;
3922 /* Floating point register store. */
3923 if (save_fregs)
3925 rtx base;
3927 /* First get the frame or stack pointer to the start of the FP register
3928 save area. */
3929 if (frame_pointer_needed)
3931 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3932 base = frame_pointer_rtx;
3934 else
3936 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3937 base = stack_pointer_rtx;
3940 /* Now actually save the FP registers. */
3941 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3943 if (regs_ever_live[i]
3944 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3946 rtx addr, insn, reg;
3947 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3948 reg = gen_rtx_REG (DFmode, i);
3949 insn = emit_move_insn (addr, reg);
3950 if (DO_FRAME_NOTES)
3952 RTX_FRAME_RELATED_P (insn) = 1;
3953 if (TARGET_64BIT)
3955 rtx mem = gen_rtx_MEM (DFmode,
3956 plus_constant (base, offset));
3957 REG_NOTES (insn)
3958 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3959 gen_rtx_SET (VOIDmode, mem, reg),
3960 REG_NOTES (insn));
3962 else
3964 rtx meml = gen_rtx_MEM (SFmode,
3965 plus_constant (base, offset));
3966 rtx memr = gen_rtx_MEM (SFmode,
3967 plus_constant (base, offset + 4));
3968 rtx regl = gen_rtx_REG (SFmode, i);
3969 rtx regr = gen_rtx_REG (SFmode, i + 1);
3970 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3971 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3972 rtvec vec;
3974 RTX_FRAME_RELATED_P (setl) = 1;
3975 RTX_FRAME_RELATED_P (setr) = 1;
3976 vec = gen_rtvec (2, setl, setr);
3977 REG_NOTES (insn)
3978 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3979 gen_rtx_SEQUENCE (VOIDmode, vec),
3980 REG_NOTES (insn));
3983 offset += GET_MODE_SIZE (DFmode);
3984 fr_saved++;
3990 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3991 Handle case where DISP > 8k by using the add_high_const patterns. */
3993 static void
3994 load_reg (int reg, HOST_WIDE_INT disp, int base)
3996 rtx dest = gen_rtx_REG (word_mode, reg);
3997 rtx basereg = gen_rtx_REG (Pmode, base);
3998 rtx src;
4000 if (VAL_14_BITS_P (disp))
4001 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4002 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4004 rtx delta = GEN_INT (disp);
4005 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4007 emit_move_insn (tmpreg, delta);
4008 if (TARGET_DISABLE_INDEXING)
4010 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4011 src = gen_rtx_MEM (word_mode, tmpreg);
4013 else
4014 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4016 else
4018 rtx delta = GEN_INT (disp);
4019 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4020 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4022 emit_move_insn (tmpreg, high);
4023 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4026 emit_move_insn (dest, src);
4029 /* Update the total code bytes output to the text section. */
4031 static void
4032 update_total_code_bytes (int nbytes)
4034 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4035 && !IN_NAMED_SECTION_P (cfun->decl))
4037 if (INSN_ADDRESSES_SET_P ())
4039 unsigned long old_total = total_code_bytes;
4041 total_code_bytes += nbytes;
4043 /* Be prepared to handle overflows. */
4044 if (old_total > total_code_bytes)
4045 total_code_bytes = -1;
4047 else
4048 total_code_bytes = -1;
4052 /* This function generates the assembly code for function exit.
4053 Args are as for output_function_prologue ().
4055 The function epilogue should not depend on the current stack
4056 pointer! It should use the frame pointer only. This is mandatory
4057 because of alloca; we also take advantage of it to omit stack
4058 adjustments before returning. */
4060 static void
4061 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4063 rtx insn = get_last_insn ();
4065 last_address = 0;
4067 /* hppa_expand_epilogue does the dirty work now. We just need
4068 to output the assembler directives which denote the end
4069 of a function.
4071 To make debuggers happy, emit a nop if the epilogue was completely
4072 eliminated due to a volatile call as the last insn in the
4073 current function. That way the return address (in %r2) will
4074 always point to a valid instruction in the current function. */
4076 /* Get the last real insn. */
4077 if (GET_CODE (insn) == NOTE)
4078 insn = prev_real_insn (insn);
4080 /* If it is a sequence, then look inside. */
4081 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4082 insn = XVECEXP (PATTERN (insn), 0, 0);
4084 /* If insn is a CALL_INSN, then it must be a call to a volatile
4085 function (otherwise there would be epilogue insns). */
4086 if (insn && GET_CODE (insn) == CALL_INSN)
4088 fputs ("\tnop\n", file);
4089 last_address += 4;
4092 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4094 if (INSN_ADDRESSES_SET_P ())
4096 insn = get_last_nonnote_insn ();
4097 last_address += INSN_ADDRESSES (INSN_UID (insn));
4098 if (INSN_P (insn))
4099 last_address += insn_default_length (insn);
4100 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4101 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4104 /* Finally, update the total number of code bytes output so far. */
4105 update_total_code_bytes (last_address);
4108 void
4109 hppa_expand_epilogue (void)
4111 rtx tmpreg;
4112 HOST_WIDE_INT offset;
4113 HOST_WIDE_INT ret_off = 0;
4114 int i;
4115 int merge_sp_adjust_with_load = 0;
4117 /* We will use this often. */
4118 tmpreg = gen_rtx_REG (word_mode, 1);
4120 /* Try to restore RP early to avoid load/use interlocks when
4121 RP gets used in the return (bv) instruction. This appears to still
4122 be necessary even when we schedule the prologue and epilogue. */
4123 if (regs_ever_live [2] || current_function_calls_eh_return)
4125 ret_off = TARGET_64BIT ? -16 : -20;
4126 if (frame_pointer_needed)
4128 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4129 ret_off = 0;
4131 else
4133 /* No frame pointer, and stack is smaller than 8k. */
4134 if (VAL_14_BITS_P (ret_off - actual_fsize))
4136 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4137 ret_off = 0;
4142 /* General register restores. */
4143 if (frame_pointer_needed)
4145 offset = local_fsize;
4147 /* If the current function calls __builtin_eh_return, then we need
4148 to restore the saved EH data registers. */
4149 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4151 unsigned int i, regno;
4153 for (i = 0; ; ++i)
4155 regno = EH_RETURN_DATA_REGNO (i);
4156 if (regno == INVALID_REGNUM)
4157 break;
4159 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4160 offset += UNITS_PER_WORD;
4164 for (i = 18; i >= 4; i--)
4165 if (regs_ever_live[i] && ! call_used_regs[i])
4167 load_reg (i, offset, FRAME_POINTER_REGNUM);
4168 offset += UNITS_PER_WORD;
4171 else
4173 offset = local_fsize - actual_fsize;
4175 /* If the current function calls __builtin_eh_return, then we need
4176 to restore the saved EH data registers. */
4177 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4179 unsigned int i, regno;
4181 for (i = 0; ; ++i)
4183 regno = EH_RETURN_DATA_REGNO (i);
4184 if (regno == INVALID_REGNUM)
4185 break;
4187 /* Only for the first load.
4188 merge_sp_adjust_with_load holds the register load
4189 with which we will merge the sp adjustment. */
4190 if (merge_sp_adjust_with_load == 0
4191 && local_fsize == 0
4192 && VAL_14_BITS_P (-actual_fsize))
4193 merge_sp_adjust_with_load = regno;
4194 else
4195 load_reg (regno, offset, STACK_POINTER_REGNUM);
4196 offset += UNITS_PER_WORD;
4200 for (i = 18; i >= 3; i--)
4202 if (regs_ever_live[i] && ! call_used_regs[i])
4204 /* Only for the first load.
4205 merge_sp_adjust_with_load holds the register load
4206 with which we will merge the sp adjustment. */
4207 if (merge_sp_adjust_with_load == 0
4208 && local_fsize == 0
4209 && VAL_14_BITS_P (-actual_fsize))
4210 merge_sp_adjust_with_load = i;
4211 else
4212 load_reg (i, offset, STACK_POINTER_REGNUM);
4213 offset += UNITS_PER_WORD;
4218 /* Align pointer properly (doubleword boundary). */
4219 offset = (offset + 7) & ~7;
4221 /* FP register restores. */
4222 if (save_fregs)
4224 /* Adjust the register to index off of. */
4225 if (frame_pointer_needed)
4226 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4227 else
4228 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4230 /* Actually do the restores now. */
4231 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4232 if (regs_ever_live[i]
4233 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4235 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4236 rtx dest = gen_rtx_REG (DFmode, i);
4237 emit_move_insn (dest, src);
4241 /* Emit a blockage insn here to keep these insns from being moved to
4242 an earlier spot in the epilogue, or into the main instruction stream.
4244 This is necessary as we must not cut the stack back before all the
4245 restores are finished. */
4246 emit_insn (gen_blockage ());
4248 /* Reset stack pointer (and possibly frame pointer). The stack
4249 pointer is initially set to fp + 64 to avoid a race condition. */
4250 if (frame_pointer_needed)
4252 rtx delta = GEN_INT (-64);
4254 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4255 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4257 /* If we were deferring a callee register restore, do it now. */
4258 else if (merge_sp_adjust_with_load)
4260 rtx delta = GEN_INT (-actual_fsize);
4261 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4263 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4265 else if (actual_fsize != 0)
4266 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4267 - actual_fsize, 0);
4269 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4270 frame greater than 8k), do so now. */
4271 if (ret_off != 0)
4272 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4274 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4276 rtx sa = EH_RETURN_STACKADJ_RTX;
4278 emit_insn (gen_blockage ());
4279 emit_insn (TARGET_64BIT
4280 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4281 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4286 hppa_pic_save_rtx (void)
4288 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4291 void
4292 hppa_profile_hook (int label_no)
4294 /* We use SImode for the address of the function in both 32 and
4295 64-bit code to avoid having to provide DImode versions of the
4296 lcla2 and load_offset_label_address insn patterns. */
4297 rtx reg = gen_reg_rtx (SImode);
4298 rtx label_rtx = gen_label_rtx ();
4299 rtx begin_label_rtx, call_insn;
4300 char begin_label_name[16];
4302 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4303 label_no);
4304 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4306 if (TARGET_64BIT)
4307 emit_move_insn (arg_pointer_rtx,
4308 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4309 GEN_INT (64)));
4311 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4313 /* The address of the function is loaded into %r25 with a instruction-
4314 relative sequence that avoids the use of relocations. The sequence
4315 is split so that the load_offset_label_address instruction can
4316 occupy the delay slot of the call to _mcount. */
4317 if (TARGET_PA_20)
4318 emit_insn (gen_lcla2 (reg, label_rtx));
4319 else
4320 emit_insn (gen_lcla1 (reg, label_rtx));
4322 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4323 reg, begin_label_rtx, label_rtx));
4325 #ifndef NO_PROFILE_COUNTERS
4327 rtx count_label_rtx, addr, r24;
4328 char count_label_name[16];
4330 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4331 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4333 addr = force_reg (Pmode, count_label_rtx);
4334 r24 = gen_rtx_REG (Pmode, 24);
4335 emit_move_insn (r24, addr);
4337 call_insn =
4338 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4339 gen_rtx_SYMBOL_REF (Pmode,
4340 "_mcount")),
4341 GEN_INT (TARGET_64BIT ? 24 : 12)));
4343 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4345 #else
4347 call_insn =
4348 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4349 gen_rtx_SYMBOL_REF (Pmode,
4350 "_mcount")),
4351 GEN_INT (TARGET_64BIT ? 16 : 8)));
4353 #endif
4355 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4356 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4358 /* Indicate the _mcount call cannot throw, nor will it execute a
4359 non-local goto. */
4360 REG_NOTES (call_insn)
4361 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4364 /* Fetch the return address for the frame COUNT steps up from
4365 the current frame, after the prologue. FRAMEADDR is the
4366 frame pointer of the COUNT frame.
4368 We want to ignore any export stub remnants here. To handle this,
4369 we examine the code at the return address, and if it is an export
4370 stub, we return a memory rtx for the stub return address stored
4371 at frame-24.
4373 The value returned is used in two different ways:
4375 1. To find a function's caller.
4377 2. To change the return address for a function.
4379 This function handles most instances of case 1; however, it will
4380 fail if there are two levels of stubs to execute on the return
4381 path. The only way I believe that can happen is if the return value
4382 needs a parameter relocation, which never happens for C code.
4384 This function handles most instances of case 2; however, it will
4385 fail if we did not originally have stub code on the return path
4386 but will need stub code on the new return path. This can happen if
4387 the caller & callee are both in the main program, but the new
4388 return location is in a shared library. */
4391 return_addr_rtx (int count, rtx frameaddr)
4393 rtx label;
4394 rtx rp;
4395 rtx saved_rp;
4396 rtx ins;
4398 if (count != 0)
4399 return NULL_RTX;
4401 rp = get_hard_reg_initial_val (Pmode, 2);
4403 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4404 return rp;
4406 saved_rp = gen_reg_rtx (Pmode);
4407 emit_move_insn (saved_rp, rp);
4409 /* Get pointer to the instruction stream. We have to mask out the
4410 privilege level from the two low order bits of the return address
4411 pointer here so that ins will point to the start of the first
4412 instruction that would have been executed if we returned. */
4413 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4414 label = gen_label_rtx ();
4416 /* Check the instruction stream at the normal return address for the
4417 export stub:
4419 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4420 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4421 0x00011820 | stub+16: mtsp r1,sr0
4422 0xe0400002 | stub+20: be,n 0(sr0,rp)
4424 If it is an export stub, than our return address is really in
4425 -24[frameaddr]. */
4427 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4428 NULL_RTX, SImode, 1);
4429 emit_jump_insn (gen_bne (label));
4431 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4432 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4433 emit_jump_insn (gen_bne (label));
4435 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4436 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4437 emit_jump_insn (gen_bne (label));
4439 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4440 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4442 /* If there is no export stub then just use the value saved from
4443 the return pointer register. */
4445 emit_jump_insn (gen_bne (label));
4447 /* Here we know that our return address points to an export
4448 stub. We don't want to return the address of the export stub,
4449 but rather the return address of the export stub. That return
4450 address is stored at -24[frameaddr]. */
4452 emit_move_insn (saved_rp,
4453 gen_rtx_MEM (Pmode,
4454 memory_address (Pmode,
4455 plus_constant (frameaddr,
4456 -24))));
4458 emit_label (label);
4459 return saved_rp;
4462 /* This is only valid once reload has completed because it depends on
4463 knowing exactly how much (if any) frame there is and...
4465 It's only valid if there is no frame marker to de-allocate and...
4467 It's only valid if %r2 hasn't been saved into the caller's frame
4468 (we're not profiling and %r2 isn't live anywhere). */
4470 hppa_can_use_return_insn_p (void)
4472 return (reload_completed
4473 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4474 && ! regs_ever_live[2]
4475 && ! frame_pointer_needed);
4478 void
4479 emit_bcond_fp (enum rtx_code code, rtx operand0)
4481 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4482 gen_rtx_IF_THEN_ELSE (VOIDmode,
4483 gen_rtx_fmt_ee (code,
4484 VOIDmode,
4485 gen_rtx_REG (CCFPmode, 0),
4486 const0_rtx),
4487 gen_rtx_LABEL_REF (VOIDmode, operand0),
4488 pc_rtx)));
4493 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4495 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4496 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4499 /* Adjust the cost of a scheduling dependency. Return the new cost of
4500 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4502 static int
4503 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4505 enum attr_type attr_type;
4507 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4508 true dependencies as they are described with bypasses now. */
4509 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4510 return cost;
4512 if (! recog_memoized (insn))
4513 return 0;
4515 attr_type = get_attr_type (insn);
4517 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4519 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4520 cycles later. */
4522 if (attr_type == TYPE_FPLOAD)
4524 rtx pat = PATTERN (insn);
4525 rtx dep_pat = PATTERN (dep_insn);
4526 if (GET_CODE (pat) == PARALLEL)
4528 /* This happens for the fldXs,mb patterns. */
4529 pat = XVECEXP (pat, 0, 0);
4531 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4532 /* If this happens, we have to extend this to schedule
4533 optimally. Return 0 for now. */
4534 return 0;
4536 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4538 if (! recog_memoized (dep_insn))
4539 return 0;
4540 switch (get_attr_type (dep_insn))
4542 case TYPE_FPALU:
4543 case TYPE_FPMULSGL:
4544 case TYPE_FPMULDBL:
4545 case TYPE_FPDIVSGL:
4546 case TYPE_FPDIVDBL:
4547 case TYPE_FPSQRTSGL:
4548 case TYPE_FPSQRTDBL:
4549 /* A fpload can't be issued until one cycle before a
4550 preceding arithmetic operation has finished if
4551 the target of the fpload is any of the sources
4552 (or destination) of the arithmetic operation. */
4553 return insn_default_latency (dep_insn) - 1;
4555 default:
4556 return 0;
4560 else if (attr_type == TYPE_FPALU)
4562 rtx pat = PATTERN (insn);
4563 rtx dep_pat = PATTERN (dep_insn);
4564 if (GET_CODE (pat) == PARALLEL)
4566 /* This happens for the fldXs,mb patterns. */
4567 pat = XVECEXP (pat, 0, 0);
4569 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4570 /* If this happens, we have to extend this to schedule
4571 optimally. Return 0 for now. */
4572 return 0;
4574 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4576 if (! recog_memoized (dep_insn))
4577 return 0;
4578 switch (get_attr_type (dep_insn))
4580 case TYPE_FPDIVSGL:
4581 case TYPE_FPDIVDBL:
4582 case TYPE_FPSQRTSGL:
4583 case TYPE_FPSQRTDBL:
4584 /* An ALU flop can't be issued until two cycles before a
4585 preceding divide or sqrt operation has finished if
4586 the target of the ALU flop is any of the sources
4587 (or destination) of the divide or sqrt operation. */
4588 return insn_default_latency (dep_insn) - 2;
4590 default:
4591 return 0;
4596 /* For other anti dependencies, the cost is 0. */
4597 return 0;
4599 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4601 /* Output dependency; DEP_INSN writes a register that INSN writes some
4602 cycles later. */
4603 if (attr_type == TYPE_FPLOAD)
4605 rtx pat = PATTERN (insn);
4606 rtx dep_pat = PATTERN (dep_insn);
4607 if (GET_CODE (pat) == PARALLEL)
4609 /* This happens for the fldXs,mb patterns. */
4610 pat = XVECEXP (pat, 0, 0);
4612 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4613 /* If this happens, we have to extend this to schedule
4614 optimally. Return 0 for now. */
4615 return 0;
4617 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4619 if (! recog_memoized (dep_insn))
4620 return 0;
4621 switch (get_attr_type (dep_insn))
4623 case TYPE_FPALU:
4624 case TYPE_FPMULSGL:
4625 case TYPE_FPMULDBL:
4626 case TYPE_FPDIVSGL:
4627 case TYPE_FPDIVDBL:
4628 case TYPE_FPSQRTSGL:
4629 case TYPE_FPSQRTDBL:
4630 /* A fpload can't be issued until one cycle before a
4631 preceding arithmetic operation has finished if
4632 the target of the fpload is the destination of the
4633 arithmetic operation.
4635 Exception: For PA7100LC, PA7200 and PA7300, the cost
4636 is 3 cycles, unless they bundle together. We also
4637 pay the penalty if the second insn is a fpload. */
4638 return insn_default_latency (dep_insn) - 1;
4640 default:
4641 return 0;
4645 else if (attr_type == TYPE_FPALU)
4647 rtx pat = PATTERN (insn);
4648 rtx dep_pat = PATTERN (dep_insn);
4649 if (GET_CODE (pat) == PARALLEL)
4651 /* This happens for the fldXs,mb patterns. */
4652 pat = XVECEXP (pat, 0, 0);
4654 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4655 /* If this happens, we have to extend this to schedule
4656 optimally. Return 0 for now. */
4657 return 0;
4659 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4661 if (! recog_memoized (dep_insn))
4662 return 0;
4663 switch (get_attr_type (dep_insn))
4665 case TYPE_FPDIVSGL:
4666 case TYPE_FPDIVDBL:
4667 case TYPE_FPSQRTSGL:
4668 case TYPE_FPSQRTDBL:
4669 /* An ALU flop can't be issued until two cycles before a
4670 preceding divide or sqrt operation has finished if
4671 the target of the ALU flop is also the target of
4672 the divide or sqrt operation. */
4673 return insn_default_latency (dep_insn) - 2;
4675 default:
4676 return 0;
4681 /* For other output dependencies, the cost is 0. */
4682 return 0;
4684 else
4685 abort ();
4688 /* Adjust scheduling priorities. We use this to try and keep addil
4689 and the next use of %r1 close together. */
4690 static int
4691 pa_adjust_priority (rtx insn, int priority)
4693 rtx set = single_set (insn);
4694 rtx src, dest;
4695 if (set)
4697 src = SET_SRC (set);
4698 dest = SET_DEST (set);
4699 if (GET_CODE (src) == LO_SUM
4700 && symbolic_operand (XEXP (src, 1), VOIDmode)
4701 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4702 priority >>= 3;
4704 else if (GET_CODE (src) == MEM
4705 && GET_CODE (XEXP (src, 0)) == LO_SUM
4706 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4707 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4708 priority >>= 1;
4710 else if (GET_CODE (dest) == MEM
4711 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4712 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4713 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4714 priority >>= 3;
4716 return priority;
4719 /* The 700 can only issue a single insn at a time.
4720 The 7XXX processors can issue two insns at a time.
4721 The 8000 can issue 4 insns at a time. */
4722 static int
4723 pa_issue_rate (void)
4725 switch (pa_cpu)
4727 case PROCESSOR_700: return 1;
4728 case PROCESSOR_7100: return 2;
4729 case PROCESSOR_7100LC: return 2;
4730 case PROCESSOR_7200: return 2;
4731 case PROCESSOR_7300: return 2;
4732 case PROCESSOR_8000: return 4;
4734 default:
4735 abort ();
4741 /* Return any length adjustment needed by INSN which already has its length
4742 computed as LENGTH. Return zero if no adjustment is necessary.
4744 For the PA: function calls, millicode calls, and backwards short
4745 conditional branches with unfilled delay slots need an adjustment by +1
4746 (to account for the NOP which will be inserted into the instruction stream).
4748 Also compute the length of an inline block move here as it is too
4749 complicated to express as a length attribute in pa.md. */
4751 pa_adjust_insn_length (rtx insn, int length)
4753 rtx pat = PATTERN (insn);
4755 /* Jumps inside switch tables which have unfilled delay slots need
4756 adjustment. */
4757 if (GET_CODE (insn) == JUMP_INSN
4758 && GET_CODE (pat) == PARALLEL
4759 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4760 return 4;
4761 /* Millicode insn with an unfilled delay slot. */
4762 else if (GET_CODE (insn) == INSN
4763 && GET_CODE (pat) != SEQUENCE
4764 && GET_CODE (pat) != USE
4765 && GET_CODE (pat) != CLOBBER
4766 && get_attr_type (insn) == TYPE_MILLI)
4767 return 4;
4768 /* Block move pattern. */
4769 else if (GET_CODE (insn) == INSN
4770 && GET_CODE (pat) == PARALLEL
4771 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4772 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4773 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4774 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4775 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4776 return compute_movstr_length (insn) - 4;
4777 /* Block clear pattern. */
4778 else if (GET_CODE (insn) == INSN
4779 && GET_CODE (pat) == PARALLEL
4780 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4781 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4782 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4783 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4784 return compute_clrstr_length (insn) - 4;
4785 /* Conditional branch with an unfilled delay slot. */
4786 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4788 /* Adjust a short backwards conditional with an unfilled delay slot. */
4789 if (GET_CODE (pat) == SET
4790 && length == 4
4791 && ! forward_branch_p (insn))
4792 return 4;
4793 else if (GET_CODE (pat) == PARALLEL
4794 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4795 && length == 4)
4796 return 4;
4797 /* Adjust dbra insn with short backwards conditional branch with
4798 unfilled delay slot -- only for case where counter is in a
4799 general register register. */
4800 else if (GET_CODE (pat) == PARALLEL
4801 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4802 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4803 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4804 && length == 4
4805 && ! forward_branch_p (insn))
4806 return 4;
4807 else
4808 return 0;
4810 return 0;
4813 /* Print operand X (an rtx) in assembler syntax to file FILE.
4814 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4815 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4817 void
4818 print_operand (FILE *file, rtx x, int code)
4820 switch (code)
4822 case '#':
4823 /* Output a 'nop' if there's nothing for the delay slot. */
4824 if (dbr_sequence_length () == 0)
4825 fputs ("\n\tnop", file);
4826 return;
4827 case '*':
4828 /* Output a nullification completer if there's nothing for the */
4829 /* delay slot or nullification is requested. */
4830 if (dbr_sequence_length () == 0 ||
4831 (final_sequence &&
4832 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4833 fputs (",n", file);
4834 return;
4835 case 'R':
4836 /* Print out the second register name of a register pair.
4837 I.e., R (6) => 7. */
4838 fputs (reg_names[REGNO (x) + 1], file);
4839 return;
4840 case 'r':
4841 /* A register or zero. */
4842 if (x == const0_rtx
4843 || (x == CONST0_RTX (DFmode))
4844 || (x == CONST0_RTX (SFmode)))
4846 fputs ("%r0", file);
4847 return;
4849 else
4850 break;
4851 case 'f':
4852 /* A register or zero (floating point). */
4853 if (x == const0_rtx
4854 || (x == CONST0_RTX (DFmode))
4855 || (x == CONST0_RTX (SFmode)))
4857 fputs ("%fr0", file);
4858 return;
4860 else
4861 break;
4862 case 'A':
4864 rtx xoperands[2];
4866 xoperands[0] = XEXP (XEXP (x, 0), 0);
4867 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4868 output_global_address (file, xoperands[1], 0);
4869 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4870 return;
4873 case 'C': /* Plain (C)ondition */
4874 case 'X':
4875 switch (GET_CODE (x))
4877 case EQ:
4878 fputs ("=", file); break;
4879 case NE:
4880 fputs ("<>", file); break;
4881 case GT:
4882 fputs (">", file); break;
4883 case GE:
4884 fputs (">=", file); break;
4885 case GEU:
4886 fputs (">>=", file); break;
4887 case GTU:
4888 fputs (">>", file); break;
4889 case LT:
4890 fputs ("<", file); break;
4891 case LE:
4892 fputs ("<=", file); break;
4893 case LEU:
4894 fputs ("<<=", file); break;
4895 case LTU:
4896 fputs ("<<", file); break;
4897 default:
4898 abort ();
4900 return;
4901 case 'N': /* Condition, (N)egated */
4902 switch (GET_CODE (x))
4904 case EQ:
4905 fputs ("<>", file); break;
4906 case NE:
4907 fputs ("=", file); break;
4908 case GT:
4909 fputs ("<=", file); break;
4910 case GE:
4911 fputs ("<", file); break;
4912 case GEU:
4913 fputs ("<<", file); break;
4914 case GTU:
4915 fputs ("<<=", file); break;
4916 case LT:
4917 fputs (">=", file); break;
4918 case LE:
4919 fputs (">", file); break;
4920 case LEU:
4921 fputs (">>", file); break;
4922 case LTU:
4923 fputs (">>=", file); break;
4924 default:
4925 abort ();
4927 return;
4928 /* For floating point comparisons. Note that the output
4929 predicates are the complement of the desired mode. */
4930 case 'Y':
4931 switch (GET_CODE (x))
4933 case EQ:
4934 fputs ("!=", file); break;
4935 case NE:
4936 fputs ("=", file); break;
4937 case GT:
4938 fputs ("!>", file); break;
4939 case GE:
4940 fputs ("!>=", file); break;
4941 case LT:
4942 fputs ("!<", file); break;
4943 case LE:
4944 fputs ("!<=", file); break;
4945 case LTGT:
4946 fputs ("!<>", file); break;
4947 case UNLE:
4948 fputs (">", file); break;
4949 case UNLT:
4950 fputs (">=", file); break;
4951 case UNGE:
4952 fputs ("<", file); break;
4953 case UNGT:
4954 fputs ("<=", file); break;
4955 case UNEQ:
4956 fputs ("<>", file); break;
4957 case UNORDERED:
4958 fputs ("<=>", file); break;
4959 case ORDERED:
4960 fputs ("!<=>", file); break;
4961 default:
4962 abort ();
4964 return;
4965 case 'S': /* Condition, operands are (S)wapped. */
4966 switch (GET_CODE (x))
4968 case EQ:
4969 fputs ("=", file); break;
4970 case NE:
4971 fputs ("<>", file); break;
4972 case GT:
4973 fputs ("<", file); break;
4974 case GE:
4975 fputs ("<=", file); break;
4976 case GEU:
4977 fputs ("<<=", file); break;
4978 case GTU:
4979 fputs ("<<", file); break;
4980 case LT:
4981 fputs (">", file); break;
4982 case LE:
4983 fputs (">=", file); break;
4984 case LEU:
4985 fputs (">>=", file); break;
4986 case LTU:
4987 fputs (">>", file); break;
4988 default:
4989 abort ();
4991 return;
4992 case 'B': /* Condition, (B)oth swapped and negate. */
4993 switch (GET_CODE (x))
4995 case EQ:
4996 fputs ("<>", file); break;
4997 case NE:
4998 fputs ("=", file); break;
4999 case GT:
5000 fputs (">=", file); break;
5001 case GE:
5002 fputs (">", file); break;
5003 case GEU:
5004 fputs (">>", file); break;
5005 case GTU:
5006 fputs (">>=", file); break;
5007 case LT:
5008 fputs ("<=", file); break;
5009 case LE:
5010 fputs ("<", file); break;
5011 case LEU:
5012 fputs ("<<", file); break;
5013 case LTU:
5014 fputs ("<<=", file); break;
5015 default:
5016 abort ();
5018 return;
5019 case 'k':
5020 if (GET_CODE (x) == CONST_INT)
5022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5023 return;
5025 abort ();
5026 case 'Q':
5027 if (GET_CODE (x) == CONST_INT)
5029 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5030 return;
5032 abort ();
5033 case 'L':
5034 if (GET_CODE (x) == CONST_INT)
5036 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5037 return;
5039 abort ();
5040 case 'O':
5041 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
5043 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5044 return;
5046 abort ();
5047 case 'p':
5048 if (GET_CODE (x) == CONST_INT)
5050 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5051 return;
5053 abort ();
5054 case 'P':
5055 if (GET_CODE (x) == CONST_INT)
5057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5058 return;
5060 abort ();
5061 case 'I':
5062 if (GET_CODE (x) == CONST_INT)
5063 fputs ("i", file);
5064 return;
5065 case 'M':
5066 case 'F':
5067 switch (GET_CODE (XEXP (x, 0)))
5069 case PRE_DEC:
5070 case PRE_INC:
5071 if (ASSEMBLER_DIALECT == 0)
5072 fputs ("s,mb", file);
5073 else
5074 fputs (",mb", file);
5075 break;
5076 case POST_DEC:
5077 case POST_INC:
5078 if (ASSEMBLER_DIALECT == 0)
5079 fputs ("s,ma", file);
5080 else
5081 fputs (",ma", file);
5082 break;
5083 case PLUS:
5084 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5085 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5087 if (ASSEMBLER_DIALECT == 0)
5088 fputs ("x", file);
5090 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5091 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5093 if (ASSEMBLER_DIALECT == 0)
5094 fputs ("x,s", file);
5095 else
5096 fputs (",s", file);
5098 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5099 fputs ("s", file);
5100 break;
5101 default:
5102 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5103 fputs ("s", file);
5104 break;
5106 return;
5107 case 'G':
5108 output_global_address (file, x, 0);
5109 return;
5110 case 'H':
5111 output_global_address (file, x, 1);
5112 return;
5113 case 0: /* Don't do anything special */
5114 break;
5115 case 'Z':
5117 unsigned op[3];
5118 compute_zdepwi_operands (INTVAL (x), op);
5119 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5120 return;
5122 case 'z':
5124 unsigned op[3];
5125 compute_zdepdi_operands (INTVAL (x), op);
5126 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5127 return;
5129 case 'c':
5130 /* We can get here from a .vtable_inherit due to our
5131 CONSTANT_ADDRESS_P rejecting perfectly good constant
5132 addresses. */
5133 break;
5134 default:
5135 abort ();
5137 if (GET_CODE (x) == REG)
5139 fputs (reg_names [REGNO (x)], file);
5140 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5142 fputs ("R", file);
5143 return;
5145 if (FP_REG_P (x)
5146 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5147 && (REGNO (x) & 1) == 0)
5148 fputs ("L", file);
5150 else if (GET_CODE (x) == MEM)
5152 int size = GET_MODE_SIZE (GET_MODE (x));
5153 rtx base = NULL_RTX;
5154 switch (GET_CODE (XEXP (x, 0)))
5156 case PRE_DEC:
5157 case POST_DEC:
5158 base = XEXP (XEXP (x, 0), 0);
5159 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5160 break;
5161 case PRE_INC:
5162 case POST_INC:
5163 base = XEXP (XEXP (x, 0), 0);
5164 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5165 break;
5166 case PLUS:
5167 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5168 fprintf (file, "%s(%s)",
5169 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5170 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5171 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5172 fprintf (file, "%s(%s)",
5173 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5174 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5175 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5176 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5178 /* Because the REG_POINTER flag can get lost during reload,
5179 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5180 index and base registers in the combined move patterns. */
5181 rtx base = XEXP (XEXP (x, 0), 1);
5182 rtx index = XEXP (XEXP (x, 0), 0);
5184 fprintf (file, "%s(%s)",
5185 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5187 else
5188 output_address (XEXP (x, 0));
5189 break;
5190 default:
5191 output_address (XEXP (x, 0));
5192 break;
5195 else
5196 output_addr_const (file, x);
5199 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5201 void
5202 output_global_address (FILE *file, rtx x, int round_constant)
5205 /* Imagine (high (const (plus ...))). */
5206 if (GET_CODE (x) == HIGH)
5207 x = XEXP (x, 0);
5209 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5210 assemble_name (file, XSTR (x, 0));
5211 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5213 assemble_name (file, XSTR (x, 0));
5214 fputs ("-$global$", file);
5216 else if (GET_CODE (x) == CONST)
5218 const char *sep = "";
5219 int offset = 0; /* assembler wants -$global$ at end */
5220 rtx base = NULL_RTX;
5222 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5224 base = XEXP (XEXP (x, 0), 0);
5225 output_addr_const (file, base);
5227 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
5228 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5229 else abort ();
5231 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
5233 base = XEXP (XEXP (x, 0), 1);
5234 output_addr_const (file, base);
5236 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
5237 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5238 else abort ();
5240 /* How bogus. The compiler is apparently responsible for
5241 rounding the constant if it uses an LR field selector.
5243 The linker and/or assembler seem a better place since
5244 they have to do this kind of thing already.
5246 If we fail to do this, HP's optimizing linker may eliminate
5247 an addil, but not update the ldw/stw/ldo instruction that
5248 uses the result of the addil. */
5249 if (round_constant)
5250 offset = ((offset + 0x1000) & ~0x1fff);
5252 if (GET_CODE (XEXP (x, 0)) == PLUS)
5254 if (offset < 0)
5256 offset = -offset;
5257 sep = "-";
5259 else
5260 sep = "+";
5262 else if (GET_CODE (XEXP (x, 0)) == MINUS
5263 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5264 sep = "-";
5265 else abort ();
5267 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5268 fputs ("-$global$", file);
5269 if (offset)
5270 fprintf (file, "%s%d", sep, offset);
5272 else
5273 output_addr_const (file, x);
5276 /* Output boilerplate text to appear at the beginning of the file.
5277 There are several possible versions. */
5278 #define aputs(x) fputs(x, asm_out_file)
5279 static inline void
5280 pa_file_start_level (void)
5282 if (TARGET_64BIT)
5283 aputs ("\t.LEVEL 2.0w\n");
5284 else if (TARGET_PA_20)
5285 aputs ("\t.LEVEL 2.0\n");
5286 else if (TARGET_PA_11)
5287 aputs ("\t.LEVEL 1.1\n");
5288 else
5289 aputs ("\t.LEVEL 1.0\n");
5292 static inline void
5293 pa_file_start_space (int sortspace)
5295 aputs ("\t.SPACE $PRIVATE$");
5296 if (sortspace)
5297 aputs (",SORT=16");
5298 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5299 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5300 "\n\t.SPACE $TEXT$");
5301 if (sortspace)
5302 aputs (",SORT=8");
5303 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5304 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5307 static inline void
5308 pa_file_start_file (int want_version)
5310 if (write_symbols != NO_DEBUG)
5312 output_file_directive (asm_out_file, main_input_filename);
5313 if (want_version)
5314 aputs ("\t.version\t\"01.01\"\n");
5318 static inline void
5319 pa_file_start_mcount (const char *aswhat)
5321 if (profile_flag)
5322 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5325 static void
5326 pa_elf_file_start (void)
5328 pa_file_start_level ();
5329 pa_file_start_mcount ("ENTRY");
5330 pa_file_start_file (0);
5333 static void
5334 pa_som_file_start (void)
5336 pa_file_start_level ();
5337 pa_file_start_space (0);
5338 aputs ("\t.IMPORT $global$,DATA\n"
5339 "\t.IMPORT $$dyncall,MILLICODE\n");
5340 pa_file_start_mcount ("CODE");
5341 pa_file_start_file (0);
5344 static void
5345 pa_linux_file_start (void)
5347 pa_file_start_file (1);
5348 pa_file_start_level ();
5349 pa_file_start_mcount ("CODE");
5352 static void
5353 pa_hpux64_gas_file_start (void)
5355 pa_file_start_level ();
5356 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5357 if (profile_flag)
5358 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5359 #endif
5360 pa_file_start_file (1);
5363 static void
5364 pa_hpux64_hpas_file_start (void)
5366 pa_file_start_level ();
5367 pa_file_start_space (1);
5368 pa_file_start_mcount ("CODE");
5369 pa_file_start_file (0);
5371 #undef aputs
5373 static struct deferred_plabel *
5374 get_plabel (const char *fname)
5376 size_t i;
5378 /* See if we have already put this function on the list of deferred
5379 plabels. This list is generally small, so a liner search is not
5380 too ugly. If it proves too slow replace it with something faster. */
5381 for (i = 0; i < n_deferred_plabels; i++)
5382 if (strcmp (fname, deferred_plabels[i].name) == 0)
5383 break;
5385 /* If the deferred plabel list is empty, or this entry was not found
5386 on the list, create a new entry on the list. */
5387 if (deferred_plabels == NULL || i == n_deferred_plabels)
5389 const char *real_name;
5391 if (deferred_plabels == 0)
5392 deferred_plabels = (struct deferred_plabel *)
5393 ggc_alloc (sizeof (struct deferred_plabel));
5394 else
5395 deferred_plabels = (struct deferred_plabel *)
5396 ggc_realloc (deferred_plabels,
5397 ((n_deferred_plabels + 1)
5398 * sizeof (struct deferred_plabel)));
5400 i = n_deferred_plabels++;
5401 deferred_plabels[i].internal_label = gen_label_rtx ();
5402 deferred_plabels[i].name = ggc_strdup (fname);
5404 /* Gross. We have just implicitly taken the address of this function,
5405 mark it as such. */
5406 real_name = (*targetm.strip_name_encoding) (fname);
5407 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5410 return &deferred_plabels[i];
5413 static void
5414 output_deferred_plabels (void)
5416 size_t i;
5417 /* If we have deferred plabels, then we need to switch into the data
5418 section and align it to a 4 byte boundary before we output the
5419 deferred plabels. */
5420 if (n_deferred_plabels)
5422 data_section ();
5423 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5426 /* Now output the deferred plabels. */
5427 for (i = 0; i < n_deferred_plabels; i++)
5429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5430 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5431 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5432 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5436 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5437 /* Initialize optabs to point to HPUX long double emulation routines. */
5438 static void
5439 pa_hpux_init_libfuncs (void)
5441 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5442 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5443 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5444 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5445 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5446 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5447 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5448 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5449 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5451 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5452 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5453 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5454 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5455 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5456 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5458 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5459 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5460 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5461 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5463 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5464 ? "__U_Qfcnvfxt_quad_to_sgl"
5465 : "_U_Qfcnvfxt_quad_to_sgl");
5466 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5467 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5468 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5470 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5471 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5473 #endif
5475 /* HP's millicode routines mean something special to the assembler.
5476 Keep track of which ones we have used. */
5478 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5479 static void import_milli (enum millicodes);
5480 static char imported[(int) end1000];
5481 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5482 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5483 #define MILLI_START 10
5485 static void
5486 import_milli (enum millicodes code)
5488 char str[sizeof (import_string)];
5490 if (!imported[(int) code])
5492 imported[(int) code] = 1;
5493 strcpy (str, import_string);
5494 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5495 output_asm_insn (str, 0);
5499 /* The register constraints have put the operands and return value in
5500 the proper registers. */
5502 const char *
5503 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5505 import_milli (mulI);
5506 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5509 /* Emit the rtl for doing a division by a constant. */
5511 /* Do magic division millicodes exist for this value? */
5512 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5513 1, 1};
5515 /* We'll use an array to keep track of the magic millicodes and
5516 whether or not we've used them already. [n][0] is signed, [n][1] is
5517 unsigned. */
5519 static int div_milli[16][2];
5522 div_operand (rtx op, enum machine_mode mode)
5524 return (mode == SImode
5525 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5526 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5527 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5531 emit_hpdiv_const (rtx *operands, int unsignedp)
5533 if (GET_CODE (operands[2]) == CONST_INT
5534 && INTVAL (operands[2]) > 0
5535 && INTVAL (operands[2]) < 16
5536 && magic_milli[INTVAL (operands[2])])
5538 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5540 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5541 emit
5542 (gen_rtx
5543 (PARALLEL, VOIDmode,
5544 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5545 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5546 SImode,
5547 gen_rtx_REG (SImode, 26),
5548 operands[2])),
5549 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5550 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5551 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5552 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5553 gen_rtx_CLOBBER (VOIDmode, ret))));
5554 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5555 return 1;
5557 return 0;
5560 const char *
5561 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5563 int divisor;
5565 /* If the divisor is a constant, try to use one of the special
5566 opcodes .*/
5567 if (GET_CODE (operands[0]) == CONST_INT)
5569 static char buf[100];
5570 divisor = INTVAL (operands[0]);
5571 if (!div_milli[divisor][unsignedp])
5573 div_milli[divisor][unsignedp] = 1;
5574 if (unsignedp)
5575 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5576 else
5577 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5579 if (unsignedp)
5581 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5582 INTVAL (operands[0]));
5583 return output_millicode_call (insn,
5584 gen_rtx_SYMBOL_REF (SImode, buf));
5586 else
5588 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5589 INTVAL (operands[0]));
5590 return output_millicode_call (insn,
5591 gen_rtx_SYMBOL_REF (SImode, buf));
5594 /* Divisor isn't a special constant. */
5595 else
5597 if (unsignedp)
5599 import_milli (divU);
5600 return output_millicode_call (insn,
5601 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5603 else
5605 import_milli (divI);
5606 return output_millicode_call (insn,
5607 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5612 /* Output a $$rem millicode to do mod. */
5614 const char *
5615 output_mod_insn (int unsignedp, rtx insn)
5617 if (unsignedp)
5619 import_milli (remU);
5620 return output_millicode_call (insn,
5621 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5623 else
5625 import_milli (remI);
5626 return output_millicode_call (insn,
5627 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5631 void
5632 output_arg_descriptor (rtx call_insn)
5634 const char *arg_regs[4];
5635 enum machine_mode arg_mode;
5636 rtx link;
5637 int i, output_flag = 0;
5638 int regno;
5640 /* We neither need nor want argument location descriptors for the
5641 64bit runtime environment or the ELF32 environment. */
5642 if (TARGET_64BIT || TARGET_ELF32)
5643 return;
5645 for (i = 0; i < 4; i++)
5646 arg_regs[i] = 0;
5648 /* Specify explicitly that no argument relocations should take place
5649 if using the portable runtime calling conventions. */
5650 if (TARGET_PORTABLE_RUNTIME)
5652 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5653 asm_out_file);
5654 return;
5657 if (GET_CODE (call_insn) != CALL_INSN)
5658 abort ();
5659 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5661 rtx use = XEXP (link, 0);
5663 if (! (GET_CODE (use) == USE
5664 && GET_CODE (XEXP (use, 0)) == REG
5665 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5666 continue;
5668 arg_mode = GET_MODE (XEXP (use, 0));
5669 regno = REGNO (XEXP (use, 0));
5670 if (regno >= 23 && regno <= 26)
5672 arg_regs[26 - regno] = "GR";
5673 if (arg_mode == DImode)
5674 arg_regs[25 - regno] = "GR";
5676 else if (regno >= 32 && regno <= 39)
5678 if (arg_mode == SFmode)
5679 arg_regs[(regno - 32) / 2] = "FR";
5680 else
5682 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5683 arg_regs[(regno - 34) / 2] = "FR";
5684 arg_regs[(regno - 34) / 2 + 1] = "FU";
5685 #else
5686 arg_regs[(regno - 34) / 2] = "FU";
5687 arg_regs[(regno - 34) / 2 + 1] = "FR";
5688 #endif
5692 fputs ("\t.CALL ", asm_out_file);
5693 for (i = 0; i < 4; i++)
5695 if (arg_regs[i])
5697 if (output_flag++)
5698 fputc (',', asm_out_file);
5699 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5702 fputc ('\n', asm_out_file);
5705 /* Return the class of any secondary reload register that is needed to
5706 move IN into a register in class CLASS using mode MODE.
5708 Profiling has showed this routine and its descendants account for
5709 a significant amount of compile time (~7%). So it has been
5710 optimized to reduce redundant computations and eliminate useless
5711 function calls.
5713 It might be worthwhile to try and make this a leaf function too. */
5715 enum reg_class
5716 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5718 int regno, is_symbolic;
5720 /* Trying to load a constant into a FP register during PIC code
5721 generation will require %r1 as a scratch register. */
5722 if (flag_pic
5723 && GET_MODE_CLASS (mode) == MODE_INT
5724 && FP_REG_CLASS_P (class)
5725 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5726 return R1_REGS;
5728 /* Profiling showed the PA port spends about 1.3% of its compilation
5729 time in true_regnum from calls inside secondary_reload_class. */
5731 if (GET_CODE (in) == REG)
5733 regno = REGNO (in);
5734 if (regno >= FIRST_PSEUDO_REGISTER)
5735 regno = true_regnum (in);
5737 else if (GET_CODE (in) == SUBREG)
5738 regno = true_regnum (in);
5739 else
5740 regno = -1;
5742 /* If we have something like (mem (mem (...)), we can safely assume the
5743 inner MEM will end up in a general register after reloading, so there's
5744 no need for a secondary reload. */
5745 if (GET_CODE (in) == MEM
5746 && GET_CODE (XEXP (in, 0)) == MEM)
5747 return NO_REGS;
5749 /* Handle out of range displacement for integer mode loads/stores of
5750 FP registers. */
5751 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5752 && GET_MODE_CLASS (mode) == MODE_INT
5753 && FP_REG_CLASS_P (class))
5754 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5755 return GENERAL_REGS;
5757 /* A SAR<->FP register copy requires a secondary register (GPR) as
5758 well as secondary memory. */
5759 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5760 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5761 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5762 return GENERAL_REGS;
5764 if (GET_CODE (in) == HIGH)
5765 in = XEXP (in, 0);
5767 /* Profiling has showed GCC spends about 2.6% of its compilation
5768 time in symbolic_operand from calls inside secondary_reload_class.
5770 We use an inline copy and only compute its return value once to avoid
5771 useless work. */
5772 switch (GET_CODE (in))
5774 rtx tmp;
5776 case SYMBOL_REF:
5777 case LABEL_REF:
5778 is_symbolic = 1;
5779 break;
5780 case CONST:
5781 tmp = XEXP (in, 0);
5782 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5783 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5784 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5785 break;
5787 default:
5788 is_symbolic = 0;
5789 break;
5792 if (!flag_pic
5793 && is_symbolic
5794 && read_only_operand (in, VOIDmode))
5795 return NO_REGS;
5797 if (class != R1_REGS && is_symbolic)
5798 return R1_REGS;
5800 return NO_REGS;
5803 enum direction
5804 function_arg_padding (enum machine_mode mode, tree type)
5806 if (mode == BLKmode
5807 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5809 /* Return none if justification is not required. */
5810 if (type
5811 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5812 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5813 return none;
5815 /* The directions set here are ignored when a BLKmode argument larger
5816 than a word is placed in a register. Different code is used for
5817 the stack and registers. This makes it difficult to have a
5818 consistent data representation for both the stack and registers.
5819 For both runtimes, the justification and padding for arguments on
5820 the stack and in registers should be identical. */
5821 if (TARGET_64BIT)
5822 /* The 64-bit runtime specifies left justification for aggregates. */
5823 return upward;
5824 else
5825 /* The 32-bit runtime architecture specifies right justification.
5826 When the argument is passed on the stack, the argument is padded
5827 with garbage on the left. The HP compiler pads with zeros. */
5828 return downward;
5831 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5832 return downward;
5833 else
5834 return none;
5838 /* Do what is necessary for `va_start'. We look at the current function
5839 to determine if stdargs or varargs is used and fill in an initial
5840 va_list. A pointer to this constructor is returned. */
5842 struct rtx_def *
5843 hppa_builtin_saveregs (void)
5845 rtx offset, dest;
5846 tree fntype = TREE_TYPE (current_function_decl);
5847 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5848 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5849 != void_type_node)))
5850 ? UNITS_PER_WORD : 0);
5852 if (argadj)
5853 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5854 else
5855 offset = current_function_arg_offset_rtx;
5857 if (TARGET_64BIT)
5859 int i, off;
5861 /* Adjust for varargs/stdarg differences. */
5862 if (argadj)
5863 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5864 else
5865 offset = current_function_arg_offset_rtx;
5867 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5868 from the incoming arg pointer and growing to larger addresses. */
5869 for (i = 26, off = -64; i >= 19; i--, off += 8)
5870 emit_move_insn (gen_rtx_MEM (word_mode,
5871 plus_constant (arg_pointer_rtx, off)),
5872 gen_rtx_REG (word_mode, i));
5874 /* The incoming args pointer points just beyond the flushback area;
5875 normally this is not a serious concern. However, when we are doing
5876 varargs/stdargs we want to make the arg pointer point to the start
5877 of the incoming argument area. */
5878 emit_move_insn (virtual_incoming_args_rtx,
5879 plus_constant (arg_pointer_rtx, -64));
5881 /* Now return a pointer to the first anonymous argument. */
5882 return copy_to_reg (expand_binop (Pmode, add_optab,
5883 virtual_incoming_args_rtx,
5884 offset, 0, 0, OPTAB_LIB_WIDEN));
5887 /* Store general registers on the stack. */
5888 dest = gen_rtx_MEM (BLKmode,
5889 plus_constant (current_function_internal_arg_pointer,
5890 -16));
5891 set_mem_alias_set (dest, get_varargs_alias_set ());
5892 set_mem_align (dest, BITS_PER_WORD);
5893 move_block_from_reg (23, dest, 4);
5895 /* move_block_from_reg will emit code to store the argument registers
5896 individually as scalar stores.
5898 However, other insns may later load from the same addresses for
5899 a structure load (passing a struct to a varargs routine).
5901 The alias code assumes that such aliasing can never happen, so we
5902 have to keep memory referencing insns from moving up beyond the
5903 last argument register store. So we emit a blockage insn here. */
5904 emit_insn (gen_blockage ());
5906 return copy_to_reg (expand_binop (Pmode, add_optab,
5907 current_function_internal_arg_pointer,
5908 offset, 0, 0, OPTAB_LIB_WIDEN));
5911 void
5912 hppa_va_start (tree valist, rtx nextarg)
5914 nextarg = expand_builtin_saveregs ();
5915 std_expand_builtin_va_start (valist, nextarg);
5919 hppa_va_arg (tree valist, tree type)
5921 HOST_WIDE_INT size = int_size_in_bytes (type);
5922 HOST_WIDE_INT ofs;
5923 tree t, ptr, pptr;
5925 if (TARGET_64BIT)
5927 /* Every argument in PA64 is supposed to be passed by value
5928 (including large structs). However, as a GCC extension, we
5929 pass zero and variable sized arguments by reference. Empty
5930 structures are a GCC extension not supported by the HP
5931 compilers. Thus, passing them by reference isn't likely
5932 to conflict with the ABI. For variable sized arguments,
5933 GCC doesn't have the infrastructure to allocate these to
5934 registers. */
5936 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5938 if (size > UNITS_PER_WORD)
5940 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5941 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5942 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5943 build_int_2 (-2 * UNITS_PER_WORD, -1));
5944 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5945 TREE_SIDE_EFFECTS (t) = 1;
5946 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5949 if (size > 0)
5950 return std_expand_builtin_va_arg (valist, type);
5951 else
5953 ptr = build_pointer_type (type);
5955 /* Args grow upward. */
5956 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5957 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5958 TREE_SIDE_EFFECTS (t) = 1;
5960 pptr = build_pointer_type (ptr);
5961 t = build1 (NOP_EXPR, pptr, t);
5962 TREE_SIDE_EFFECTS (t) = 1;
5964 t = build1 (INDIRECT_REF, ptr, t);
5965 TREE_SIDE_EFFECTS (t) = 1;
5968 else /* !TARGET_64BIT */
5970 ptr = build_pointer_type (type);
5972 /* "Large" and variable sized types are passed by reference. */
5973 if (size > 8 || size <= 0)
5975 /* Args grow downward. */
5976 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5977 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5978 TREE_SIDE_EFFECTS (t) = 1;
5980 pptr = build_pointer_type (ptr);
5981 t = build1 (NOP_EXPR, pptr, t);
5982 TREE_SIDE_EFFECTS (t) = 1;
5984 t = build1 (INDIRECT_REF, ptr, t);
5985 TREE_SIDE_EFFECTS (t) = 1;
5987 else
5989 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5990 build_int_2 (-size, -1));
5992 /* Copied from va-pa.h, but we probably don't need to align to
5993 word size, since we generate and preserve that invariant. */
5994 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5995 build_int_2 ((size > 4 ? -8 : -4), -1));
5997 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5998 TREE_SIDE_EFFECTS (t) = 1;
6000 ofs = (8 - size) % 4;
6001 if (ofs)
6003 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
6004 build_int_2 (ofs, 0));
6005 TREE_SIDE_EFFECTS (t) = 1;
6008 t = build1 (NOP_EXPR, ptr, t);
6009 TREE_SIDE_EFFECTS (t) = 1;
6013 /* Calculate! */
6014 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6019 /* This routine handles all the normal conditional branch sequences we
6020 might need to generate. It handles compare immediate vs compare
6021 register, nullification of delay slots, varying length branches,
6022 negated branches, and all combinations of the above. It returns the
6023 output appropriate to emit the branch corresponding to all given
6024 parameters. */
6026 const char *
6027 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
6029 static char buf[100];
6030 int useskip = 0;
6031 rtx xoperands[5];
6033 /* A conditional branch to the following instruction (eg the delay slot)
6034 is asking for a disaster. This can happen when not optimizing and
6035 when jump optimization fails.
6037 While it is usually safe to emit nothing, this can fail if the
6038 preceding instruction is a nullified branch with an empty delay
6039 slot and the same branch target as this branch. We could check
6040 for this but jump optimization should eliminate nop jumps. It
6041 is always safe to emit a nop. */
6042 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6043 return "nop";
6045 /* The doubleword form of the cmpib instruction doesn't have the LEU
6046 and GTU conditions while the cmpb instruction does. Since we accept
6047 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6048 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6049 operands[2] = gen_rtx_REG (DImode, 0);
6051 /* If this is a long branch with its delay slot unfilled, set `nullify'
6052 as it can nullify the delay slot and save a nop. */
6053 if (length == 8 && dbr_sequence_length () == 0)
6054 nullify = 1;
6056 /* If this is a short forward conditional branch which did not get
6057 its delay slot filled, the delay slot can still be nullified. */
6058 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6059 nullify = forward_branch_p (insn);
6061 /* A forward branch over a single nullified insn can be done with a
6062 comclr instruction. This avoids a single cycle penalty due to
6063 mis-predicted branch if we fall through (branch not taken). */
6064 if (length == 4
6065 && next_real_insn (insn) != 0
6066 && get_attr_length (next_real_insn (insn)) == 4
6067 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6068 && nullify)
6069 useskip = 1;
6071 switch (length)
6073 /* All short conditional branches except backwards with an unfilled
6074 delay slot. */
6075 case 4:
6076 if (useskip)
6077 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6078 else
6079 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6080 if (GET_MODE (operands[1]) == DImode)
6081 strcat (buf, "*");
6082 if (negated)
6083 strcat (buf, "%B3");
6084 else
6085 strcat (buf, "%S3");
6086 if (useskip)
6087 strcat (buf, " %2,%r1,%%r0");
6088 else if (nullify)
6089 strcat (buf, ",n %2,%r1,%0");
6090 else
6091 strcat (buf, " %2,%r1,%0");
6092 break;
6094 /* All long conditionals. Note a short backward branch with an
6095 unfilled delay slot is treated just like a long backward branch
6096 with an unfilled delay slot. */
6097 case 8:
6098 /* Handle weird backwards branch with a filled delay slot
6099 with is nullified. */
6100 if (dbr_sequence_length () != 0
6101 && ! forward_branch_p (insn)
6102 && nullify)
6104 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6105 if (GET_MODE (operands[1]) == DImode)
6106 strcat (buf, "*");
6107 if (negated)
6108 strcat (buf, "%S3");
6109 else
6110 strcat (buf, "%B3");
6111 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6113 /* Handle short backwards branch with an unfilled delay slot.
6114 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6115 taken and untaken branches. */
6116 else if (dbr_sequence_length () == 0
6117 && ! forward_branch_p (insn)
6118 && INSN_ADDRESSES_SET_P ()
6119 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6120 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6122 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6123 if (GET_MODE (operands[1]) == DImode)
6124 strcat (buf, "*");
6125 if (negated)
6126 strcat (buf, "%B3 %2,%r1,%0%#");
6127 else
6128 strcat (buf, "%S3 %2,%r1,%0%#");
6130 else
6132 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6133 if (GET_MODE (operands[1]) == DImode)
6134 strcat (buf, "*");
6135 if (negated)
6136 strcat (buf, "%S3");
6137 else
6138 strcat (buf, "%B3");
6139 if (nullify)
6140 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6141 else
6142 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6144 break;
6146 case 20:
6147 case 28:
6148 xoperands[0] = operands[0];
6149 xoperands[1] = operands[1];
6150 xoperands[2] = operands[2];
6151 xoperands[3] = operands[3];
6153 /* The reversed conditional branch must branch over one additional
6154 instruction if the delay slot is filled. If the delay slot
6155 is empty, the instruction after the reversed condition branch
6156 must be nullified. */
6157 nullify = dbr_sequence_length () == 0;
6158 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6160 /* Create a reversed conditional branch which branches around
6161 the following insns. */
6162 if (GET_MODE (operands[1]) != DImode)
6164 if (nullify)
6166 if (negated)
6167 strcpy (buf,
6168 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6169 else
6170 strcpy (buf,
6171 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6173 else
6175 if (negated)
6176 strcpy (buf,
6177 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6178 else
6179 strcpy (buf,
6180 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6183 else
6185 if (nullify)
6187 if (negated)
6188 strcpy (buf,
6189 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6190 else
6191 strcpy (buf,
6192 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6194 else
6196 if (negated)
6197 strcpy (buf,
6198 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6199 else
6200 strcpy (buf,
6201 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6205 output_asm_insn (buf, xoperands);
6206 return output_lbranch (operands[0], insn);
6208 default:
6209 abort ();
6211 return buf;
6214 /* This routine handles long unconditional branches that exceed the
6215 maximum range of a simple branch instruction. */
6217 const char *
6218 output_lbranch (rtx dest, rtx insn)
6220 rtx xoperands[2];
6222 xoperands[0] = dest;
6224 /* First, free up the delay slot. */
6225 if (dbr_sequence_length () != 0)
6227 /* We can't handle a jump in the delay slot. */
6228 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
6229 abort ();
6231 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6232 optimize, 0, 0, NULL);
6234 /* Now delete the delay insn. */
6235 PUT_CODE (NEXT_INSN (insn), NOTE);
6236 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6237 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6240 /* Output an insn to save %r1. The runtime documentation doesn't
6241 specify whether the "Clean Up" slot in the callers frame can
6242 be clobbered by the callee. It isn't copied by HP's builtin
6243 alloca, so this suggests that it can be clobbered if necessary.
6244 The "Static Link" location is copied by HP builtin alloca, so
6245 we avoid using it. Using the cleanup slot might be a problem
6246 if we have to interoperate with languages that pass cleanup
6247 information. However, it should be possible to handle these
6248 situations with GCC's asm feature.
6250 The "Current RP" slot is reserved for the called procedure, so
6251 we try to use it when we don't have a frame of our own. It's
6252 rather unlikely that we won't have a frame when we need to emit
6253 a very long branch.
6255 Really the way to go long term is a register scavenger; goto
6256 the target of the jump and find a register which we can use
6257 as a scratch to hold the value in %r1. Then, we wouldn't have
6258 to free up the delay slot or clobber a slot that may be needed
6259 for other purposes. */
6260 if (TARGET_64BIT)
6262 if (actual_fsize == 0 && !regs_ever_live[2])
6263 /* Use the return pointer slot in the frame marker. */
6264 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6265 else
6266 /* Use the slot at -40 in the frame marker since HP builtin
6267 alloca doesn't copy it. */
6268 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6270 else
6272 if (actual_fsize == 0 && !regs_ever_live[2])
6273 /* Use the return pointer slot in the frame marker. */
6274 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6275 else
6276 /* Use the "Clean Up" slot in the frame marker. In GCC,
6277 the only other use of this location is for copying a
6278 floating point double argument from a floating-point
6279 register to two general registers. The copy is done
6280 as an "atomic" operation when outputting a call, so it
6281 won't interfere with our using the location here. */
6282 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6285 if (TARGET_PORTABLE_RUNTIME)
6287 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6288 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6289 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6291 else if (flag_pic)
6293 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6294 if (TARGET_SOM || !TARGET_GAS)
6296 xoperands[1] = gen_label_rtx ();
6297 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6298 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6299 CODE_LABEL_NUMBER (xoperands[1]));
6300 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6302 else
6304 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6305 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6307 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6309 else
6310 /* Now output a very long branch to the original target. */
6311 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6313 /* Now restore the value of %r1 in the delay slot. */
6314 if (TARGET_64BIT)
6316 if (actual_fsize == 0 && !regs_ever_live[2])
6317 return "ldd -16(%%r30),%%r1";
6318 else
6319 return "ldd -40(%%r30),%%r1";
6321 else
6323 if (actual_fsize == 0 && !regs_ever_live[2])
6324 return "ldw -20(%%r30),%%r1";
6325 else
6326 return "ldw -12(%%r30),%%r1";
6330 /* This routine handles all the branch-on-bit conditional branch sequences we
6331 might need to generate. It handles nullification of delay slots,
6332 varying length branches, negated branches and all combinations of the
6333 above. it returns the appropriate output template to emit the branch. */
6335 const char *
6336 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6337 int negated, rtx insn, int which)
6339 static char buf[100];
6340 int useskip = 0;
6342 /* A conditional branch to the following instruction (eg the delay slot) is
6343 asking for a disaster. I do not think this can happen as this pattern
6344 is only used when optimizing; jump optimization should eliminate the
6345 jump. But be prepared just in case. */
6347 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6348 return "nop";
6350 /* If this is a long branch with its delay slot unfilled, set `nullify'
6351 as it can nullify the delay slot and save a nop. */
6352 if (length == 8 && dbr_sequence_length () == 0)
6353 nullify = 1;
6355 /* If this is a short forward conditional branch which did not get
6356 its delay slot filled, the delay slot can still be nullified. */
6357 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6358 nullify = forward_branch_p (insn);
6360 /* A forward branch over a single nullified insn can be done with a
6361 extrs instruction. This avoids a single cycle penalty due to
6362 mis-predicted branch if we fall through (branch not taken). */
6364 if (length == 4
6365 && next_real_insn (insn) != 0
6366 && get_attr_length (next_real_insn (insn)) == 4
6367 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6368 && nullify)
6369 useskip = 1;
6371 switch (length)
6374 /* All short conditional branches except backwards with an unfilled
6375 delay slot. */
6376 case 4:
6377 if (useskip)
6378 strcpy (buf, "{extrs,|extrw,s,}");
6379 else
6380 strcpy (buf, "bb,");
6381 if (useskip && GET_MODE (operands[0]) == DImode)
6382 strcpy (buf, "extrd,s,*");
6383 else if (GET_MODE (operands[0]) == DImode)
6384 strcpy (buf, "bb,*");
6385 if ((which == 0 && negated)
6386 || (which == 1 && ! negated))
6387 strcat (buf, ">=");
6388 else
6389 strcat (buf, "<");
6390 if (useskip)
6391 strcat (buf, " %0,%1,1,%%r0");
6392 else if (nullify && negated)
6393 strcat (buf, ",n %0,%1,%3");
6394 else if (nullify && ! negated)
6395 strcat (buf, ",n %0,%1,%2");
6396 else if (! nullify && negated)
6397 strcat (buf, "%0,%1,%3");
6398 else if (! nullify && ! negated)
6399 strcat (buf, " %0,%1,%2");
6400 break;
6402 /* All long conditionals. Note a short backward branch with an
6403 unfilled delay slot is treated just like a long backward branch
6404 with an unfilled delay slot. */
6405 case 8:
6406 /* Handle weird backwards branch with a filled delay slot
6407 with is nullified. */
6408 if (dbr_sequence_length () != 0
6409 && ! forward_branch_p (insn)
6410 && nullify)
6412 strcpy (buf, "bb,");
6413 if (GET_MODE (operands[0]) == DImode)
6414 strcat (buf, "*");
6415 if ((which == 0 && negated)
6416 || (which == 1 && ! negated))
6417 strcat (buf, "<");
6418 else
6419 strcat (buf, ">=");
6420 if (negated)
6421 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6422 else
6423 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6425 /* Handle short backwards branch with an unfilled delay slot.
6426 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6427 taken and untaken branches. */
6428 else if (dbr_sequence_length () == 0
6429 && ! forward_branch_p (insn)
6430 && INSN_ADDRESSES_SET_P ()
6431 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6432 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6434 strcpy (buf, "bb,");
6435 if (GET_MODE (operands[0]) == DImode)
6436 strcat (buf, "*");
6437 if ((which == 0 && negated)
6438 || (which == 1 && ! negated))
6439 strcat (buf, ">=");
6440 else
6441 strcat (buf, "<");
6442 if (negated)
6443 strcat (buf, " %0,%1,%3%#");
6444 else
6445 strcat (buf, " %0,%1,%2%#");
6447 else
6449 strcpy (buf, "{extrs,|extrw,s,}");
6450 if (GET_MODE (operands[0]) == DImode)
6451 strcpy (buf, "extrd,s,*");
6452 if ((which == 0 && negated)
6453 || (which == 1 && ! negated))
6454 strcat (buf, "<");
6455 else
6456 strcat (buf, ">=");
6457 if (nullify && negated)
6458 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6459 else if (nullify && ! negated)
6460 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6461 else if (negated)
6462 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6463 else
6464 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6466 break;
6468 default:
6469 abort ();
6471 return buf;
6474 /* This routine handles all the branch-on-variable-bit conditional branch
6475 sequences we might need to generate. It handles nullification of delay
6476 slots, varying length branches, negated branches and all combinations
6477 of the above. it returns the appropriate output template to emit the
6478 branch. */
6480 const char *
6481 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6482 int negated, rtx insn, int which)
6484 static char buf[100];
6485 int useskip = 0;
6487 /* A conditional branch to the following instruction (eg the delay slot) is
6488 asking for a disaster. I do not think this can happen as this pattern
6489 is only used when optimizing; jump optimization should eliminate the
6490 jump. But be prepared just in case. */
6492 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6493 return "nop";
6495 /* If this is a long branch with its delay slot unfilled, set `nullify'
6496 as it can nullify the delay slot and save a nop. */
6497 if (length == 8 && dbr_sequence_length () == 0)
6498 nullify = 1;
6500 /* If this is a short forward conditional branch which did not get
6501 its delay slot filled, the delay slot can still be nullified. */
6502 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6503 nullify = forward_branch_p (insn);
6505 /* A forward branch over a single nullified insn can be done with a
6506 extrs instruction. This avoids a single cycle penalty due to
6507 mis-predicted branch if we fall through (branch not taken). */
6509 if (length == 4
6510 && next_real_insn (insn) != 0
6511 && get_attr_length (next_real_insn (insn)) == 4
6512 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6513 && nullify)
6514 useskip = 1;
6516 switch (length)
6519 /* All short conditional branches except backwards with an unfilled
6520 delay slot. */
6521 case 4:
6522 if (useskip)
6523 strcpy (buf, "{vextrs,|extrw,s,}");
6524 else
6525 strcpy (buf, "{bvb,|bb,}");
6526 if (useskip && GET_MODE (operands[0]) == DImode)
6527 strcpy (buf, "extrd,s,*");
6528 else if (GET_MODE (operands[0]) == DImode)
6529 strcpy (buf, "bb,*");
6530 if ((which == 0 && negated)
6531 || (which == 1 && ! negated))
6532 strcat (buf, ">=");
6533 else
6534 strcat (buf, "<");
6535 if (useskip)
6536 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6537 else if (nullify && negated)
6538 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6539 else if (nullify && ! negated)
6540 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6541 else if (! nullify && negated)
6542 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6543 else if (! nullify && ! negated)
6544 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6545 break;
6547 /* All long conditionals. Note a short backward branch with an
6548 unfilled delay slot is treated just like a long backward branch
6549 with an unfilled delay slot. */
6550 case 8:
6551 /* Handle weird backwards branch with a filled delay slot
6552 with is nullified. */
6553 if (dbr_sequence_length () != 0
6554 && ! forward_branch_p (insn)
6555 && nullify)
6557 strcpy (buf, "{bvb,|bb,}");
6558 if (GET_MODE (operands[0]) == DImode)
6559 strcat (buf, "*");
6560 if ((which == 0 && negated)
6561 || (which == 1 && ! negated))
6562 strcat (buf, "<");
6563 else
6564 strcat (buf, ">=");
6565 if (negated)
6566 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6567 else
6568 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6570 /* Handle short backwards branch with an unfilled delay slot.
6571 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6572 taken and untaken branches. */
6573 else if (dbr_sequence_length () == 0
6574 && ! forward_branch_p (insn)
6575 && INSN_ADDRESSES_SET_P ()
6576 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6577 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6579 strcpy (buf, "{bvb,|bb,}");
6580 if (GET_MODE (operands[0]) == DImode)
6581 strcat (buf, "*");
6582 if ((which == 0 && negated)
6583 || (which == 1 && ! negated))
6584 strcat (buf, ">=");
6585 else
6586 strcat (buf, "<");
6587 if (negated)
6588 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6589 else
6590 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6592 else
6594 strcpy (buf, "{vextrs,|extrw,s,}");
6595 if (GET_MODE (operands[0]) == DImode)
6596 strcpy (buf, "extrd,s,*");
6597 if ((which == 0 && negated)
6598 || (which == 1 && ! negated))
6599 strcat (buf, "<");
6600 else
6601 strcat (buf, ">=");
6602 if (nullify && negated)
6603 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6604 else if (nullify && ! negated)
6605 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6606 else if (negated)
6607 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6608 else
6609 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6611 break;
6613 default:
6614 abort ();
6616 return buf;
6619 /* Return the output template for emitting a dbra type insn.
6621 Note it may perform some output operations on its own before
6622 returning the final output string. */
6623 const char *
6624 output_dbra (rtx *operands, rtx insn, int which_alternative)
6627 /* A conditional branch to the following instruction (eg the delay slot) is
6628 asking for a disaster. Be prepared! */
6630 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6632 if (which_alternative == 0)
6633 return "ldo %1(%0),%0";
6634 else if (which_alternative == 1)
6636 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6637 output_asm_insn ("ldw -16(%%r30),%4", operands);
6638 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6639 return "{fldws|fldw} -16(%%r30),%0";
6641 else
6643 output_asm_insn ("ldw %0,%4", operands);
6644 return "ldo %1(%4),%4\n\tstw %4,%0";
6648 if (which_alternative == 0)
6650 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6651 int length = get_attr_length (insn);
6653 /* If this is a long branch with its delay slot unfilled, set `nullify'
6654 as it can nullify the delay slot and save a nop. */
6655 if (length == 8 && dbr_sequence_length () == 0)
6656 nullify = 1;
6658 /* If this is a short forward conditional branch which did not get
6659 its delay slot filled, the delay slot can still be nullified. */
6660 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6661 nullify = forward_branch_p (insn);
6663 /* Handle short versions first. */
6664 if (length == 4 && nullify)
6665 return "addib,%C2,n %1,%0,%3";
6666 else if (length == 4 && ! nullify)
6667 return "addib,%C2 %1,%0,%3";
6668 else if (length == 8)
6670 /* Handle weird backwards branch with a fulled delay slot
6671 which is nullified. */
6672 if (dbr_sequence_length () != 0
6673 && ! forward_branch_p (insn)
6674 && nullify)
6675 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6676 /* Handle short backwards branch with an unfilled delay slot.
6677 Using a addb;nop rather than addi;bl saves 1 cycle for both
6678 taken and untaken branches. */
6679 else if (dbr_sequence_length () == 0
6680 && ! forward_branch_p (insn)
6681 && INSN_ADDRESSES_SET_P ()
6682 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6683 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6684 return "addib,%C2 %1,%0,%3%#";
6686 /* Handle normal cases. */
6687 if (nullify)
6688 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6689 else
6690 return "addi,%N2 %1,%0,%0\n\tb %3";
6692 else
6693 abort ();
6695 /* Deal with gross reload from FP register case. */
6696 else if (which_alternative == 1)
6698 /* Move loop counter from FP register to MEM then into a GR,
6699 increment the GR, store the GR into MEM, and finally reload
6700 the FP register from MEM from within the branch's delay slot. */
6701 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6702 operands);
6703 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6704 if (get_attr_length (insn) == 24)
6705 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6706 else
6707 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6709 /* Deal with gross reload from memory case. */
6710 else
6712 /* Reload loop counter from memory, the store back to memory
6713 happens in the branch's delay slot. */
6714 output_asm_insn ("ldw %0,%4", operands);
6715 if (get_attr_length (insn) == 12)
6716 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6717 else
6718 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6722 /* Return the output template for emitting a dbra type insn.
6724 Note it may perform some output operations on its own before
6725 returning the final output string. */
6726 const char *
6727 output_movb (rtx *operands, rtx insn, int which_alternative,
6728 int reverse_comparison)
6731 /* A conditional branch to the following instruction (eg the delay slot) is
6732 asking for a disaster. Be prepared! */
6734 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6736 if (which_alternative == 0)
6737 return "copy %1,%0";
6738 else if (which_alternative == 1)
6740 output_asm_insn ("stw %1,-16(%%r30)", operands);
6741 return "{fldws|fldw} -16(%%r30),%0";
6743 else if (which_alternative == 2)
6744 return "stw %1,%0";
6745 else
6746 return "mtsar %r1";
6749 /* Support the second variant. */
6750 if (reverse_comparison)
6751 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6753 if (which_alternative == 0)
6755 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6756 int length = get_attr_length (insn);
6758 /* If this is a long branch with its delay slot unfilled, set `nullify'
6759 as it can nullify the delay slot and save a nop. */
6760 if (length == 8 && dbr_sequence_length () == 0)
6761 nullify = 1;
6763 /* If this is a short forward conditional branch which did not get
6764 its delay slot filled, the delay slot can still be nullified. */
6765 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6766 nullify = forward_branch_p (insn);
6768 /* Handle short versions first. */
6769 if (length == 4 && nullify)
6770 return "movb,%C2,n %1,%0,%3";
6771 else if (length == 4 && ! nullify)
6772 return "movb,%C2 %1,%0,%3";
6773 else if (length == 8)
6775 /* Handle weird backwards branch with a filled delay slot
6776 which is nullified. */
6777 if (dbr_sequence_length () != 0
6778 && ! forward_branch_p (insn)
6779 && nullify)
6780 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6782 /* Handle short backwards branch with an unfilled delay slot.
6783 Using a movb;nop rather than or;bl saves 1 cycle for both
6784 taken and untaken branches. */
6785 else if (dbr_sequence_length () == 0
6786 && ! forward_branch_p (insn)
6787 && INSN_ADDRESSES_SET_P ()
6788 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6789 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6790 return "movb,%C2 %1,%0,%3%#";
6791 /* Handle normal cases. */
6792 if (nullify)
6793 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6794 else
6795 return "or,%N2 %1,%%r0,%0\n\tb %3";
6797 else
6798 abort ();
6800 /* Deal with gross reload from FP register case. */
6801 else if (which_alternative == 1)
6803 /* Move loop counter from FP register to MEM then into a GR,
6804 increment the GR, store the GR into MEM, and finally reload
6805 the FP register from MEM from within the branch's delay slot. */
6806 output_asm_insn ("stw %1,-16(%%r30)", operands);
6807 if (get_attr_length (insn) == 12)
6808 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6809 else
6810 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6812 /* Deal with gross reload from memory case. */
6813 else if (which_alternative == 2)
6815 /* Reload loop counter from memory, the store back to memory
6816 happens in the branch's delay slot. */
6817 if (get_attr_length (insn) == 8)
6818 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6819 else
6820 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6822 /* Handle SAR as a destination. */
6823 else
6825 if (get_attr_length (insn) == 8)
6826 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6827 else
6828 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6832 /* Copy any FP arguments in INSN into integer registers. */
6833 static void
6834 copy_fp_args (rtx insn)
6836 rtx link;
6837 rtx xoperands[2];
6839 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6841 int arg_mode, regno;
6842 rtx use = XEXP (link, 0);
6844 if (! (GET_CODE (use) == USE
6845 && GET_CODE (XEXP (use, 0)) == REG
6846 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6847 continue;
6849 arg_mode = GET_MODE (XEXP (use, 0));
6850 regno = REGNO (XEXP (use, 0));
6852 /* Is it a floating point register? */
6853 if (regno >= 32 && regno <= 39)
6855 /* Copy the FP register into an integer register via memory. */
6856 if (arg_mode == SFmode)
6858 xoperands[0] = XEXP (use, 0);
6859 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6860 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6861 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6863 else
6865 xoperands[0] = XEXP (use, 0);
6866 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6867 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6868 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6869 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6875 /* Compute length of the FP argument copy sequence for INSN. */
6876 static int
6877 length_fp_args (rtx insn)
6879 int length = 0;
6880 rtx link;
6882 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6884 int arg_mode, regno;
6885 rtx use = XEXP (link, 0);
6887 if (! (GET_CODE (use) == USE
6888 && GET_CODE (XEXP (use, 0)) == REG
6889 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6890 continue;
6892 arg_mode = GET_MODE (XEXP (use, 0));
6893 regno = REGNO (XEXP (use, 0));
6895 /* Is it a floating point register? */
6896 if (regno >= 32 && regno <= 39)
6898 if (arg_mode == SFmode)
6899 length += 8;
6900 else
6901 length += 12;
6905 return length;
6908 /* Return the attribute length for the millicode call instruction INSN.
6909 The length must match the code generated by output_millicode_call.
6910 We include the delay slot in the returned length as it is better to
6911 over estimate the length than to under estimate it. */
6914 attr_length_millicode_call (rtx insn)
6916 unsigned long distance = -1;
6917 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6919 if (INSN_ADDRESSES_SET_P ())
6921 distance = (total + insn_current_reference_address (insn));
6922 if (distance < total)
6923 distance = -1;
6926 if (TARGET_64BIT)
6928 if (!TARGET_LONG_CALLS && distance < 7600000)
6929 return 8;
6931 return 20;
6933 else if (TARGET_PORTABLE_RUNTIME)
6934 return 24;
6935 else
6937 if (!TARGET_LONG_CALLS && distance < 240000)
6938 return 8;
6940 if (TARGET_LONG_ABS_CALL && !flag_pic)
6941 return 12;
6943 return 24;
6947 /* INSN is a function call. It may have an unconditional jump
6948 in its delay slot.
6950 CALL_DEST is the routine we are calling. */
6952 const char *
6953 output_millicode_call (rtx insn, rtx call_dest)
6955 int attr_length = get_attr_length (insn);
6956 int seq_length = dbr_sequence_length ();
6957 int distance;
6958 rtx seq_insn;
6959 rtx xoperands[3];
6961 xoperands[0] = call_dest;
6962 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6964 /* Handle the common case where we are sure that the branch will
6965 reach the beginning of the $CODE$ subspace. The within reach
6966 form of the $$sh_func_adrs call has a length of 28. Because
6967 it has an attribute type of multi, it never has a nonzero
6968 sequence length. The length of the $$sh_func_adrs is the same
6969 as certain out of reach PIC calls to other routines. */
6970 if (!TARGET_LONG_CALLS
6971 && ((seq_length == 0
6972 && (attr_length == 12
6973 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6974 || (seq_length != 0 && attr_length == 8)))
6976 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6978 else
6980 if (TARGET_64BIT)
6982 /* It might seem that one insn could be saved by accessing
6983 the millicode function using the linkage table. However,
6984 this doesn't work in shared libraries and other dynamically
6985 loaded objects. Using a pc-relative sequence also avoids
6986 problems related to the implicit use of the gp register. */
6987 output_asm_insn ("b,l .+8,%%r1", xoperands);
6989 if (TARGET_GAS)
6991 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6992 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6994 else
6996 xoperands[1] = gen_label_rtx ();
6997 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6998 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6999 CODE_LABEL_NUMBER (xoperands[1]));
7000 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7003 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7005 else if (TARGET_PORTABLE_RUNTIME)
7007 /* Pure portable runtime doesn't allow be/ble; we also don't
7008 have PIC support in the assembler/linker, so this sequence
7009 is needed. */
7011 /* Get the address of our target into %r1. */
7012 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7013 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7015 /* Get our return address into %r31. */
7016 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7017 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7019 /* Jump to our target address in %r1. */
7020 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7022 else if (!flag_pic)
7024 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7025 if (TARGET_PA_20)
7026 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7027 else
7028 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7030 else
7032 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7033 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7035 if (TARGET_SOM || !TARGET_GAS)
7037 /* The HP assembler can generate relocations for the
7038 difference of two symbols. GAS can do this for a
7039 millicode symbol but not an arbitrary external
7040 symbol when generating SOM output. */
7041 xoperands[1] = gen_label_rtx ();
7042 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7043 CODE_LABEL_NUMBER (xoperands[1]));
7044 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7045 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7047 else
7049 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7050 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7051 xoperands);
7054 /* Jump to our target address in %r1. */
7055 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7059 if (seq_length == 0)
7060 output_asm_insn ("nop", xoperands);
7062 /* We are done if there isn't a jump in the delay slot. */
7063 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7064 return "";
7066 /* This call has an unconditional jump in its delay slot. */
7067 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7069 /* See if the return address can be adjusted. Use the containing
7070 sequence insn's address. */
7071 if (INSN_ADDRESSES_SET_P ())
7073 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7074 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7075 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7077 if (VAL_14_BITS_P (distance))
7079 xoperands[1] = gen_label_rtx ();
7080 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7081 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7082 CODE_LABEL_NUMBER (xoperands[1]));
7084 else
7085 /* ??? This branch may not reach its target. */
7086 output_asm_insn ("nop\n\tb,n %0", xoperands);
7088 else
7089 /* ??? This branch may not reach its target. */
7090 output_asm_insn ("nop\n\tb,n %0", xoperands);
7092 /* Delete the jump. */
7093 PUT_CODE (NEXT_INSN (insn), NOTE);
7094 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7095 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7097 return "";
7100 /* Return the attribute length of the call instruction INSN. The SIBCALL
7101 flag indicates whether INSN is a regular call or a sibling call. The
7102 length returned must be longer than the code actually generated by
7103 output_call. Since branch shortening is done before delay branch
7104 sequencing, there is no way to determine whether or not the delay
7105 slot will be filled during branch shortening. Even when the delay
7106 slot is filled, we may have to add a nop if the delay slot contains
7107 a branch that can't reach its target. Thus, we always have to include
7108 the delay slot in the length estimate. This used to be done in
7109 pa_adjust_insn_length but we do it here now as some sequences always
7110 fill the delay slot and we can save four bytes in the estimate for
7111 these sequences. */
7114 attr_length_call (rtx insn, int sibcall)
7116 int local_call;
7117 rtx call_dest;
7118 tree call_decl;
7119 int length = 0;
7120 rtx pat = PATTERN (insn);
7121 unsigned long distance = -1;
7123 if (INSN_ADDRESSES_SET_P ())
7125 unsigned long total;
7127 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7128 distance = (total + insn_current_reference_address (insn));
7129 if (distance < total)
7130 distance = -1;
7133 /* Determine if this is a local call. */
7134 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7135 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7136 else
7137 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7139 call_decl = SYMBOL_REF_DECL (call_dest);
7140 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7142 /* pc-relative branch. */
7143 if (!TARGET_LONG_CALLS
7144 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7145 || distance < 240000))
7146 length += 8;
7148 /* 64-bit plabel sequence. */
7149 else if (TARGET_64BIT && !local_call)
7150 length += sibcall ? 28 : 24;
7152 /* non-pic long absolute branch sequence. */
7153 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7154 length += 12;
7156 /* long pc-relative branch sequence. */
7157 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7158 || (TARGET_64BIT && !TARGET_GAS)
7159 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7161 length += 20;
7163 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7164 length += 8;
7167 /* 32-bit plabel sequence. */
7168 else
7170 length += 32;
7172 if (TARGET_SOM)
7173 length += length_fp_args (insn);
7175 if (flag_pic)
7176 length += 4;
7178 if (!TARGET_PA_20)
7180 if (!sibcall)
7181 length += 8;
7183 if (!TARGET_NO_SPACE_REGS)
7184 length += 8;
7188 return length;
7191 /* INSN is a function call. It may have an unconditional jump
7192 in its delay slot.
7194 CALL_DEST is the routine we are calling. */
7196 const char *
7197 output_call (rtx insn, rtx call_dest, int sibcall)
7199 int delay_insn_deleted = 0;
7200 int delay_slot_filled = 0;
7201 int seq_length = dbr_sequence_length ();
7202 tree call_decl = SYMBOL_REF_DECL (call_dest);
7203 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7204 rtx xoperands[2];
7206 xoperands[0] = call_dest;
7208 /* Handle the common case where we're sure that the branch will reach
7209 the beginning of the "$CODE$" subspace. This is the beginning of
7210 the current function if we are in a named section. */
7211 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7213 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7214 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7216 else
7218 if (TARGET_64BIT && !local_call)
7220 /* ??? As far as I can tell, the HP linker doesn't support the
7221 long pc-relative sequence described in the 64-bit runtime
7222 architecture. So, we use a slightly longer indirect call. */
7223 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7225 xoperands[0] = p->internal_label;
7226 xoperands[1] = gen_label_rtx ();
7228 /* If this isn't a sibcall, we put the load of %r27 into the
7229 delay slot. We can't do this in a sibcall as we don't
7230 have a second call-clobbered scratch register available. */
7231 if (seq_length != 0
7232 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7233 && !sibcall)
7235 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7236 optimize, 0, 0, NULL);
7238 /* Now delete the delay insn. */
7239 PUT_CODE (NEXT_INSN (insn), NOTE);
7240 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7241 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7242 delay_insn_deleted = 1;
7245 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7246 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7247 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7249 if (sibcall)
7251 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7252 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7253 output_asm_insn ("bve (%%r1)", xoperands);
7255 else
7257 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7258 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7259 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7260 delay_slot_filled = 1;
7263 else
7265 int indirect_call = 0;
7267 /* Emit a long call. There are several different sequences
7268 of increasing length and complexity. In most cases,
7269 they don't allow an instruction in the delay slot. */
7270 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7271 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7272 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7273 && !TARGET_64BIT)
7274 indirect_call = 1;
7276 if (seq_length != 0
7277 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7278 && !sibcall
7279 && (!TARGET_PA_20 || indirect_call))
7281 /* A non-jump insn in the delay slot. By definition we can
7282 emit this insn before the call (and in fact before argument
7283 relocating. */
7284 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0,
7285 NULL);
7287 /* Now delete the delay insn. */
7288 PUT_CODE (NEXT_INSN (insn), NOTE);
7289 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7290 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7291 delay_insn_deleted = 1;
7294 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7296 /* This is the best sequence for making long calls in
7297 non-pic code. Unfortunately, GNU ld doesn't provide
7298 the stub needed for external calls, and GAS's support
7299 for this with the SOM linker is buggy. It is safe
7300 to use this for local calls. */
7301 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7302 if (sibcall)
7303 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7304 else
7306 if (TARGET_PA_20)
7307 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7308 xoperands);
7309 else
7310 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7312 output_asm_insn ("copy %%r31,%%r2", xoperands);
7313 delay_slot_filled = 1;
7316 else
7318 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7319 || (TARGET_64BIT && !TARGET_GAS))
7321 /* The HP assembler and linker can handle relocations
7322 for the difference of two symbols. GAS and the HP
7323 linker can't do this when one of the symbols is
7324 external. */
7325 xoperands[1] = gen_label_rtx ();
7326 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7327 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7328 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7329 CODE_LABEL_NUMBER (xoperands[1]));
7330 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7332 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7334 /* GAS currently can't generate the relocations that
7335 are needed for the SOM linker under HP-UX using this
7336 sequence. The GNU linker doesn't generate the stubs
7337 that are needed for external calls on TARGET_ELF32
7338 with this sequence. For now, we have to use a
7339 longer plabel sequence when using GAS. */
7340 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7341 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7342 xoperands);
7343 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7344 xoperands);
7346 else
7348 /* Emit a long plabel-based call sequence. This is
7349 essentially an inline implementation of $$dyncall.
7350 We don't actually try to call $$dyncall as this is
7351 as difficult as calling the function itself. */
7352 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7354 xoperands[0] = p->internal_label;
7355 xoperands[1] = gen_label_rtx ();
7357 /* Since the call is indirect, FP arguments in registers
7358 need to be copied to the general registers. Then, the
7359 argument relocation stub will copy them back. */
7360 if (TARGET_SOM)
7361 copy_fp_args (insn);
7363 if (flag_pic)
7365 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7366 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7367 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7369 else
7371 output_asm_insn ("addil LR'%0-$global$,%%r27",
7372 xoperands);
7373 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7374 xoperands);
7377 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7378 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7379 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7380 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7382 if (!sibcall && !TARGET_PA_20)
7384 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7385 if (TARGET_NO_SPACE_REGS)
7386 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7387 else
7388 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7392 if (TARGET_PA_20)
7394 if (sibcall)
7395 output_asm_insn ("bve (%%r1)", xoperands);
7396 else
7398 if (indirect_call)
7400 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7401 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7402 delay_slot_filled = 1;
7404 else
7405 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7408 else
7410 if (!TARGET_NO_SPACE_REGS)
7411 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7412 xoperands);
7414 if (sibcall)
7416 if (TARGET_NO_SPACE_REGS)
7417 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7418 else
7419 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7421 else
7423 if (TARGET_NO_SPACE_REGS)
7424 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7425 else
7426 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7428 if (indirect_call)
7429 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7430 else
7431 output_asm_insn ("copy %%r31,%%r2", xoperands);
7432 delay_slot_filled = 1;
7439 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7440 output_asm_insn ("nop", xoperands);
7442 /* We are done if there isn't a jump in the delay slot. */
7443 if (seq_length == 0
7444 || delay_insn_deleted
7445 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7446 return "";
7448 /* A sibcall should never have a branch in the delay slot. */
7449 if (sibcall)
7450 abort ();
7452 /* This call has an unconditional jump in its delay slot. */
7453 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7455 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7457 /* See if the return address can be adjusted. Use the containing
7458 sequence insn's address. */
7459 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7460 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7461 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7463 if (VAL_14_BITS_P (distance))
7465 xoperands[1] = gen_label_rtx ();
7466 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7467 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7468 CODE_LABEL_NUMBER (xoperands[1]));
7470 else
7471 output_asm_insn ("nop\n\tb,n %0", xoperands);
7473 else
7474 output_asm_insn ("b,n %0", xoperands);
7476 /* Delete the jump. */
7477 PUT_CODE (NEXT_INSN (insn), NOTE);
7478 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7479 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7481 return "";
7484 /* Return the attribute length of the indirect call instruction INSN.
7485 The length must match the code generated by output_indirect call.
7486 The returned length includes the delay slot. Currently, the delay
7487 slot of an indirect call sequence is not exposed and it is used by
7488 the sequence itself. */
7491 attr_length_indirect_call (rtx insn)
7493 unsigned long distance = -1;
7494 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7496 if (INSN_ADDRESSES_SET_P ())
7498 distance = (total + insn_current_reference_address (insn));
7499 if (distance < total)
7500 distance = -1;
7503 if (TARGET_64BIT)
7504 return 12;
7506 if (TARGET_FAST_INDIRECT_CALLS
7507 || (!TARGET_PORTABLE_RUNTIME
7508 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7509 return 8;
7511 if (flag_pic)
7512 return 24;
7514 if (TARGET_PORTABLE_RUNTIME)
7515 return 20;
7517 /* Out of reach, can use ble. */
7518 return 12;
7521 const char *
7522 output_indirect_call (rtx insn, rtx call_dest)
7524 rtx xoperands[1];
7526 if (TARGET_64BIT)
7528 xoperands[0] = call_dest;
7529 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7530 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7531 return "";
7534 /* First the special case for kernels, level 0 systems, etc. */
7535 if (TARGET_FAST_INDIRECT_CALLS)
7536 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7538 /* Now the normal case -- we can reach $$dyncall directly or
7539 we're sure that we can get there via a long-branch stub.
7541 No need to check target flags as the length uniquely identifies
7542 the remaining cases. */
7543 if (attr_length_indirect_call (insn) == 8)
7544 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7546 /* Long millicode call, but we are not generating PIC or portable runtime
7547 code. */
7548 if (attr_length_indirect_call (insn) == 12)
7549 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7551 /* Long millicode call for portable runtime. */
7552 if (attr_length_indirect_call (insn) == 20)
7553 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7555 /* We need a long PIC call to $$dyncall. */
7556 xoperands[0] = NULL_RTX;
7557 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7558 if (TARGET_SOM || !TARGET_GAS)
7560 xoperands[0] = gen_label_rtx ();
7561 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7562 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7563 CODE_LABEL_NUMBER (xoperands[0]));
7564 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7566 else
7568 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7569 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7570 xoperands);
7572 output_asm_insn ("blr %%r0,%%r2", xoperands);
7573 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7574 return "";
7577 /* Return the total length of the save and restore instructions needed for
7578 the data linkage table pointer (i.e., the PIC register) across the call
7579 instruction INSN. No-return calls do not require a save and restore.
7580 In addition, we may be able to avoid the save and restore for calls
7581 within the same translation unit. */
7584 attr_length_save_restore_dltp (rtx insn)
7586 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7587 return 0;
7589 return 8;
7592 /* In HPUX 8.0's shared library scheme, special relocations are needed
7593 for function labels if they might be passed to a function
7594 in a shared library (because shared libraries don't live in code
7595 space), and special magic is needed to construct their address. */
7597 void
7598 hppa_encode_label (rtx sym)
7600 const char *str = XSTR (sym, 0);
7601 int len = strlen (str) + 1;
7602 char *newstr, *p;
7604 p = newstr = alloca (len + 1);
7605 *p++ = '@';
7606 strcpy (p, str);
7608 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7611 static void
7612 pa_encode_section_info (tree decl, rtx rtl, int first)
7614 if (first && TEXT_SPACE_P (decl))
7616 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7617 if (TREE_CODE (decl) == FUNCTION_DECL)
7618 hppa_encode_label (XEXP (rtl, 0));
7622 /* This is sort of inverse to pa_encode_section_info. */
7624 static const char *
7625 pa_strip_name_encoding (const char *str)
7627 str += (*str == '@');
7628 str += (*str == '*');
7629 return str;
7633 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7635 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7638 /* Returns 1 if OP is a function label involved in a simple addition
7639 with a constant. Used to keep certain patterns from matching
7640 during instruction combination. */
7642 is_function_label_plus_const (rtx op)
7644 /* Strip off any CONST. */
7645 if (GET_CODE (op) == CONST)
7646 op = XEXP (op, 0);
7648 return (GET_CODE (op) == PLUS
7649 && function_label_operand (XEXP (op, 0), Pmode)
7650 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7653 /* Output assembly code for a thunk to FUNCTION. */
7655 static void
7656 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7657 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7658 tree function)
7660 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7661 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7662 int val_14 = VAL_14_BITS_P (delta);
7663 int nbytes = 0;
7664 static unsigned int current_thunk_number;
7665 char label[16];
7667 ASM_OUTPUT_LABEL (file, tname);
7668 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7670 fname = (*targetm.strip_name_encoding) (fname);
7671 tname = (*targetm.strip_name_encoding) (tname);
7673 /* Output the thunk. We know that the function is in the same
7674 translation unit (i.e., the same space) as the thunk, and that
7675 thunks are output after their method. Thus, we don't need an
7676 external branch to reach the function. With SOM and GAS,
7677 functions and thunks are effectively in different sections.
7678 Thus, we can always use a IA-relative branch and the linker
7679 will add a long branch stub if necessary.
7681 However, we have to be careful when generating PIC code on the
7682 SOM port to ensure that the sequence does not transfer to an
7683 import stub for the target function as this could clobber the
7684 return value saved at SP-24. This would also apply to the
7685 32-bit linux port if the multi-space model is implemented. */
7686 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7687 && !(flag_pic && TREE_PUBLIC (function))
7688 && (TARGET_GAS || last_address < 262132))
7689 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7690 && ((targetm.have_named_sections
7691 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7692 /* The GNU 64-bit linker has rather poor stub management.
7693 So, we use a long branch from thunks that aren't in
7694 the same section as the target function. */
7695 && ((!TARGET_64BIT
7696 && (DECL_SECTION_NAME (thunk_fndecl)
7697 != DECL_SECTION_NAME (function)))
7698 || ((DECL_SECTION_NAME (thunk_fndecl)
7699 == DECL_SECTION_NAME (function))
7700 && last_address < 262132)))
7701 || (!targetm.have_named_sections && last_address < 262132))))
7703 if (val_14)
7705 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7706 "(%%r26),%%r26\n", fname, delta);
7707 nbytes += 8;
7709 else
7711 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7712 ",%%r26\n", delta);
7713 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7714 "(%%r1),%%r26\n", fname, delta);
7715 nbytes += 12;
7718 else if (TARGET_64BIT)
7720 /* We only have one call-clobbered scratch register, so we can't
7721 make use of the delay slot if delta doesn't fit in 14 bits. */
7722 if (!val_14)
7723 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7724 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7725 "(%%r1),%%r26\n", delta, delta);
7727 fprintf (file, "\tb,l .+8,%%r1\n");
7729 if (TARGET_GAS)
7731 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7732 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7734 else
7736 int off = val_14 ? 8 : 16;
7737 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7738 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7741 if (val_14)
7743 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7744 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7745 nbytes += 20;
7747 else
7749 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7750 nbytes += 24;
7753 else if (TARGET_PORTABLE_RUNTIME)
7755 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7756 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7758 if (val_14)
7760 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7761 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7762 nbytes += 16;
7764 else
7766 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7767 ",%%r26\n", delta);
7768 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7769 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7770 nbytes += 20;
7773 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7775 /* The function is accessible from outside this module. The only
7776 way to avoid an import stub between the thunk and function is to
7777 call the function directly with an indirect sequence similar to
7778 that used by $$dyncall. This is possible because $$dyncall acts
7779 as the import stub in an indirect call. */
7780 const char *lab;
7782 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7783 lab = (*targetm.strip_name_encoding) (label);
7785 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7786 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7787 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7788 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7789 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7790 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7791 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7792 if (!val_14)
7794 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7795 ",%%r26\n", delta);
7796 nbytes += 4;
7798 if (TARGET_PA_20)
7800 fprintf (file, "\tbve (%%r22)\n\tldo ");
7801 nbytes += 36;
7803 else
7805 if (TARGET_NO_SPACE_REGS)
7807 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7808 nbytes += 36;
7810 else
7812 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7813 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7814 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7815 nbytes += 44;
7819 if (val_14)
7820 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7821 else
7822 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7824 else if (flag_pic)
7826 if (TARGET_PA_20)
7827 fprintf (file, "\tb,l .+8,%%r1\n");
7828 else
7829 fprintf (file, "\tbl .+8,%%r1\n");
7831 if (TARGET_SOM || !TARGET_GAS)
7833 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7834 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7836 else
7838 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7839 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7842 if (val_14)
7844 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7845 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7846 nbytes += 20;
7848 else
7850 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7851 ",%%r26\n", delta);
7852 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7853 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7854 nbytes += 24;
7857 else
7859 if (!val_14)
7860 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7862 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7863 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7865 if (val_14)
7867 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7868 nbytes += 12;
7870 else
7872 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7873 nbytes += 16;
7877 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7879 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7881 data_section ();
7882 fprintf (file, "\t.align 4\n");
7883 ASM_OUTPUT_LABEL (file, label);
7884 fprintf (file, "\t.word P'%s\n", fname);
7885 function_section (thunk_fndecl);
7888 current_thunk_number++;
7889 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7890 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7891 last_address += nbytes;
7892 update_total_code_bytes (nbytes);
7895 /* Only direct calls to static functions are allowed to be sibling (tail)
7896 call optimized.
7898 This restriction is necessary because some linker generated stubs will
7899 store return pointers into rp' in some cases which might clobber a
7900 live value already in rp'.
7902 In a sibcall the current function and the target function share stack
7903 space. Thus if the path to the current function and the path to the
7904 target function save a value in rp', they save the value into the
7905 same stack slot, which has undesirable consequences.
7907 Because of the deferred binding nature of shared libraries any function
7908 with external scope could be in a different load module and thus require
7909 rp' to be saved when calling that function. So sibcall optimizations
7910 can only be safe for static function.
7912 Note that GCC never needs return value relocations, so we don't have to
7913 worry about static calls with return value relocations (which require
7914 saving rp').
7916 It is safe to perform a sibcall optimization when the target function
7917 will never return. */
7918 static bool
7919 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7921 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7922 single subspace mode and the call is not indirect. As far as I know,
7923 there is no operating system support for the multiple subspace mode.
7924 It might be possible to support indirect calls if we didn't use
7925 $$dyncall (see the indirect sequence generated in output_call). */
7926 if (TARGET_ELF32)
7927 return (decl != NULL_TREE);
7929 /* Sibcalls are not ok because the arg pointer register is not a fixed
7930 register. This prevents the sibcall optimization from occurring. In
7931 addition, there are problems with stub placement using GNU ld. This
7932 is because a normal sibcall branch uses a 17-bit relocation while
7933 a regular call branch uses a 22-bit relocation. As a result, more
7934 care needs to be taken in the placement of long-branch stubs. */
7935 if (TARGET_64BIT)
7936 return false;
7938 return (decl
7939 && !TARGET_PORTABLE_RUNTIME
7940 && !TREE_PUBLIC (decl));
7943 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7944 use in fmpyadd instructions. */
7946 fmpyaddoperands (rtx *operands)
7948 enum machine_mode mode = GET_MODE (operands[0]);
7950 /* Must be a floating point mode. */
7951 if (mode != SFmode && mode != DFmode)
7952 return 0;
7954 /* All modes must be the same. */
7955 if (! (mode == GET_MODE (operands[1])
7956 && mode == GET_MODE (operands[2])
7957 && mode == GET_MODE (operands[3])
7958 && mode == GET_MODE (operands[4])
7959 && mode == GET_MODE (operands[5])))
7960 return 0;
7962 /* All operands must be registers. */
7963 if (! (GET_CODE (operands[1]) == REG
7964 && GET_CODE (operands[2]) == REG
7965 && GET_CODE (operands[3]) == REG
7966 && GET_CODE (operands[4]) == REG
7967 && GET_CODE (operands[5]) == REG))
7968 return 0;
7970 /* Only 2 real operands to the addition. One of the input operands must
7971 be the same as the output operand. */
7972 if (! rtx_equal_p (operands[3], operands[4])
7973 && ! rtx_equal_p (operands[3], operands[5]))
7974 return 0;
7976 /* Inout operand of add can not conflict with any operands from multiply. */
7977 if (rtx_equal_p (operands[3], operands[0])
7978 || rtx_equal_p (operands[3], operands[1])
7979 || rtx_equal_p (operands[3], operands[2]))
7980 return 0;
7982 /* multiply can not feed into addition operands. */
7983 if (rtx_equal_p (operands[4], operands[0])
7984 || rtx_equal_p (operands[5], operands[0]))
7985 return 0;
7987 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7988 if (mode == SFmode
7989 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7990 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7991 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7992 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7993 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7994 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7995 return 0;
7997 /* Passed. Operands are suitable for fmpyadd. */
7998 return 1;
8001 #if !defined(USE_COLLECT2)
8002 static void
8003 pa_asm_out_constructor (rtx symbol, int priority)
8005 if (!function_label_operand (symbol, VOIDmode))
8006 hppa_encode_label (symbol);
8008 #ifdef CTORS_SECTION_ASM_OP
8009 default_ctor_section_asm_out_constructor (symbol, priority);
8010 #else
8011 # ifdef TARGET_ASM_NAMED_SECTION
8012 default_named_section_asm_out_constructor (symbol, priority);
8013 # else
8014 default_stabs_asm_out_constructor (symbol, priority);
8015 # endif
8016 #endif
8019 static void
8020 pa_asm_out_destructor (rtx symbol, int priority)
8022 if (!function_label_operand (symbol, VOIDmode))
8023 hppa_encode_label (symbol);
8025 #ifdef DTORS_SECTION_ASM_OP
8026 default_dtor_section_asm_out_destructor (symbol, priority);
8027 #else
8028 # ifdef TARGET_ASM_NAMED_SECTION
8029 default_named_section_asm_out_destructor (symbol, priority);
8030 # else
8031 default_stabs_asm_out_destructor (symbol, priority);
8032 # endif
8033 #endif
8035 #endif
8037 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8038 use in fmpysub instructions. */
8040 fmpysuboperands (rtx *operands)
8042 enum machine_mode mode = GET_MODE (operands[0]);
8044 /* Must be a floating point mode. */
8045 if (mode != SFmode && mode != DFmode)
8046 return 0;
8048 /* All modes must be the same. */
8049 if (! (mode == GET_MODE (operands[1])
8050 && mode == GET_MODE (operands[2])
8051 && mode == GET_MODE (operands[3])
8052 && mode == GET_MODE (operands[4])
8053 && mode == GET_MODE (operands[5])))
8054 return 0;
8056 /* All operands must be registers. */
8057 if (! (GET_CODE (operands[1]) == REG
8058 && GET_CODE (operands[2]) == REG
8059 && GET_CODE (operands[3]) == REG
8060 && GET_CODE (operands[4]) == REG
8061 && GET_CODE (operands[5]) == REG))
8062 return 0;
8064 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8065 operation, so operands[4] must be the same as operand[3]. */
8066 if (! rtx_equal_p (operands[3], operands[4]))
8067 return 0;
8069 /* multiply can not feed into subtraction. */
8070 if (rtx_equal_p (operands[5], operands[0]))
8071 return 0;
8073 /* Inout operand of sub can not conflict with any operands from multiply. */
8074 if (rtx_equal_p (operands[3], operands[0])
8075 || rtx_equal_p (operands[3], operands[1])
8076 || rtx_equal_p (operands[3], operands[2]))
8077 return 0;
8079 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8080 if (mode == SFmode
8081 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8082 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8083 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8084 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8085 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8086 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8087 return 0;
8089 /* Passed. Operands are suitable for fmpysub. */
8090 return 1;
8094 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8096 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
8097 || GET_CODE (op) == IOR);
8100 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8101 constants for shadd instructions. */
8102 static int
8103 shadd_constant_p (int val)
8105 if (val == 2 || val == 4 || val == 8)
8106 return 1;
8107 else
8108 return 0;
8111 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
8112 the valid constant for shadd instructions. */
8114 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8116 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
8119 /* Return 1 if OP is valid as a base or index register in a
8120 REG+REG address. */
8123 borx_reg_operand (rtx op, enum machine_mode mode)
8125 if (GET_CODE (op) != REG)
8126 return 0;
8128 /* We must reject virtual registers as the only expressions that
8129 can be instantiated are REG and REG+CONST. */
8130 if (op == virtual_incoming_args_rtx
8131 || op == virtual_stack_vars_rtx
8132 || op == virtual_stack_dynamic_rtx
8133 || op == virtual_outgoing_args_rtx
8134 || op == virtual_cfa_rtx)
8135 return 0;
8137 /* While it's always safe to index off the frame pointer, it's not
8138 profitable to do so when the frame pointer is being eliminated. */
8139 if (!reload_completed
8140 && flag_omit_frame_pointer
8141 && !current_function_calls_alloca
8142 && op == frame_pointer_rtx)
8143 return 0;
8145 return register_operand (op, mode);
8148 /* Return 1 if this operand is anything other than a hard register. */
8151 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8153 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8156 /* Return 1 if INSN branches forward. Should be using insn_addresses
8157 to avoid walking through all the insns... */
8158 static int
8159 forward_branch_p (rtx insn)
8161 rtx label = JUMP_LABEL (insn);
8163 while (insn)
8165 if (insn == label)
8166 break;
8167 else
8168 insn = NEXT_INSN (insn);
8171 return (insn == label);
8174 /* Return 1 if OP is an equality comparison, else return 0. */
8176 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8178 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8181 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
8183 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8185 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
8186 || GET_CODE (op) == LT || GET_CODE (op) == GE);
8189 /* Return 1 if INSN is in the delay slot of a call instruction. */
8191 jump_in_call_delay (rtx insn)
8194 if (GET_CODE (insn) != JUMP_INSN)
8195 return 0;
8197 if (PREV_INSN (insn)
8198 && PREV_INSN (PREV_INSN (insn))
8199 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8201 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8203 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8204 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8207 else
8208 return 0;
8211 /* Output an unconditional move and branch insn. */
8213 const char *
8214 output_parallel_movb (rtx *operands, int length)
8216 /* These are the cases in which we win. */
8217 if (length == 4)
8218 return "mov%I1b,tr %1,%0,%2";
8220 /* None of these cases wins, but they don't lose either. */
8221 if (dbr_sequence_length () == 0)
8223 /* Nothing in the delay slot, fake it by putting the combined
8224 insn (the copy or add) in the delay slot of a bl. */
8225 if (GET_CODE (operands[1]) == CONST_INT)
8226 return "b %2\n\tldi %1,%0";
8227 else
8228 return "b %2\n\tcopy %1,%0";
8230 else
8232 /* Something in the delay slot, but we've got a long branch. */
8233 if (GET_CODE (operands[1]) == CONST_INT)
8234 return "ldi %1,%0\n\tb %2";
8235 else
8236 return "copy %1,%0\n\tb %2";
8240 /* Output an unconditional add and branch insn. */
8242 const char *
8243 output_parallel_addb (rtx *operands, int length)
8245 /* To make life easy we want operand0 to be the shared input/output
8246 operand and operand1 to be the readonly operand. */
8247 if (operands[0] == operands[1])
8248 operands[1] = operands[2];
8250 /* These are the cases in which we win. */
8251 if (length == 4)
8252 return "add%I1b,tr %1,%0,%3";
8254 /* None of these cases win, but they don't lose either. */
8255 if (dbr_sequence_length () == 0)
8257 /* Nothing in the delay slot, fake it by putting the combined
8258 insn (the copy or add) in the delay slot of a bl. */
8259 return "b %3\n\tadd%I1 %1,%0,%0";
8261 else
8263 /* Something in the delay slot, but we've got a long branch. */
8264 return "add%I1 %1,%0,%0\n\tb %3";
8268 /* Return nonzero if INSN (a jump insn) immediately follows a call
8269 to a named function. This is used to avoid filling the delay slot
8270 of the jump since it can usually be eliminated by modifying RP in
8271 the delay slot of the call. */
8274 following_call (rtx insn)
8276 if (! TARGET_JUMP_IN_DELAY)
8277 return 0;
8279 /* Find the previous real insn, skipping NOTEs. */
8280 insn = PREV_INSN (insn);
8281 while (insn && GET_CODE (insn) == NOTE)
8282 insn = PREV_INSN (insn);
8284 /* Check for CALL_INSNs and millicode calls. */
8285 if (insn
8286 && ((GET_CODE (insn) == CALL_INSN
8287 && get_attr_type (insn) != TYPE_DYNCALL)
8288 || (GET_CODE (insn) == INSN
8289 && GET_CODE (PATTERN (insn)) != SEQUENCE
8290 && GET_CODE (PATTERN (insn)) != USE
8291 && GET_CODE (PATTERN (insn)) != CLOBBER
8292 && get_attr_type (insn) == TYPE_MILLI)))
8293 return 1;
8295 return 0;
8298 /* We use this hook to perform a PA specific optimization which is difficult
8299 to do in earlier passes.
8301 We want the delay slots of branches within jump tables to be filled.
8302 None of the compiler passes at the moment even has the notion that a
8303 PA jump table doesn't contain addresses, but instead contains actual
8304 instructions!
8306 Because we actually jump into the table, the addresses of each entry
8307 must stay constant in relation to the beginning of the table (which
8308 itself must stay constant relative to the instruction to jump into
8309 it). I don't believe we can guarantee earlier passes of the compiler
8310 will adhere to those rules.
8312 So, late in the compilation process we find all the jump tables, and
8313 expand them into real code -- eg each entry in the jump table vector
8314 will get an appropriate label followed by a jump to the final target.
8316 Reorg and the final jump pass can then optimize these branches and
8317 fill their delay slots. We end up with smaller, more efficient code.
8319 The jump instructions within the table are special; we must be able
8320 to identify them during assembly output (if the jumps don't get filled
8321 we need to emit a nop rather than nullifying the delay slot)). We
8322 identify jumps in switch tables by using insns with the attribute
8323 type TYPE_BTABLE_BRANCH.
8325 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8326 insns. This serves two purposes, first it prevents jump.c from
8327 noticing that the last N entries in the table jump to the instruction
8328 immediately after the table and deleting the jumps. Second, those
8329 insns mark where we should emit .begin_brtab and .end_brtab directives
8330 when using GAS (allows for better link time optimizations). */
8332 static void
8333 pa_reorg (void)
8335 rtx insn;
8337 remove_useless_addtr_insns (1);
8339 if (pa_cpu < PROCESSOR_8000)
8340 pa_combine_instructions ();
8343 /* This is fairly cheap, so always run it if optimizing. */
8344 if (optimize > 0 && !TARGET_BIG_SWITCH)
8346 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8347 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8349 rtx pattern, tmp, location, label;
8350 unsigned int length, i;
8352 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8353 if (GET_CODE (insn) != JUMP_INSN
8354 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8355 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8356 continue;
8358 /* Emit marker for the beginning of the branch table. */
8359 emit_insn_before (gen_begin_brtab (), insn);
8361 pattern = PATTERN (insn);
8362 location = PREV_INSN (insn);
8363 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8365 for (i = 0; i < length; i++)
8367 /* Emit a label before each jump to keep jump.c from
8368 removing this code. */
8369 tmp = gen_label_rtx ();
8370 LABEL_NUSES (tmp) = 1;
8371 emit_label_after (tmp, location);
8372 location = NEXT_INSN (location);
8374 if (GET_CODE (pattern) == ADDR_VEC)
8375 label = XEXP (XVECEXP (pattern, 0, i), 0);
8376 else
8377 label = XEXP (XVECEXP (pattern, 1, i), 0);
8379 tmp = gen_short_jump (label);
8381 /* Emit the jump itself. */
8382 tmp = emit_jump_insn_after (tmp, location);
8383 JUMP_LABEL (tmp) = label;
8384 LABEL_NUSES (label)++;
8385 location = NEXT_INSN (location);
8387 /* Emit a BARRIER after the jump. */
8388 emit_barrier_after (location);
8389 location = NEXT_INSN (location);
8392 /* Emit marker for the end of the branch table. */
8393 emit_insn_before (gen_end_brtab (), location);
8394 location = NEXT_INSN (location);
8395 emit_barrier_after (location);
8397 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8398 delete_insn (insn);
8401 else
8403 /* Still need brtab marker insns. FIXME: the presence of these
8404 markers disables output of the branch table to readonly memory,
8405 and any alignment directives that might be needed. Possibly,
8406 the begin_brtab insn should be output before the label for the
8407 table. This doesn matter at the moment since the tables are
8408 always output in the text section. */
8409 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8411 /* Find an ADDR_VEC insn. */
8412 if (GET_CODE (insn) != JUMP_INSN
8413 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8414 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8415 continue;
8417 /* Now generate markers for the beginning and end of the
8418 branch table. */
8419 emit_insn_before (gen_begin_brtab (), insn);
8420 emit_insn_after (gen_end_brtab (), insn);
8425 /* The PA has a number of odd instructions which can perform multiple
8426 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8427 it may be profitable to combine two instructions into one instruction
8428 with two outputs. It's not profitable PA2.0 machines because the
8429 two outputs would take two slots in the reorder buffers.
8431 This routine finds instructions which can be combined and combines
8432 them. We only support some of the potential combinations, and we
8433 only try common ways to find suitable instructions.
8435 * addb can add two registers or a register and a small integer
8436 and jump to a nearby (+-8k) location. Normally the jump to the
8437 nearby location is conditional on the result of the add, but by
8438 using the "true" condition we can make the jump unconditional.
8439 Thus addb can perform two independent operations in one insn.
8441 * movb is similar to addb in that it can perform a reg->reg
8442 or small immediate->reg copy and jump to a nearby (+-8k location).
8444 * fmpyadd and fmpysub can perform a FP multiply and either an
8445 FP add or FP sub if the operands of the multiply and add/sub are
8446 independent (there are other minor restrictions). Note both
8447 the fmpy and fadd/fsub can in theory move to better spots according
8448 to data dependencies, but for now we require the fmpy stay at a
8449 fixed location.
8451 * Many of the memory operations can perform pre & post updates
8452 of index registers. GCC's pre/post increment/decrement addressing
8453 is far too simple to take advantage of all the possibilities. This
8454 pass may not be suitable since those insns may not be independent.
8456 * comclr can compare two ints or an int and a register, nullify
8457 the following instruction and zero some other register. This
8458 is more difficult to use as it's harder to find an insn which
8459 will generate a comclr than finding something like an unconditional
8460 branch. (conditional moves & long branches create comclr insns).
8462 * Most arithmetic operations can conditionally skip the next
8463 instruction. They can be viewed as "perform this operation
8464 and conditionally jump to this nearby location" (where nearby
8465 is an insns away). These are difficult to use due to the
8466 branch length restrictions. */
8468 static void
8469 pa_combine_instructions (void)
8471 rtx anchor, new;
8473 /* This can get expensive since the basic algorithm is on the
8474 order of O(n^2) (or worse). Only do it for -O2 or higher
8475 levels of optimization. */
8476 if (optimize < 2)
8477 return;
8479 /* Walk down the list of insns looking for "anchor" insns which
8480 may be combined with "floating" insns. As the name implies,
8481 "anchor" instructions don't move, while "floating" insns may
8482 move around. */
8483 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8484 new = make_insn_raw (new);
8486 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8488 enum attr_pa_combine_type anchor_attr;
8489 enum attr_pa_combine_type floater_attr;
8491 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8492 Also ignore any special USE insns. */
8493 if ((GET_CODE (anchor) != INSN
8494 && GET_CODE (anchor) != JUMP_INSN
8495 && GET_CODE (anchor) != CALL_INSN)
8496 || GET_CODE (PATTERN (anchor)) == USE
8497 || GET_CODE (PATTERN (anchor)) == CLOBBER
8498 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8499 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8500 continue;
8502 anchor_attr = get_attr_pa_combine_type (anchor);
8503 /* See if anchor is an insn suitable for combination. */
8504 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8505 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8506 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8507 && ! forward_branch_p (anchor)))
8509 rtx floater;
8511 for (floater = PREV_INSN (anchor);
8512 floater;
8513 floater = PREV_INSN (floater))
8515 if (GET_CODE (floater) == NOTE
8516 || (GET_CODE (floater) == INSN
8517 && (GET_CODE (PATTERN (floater)) == USE
8518 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8519 continue;
8521 /* Anything except a regular INSN will stop our search. */
8522 if (GET_CODE (floater) != INSN
8523 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8524 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8526 floater = NULL_RTX;
8527 break;
8530 /* See if FLOATER is suitable for combination with the
8531 anchor. */
8532 floater_attr = get_attr_pa_combine_type (floater);
8533 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8534 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8535 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8536 && floater_attr == PA_COMBINE_TYPE_FMPY))
8538 /* If ANCHOR and FLOATER can be combined, then we're
8539 done with this pass. */
8540 if (pa_can_combine_p (new, anchor, floater, 0,
8541 SET_DEST (PATTERN (floater)),
8542 XEXP (SET_SRC (PATTERN (floater)), 0),
8543 XEXP (SET_SRC (PATTERN (floater)), 1)))
8544 break;
8547 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8548 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8550 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8552 if (pa_can_combine_p (new, anchor, floater, 0,
8553 SET_DEST (PATTERN (floater)),
8554 XEXP (SET_SRC (PATTERN (floater)), 0),
8555 XEXP (SET_SRC (PATTERN (floater)), 1)))
8556 break;
8558 else
8560 if (pa_can_combine_p (new, anchor, floater, 0,
8561 SET_DEST (PATTERN (floater)),
8562 SET_SRC (PATTERN (floater)),
8563 SET_SRC (PATTERN (floater))))
8564 break;
8569 /* If we didn't find anything on the backwards scan try forwards. */
8570 if (!floater
8571 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8572 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8574 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8576 if (GET_CODE (floater) == NOTE
8577 || (GET_CODE (floater) == INSN
8578 && (GET_CODE (PATTERN (floater)) == USE
8579 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8581 continue;
8583 /* Anything except a regular INSN will stop our search. */
8584 if (GET_CODE (floater) != INSN
8585 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8586 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8588 floater = NULL_RTX;
8589 break;
8592 /* See if FLOATER is suitable for combination with the
8593 anchor. */
8594 floater_attr = get_attr_pa_combine_type (floater);
8595 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8596 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8597 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8598 && floater_attr == PA_COMBINE_TYPE_FMPY))
8600 /* If ANCHOR and FLOATER can be combined, then we're
8601 done with this pass. */
8602 if (pa_can_combine_p (new, anchor, floater, 1,
8603 SET_DEST (PATTERN (floater)),
8604 XEXP (SET_SRC (PATTERN (floater)),
8606 XEXP (SET_SRC (PATTERN (floater)),
8607 1)))
8608 break;
8613 /* FLOATER will be nonzero if we found a suitable floating
8614 insn for combination with ANCHOR. */
8615 if (floater
8616 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8617 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8619 /* Emit the new instruction and delete the old anchor. */
8620 emit_insn_before (gen_rtx_PARALLEL
8621 (VOIDmode,
8622 gen_rtvec (2, PATTERN (anchor),
8623 PATTERN (floater))),
8624 anchor);
8626 PUT_CODE (anchor, NOTE);
8627 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8628 NOTE_SOURCE_FILE (anchor) = 0;
8630 /* Emit a special USE insn for FLOATER, then delete
8631 the floating insn. */
8632 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8633 delete_insn (floater);
8635 continue;
8637 else if (floater
8638 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8640 rtx temp;
8641 /* Emit the new_jump instruction and delete the old anchor. */
8642 temp
8643 = emit_jump_insn_before (gen_rtx_PARALLEL
8644 (VOIDmode,
8645 gen_rtvec (2, PATTERN (anchor),
8646 PATTERN (floater))),
8647 anchor);
8649 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8650 PUT_CODE (anchor, NOTE);
8651 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8652 NOTE_SOURCE_FILE (anchor) = 0;
8654 /* Emit a special USE insn for FLOATER, then delete
8655 the floating insn. */
8656 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8657 delete_insn (floater);
8658 continue;
8664 static int
8665 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8666 rtx src1, rtx src2)
8668 int insn_code_number;
8669 rtx start, end;
8671 /* Create a PARALLEL with the patterns of ANCHOR and
8672 FLOATER, try to recognize it, then test constraints
8673 for the resulting pattern.
8675 If the pattern doesn't match or the constraints
8676 aren't met keep searching for a suitable floater
8677 insn. */
8678 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8679 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8680 INSN_CODE (new) = -1;
8681 insn_code_number = recog_memoized (new);
8682 if (insn_code_number < 0
8683 || (extract_insn (new), ! constrain_operands (1)))
8684 return 0;
8686 if (reversed)
8688 start = anchor;
8689 end = floater;
8691 else
8693 start = floater;
8694 end = anchor;
8697 /* There's up to three operands to consider. One
8698 output and two inputs.
8700 The output must not be used between FLOATER & ANCHOR
8701 exclusive. The inputs must not be set between
8702 FLOATER and ANCHOR exclusive. */
8704 if (reg_used_between_p (dest, start, end))
8705 return 0;
8707 if (reg_set_between_p (src1, start, end))
8708 return 0;
8710 if (reg_set_between_p (src2, start, end))
8711 return 0;
8713 /* If we get here, then everything is good. */
8714 return 1;
8717 /* Return nonzero if references for INSN are delayed.
8719 Millicode insns are actually function calls with some special
8720 constraints on arguments and register usage.
8722 Millicode calls always expect their arguments in the integer argument
8723 registers, and always return their result in %r29 (ret1). They
8724 are expected to clobber their arguments, %r1, %r29, and the return
8725 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8727 This function tells reorg that the references to arguments and
8728 millicode calls do not appear to happen until after the millicode call.
8729 This allows reorg to put insns which set the argument registers into the
8730 delay slot of the millicode call -- thus they act more like traditional
8731 CALL_INSNs.
8733 Note we can not consider side effects of the insn to be delayed because
8734 the branch and link insn will clobber the return pointer. If we happened
8735 to use the return pointer in the delay slot of the call, then we lose.
8737 get_attr_type will try to recognize the given insn, so make sure to
8738 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8739 in particular. */
8741 insn_refs_are_delayed (rtx insn)
8743 return ((GET_CODE (insn) == INSN
8744 && GET_CODE (PATTERN (insn)) != SEQUENCE
8745 && GET_CODE (PATTERN (insn)) != USE
8746 && GET_CODE (PATTERN (insn)) != CLOBBER
8747 && get_attr_type (insn) == TYPE_MILLI));
8750 /* On the HP-PA the value is found in register(s) 28(-29), unless
8751 the mode is SF or DF. Then the value is returned in fr4 (32).
8753 This must perform the same promotions as PROMOTE_MODE, else
8754 PROMOTE_FUNCTION_RETURN will not work correctly.
8756 Small structures must be returned in a PARALLEL on PA64 in order
8757 to match the HP Compiler ABI. */
8760 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8762 enum machine_mode valmode;
8764 /* Aggregates with a size less than or equal to 128 bits are returned
8765 in GR 28(-29). They are left justified. The pad bits are undefined.
8766 Larger aggregates are returned in memory. */
8767 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8769 rtx loc[2];
8770 int i, offset = 0;
8771 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8773 for (i = 0; i < ub; i++)
8775 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8776 gen_rtx_REG (DImode, 28 + i),
8777 GEN_INT (offset));
8778 offset += 8;
8781 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8784 if ((INTEGRAL_TYPE_P (valtype)
8785 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8786 || POINTER_TYPE_P (valtype))
8787 valmode = word_mode;
8788 else
8789 valmode = TYPE_MODE (valtype);
8791 if (TREE_CODE (valtype) == REAL_TYPE
8792 && TYPE_MODE (valtype) != TFmode
8793 && !TARGET_SOFT_FLOAT)
8794 return gen_rtx_REG (valmode, 32);
8796 return gen_rtx_REG (valmode, 28);
8799 /* Return the location of a parameter that is passed in a register or NULL
8800 if the parameter has any component that is passed in memory.
8802 This is new code and will be pushed to into the net sources after
8803 further testing.
8805 ??? We might want to restructure this so that it looks more like other
8806 ports. */
8808 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8809 int named ATTRIBUTE_UNUSED)
8811 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8812 int alignment = 0;
8813 int arg_size;
8814 int fpr_reg_base;
8815 int gpr_reg_base;
8816 rtx retval;
8818 if (mode == VOIDmode)
8819 return NULL_RTX;
8821 arg_size = FUNCTION_ARG_SIZE (mode, type);
8823 /* If this arg would be passed partially or totally on the stack, then
8824 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8825 handle arguments which are split between regs and stack slots if
8826 the ABI mandates split arguments. */
8827 if (! TARGET_64BIT)
8829 /* The 32-bit ABI does not split arguments. */
8830 if (cum->words + arg_size > max_arg_words)
8831 return NULL_RTX;
8833 else
8835 if (arg_size > 1)
8836 alignment = cum->words & 1;
8837 if (cum->words + alignment >= max_arg_words)
8838 return NULL_RTX;
8841 /* The 32bit ABIs and the 64bit ABIs are rather different,
8842 particularly in their handling of FP registers. We might
8843 be able to cleverly share code between them, but I'm not
8844 going to bother in the hope that splitting them up results
8845 in code that is more easily understood. */
8847 if (TARGET_64BIT)
8849 /* Advance the base registers to their current locations.
8851 Remember, gprs grow towards smaller register numbers while
8852 fprs grow to higher register numbers. Also remember that
8853 although FP regs are 32-bit addressable, we pretend that
8854 the registers are 64-bits wide. */
8855 gpr_reg_base = 26 - cum->words;
8856 fpr_reg_base = 32 + cum->words;
8858 /* Arguments wider than one word and small aggregates need special
8859 treatment. */
8860 if (arg_size > 1
8861 || mode == BLKmode
8862 || (type && AGGREGATE_TYPE_P (type)))
8864 /* Double-extended precision (80-bit), quad-precision (128-bit)
8865 and aggregates including complex numbers are aligned on
8866 128-bit boundaries. The first eight 64-bit argument slots
8867 are associated one-to-one, with general registers r26
8868 through r19, and also with floating-point registers fr4
8869 through fr11. Arguments larger than one word are always
8870 passed in general registers.
8872 Using a PARALLEL with a word mode register results in left
8873 justified data on a big-endian target. */
8875 rtx loc[8];
8876 int i, offset = 0, ub = arg_size;
8878 /* Align the base register. */
8879 gpr_reg_base -= alignment;
8881 ub = MIN (ub, max_arg_words - cum->words - alignment);
8882 for (i = 0; i < ub; i++)
8884 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8885 gen_rtx_REG (DImode, gpr_reg_base),
8886 GEN_INT (offset));
8887 gpr_reg_base -= 1;
8888 offset += 8;
8891 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8894 else
8896 /* If the argument is larger than a word, then we know precisely
8897 which registers we must use. */
8898 if (arg_size > 1)
8900 if (cum->words)
8902 gpr_reg_base = 23;
8903 fpr_reg_base = 38;
8905 else
8907 gpr_reg_base = 25;
8908 fpr_reg_base = 34;
8911 /* Structures 5 to 8 bytes in size are passed in the general
8912 registers in the same manner as other non floating-point
8913 objects. The data is right-justified and zero-extended
8914 to 64 bits.
8916 This is magic. Normally, using a PARALLEL results in left
8917 justified data on a big-endian target. However, using a
8918 single double-word register provides the required right
8919 justification for 5 to 8 byte structures. This has nothing
8920 to do with the direction of padding specified for the argument.
8921 It has to do with how the data is widened and shifted into
8922 and from the register.
8924 Aside from adding load_multiple and store_multiple patterns,
8925 this is the only way that I have found to obtain right
8926 justification of BLKmode data when it has a size greater
8927 than one word. Splitting the operation into two SImode loads
8928 or returning a DImode REG results in left justified data. */
8929 if (mode == BLKmode)
8931 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8932 gen_rtx_REG (DImode, gpr_reg_base),
8933 const0_rtx);
8934 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8937 else
8939 /* We have a single word (32 bits). A simple computation
8940 will get us the register #s we need. */
8941 gpr_reg_base = 26 - cum->words;
8942 fpr_reg_base = 32 + 2 * cum->words;
8946 /* Determine if the argument needs to be passed in both general and
8947 floating point registers. */
8948 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8949 /* If we are doing soft-float with portable runtime, then there
8950 is no need to worry about FP regs. */
8951 && !TARGET_SOFT_FLOAT
8952 /* The parameter must be some kind of float, else we can just
8953 pass it in integer registers. */
8954 && FLOAT_MODE_P (mode)
8955 /* The target function must not have a prototype. */
8956 && cum->nargs_prototype <= 0
8957 /* libcalls do not need to pass items in both FP and general
8958 registers. */
8959 && type != NULL_TREE
8960 /* All this hair applies to "outgoing" args only. This includes
8961 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8962 && !cum->incoming)
8963 /* Also pass outgoing floating arguments in both registers in indirect
8964 calls with the 32 bit ABI and the HP assembler since there is no
8965 way to the specify argument locations in static functions. */
8966 || (!TARGET_64BIT
8967 && !TARGET_GAS
8968 && !cum->incoming
8969 && cum->indirect
8970 && FLOAT_MODE_P (mode)))
8972 retval
8973 = gen_rtx_PARALLEL
8974 (mode,
8975 gen_rtvec (2,
8976 gen_rtx_EXPR_LIST (VOIDmode,
8977 gen_rtx_REG (mode, fpr_reg_base),
8978 const0_rtx),
8979 gen_rtx_EXPR_LIST (VOIDmode,
8980 gen_rtx_REG (mode, gpr_reg_base),
8981 const0_rtx)));
8983 else
8985 /* See if we should pass this parameter in a general register. */
8986 if (TARGET_SOFT_FLOAT
8987 /* Indirect calls in the normal 32bit ABI require all arguments
8988 to be passed in general registers. */
8989 || (!TARGET_PORTABLE_RUNTIME
8990 && !TARGET_64BIT
8991 && !TARGET_ELF32
8992 && cum->indirect)
8993 /* If the parameter is not a floating point parameter, then
8994 it belongs in GPRs. */
8995 || !FLOAT_MODE_P (mode))
8996 retval = gen_rtx_REG (mode, gpr_reg_base);
8997 else
8998 retval = gen_rtx_REG (mode, fpr_reg_base);
9000 return retval;
9004 /* If this arg would be passed totally in registers or totally on the stack,
9005 then this routine should return zero. It is currently called only for
9006 the 64-bit target. */
9008 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9009 tree type, int named ATTRIBUTE_UNUSED)
9011 unsigned int max_arg_words = 8;
9012 unsigned int offset = 0;
9014 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9015 offset = 1;
9017 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9018 /* Arg fits fully into registers. */
9019 return 0;
9020 else if (cum->words + offset >= max_arg_words)
9021 /* Arg fully on the stack. */
9022 return 0;
9023 else
9024 /* Arg is split. */
9025 return max_arg_words - cum->words - offset;
9029 /* Return 1 if this is a comparison operator. This allows the use of
9030 MATCH_OPERATOR to recognize all the branch insns. */
9033 cmpib_comparison_operator (rtx op, enum machine_mode mode)
9035 return ((mode == VOIDmode || GET_MODE (op) == mode)
9036 && (GET_CODE (op) == EQ
9037 || GET_CODE (op) == NE
9038 || GET_CODE (op) == GT
9039 || GET_CODE (op) == GTU
9040 || GET_CODE (op) == GE
9041 || GET_CODE (op) == LT
9042 || GET_CODE (op) == LE
9043 || GET_CODE (op) == LEU));
9046 /* On hpux10, the linker will give an error if we have a reference
9047 in the read-only data section to a symbol defined in a shared
9048 library. Therefore, expressions that might require a reloc can
9049 not be placed in the read-only data section. */
9051 static void
9052 pa_select_section (tree exp, int reloc,
9053 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9055 if (TREE_CODE (exp) == VAR_DECL
9056 && TREE_READONLY (exp)
9057 && !TREE_THIS_VOLATILE (exp)
9058 && DECL_INITIAL (exp)
9059 && (DECL_INITIAL (exp) == error_mark_node
9060 || TREE_CONSTANT (DECL_INITIAL (exp)))
9061 && !reloc)
9062 readonly_data_section ();
9063 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
9064 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
9065 && !reloc)
9066 readonly_data_section ();
9067 else
9068 data_section ();
9071 static void
9072 pa_globalize_label (FILE *stream, const char *name)
9074 /* We only handle DATA objects here, functions are globalized in
9075 ASM_DECLARE_FUNCTION_NAME. */
9076 if (! FUNCTION_NAME_P (name))
9078 fputs ("\t.EXPORT ", stream);
9079 assemble_name (stream, name);
9080 fputs (",DATA\n", stream);
9083 #include "gt-pa.h"