* target.h (struct gcc_target): Add calls.pass_by_reference.
[official-gcc.git] / gcc / config / pa / pa.c
blob7f4ba45c2c6d759d7c1f8045558276cbfb0ada9e
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
51 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
52 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56 int
57 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
63 if (recog_memoized (in_insn) < 0
64 || get_attr_type (in_insn) != TYPE_FPSTORE
65 || recog_memoized (out_insn) < 0)
66 return 0;
68 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
70 set = single_set (out_insn);
71 if (!set)
72 return 0;
74 other_mode = GET_MODE (SET_SRC (set));
76 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
80 #ifndef DO_FRAME_NOTES
81 #ifdef INCOMING_RETURN_ADDR_RTX
82 #define DO_FRAME_NOTES 1
83 #else
84 #define DO_FRAME_NOTES 0
85 #endif
86 #endif
88 static void copy_reg_pointer (rtx, rtx);
89 static int hppa_address_cost (rtx);
90 static bool hppa_rtx_costs (rtx, int, int, int *);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static int forward_branch_p (rtx);
96 static int shadd_constant_p (int);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
113 ATTRIBUTE_UNUSED;
114 static void pa_encode_section_info (tree, rtx, int);
115 static const char *pa_strip_name_encoding (const char *);
116 static bool pa_function_ok_for_sibcall (tree, tree);
117 static void pa_globalize_label (FILE *, const char *)
118 ATTRIBUTE_UNUSED;
119 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
120 HOST_WIDE_INT, tree);
121 #if !defined(USE_COLLECT2)
122 static void pa_asm_out_constructor (rtx, int);
123 static void pa_asm_out_destructor (rtx, int);
124 #endif
125 static void pa_init_builtins (void);
126 static rtx hppa_builtin_saveregs (void);
127 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
128 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
129 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
130 static struct deferred_plabel *get_plabel (const char *)
131 ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
135 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
136 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
141 static void output_deferred_plabels (void);
142 #ifdef HPUX_LONG_DOUBLE_LIBRARY
143 static void pa_hpux_init_libfuncs (void);
144 #endif
145 static rtx pa_struct_value_rtx (tree, int);
146 static bool pa_pass_by_reference (CUMULATIVE_ARGS *ca, enum machine_mode,
147 tree, bool);
150 /* Save the operands last given to a compare for use when we
151 generate a scc or bcc insn. */
152 rtx hppa_compare_op0, hppa_compare_op1;
153 enum cmp_type hppa_branch_type;
155 /* Which cpu we are scheduling for. */
156 enum processor_type pa_cpu;
158 /* String to hold which cpu we are scheduling for. */
159 const char *pa_cpu_string;
161 /* Which architecture we are generating code for. */
162 enum architecture_type pa_arch;
164 /* String to hold which architecture we are generating code for. */
165 const char *pa_arch_string;
167 /* Counts for the number of callee-saved general and floating point
168 registers which were saved by the current function's prologue. */
169 static int gr_saved, fr_saved;
171 static rtx find_addr_reg (rtx);
173 /* Keep track of the number of bytes we have output in the CODE subspace
174 during this compilation so we'll know when to emit inline long-calls. */
175 unsigned long total_code_bytes;
177 /* The last address of the previous function plus the number of bytes in
178 associated thunks that have been output. This is used to determine if
179 a thunk can use an IA-relative branch to reach its target function. */
180 static int last_address;
182 /* Variables to handle plabels that we discover are necessary at assembly
183 output time. They are output after the current function. */
184 struct deferred_plabel GTY(())
186 rtx internal_label;
187 const char *name;
189 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
190 deferred_plabels;
191 static size_t n_deferred_plabels = 0;
194 /* Initialize the GCC target structure. */
196 #undef TARGET_ASM_ALIGNED_HI_OP
197 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
198 #undef TARGET_ASM_ALIGNED_SI_OP
199 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
200 #undef TARGET_ASM_ALIGNED_DI_OP
201 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
202 #undef TARGET_ASM_UNALIGNED_HI_OP
203 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
204 #undef TARGET_ASM_UNALIGNED_SI_OP
205 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
206 #undef TARGET_ASM_UNALIGNED_DI_OP
207 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
208 #undef TARGET_ASM_INTEGER
209 #define TARGET_ASM_INTEGER pa_assemble_integer
211 #undef TARGET_ASM_FUNCTION_PROLOGUE
212 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
213 #undef TARGET_ASM_FUNCTION_EPILOGUE
214 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
216 #undef TARGET_SCHED_ADJUST_COST
217 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
218 #undef TARGET_SCHED_ADJUST_PRIORITY
219 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
220 #undef TARGET_SCHED_ISSUE_RATE
221 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
223 #undef TARGET_ENCODE_SECTION_INFO
224 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
225 #undef TARGET_STRIP_NAME_ENCODING
226 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
228 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
229 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
231 #undef TARGET_ASM_OUTPUT_MI_THUNK
232 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
233 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
234 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
236 #undef TARGET_ASM_FILE_END
237 #define TARGET_ASM_FILE_END output_deferred_plabels
239 #if !defined(USE_COLLECT2)
240 #undef TARGET_ASM_CONSTRUCTOR
241 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
242 #undef TARGET_ASM_DESTRUCTOR
243 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
244 #endif
246 #undef TARGET_INIT_BUILTINS
247 #define TARGET_INIT_BUILTINS pa_init_builtins
249 #undef TARGET_RTX_COSTS
250 #define TARGET_RTX_COSTS hppa_rtx_costs
251 #undef TARGET_ADDRESS_COST
252 #define TARGET_ADDRESS_COST hppa_address_cost
254 #undef TARGET_MACHINE_DEPENDENT_REORG
255 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
257 #ifdef HPUX_LONG_DOUBLE_LIBRARY
258 #undef TARGET_INIT_LIBFUNCS
259 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
260 #endif
262 #undef TARGET_PROMOTE_FUNCTION_RETURN
263 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
264 #undef TARGET_PROMOTE_PROTOTYPES
265 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
267 #undef TARGET_STRUCT_VALUE_RTX
268 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
269 #undef TARGET_RETURN_IN_MEMORY
270 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
271 #undef TARGET_MUST_PASS_IN_STACK
272 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
273 #undef TARGET_PASS_BY_REFERENCE
274 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
276 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
277 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
278 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
279 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
281 struct gcc_target targetm = TARGET_INITIALIZER;
283 void
284 override_options (void)
286 if (pa_cpu_string == NULL)
287 pa_cpu_string = TARGET_SCHED_DEFAULT;
289 if (! strcmp (pa_cpu_string, "8000"))
291 pa_cpu_string = "8000";
292 pa_cpu = PROCESSOR_8000;
294 else if (! strcmp (pa_cpu_string, "7100"))
296 pa_cpu_string = "7100";
297 pa_cpu = PROCESSOR_7100;
299 else if (! strcmp (pa_cpu_string, "700"))
301 pa_cpu_string = "700";
302 pa_cpu = PROCESSOR_700;
304 else if (! strcmp (pa_cpu_string, "7100LC"))
306 pa_cpu_string = "7100LC";
307 pa_cpu = PROCESSOR_7100LC;
309 else if (! strcmp (pa_cpu_string, "7200"))
311 pa_cpu_string = "7200";
312 pa_cpu = PROCESSOR_7200;
314 else if (! strcmp (pa_cpu_string, "7300"))
316 pa_cpu_string = "7300";
317 pa_cpu = PROCESSOR_7300;
319 else
321 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
324 /* Set the instruction set architecture. */
325 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
327 pa_arch_string = "1.0";
328 pa_arch = ARCHITECTURE_10;
329 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
331 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
333 pa_arch_string = "1.1";
334 pa_arch = ARCHITECTURE_11;
335 target_flags &= ~MASK_PA_20;
336 target_flags |= MASK_PA_11;
338 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
340 pa_arch_string = "2.0";
341 pa_arch = ARCHITECTURE_20;
342 target_flags |= MASK_PA_11 | MASK_PA_20;
344 else if (pa_arch_string)
346 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
349 /* Unconditional branches in the delay slot are not compatible with dwarf2
350 call frame information. There is no benefit in using this optimization
351 on PA8000 and later processors. */
352 if (pa_cpu >= PROCESSOR_8000
353 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
354 || flag_unwind_tables)
355 target_flags &= ~MASK_JUMP_IN_DELAY;
357 if (flag_pic && TARGET_PORTABLE_RUNTIME)
359 warning ("PIC code generation is not supported in the portable runtime model\n");
362 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
364 warning ("PIC code generation is not compatible with fast indirect calls\n");
367 if (! TARGET_GAS && write_symbols != NO_DEBUG)
369 warning ("-g is only supported when using GAS on this processor,");
370 warning ("-g option disabled");
371 write_symbols = NO_DEBUG;
374 /* We only support the "big PIC" model now. And we always generate PIC
375 code when in 64bit mode. */
376 if (flag_pic == 1 || TARGET_64BIT)
377 flag_pic = 2;
379 /* We can't guarantee that .dword is available for 32-bit targets. */
380 if (UNITS_PER_WORD == 4)
381 targetm.asm_out.aligned_op.di = NULL;
383 /* The unaligned ops are only available when using GAS. */
384 if (!TARGET_GAS)
386 targetm.asm_out.unaligned_op.hi = NULL;
387 targetm.asm_out.unaligned_op.si = NULL;
388 targetm.asm_out.unaligned_op.di = NULL;
392 static void
393 pa_init_builtins (void)
395 #ifdef DONT_HAVE_FPUTC_UNLOCKED
396 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
397 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
398 #endif
401 /* If FROM is a probable pointer register, mark TO as a probable
402 pointer register with the same pointer alignment as FROM. */
404 static void
405 copy_reg_pointer (rtx to, rtx from)
407 if (REG_POINTER (from))
408 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
411 /* Return nonzero only if OP is a register of mode MODE,
412 or CONST0_RTX. */
414 reg_or_0_operand (rtx op, enum machine_mode mode)
416 return (op == CONST0_RTX (mode) || register_operand (op, mode));
419 /* Return nonzero if OP is suitable for use in a call to a named
420 function.
422 For 2.5 try to eliminate either call_operand_address or
423 function_label_operand, they perform very similar functions. */
425 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
427 return (GET_MODE (op) == word_mode
428 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
431 /* Return 1 if X contains a symbolic expression. We know these
432 expressions will have one of a few well defined forms, so
433 we need only check those forms. */
435 symbolic_expression_p (rtx x)
438 /* Strip off any HIGH. */
439 if (GET_CODE (x) == HIGH)
440 x = XEXP (x, 0);
442 return (symbolic_operand (x, VOIDmode));
446 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
448 switch (GET_CODE (op))
450 case SYMBOL_REF:
451 case LABEL_REF:
452 return 1;
453 case CONST:
454 op = XEXP (op, 0);
455 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
456 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
457 && GET_CODE (XEXP (op, 1)) == CONST_INT);
458 default:
459 return 0;
463 /* Return truth value of statement that OP is a symbolic memory
464 operand of mode MODE. */
467 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
469 if (GET_CODE (op) == SUBREG)
470 op = SUBREG_REG (op);
471 if (GET_CODE (op) != MEM)
472 return 0;
473 op = XEXP (op, 0);
474 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
475 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
478 /* Return 1 if the operand is either a register, zero, or a memory operand
479 that is not symbolic. */
482 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
484 if (register_operand (op, mode))
485 return 1;
487 if (op == CONST0_RTX (mode))
488 return 1;
490 if (GET_CODE (op) == SUBREG)
491 op = SUBREG_REG (op);
493 if (GET_CODE (op) != MEM)
494 return 0;
496 /* Until problems with management of the REG_POINTER flag are resolved,
497 we need to delay creating move insns with unscaled indexed addresses
498 until CSE is not expected. */
499 if (!TARGET_NO_SPACE_REGS
500 && !cse_not_expected
501 && GET_CODE (XEXP (op, 0)) == PLUS
502 && REG_P (XEXP (XEXP (op, 0), 0))
503 && REG_P (XEXP (XEXP (op, 0), 1)))
504 return 0;
506 return (!symbolic_memory_operand (op, mode)
507 && memory_address_p (mode, XEXP (op, 0)));
510 /* Return 1 if the operand is a register operand or a non-symbolic memory
511 operand after reload. This predicate is used for branch patterns that
512 internally handle register reloading. We need to accept non-symbolic
513 memory operands after reload to ensure that the pattern is still valid
514 if reload didn't find a hard register for the operand. */
517 reg_before_reload_operand (rtx op, enum machine_mode mode)
519 /* Don't accept a SUBREG since it will need a reload. */
520 if (GET_CODE (op) == SUBREG)
521 return 0;
523 if (register_operand (op, mode))
524 return 1;
526 if (reload_completed
527 && memory_operand (op, mode)
528 && !symbolic_memory_operand (op, mode))
529 return 1;
531 return 0;
534 /* Accept any constant that can be moved in one instruction into a
535 general register. */
537 cint_ok_for_move (HOST_WIDE_INT intval)
539 /* OK if ldo, ldil, or zdepi, can be used. */
540 return (CONST_OK_FOR_LETTER_P (intval, 'J')
541 || CONST_OK_FOR_LETTER_P (intval, 'N')
542 || CONST_OK_FOR_LETTER_P (intval, 'K'));
545 /* Return 1 iff OP is an indexed memory operand. */
547 indexed_memory_operand (rtx op, enum machine_mode mode)
549 if (GET_MODE (op) != mode)
550 return 0;
552 /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */
553 if (reload_completed && GET_CODE (op) == SUBREG)
554 op = SUBREG_REG (op);
556 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
557 return 0;
559 op = XEXP (op, 0);
561 return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
564 /* Accept anything that can be used as a destination operand for a
565 move instruction. We don't accept indexed memory operands since
566 they are supported only for floating point stores. */
568 move_dest_operand (rtx op, enum machine_mode mode)
570 if (register_operand (op, mode))
571 return 1;
573 if (GET_MODE (op) != mode)
574 return 0;
576 if (GET_CODE (op) == SUBREG)
577 op = SUBREG_REG (op);
579 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
580 return 0;
582 op = XEXP (op, 0);
584 return (memory_address_p (mode, op)
585 && !IS_INDEX_ADDR_P (op)
586 && !IS_LO_SUM_DLT_ADDR_P (op));
589 /* Accept anything that can be used as a source operand for a move
590 instruction. */
592 move_src_operand (rtx op, enum machine_mode mode)
594 if (register_operand (op, mode))
595 return 1;
597 if (GET_CODE (op) == CONST_INT)
598 return cint_ok_for_move (INTVAL (op));
600 if (GET_MODE (op) != mode)
601 return 0;
603 if (GET_CODE (op) == SUBREG)
604 op = SUBREG_REG (op);
606 if (GET_CODE (op) != MEM)
607 return 0;
609 /* Until problems with management of the REG_POINTER flag are resolved,
610 we need to delay creating move insns with unscaled indexed addresses
611 until CSE is not expected. */
612 if (!TARGET_NO_SPACE_REGS
613 && !cse_not_expected
614 && GET_CODE (XEXP (op, 0)) == PLUS
615 && REG_P (XEXP (XEXP (op, 0), 0))
616 && REG_P (XEXP (XEXP (op, 0), 1)))
617 return 0;
619 return memory_address_p (mode, XEXP (op, 0));
622 /* Accept anything that can be used as the source operand for a prefetch
623 instruction. */
625 prefetch_operand (rtx op, enum machine_mode mode)
627 if (GET_CODE (op) != MEM)
628 return 0;
630 /* Until problems with management of the REG_POINTER flag are resolved,
631 we need to delay creating prefetch insns with unscaled indexed addresses
632 until CSE is not expected. */
633 if (!TARGET_NO_SPACE_REGS
634 && !cse_not_expected
635 && GET_CODE (XEXP (op, 0)) == PLUS
636 && REG_P (XEXP (XEXP (op, 0), 0))
637 && REG_P (XEXP (XEXP (op, 0), 1)))
638 return 0;
640 return memory_address_p (mode, XEXP (op, 0));
643 /* Accept REG and any CONST_INT that can be moved in one instruction into a
644 general register. */
646 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
648 if (register_operand (op, mode))
649 return 1;
651 return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
655 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
657 if (!flag_pic)
658 return 0;
660 switch (GET_CODE (op))
662 case LABEL_REF:
663 return 1;
664 case CONST:
665 op = XEXP (op, 0);
666 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
667 && GET_CODE (XEXP (op, 1)) == CONST_INT);
668 default:
669 return 0;
674 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
676 return reg_renumber && FP_REG_P (op);
681 /* Return truth value of whether OP can be used as an operand in a
682 three operand arithmetic insn that accepts registers of mode MODE
683 or 14-bit signed integers. */
685 arith_operand (rtx op, enum machine_mode mode)
687 return (register_operand (op, mode)
688 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
691 /* Return truth value of whether OP can be used as an operand in a
692 three operand arithmetic insn that accepts registers of mode MODE
693 or 11-bit signed integers. */
695 arith11_operand (rtx op, enum machine_mode mode)
697 return (register_operand (op, mode)
698 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
701 /* Return truth value of whether OP can be used as an operand in a
702 adddi3 insn. */
704 adddi3_operand (rtx op, enum machine_mode mode)
706 return (register_operand (op, mode)
707 || (GET_CODE (op) == CONST_INT
708 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
711 /* A constant integer suitable for use in a PRE_MODIFY memory
712 reference. */
714 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
716 return (GET_CODE (op) == CONST_INT
717 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
720 /* A constant integer suitable for use in a POST_MODIFY memory
721 reference. */
723 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
725 return (GET_CODE (op) == CONST_INT
726 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
730 arith_double_operand (rtx op, enum machine_mode mode)
732 return (register_operand (op, mode)
733 || (GET_CODE (op) == CONST_DOUBLE
734 && GET_MODE (op) == mode
735 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
736 && ((CONST_DOUBLE_HIGH (op) >= 0)
737 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
740 /* Return truth value of whether OP is an integer which fits the
741 range constraining immediate operands in three-address insns, or
742 is an integer register. */
745 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
747 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
748 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
751 /* Return nonzero if OP is an integer register, else return zero. */
753 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
755 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
758 /* Return truth value of whether OP is an integer which fits the
759 range constraining immediate operands in three-address insns. */
762 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
764 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
768 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
770 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
774 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
776 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
780 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
782 #if HOST_BITS_PER_WIDE_INT > 32
783 /* All allowed constants will fit a CONST_INT. */
784 return (GET_CODE (op) == CONST_INT
785 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
786 #else
787 return (GET_CODE (op) == CONST_INT
788 || (GET_CODE (op) == CONST_DOUBLE
789 && CONST_DOUBLE_HIGH (op) == 0));
790 #endif
794 arith5_operand (rtx op, enum machine_mode mode)
796 return register_operand (op, mode) || int5_operand (op, mode);
799 /* True iff zdepi can be used to generate this CONST_INT.
800 zdepi first sign extends a 5 bit signed number to a given field
801 length, then places this field anywhere in a zero. */
803 zdepi_cint_p (unsigned HOST_WIDE_INT x)
805 unsigned HOST_WIDE_INT lsb_mask, t;
807 /* This might not be obvious, but it's at least fast.
808 This function is critical; we don't have the time loops would take. */
809 lsb_mask = x & -x;
810 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
811 /* Return true iff t is a power of two. */
812 return ((t & (t - 1)) == 0);
815 /* True iff depi or extru can be used to compute (reg & mask).
816 Accept bit pattern like these:
817 0....01....1
818 1....10....0
819 1..10..01..1 */
821 and_mask_p (unsigned HOST_WIDE_INT mask)
823 mask = ~mask;
824 mask += mask & -mask;
825 return (mask & (mask - 1)) == 0;
828 /* True iff depi or extru can be used to compute (reg & OP). */
830 and_operand (rtx op, enum machine_mode mode)
832 return (register_operand (op, mode)
833 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
836 /* True iff depi can be used to compute (reg | MASK). */
838 ior_mask_p (unsigned HOST_WIDE_INT mask)
840 mask += mask & -mask;
841 return (mask & (mask - 1)) == 0;
844 /* True iff depi can be used to compute (reg | OP). */
846 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
848 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
852 lhs_lshift_operand (rtx op, enum machine_mode mode)
854 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
857 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
858 Such values can be the left hand side x in (x << r), using the zvdepi
859 instruction. */
861 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
863 unsigned HOST_WIDE_INT x;
864 if (GET_CODE (op) != CONST_INT)
865 return 0;
866 x = INTVAL (op) >> 4;
867 return (x & (x + 1)) == 0;
871 arith32_operand (rtx op, enum machine_mode mode)
873 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
877 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
879 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
882 /* Legitimize PIC addresses. If the address is already
883 position-independent, we return ORIG. Newly generated
884 position-independent addresses go to REG. If we need more
885 than one register, we lose. */
888 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
890 rtx pic_ref = orig;
892 /* Labels need special handling. */
893 if (pic_label_operand (orig, mode))
895 /* We do not want to go through the movXX expanders here since that
896 would create recursion.
898 Nor do we really want to call a generator for a named pattern
899 since that requires multiple patterns if we want to support
900 multiple word sizes.
902 So instead we just emit the raw set, which avoids the movXX
903 expanders completely. */
904 mark_reg_pointer (reg, BITS_PER_UNIT);
905 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
906 current_function_uses_pic_offset_table = 1;
907 return reg;
909 if (GET_CODE (orig) == SYMBOL_REF)
911 rtx insn, tmp_reg;
913 if (reg == 0)
914 abort ();
916 /* Before reload, allocate a temporary register for the intermediate
917 result. This allows the sequence to be deleted when the final
918 result is unused and the insns are trivially dead. */
919 tmp_reg = ((reload_in_progress || reload_completed)
920 ? reg : gen_reg_rtx (Pmode));
922 emit_move_insn (tmp_reg,
923 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
924 gen_rtx_HIGH (word_mode, orig)));
925 pic_ref
926 = gen_rtx_MEM (Pmode,
927 gen_rtx_LO_SUM (Pmode, tmp_reg,
928 gen_rtx_UNSPEC (Pmode,
929 gen_rtvec (1, orig),
930 UNSPEC_DLTIND14R)));
932 current_function_uses_pic_offset_table = 1;
933 MEM_NOTRAP_P (pic_ref) = 1;
934 RTX_UNCHANGING_P (pic_ref) = 1;
935 mark_reg_pointer (reg, BITS_PER_UNIT);
936 insn = emit_move_insn (reg, pic_ref);
938 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
939 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
941 return reg;
943 else if (GET_CODE (orig) == CONST)
945 rtx base;
947 if (GET_CODE (XEXP (orig, 0)) == PLUS
948 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
949 return orig;
951 if (reg == 0)
952 abort ();
954 if (GET_CODE (XEXP (orig, 0)) == PLUS)
956 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
957 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
958 base == reg ? 0 : reg);
960 else
961 abort ();
963 if (GET_CODE (orig) == CONST_INT)
965 if (INT_14_BITS (orig))
966 return plus_constant (base, INTVAL (orig));
967 orig = force_reg (Pmode, orig);
969 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
970 /* Likewise, should we set special REG_NOTEs here? */
973 return pic_ref;
976 /* Try machine-dependent ways of modifying an illegitimate address
977 to be legitimate. If we find one, return the new, valid address.
978 This macro is used in only one place: `memory_address' in explow.c.
980 OLDX is the address as it was before break_out_memory_refs was called.
981 In some cases it is useful to look at this to decide what needs to be done.
983 MODE and WIN are passed so that this macro can use
984 GO_IF_LEGITIMATE_ADDRESS.
986 It is always safe for this macro to do nothing. It exists to recognize
987 opportunities to optimize the output.
989 For the PA, transform:
991 memory(X + <large int>)
993 into:
995 if (<large int> & mask) >= 16
996 Y = (<large int> & ~mask) + mask + 1 Round up.
997 else
998 Y = (<large int> & ~mask) Round down.
999 Z = X + Y
1000 memory (Z + (<large int> - Y));
1002 This is for CSE to find several similar references, and only use one Z.
1004 X can either be a SYMBOL_REF or REG, but because combine can not
1005 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1006 D will not fit in 14 bits.
1008 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1009 0x1f as the mask.
1011 MODE_INT references allow displacements which fit in 14 bits, so use
1012 0x3fff as the mask.
1014 This relies on the fact that most mode MODE_FLOAT references will use FP
1015 registers and most mode MODE_INT references will use integer registers.
1016 (In the rare case of an FP register used in an integer MODE, we depend
1017 on secondary reloads to clean things up.)
1020 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1021 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1022 addressing modes to be used).
1024 Put X and Z into registers. Then put the entire expression into
1025 a register. */
1028 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1029 enum machine_mode mode)
1031 rtx orig = x;
1033 /* We need to canonicalize the order of operands in unscaled indexed
1034 addresses since the code that checks if an address is valid doesn't
1035 always try both orders. */
1036 if (!TARGET_NO_SPACE_REGS
1037 && GET_CODE (x) == PLUS
1038 && GET_MODE (x) == Pmode
1039 && REG_P (XEXP (x, 0))
1040 && REG_P (XEXP (x, 1))
1041 && REG_POINTER (XEXP (x, 0))
1042 && !REG_POINTER (XEXP (x, 1)))
1043 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1045 if (flag_pic)
1046 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1048 /* Strip off CONST. */
1049 if (GET_CODE (x) == CONST)
1050 x = XEXP (x, 0);
1052 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1053 That should always be safe. */
1054 if (GET_CODE (x) == PLUS
1055 && GET_CODE (XEXP (x, 0)) == REG
1056 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1058 rtx reg = force_reg (Pmode, XEXP (x, 1));
1059 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1062 /* Note we must reject symbols which represent function addresses
1063 since the assembler/linker can't handle arithmetic on plabels. */
1064 if (GET_CODE (x) == PLUS
1065 && GET_CODE (XEXP (x, 1)) == CONST_INT
1066 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1067 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1068 || GET_CODE (XEXP (x, 0)) == REG))
1070 rtx int_part, ptr_reg;
1071 int newoffset;
1072 int offset = INTVAL (XEXP (x, 1));
1073 int mask;
1075 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1076 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1078 /* Choose which way to round the offset. Round up if we
1079 are >= halfway to the next boundary. */
1080 if ((offset & mask) >= ((mask + 1) / 2))
1081 newoffset = (offset & ~ mask) + mask + 1;
1082 else
1083 newoffset = (offset & ~ mask);
1085 /* If the newoffset will not fit in 14 bits (ldo), then
1086 handling this would take 4 or 5 instructions (2 to load
1087 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1088 add the new offset and the SYMBOL_REF.) Combine can
1089 not handle 4->2 or 5->2 combinations, so do not create
1090 them. */
1091 if (! VAL_14_BITS_P (newoffset)
1092 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1094 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1095 rtx tmp_reg
1096 = force_reg (Pmode,
1097 gen_rtx_HIGH (Pmode, const_part));
1098 ptr_reg
1099 = force_reg (Pmode,
1100 gen_rtx_LO_SUM (Pmode,
1101 tmp_reg, const_part));
1103 else
1105 if (! VAL_14_BITS_P (newoffset))
1106 int_part = force_reg (Pmode, GEN_INT (newoffset));
1107 else
1108 int_part = GEN_INT (newoffset);
1110 ptr_reg = force_reg (Pmode,
1111 gen_rtx_PLUS (Pmode,
1112 force_reg (Pmode, XEXP (x, 0)),
1113 int_part));
1115 return plus_constant (ptr_reg, offset - newoffset);
1118 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1120 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1121 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1122 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1123 && (OBJECT_P (XEXP (x, 1))
1124 || GET_CODE (XEXP (x, 1)) == SUBREG)
1125 && GET_CODE (XEXP (x, 1)) != CONST)
1127 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1128 rtx reg1, reg2;
1130 reg1 = XEXP (x, 1);
1131 if (GET_CODE (reg1) != REG)
1132 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1134 reg2 = XEXP (XEXP (x, 0), 0);
1135 if (GET_CODE (reg2) != REG)
1136 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1138 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1139 gen_rtx_MULT (Pmode,
1140 reg2,
1141 GEN_INT (val)),
1142 reg1));
1145 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1147 Only do so for floating point modes since this is more speculative
1148 and we lose if it's an integer store. */
1149 if (GET_CODE (x) == PLUS
1150 && GET_CODE (XEXP (x, 0)) == PLUS
1151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1152 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1153 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1154 && (mode == SFmode || mode == DFmode))
1157 /* First, try and figure out what to use as a base register. */
1158 rtx reg1, reg2, base, idx, orig_base;
1160 reg1 = XEXP (XEXP (x, 0), 1);
1161 reg2 = XEXP (x, 1);
1162 base = NULL_RTX;
1163 idx = NULL_RTX;
1165 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1166 then emit_move_sequence will turn on REG_POINTER so we'll know
1167 it's a base register below. */
1168 if (GET_CODE (reg1) != REG)
1169 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1171 if (GET_CODE (reg2) != REG)
1172 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1174 /* Figure out what the base and index are. */
1176 if (GET_CODE (reg1) == REG
1177 && REG_POINTER (reg1))
1179 base = reg1;
1180 orig_base = XEXP (XEXP (x, 0), 1);
1181 idx = gen_rtx_PLUS (Pmode,
1182 gen_rtx_MULT (Pmode,
1183 XEXP (XEXP (XEXP (x, 0), 0), 0),
1184 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1185 XEXP (x, 1));
1187 else if (GET_CODE (reg2) == REG
1188 && REG_POINTER (reg2))
1190 base = reg2;
1191 orig_base = XEXP (x, 1);
1192 idx = XEXP (x, 0);
1195 if (base == 0)
1196 return orig;
1198 /* If the index adds a large constant, try to scale the
1199 constant so that it can be loaded with only one insn. */
1200 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1201 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1202 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1203 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1205 /* Divide the CONST_INT by the scale factor, then add it to A. */
1206 int val = INTVAL (XEXP (idx, 1));
1208 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1209 reg1 = XEXP (XEXP (idx, 0), 0);
1210 if (GET_CODE (reg1) != REG)
1211 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1213 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1215 /* We can now generate a simple scaled indexed address. */
1216 return
1217 force_reg
1218 (Pmode, gen_rtx_PLUS (Pmode,
1219 gen_rtx_MULT (Pmode, reg1,
1220 XEXP (XEXP (idx, 0), 1)),
1221 base));
1224 /* If B + C is still a valid base register, then add them. */
1225 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1226 && INTVAL (XEXP (idx, 1)) <= 4096
1227 && INTVAL (XEXP (idx, 1)) >= -4096)
1229 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1230 rtx reg1, reg2;
1232 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1234 reg2 = XEXP (XEXP (idx, 0), 0);
1235 if (GET_CODE (reg2) != CONST_INT)
1236 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1238 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1239 gen_rtx_MULT (Pmode,
1240 reg2,
1241 GEN_INT (val)),
1242 reg1));
1245 /* Get the index into a register, then add the base + index and
1246 return a register holding the result. */
1248 /* First get A into a register. */
1249 reg1 = XEXP (XEXP (idx, 0), 0);
1250 if (GET_CODE (reg1) != REG)
1251 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1253 /* And get B into a register. */
1254 reg2 = XEXP (idx, 1);
1255 if (GET_CODE (reg2) != REG)
1256 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1258 reg1 = force_reg (Pmode,
1259 gen_rtx_PLUS (Pmode,
1260 gen_rtx_MULT (Pmode, reg1,
1261 XEXP (XEXP (idx, 0), 1)),
1262 reg2));
1264 /* Add the result to our base register and return. */
1265 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1269 /* Uh-oh. We might have an address for x[n-100000]. This needs
1270 special handling to avoid creating an indexed memory address
1271 with x-100000 as the base.
1273 If the constant part is small enough, then it's still safe because
1274 there is a guard page at the beginning and end of the data segment.
1276 Scaled references are common enough that we want to try and rearrange the
1277 terms so that we can use indexing for these addresses too. Only
1278 do the optimization for floatint point modes. */
1280 if (GET_CODE (x) == PLUS
1281 && symbolic_expression_p (XEXP (x, 1)))
1283 /* Ugly. We modify things here so that the address offset specified
1284 by the index expression is computed first, then added to x to form
1285 the entire address. */
1287 rtx regx1, regx2, regy1, regy2, y;
1289 /* Strip off any CONST. */
1290 y = XEXP (x, 1);
1291 if (GET_CODE (y) == CONST)
1292 y = XEXP (y, 0);
1294 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1296 /* See if this looks like
1297 (plus (mult (reg) (shadd_const))
1298 (const (plus (symbol_ref) (const_int))))
1300 Where const_int is small. In that case the const
1301 expression is a valid pointer for indexing.
1303 If const_int is big, but can be divided evenly by shadd_const
1304 and added to (reg). This allows more scaled indexed addresses. */
1305 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1306 && GET_CODE (XEXP (x, 0)) == MULT
1307 && GET_CODE (XEXP (y, 1)) == CONST_INT
1308 && INTVAL (XEXP (y, 1)) >= -4096
1309 && INTVAL (XEXP (y, 1)) <= 4095
1310 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1311 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1313 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1314 rtx reg1, reg2;
1316 reg1 = XEXP (x, 1);
1317 if (GET_CODE (reg1) != REG)
1318 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1320 reg2 = XEXP (XEXP (x, 0), 0);
1321 if (GET_CODE (reg2) != REG)
1322 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1324 return force_reg (Pmode,
1325 gen_rtx_PLUS (Pmode,
1326 gen_rtx_MULT (Pmode,
1327 reg2,
1328 GEN_INT (val)),
1329 reg1));
1331 else if ((mode == DFmode || mode == SFmode)
1332 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1333 && GET_CODE (XEXP (x, 0)) == MULT
1334 && GET_CODE (XEXP (y, 1)) == CONST_INT
1335 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1336 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1337 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1339 regx1
1340 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1341 / INTVAL (XEXP (XEXP (x, 0), 1))));
1342 regx2 = XEXP (XEXP (x, 0), 0);
1343 if (GET_CODE (regx2) != REG)
1344 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1345 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1346 regx2, regx1));
1347 return
1348 force_reg (Pmode,
1349 gen_rtx_PLUS (Pmode,
1350 gen_rtx_MULT (Pmode, regx2,
1351 XEXP (XEXP (x, 0), 1)),
1352 force_reg (Pmode, XEXP (y, 0))));
1354 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1355 && INTVAL (XEXP (y, 1)) >= -4096
1356 && INTVAL (XEXP (y, 1)) <= 4095)
1358 /* This is safe because of the guard page at the
1359 beginning and end of the data space. Just
1360 return the original address. */
1361 return orig;
1363 else
1365 /* Doesn't look like one we can optimize. */
1366 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1367 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1368 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1369 regx1 = force_reg (Pmode,
1370 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1371 regx1, regy2));
1372 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1377 return orig;
1380 /* For the HPPA, REG and REG+CONST is cost 0
1381 and addresses involving symbolic constants are cost 2.
1383 PIC addresses are very expensive.
1385 It is no coincidence that this has the same structure
1386 as GO_IF_LEGITIMATE_ADDRESS. */
1388 static int
1389 hppa_address_cost (rtx X)
1391 switch (GET_CODE (X))
1393 case REG:
1394 case PLUS:
1395 case LO_SUM:
1396 return 1;
1397 case HIGH:
1398 return 2;
1399 default:
1400 return 4;
1404 /* Compute a (partial) cost for rtx X. Return true if the complete
1405 cost has been computed, and false if subexpressions should be
1406 scanned. In either case, *TOTAL contains the cost result. */
1408 static bool
1409 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1411 switch (code)
1413 case CONST_INT:
1414 if (INTVAL (x) == 0)
1415 *total = 0;
1416 else if (INT_14_BITS (x))
1417 *total = 1;
1418 else
1419 *total = 2;
1420 return true;
1422 case HIGH:
1423 *total = 2;
1424 return true;
1426 case CONST:
1427 case LABEL_REF:
1428 case SYMBOL_REF:
1429 *total = 4;
1430 return true;
1432 case CONST_DOUBLE:
1433 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1434 && outer_code != SET)
1435 *total = 0;
1436 else
1437 *total = 8;
1438 return true;
1440 case MULT:
1441 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1442 *total = COSTS_N_INSNS (3);
1443 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1444 *total = COSTS_N_INSNS (8);
1445 else
1446 *total = COSTS_N_INSNS (20);
1447 return true;
1449 case DIV:
1450 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1452 *total = COSTS_N_INSNS (14);
1453 return true;
1455 /* FALLTHRU */
1457 case UDIV:
1458 case MOD:
1459 case UMOD:
1460 *total = COSTS_N_INSNS (60);
1461 return true;
1463 case PLUS: /* this includes shNadd insns */
1464 case MINUS:
1465 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1466 *total = COSTS_N_INSNS (3);
1467 else
1468 *total = COSTS_N_INSNS (1);
1469 return true;
1471 case ASHIFT:
1472 case ASHIFTRT:
1473 case LSHIFTRT:
1474 *total = COSTS_N_INSNS (1);
1475 return true;
1477 default:
1478 return false;
1482 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1483 new rtx with the correct mode. */
1484 static inline rtx
1485 force_mode (enum machine_mode mode, rtx orig)
1487 if (mode == GET_MODE (orig))
1488 return orig;
1490 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1491 abort ();
1493 return gen_rtx_REG (mode, REGNO (orig));
1496 /* Emit insns to move operands[1] into operands[0].
1498 Return 1 if we have written out everything that needs to be done to
1499 do the move. Otherwise, return 0 and the caller will emit the move
1500 normally.
1502 Note SCRATCH_REG may not be in the proper mode depending on how it
1503 will be used. This routine is responsible for creating a new copy
1504 of SCRATCH_REG in the proper mode. */
1507 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1509 register rtx operand0 = operands[0];
1510 register rtx operand1 = operands[1];
1511 register rtx tem;
1513 /* We can only handle indexed addresses in the destination operand
1514 of floating point stores. Thus, we need to break out indexed
1515 addresses from the destination operand. */
1516 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1518 /* This is only safe up to the beginning of life analysis. */
1519 if (no_new_pseudos)
1520 abort ();
1522 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1523 operand0 = replace_equiv_address (operand0, tem);
1526 /* On targets with non-equivalent space registers, break out unscaled
1527 indexed addresses from the source operand before the final CSE.
1528 We have to do this because the REG_POINTER flag is not correctly
1529 carried through various optimization passes and CSE may substitute
1530 a pseudo without the pointer set for one with the pointer set. As
1531 a result, we loose various opportunities to create insns with
1532 unscaled indexed addresses. */
1533 if (!TARGET_NO_SPACE_REGS
1534 && !cse_not_expected
1535 && GET_CODE (operand1) == MEM
1536 && GET_CODE (XEXP (operand1, 0)) == PLUS
1537 && REG_P (XEXP (XEXP (operand1, 0), 0))
1538 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1539 operand1
1540 = replace_equiv_address (operand1,
1541 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1543 if (scratch_reg
1544 && reload_in_progress && GET_CODE (operand0) == REG
1545 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1546 operand0 = reg_equiv_mem[REGNO (operand0)];
1547 else if (scratch_reg
1548 && reload_in_progress && GET_CODE (operand0) == SUBREG
1549 && GET_CODE (SUBREG_REG (operand0)) == REG
1550 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1552 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1553 the code which tracks sets/uses for delete_output_reload. */
1554 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1555 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1556 SUBREG_BYTE (operand0));
1557 operand0 = alter_subreg (&temp);
1560 if (scratch_reg
1561 && reload_in_progress && GET_CODE (operand1) == REG
1562 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1563 operand1 = reg_equiv_mem[REGNO (operand1)];
1564 else if (scratch_reg
1565 && reload_in_progress && GET_CODE (operand1) == SUBREG
1566 && GET_CODE (SUBREG_REG (operand1)) == REG
1567 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1569 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1570 the code which tracks sets/uses for delete_output_reload. */
1571 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1572 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1573 SUBREG_BYTE (operand1));
1574 operand1 = alter_subreg (&temp);
1577 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1578 && ((tem = find_replacement (&XEXP (operand0, 0)))
1579 != XEXP (operand0, 0)))
1580 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1582 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1583 && ((tem = find_replacement (&XEXP (operand1, 0)))
1584 != XEXP (operand1, 0)))
1585 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1587 /* Handle secondary reloads for loads/stores of FP registers from
1588 REG+D addresses where D does not fit in 5 or 14 bits, including
1589 (subreg (mem (addr))) cases. */
1590 if (scratch_reg
1591 && fp_reg_operand (operand0, mode)
1592 && ((GET_CODE (operand1) == MEM
1593 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1594 XEXP (operand1, 0)))
1595 || ((GET_CODE (operand1) == SUBREG
1596 && GET_CODE (XEXP (operand1, 0)) == MEM
1597 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1598 ? SFmode : DFmode),
1599 XEXP (XEXP (operand1, 0), 0))))))
1601 if (GET_CODE (operand1) == SUBREG)
1602 operand1 = XEXP (operand1, 0);
1604 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1605 it in WORD_MODE regardless of what mode it was originally given
1606 to us. */
1607 scratch_reg = force_mode (word_mode, scratch_reg);
1609 /* D might not fit in 14 bits either; for such cases load D into
1610 scratch reg. */
1611 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1613 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1614 emit_move_insn (scratch_reg,
1615 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1616 Pmode,
1617 XEXP (XEXP (operand1, 0), 0),
1618 scratch_reg));
1620 else
1621 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1622 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1623 gen_rtx_MEM (mode, scratch_reg)));
1624 return 1;
1626 else if (scratch_reg
1627 && fp_reg_operand (operand1, mode)
1628 && ((GET_CODE (operand0) == MEM
1629 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1630 ? SFmode : DFmode),
1631 XEXP (operand0, 0)))
1632 || ((GET_CODE (operand0) == SUBREG)
1633 && GET_CODE (XEXP (operand0, 0)) == MEM
1634 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1635 ? SFmode : DFmode),
1636 XEXP (XEXP (operand0, 0), 0)))))
1638 if (GET_CODE (operand0) == SUBREG)
1639 operand0 = XEXP (operand0, 0);
1641 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1642 it in WORD_MODE regardless of what mode it was originally given
1643 to us. */
1644 scratch_reg = force_mode (word_mode, scratch_reg);
1646 /* D might not fit in 14 bits either; for such cases load D into
1647 scratch reg. */
1648 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1650 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1651 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1652 0)),
1653 Pmode,
1654 XEXP (XEXP (operand0, 0),
1656 scratch_reg));
1658 else
1659 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1660 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1661 operand1));
1662 return 1;
1664 /* Handle secondary reloads for loads of FP registers from constant
1665 expressions by forcing the constant into memory.
1667 Use scratch_reg to hold the address of the memory location.
1669 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1670 NO_REGS when presented with a const_int and a register class
1671 containing only FP registers. Doing so unfortunately creates
1672 more problems than it solves. Fix this for 2.5. */
1673 else if (scratch_reg
1674 && CONSTANT_P (operand1)
1675 && fp_reg_operand (operand0, mode))
1677 rtx xoperands[2];
1679 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1680 it in WORD_MODE regardless of what mode it was originally given
1681 to us. */
1682 scratch_reg = force_mode (word_mode, scratch_reg);
1684 /* Force the constant into memory and put the address of the
1685 memory location into scratch_reg. */
1686 xoperands[0] = scratch_reg;
1687 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1688 emit_move_sequence (xoperands, Pmode, 0);
1690 /* Now load the destination register. */
1691 emit_insn (gen_rtx_SET (mode, operand0,
1692 gen_rtx_MEM (mode, scratch_reg)));
1693 return 1;
1695 /* Handle secondary reloads for SAR. These occur when trying to load
1696 the SAR from memory, FP register, or with a constant. */
1697 else if (scratch_reg
1698 && GET_CODE (operand0) == REG
1699 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1700 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1701 && (GET_CODE (operand1) == MEM
1702 || GET_CODE (operand1) == CONST_INT
1703 || (GET_CODE (operand1) == REG
1704 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1706 /* D might not fit in 14 bits either; for such cases load D into
1707 scratch reg. */
1708 if (GET_CODE (operand1) == MEM
1709 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1711 /* We are reloading the address into the scratch register, so we
1712 want to make sure the scratch register is a full register. */
1713 scratch_reg = force_mode (word_mode, scratch_reg);
1715 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1716 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1717 0)),
1718 Pmode,
1719 XEXP (XEXP (operand1, 0),
1721 scratch_reg));
1723 /* Now we are going to load the scratch register from memory,
1724 we want to load it in the same width as the original MEM,
1725 which must be the same as the width of the ultimate destination,
1726 OPERAND0. */
1727 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1729 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1730 scratch_reg));
1732 else
1734 /* We want to load the scratch register using the same mode as
1735 the ultimate destination. */
1736 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1738 emit_move_insn (scratch_reg, operand1);
1741 /* And emit the insn to set the ultimate destination. We know that
1742 the scratch register has the same mode as the destination at this
1743 point. */
1744 emit_move_insn (operand0, scratch_reg);
1745 return 1;
1747 /* Handle the most common case: storing into a register. */
1748 else if (register_operand (operand0, mode))
1750 if (register_operand (operand1, mode)
1751 || (GET_CODE (operand1) == CONST_INT
1752 && cint_ok_for_move (INTVAL (operand1)))
1753 || (operand1 == CONST0_RTX (mode))
1754 || (GET_CODE (operand1) == HIGH
1755 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1756 /* Only `general_operands' can come here, so MEM is ok. */
1757 || GET_CODE (operand1) == MEM)
1759 /* Various sets are created during RTL generation which don't
1760 have the REG_POINTER flag correctly set. After the CSE pass,
1761 instruction recognition can fail if we don't consistently
1762 set this flag when performing register copies. This should
1763 also improve the opportunities for creating insns that use
1764 unscaled indexing. */
1765 if (REG_P (operand0) && REG_P (operand1))
1767 if (REG_POINTER (operand1)
1768 && !REG_POINTER (operand0)
1769 && !HARD_REGISTER_P (operand0))
1770 copy_reg_pointer (operand0, operand1);
1771 else if (REG_POINTER (operand0)
1772 && !REG_POINTER (operand1)
1773 && !HARD_REGISTER_P (operand1))
1774 copy_reg_pointer (operand1, operand0);
1777 /* When MEMs are broken out, the REG_POINTER flag doesn't
1778 get set. In some cases, we can set the REG_POINTER flag
1779 from the declaration for the MEM. */
1780 if (REG_P (operand0)
1781 && GET_CODE (operand1) == MEM
1782 && !REG_POINTER (operand0))
1784 tree decl = MEM_EXPR (operand1);
1786 /* Set the register pointer flag and register alignment
1787 if the declaration for this memory reference is a
1788 pointer type. Fortran indirect argument references
1789 are ignored. */
1790 if (decl
1791 && !(flag_argument_noalias > 1
1792 && TREE_CODE (decl) == INDIRECT_REF
1793 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1795 tree type;
1797 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1798 tree operand 1. */
1799 if (TREE_CODE (decl) == COMPONENT_REF)
1800 decl = TREE_OPERAND (decl, 1);
1802 type = TREE_TYPE (decl);
1803 if (TREE_CODE (type) == ARRAY_TYPE)
1804 type = get_inner_array_type (type);
1806 if (POINTER_TYPE_P (type))
1808 int align;
1810 type = TREE_TYPE (type);
1811 /* Using TYPE_ALIGN_OK is rather conservative as
1812 only the ada frontend actually sets it. */
1813 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1814 : BITS_PER_UNIT);
1815 mark_reg_pointer (operand0, align);
1820 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1821 return 1;
1824 else if (GET_CODE (operand0) == MEM)
1826 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1827 && !(reload_in_progress || reload_completed))
1829 rtx temp = gen_reg_rtx (DFmode);
1831 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1832 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1833 return 1;
1835 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1837 /* Run this case quickly. */
1838 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1839 return 1;
1841 if (! (reload_in_progress || reload_completed))
1843 operands[0] = validize_mem (operand0);
1844 operands[1] = operand1 = force_reg (mode, operand1);
1848 /* Simplify the source if we need to.
1849 Note we do have to handle function labels here, even though we do
1850 not consider them legitimate constants. Loop optimizations can
1851 call the emit_move_xxx with one as a source. */
1852 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1853 || function_label_operand (operand1, mode)
1854 || (GET_CODE (operand1) == HIGH
1855 && symbolic_operand (XEXP (operand1, 0), mode)))
1857 int ishighonly = 0;
1859 if (GET_CODE (operand1) == HIGH)
1861 ishighonly = 1;
1862 operand1 = XEXP (operand1, 0);
1864 if (symbolic_operand (operand1, mode))
1866 /* Argh. The assembler and linker can't handle arithmetic
1867 involving plabels.
1869 So we force the plabel into memory, load operand0 from
1870 the memory location, then add in the constant part. */
1871 if ((GET_CODE (operand1) == CONST
1872 && GET_CODE (XEXP (operand1, 0)) == PLUS
1873 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1874 || function_label_operand (operand1, mode))
1876 rtx temp, const_part;
1878 /* Figure out what (if any) scratch register to use. */
1879 if (reload_in_progress || reload_completed)
1881 scratch_reg = scratch_reg ? scratch_reg : operand0;
1882 /* SCRATCH_REG will hold an address and maybe the actual
1883 data. We want it in WORD_MODE regardless of what mode it
1884 was originally given to us. */
1885 scratch_reg = force_mode (word_mode, scratch_reg);
1887 else if (flag_pic)
1888 scratch_reg = gen_reg_rtx (Pmode);
1890 if (GET_CODE (operand1) == CONST)
1892 /* Save away the constant part of the expression. */
1893 const_part = XEXP (XEXP (operand1, 0), 1);
1894 if (GET_CODE (const_part) != CONST_INT)
1895 abort ();
1897 /* Force the function label into memory. */
1898 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1900 else
1902 /* No constant part. */
1903 const_part = NULL_RTX;
1905 /* Force the function label into memory. */
1906 temp = force_const_mem (mode, operand1);
1910 /* Get the address of the memory location. PIC-ify it if
1911 necessary. */
1912 temp = XEXP (temp, 0);
1913 if (flag_pic)
1914 temp = legitimize_pic_address (temp, mode, scratch_reg);
1916 /* Put the address of the memory location into our destination
1917 register. */
1918 operands[1] = temp;
1919 emit_move_sequence (operands, mode, scratch_reg);
1921 /* Now load from the memory location into our destination
1922 register. */
1923 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1924 emit_move_sequence (operands, mode, scratch_reg);
1926 /* And add back in the constant part. */
1927 if (const_part != NULL_RTX)
1928 expand_inc (operand0, const_part);
1930 return 1;
1933 if (flag_pic)
1935 rtx temp;
1937 if (reload_in_progress || reload_completed)
1939 temp = scratch_reg ? scratch_reg : operand0;
1940 /* TEMP will hold an address and maybe the actual
1941 data. We want it in WORD_MODE regardless of what mode it
1942 was originally given to us. */
1943 temp = force_mode (word_mode, temp);
1945 else
1946 temp = gen_reg_rtx (Pmode);
1948 /* (const (plus (symbol) (const_int))) must be forced to
1949 memory during/after reload if the const_int will not fit
1950 in 14 bits. */
1951 if (GET_CODE (operand1) == CONST
1952 && GET_CODE (XEXP (operand1, 0)) == PLUS
1953 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1954 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1955 && (reload_completed || reload_in_progress)
1956 && flag_pic)
1958 operands[1] = force_const_mem (mode, operand1);
1959 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1960 mode, temp);
1961 operands[1] = gen_rtx_MEM (mode, operands[1]);
1962 emit_move_sequence (operands, mode, temp);
1964 else
1966 operands[1] = legitimize_pic_address (operand1, mode, temp);
1967 if (REG_P (operand0) && REG_P (operands[1]))
1968 copy_reg_pointer (operand0, operands[1]);
1969 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1972 /* On the HPPA, references to data space are supposed to use dp,
1973 register 27, but showing it in the RTL inhibits various cse
1974 and loop optimizations. */
1975 else
1977 rtx temp, set;
1979 if (reload_in_progress || reload_completed)
1981 temp = scratch_reg ? scratch_reg : operand0;
1982 /* TEMP will hold an address and maybe the actual
1983 data. We want it in WORD_MODE regardless of what mode it
1984 was originally given to us. */
1985 temp = force_mode (word_mode, temp);
1987 else
1988 temp = gen_reg_rtx (mode);
1990 /* Loading a SYMBOL_REF into a register makes that register
1991 safe to be used as the base in an indexed address.
1993 Don't mark hard registers though. That loses. */
1994 if (GET_CODE (operand0) == REG
1995 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1996 mark_reg_pointer (operand0, BITS_PER_UNIT);
1997 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1998 mark_reg_pointer (temp, BITS_PER_UNIT);
2000 if (ishighonly)
2001 set = gen_rtx_SET (mode, operand0, temp);
2002 else
2003 set = gen_rtx_SET (VOIDmode,
2004 operand0,
2005 gen_rtx_LO_SUM (mode, temp, operand1));
2007 emit_insn (gen_rtx_SET (VOIDmode,
2008 temp,
2009 gen_rtx_HIGH (mode, operand1)));
2010 emit_insn (set);
2013 return 1;
2015 else if (GET_CODE (operand1) != CONST_INT
2016 || !cint_ok_for_move (INTVAL (operand1)))
2018 rtx insn, temp;
2019 rtx op1 = operand1;
2020 HOST_WIDE_INT value = 0;
2021 HOST_WIDE_INT insv = 0;
2022 int insert = 0;
2024 if (GET_CODE (operand1) == CONST_INT)
2025 value = INTVAL (operand1);
2027 if (TARGET_64BIT
2028 && GET_CODE (operand1) == CONST_INT
2029 && HOST_BITS_PER_WIDE_INT > 32
2030 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2032 HOST_WIDE_INT nval;
2034 /* Extract the low order 32 bits of the value and sign extend.
2035 If the new value is the same as the original value, we can
2036 can use the original value as-is. If the new value is
2037 different, we use it and insert the most-significant 32-bits
2038 of the original value into the final result. */
2039 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2040 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2041 if (value != nval)
2043 #if HOST_BITS_PER_WIDE_INT > 32
2044 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2045 #endif
2046 insert = 1;
2047 value = nval;
2048 operand1 = GEN_INT (nval);
2052 if (reload_in_progress || reload_completed)
2053 temp = scratch_reg ? scratch_reg : operand0;
2054 else
2055 temp = gen_reg_rtx (mode);
2057 /* We don't directly split DImode constants on 32-bit targets
2058 because PLUS uses an 11-bit immediate and the insn sequence
2059 generated is not as efficient as the one using HIGH/LO_SUM. */
2060 if (GET_CODE (operand1) == CONST_INT
2061 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2062 && !insert)
2064 /* Directly break constant into high and low parts. This
2065 provides better optimization opportunities because various
2066 passes recognize constants split with PLUS but not LO_SUM.
2067 We use a 14-bit signed low part except when the addition
2068 of 0x4000 to the high part might change the sign of the
2069 high part. */
2070 HOST_WIDE_INT low = value & 0x3fff;
2071 HOST_WIDE_INT high = value & ~ 0x3fff;
2073 if (low >= 0x2000)
2075 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2076 high += 0x2000;
2077 else
2078 high += 0x4000;
2081 low = value - high;
2083 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2084 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2086 else
2088 emit_insn (gen_rtx_SET (VOIDmode, temp,
2089 gen_rtx_HIGH (mode, operand1)));
2090 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2093 insn = emit_move_insn (operands[0], operands[1]);
2095 /* Now insert the most significant 32 bits of the value
2096 into the register. When we don't have a second register
2097 available, it could take up to nine instructions to load
2098 a 64-bit integer constant. Prior to reload, we force
2099 constants that would take more than three instructions
2100 to load to the constant pool. During and after reload,
2101 we have to handle all possible values. */
2102 if (insert)
2104 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2105 register and the value to be inserted is outside the
2106 range that can be loaded with three depdi instructions. */
2107 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2109 operand1 = GEN_INT (insv);
2111 emit_insn (gen_rtx_SET (VOIDmode, temp,
2112 gen_rtx_HIGH (mode, operand1)));
2113 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2114 emit_insn (gen_insv (operand0, GEN_INT (32),
2115 const0_rtx, temp));
2117 else
2119 int len = 5, pos = 27;
2121 /* Insert the bits using the depdi instruction. */
2122 while (pos >= 0)
2124 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2125 HOST_WIDE_INT sign = v5 < 0;
2127 /* Left extend the insertion. */
2128 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2129 while (pos > 0 && (insv & 1) == sign)
2131 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2132 len += 1;
2133 pos -= 1;
2136 emit_insn (gen_insv (operand0, GEN_INT (len),
2137 GEN_INT (pos), GEN_INT (v5)));
2139 len = pos > 0 && pos < 5 ? pos : 5;
2140 pos -= len;
2145 REG_NOTES (insn)
2146 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
2148 return 1;
2151 /* Now have insn-emit do whatever it normally does. */
2152 return 0;
2155 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2156 it will need a link/runtime reloc). */
2159 reloc_needed (tree exp)
2161 int reloc = 0;
2163 switch (TREE_CODE (exp))
2165 case ADDR_EXPR:
2166 return 1;
2168 case PLUS_EXPR:
2169 case MINUS_EXPR:
2170 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2171 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2172 break;
2174 case NOP_EXPR:
2175 case CONVERT_EXPR:
2176 case NON_LVALUE_EXPR:
2177 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2178 break;
2180 case CONSTRUCTOR:
2182 register tree link;
2183 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
2184 if (TREE_VALUE (link) != 0)
2185 reloc |= reloc_needed (TREE_VALUE (link));
2187 break;
2189 case ERROR_MARK:
2190 break;
2192 default:
2193 break;
2195 return reloc;
2198 /* Does operand (which is a symbolic_operand) live in text space?
2199 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2200 will be true. */
2203 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2205 if (GET_CODE (operand) == CONST)
2206 operand = XEXP (XEXP (operand, 0), 0);
2207 if (flag_pic)
2209 if (GET_CODE (operand) == SYMBOL_REF)
2210 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2212 else
2214 if (GET_CODE (operand) == SYMBOL_REF)
2215 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2217 return 1;
2221 /* Return the best assembler insn template
2222 for moving operands[1] into operands[0] as a fullword. */
2223 const char *
2224 singlemove_string (rtx *operands)
2226 HOST_WIDE_INT intval;
2228 if (GET_CODE (operands[0]) == MEM)
2229 return "stw %r1,%0";
2230 if (GET_CODE (operands[1]) == MEM)
2231 return "ldw %1,%0";
2232 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2234 long i;
2235 REAL_VALUE_TYPE d;
2237 if (GET_MODE (operands[1]) != SFmode)
2238 abort ();
2240 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2241 bit pattern. */
2242 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2243 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2245 operands[1] = GEN_INT (i);
2246 /* Fall through to CONST_INT case. */
2248 if (GET_CODE (operands[1]) == CONST_INT)
2250 intval = INTVAL (operands[1]);
2252 if (VAL_14_BITS_P (intval))
2253 return "ldi %1,%0";
2254 else if ((intval & 0x7ff) == 0)
2255 return "ldil L'%1,%0";
2256 else if (zdepi_cint_p (intval))
2257 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2258 else
2259 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2261 return "copy %1,%0";
2265 /* Compute position (in OP[1]) and width (in OP[2])
2266 useful for copying IMM to a register using the zdepi
2267 instructions. Store the immediate value to insert in OP[0]. */
2268 static void
2269 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2271 int lsb, len;
2273 /* Find the least significant set bit in IMM. */
2274 for (lsb = 0; lsb < 32; lsb++)
2276 if ((imm & 1) != 0)
2277 break;
2278 imm >>= 1;
2281 /* Choose variants based on *sign* of the 5-bit field. */
2282 if ((imm & 0x10) == 0)
2283 len = (lsb <= 28) ? 4 : 32 - lsb;
2284 else
2286 /* Find the width of the bitstring in IMM. */
2287 for (len = 5; len < 32; len++)
2289 if ((imm & (1 << len)) == 0)
2290 break;
2293 /* Sign extend IMM as a 5-bit value. */
2294 imm = (imm & 0xf) - 0x10;
2297 op[0] = imm;
2298 op[1] = 31 - lsb;
2299 op[2] = len;
2302 /* Compute position (in OP[1]) and width (in OP[2])
2303 useful for copying IMM to a register using the depdi,z
2304 instructions. Store the immediate value to insert in OP[0]. */
2305 void
2306 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2308 HOST_WIDE_INT lsb, len;
2310 /* Find the least significant set bit in IMM. */
2311 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2313 if ((imm & 1) != 0)
2314 break;
2315 imm >>= 1;
2318 /* Choose variants based on *sign* of the 5-bit field. */
2319 if ((imm & 0x10) == 0)
2320 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2321 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2322 else
2324 /* Find the width of the bitstring in IMM. */
2325 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2327 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2328 break;
2331 /* Sign extend IMM as a 5-bit value. */
2332 imm = (imm & 0xf) - 0x10;
2335 op[0] = imm;
2336 op[1] = 63 - lsb;
2337 op[2] = len;
2340 /* Output assembler code to perform a doubleword move insn
2341 with operands OPERANDS. */
2343 const char *
2344 output_move_double (rtx *operands)
2346 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2347 rtx latehalf[2];
2348 rtx addreg0 = 0, addreg1 = 0;
2350 /* First classify both operands. */
2352 if (REG_P (operands[0]))
2353 optype0 = REGOP;
2354 else if (offsettable_memref_p (operands[0]))
2355 optype0 = OFFSOP;
2356 else if (GET_CODE (operands[0]) == MEM)
2357 optype0 = MEMOP;
2358 else
2359 optype0 = RNDOP;
2361 if (REG_P (operands[1]))
2362 optype1 = REGOP;
2363 else if (CONSTANT_P (operands[1]))
2364 optype1 = CNSTOP;
2365 else if (offsettable_memref_p (operands[1]))
2366 optype1 = OFFSOP;
2367 else if (GET_CODE (operands[1]) == MEM)
2368 optype1 = MEMOP;
2369 else
2370 optype1 = RNDOP;
2372 /* Check for the cases that the operand constraints are not
2373 supposed to allow to happen. Abort if we get one,
2374 because generating code for these cases is painful. */
2376 if (optype0 != REGOP && optype1 != REGOP)
2377 abort ();
2379 /* Handle auto decrementing and incrementing loads and stores
2380 specifically, since the structure of the function doesn't work
2381 for them without major modification. Do it better when we learn
2382 this port about the general inc/dec addressing of PA.
2383 (This was written by tege. Chide him if it doesn't work.) */
2385 if (optype0 == MEMOP)
2387 /* We have to output the address syntax ourselves, since print_operand
2388 doesn't deal with the addresses we want to use. Fix this later. */
2390 rtx addr = XEXP (operands[0], 0);
2391 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2393 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2395 operands[0] = XEXP (addr, 0);
2396 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2397 abort ();
2399 if (!reg_overlap_mentioned_p (high_reg, addr))
2401 /* No overlap between high target register and address
2402 register. (We do this in a non-obvious way to
2403 save a register file writeback) */
2404 if (GET_CODE (addr) == POST_INC)
2405 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2406 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2408 else
2409 abort ();
2411 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2413 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2415 operands[0] = XEXP (addr, 0);
2416 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2417 abort ();
2419 if (!reg_overlap_mentioned_p (high_reg, addr))
2421 /* No overlap between high target register and address
2422 register. (We do this in a non-obvious way to
2423 save a register file writeback) */
2424 if (GET_CODE (addr) == PRE_INC)
2425 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2426 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2428 else
2429 abort ();
2432 if (optype1 == MEMOP)
2434 /* We have to output the address syntax ourselves, since print_operand
2435 doesn't deal with the addresses we want to use. Fix this later. */
2437 rtx addr = XEXP (operands[1], 0);
2438 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2440 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2442 operands[1] = XEXP (addr, 0);
2443 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2444 abort ();
2446 if (!reg_overlap_mentioned_p (high_reg, addr))
2448 /* No overlap between high target register and address
2449 register. (We do this in a non-obvious way to
2450 save a register file writeback) */
2451 if (GET_CODE (addr) == POST_INC)
2452 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2453 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2455 else
2457 /* This is an undefined situation. We should load into the
2458 address register *and* update that register. Probably
2459 we don't need to handle this at all. */
2460 if (GET_CODE (addr) == POST_INC)
2461 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2462 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2465 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2467 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2469 operands[1] = XEXP (addr, 0);
2470 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2471 abort ();
2473 if (!reg_overlap_mentioned_p (high_reg, addr))
2475 /* No overlap between high target register and address
2476 register. (We do this in a non-obvious way to
2477 save a register file writeback) */
2478 if (GET_CODE (addr) == PRE_INC)
2479 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2480 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2482 else
2484 /* This is an undefined situation. We should load into the
2485 address register *and* update that register. Probably
2486 we don't need to handle this at all. */
2487 if (GET_CODE (addr) == PRE_INC)
2488 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2489 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2492 else if (GET_CODE (addr) == PLUS
2493 && GET_CODE (XEXP (addr, 0)) == MULT)
2495 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2497 if (!reg_overlap_mentioned_p (high_reg, addr))
2499 rtx xoperands[3];
2501 xoperands[0] = high_reg;
2502 xoperands[1] = XEXP (addr, 1);
2503 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2504 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2505 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2506 xoperands);
2507 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2509 else
2511 rtx xoperands[3];
2513 xoperands[0] = high_reg;
2514 xoperands[1] = XEXP (addr, 1);
2515 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2516 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2517 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2518 xoperands);
2519 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2524 /* If an operand is an unoffsettable memory ref, find a register
2525 we can increment temporarily to make it refer to the second word. */
2527 if (optype0 == MEMOP)
2528 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2530 if (optype1 == MEMOP)
2531 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2533 /* Ok, we can do one word at a time.
2534 Normally we do the low-numbered word first.
2536 In either case, set up in LATEHALF the operands to use
2537 for the high-numbered word and in some cases alter the
2538 operands in OPERANDS to be suitable for the low-numbered word. */
2540 if (optype0 == REGOP)
2541 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2542 else if (optype0 == OFFSOP)
2543 latehalf[0] = adjust_address (operands[0], SImode, 4);
2544 else
2545 latehalf[0] = operands[0];
2547 if (optype1 == REGOP)
2548 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2549 else if (optype1 == OFFSOP)
2550 latehalf[1] = adjust_address (operands[1], SImode, 4);
2551 else if (optype1 == CNSTOP)
2552 split_double (operands[1], &operands[1], &latehalf[1]);
2553 else
2554 latehalf[1] = operands[1];
2556 /* If the first move would clobber the source of the second one,
2557 do them in the other order.
2559 This can happen in two cases:
2561 mem -> register where the first half of the destination register
2562 is the same register used in the memory's address. Reload
2563 can create such insns.
2565 mem in this case will be either register indirect or register
2566 indirect plus a valid offset.
2568 register -> register move where REGNO(dst) == REGNO(src + 1)
2569 someone (Tim/Tege?) claimed this can happen for parameter loads.
2571 Handle mem -> register case first. */
2572 if (optype0 == REGOP
2573 && (optype1 == MEMOP || optype1 == OFFSOP)
2574 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2575 operands[1], 0))
2577 /* Do the late half first. */
2578 if (addreg1)
2579 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2580 output_asm_insn (singlemove_string (latehalf), latehalf);
2582 /* Then clobber. */
2583 if (addreg1)
2584 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2585 return singlemove_string (operands);
2588 /* Now handle register -> register case. */
2589 if (optype0 == REGOP && optype1 == REGOP
2590 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2592 output_asm_insn (singlemove_string (latehalf), latehalf);
2593 return singlemove_string (operands);
2596 /* Normal case: do the two words, low-numbered first. */
2598 output_asm_insn (singlemove_string (operands), operands);
2600 /* Make any unoffsettable addresses point at high-numbered word. */
2601 if (addreg0)
2602 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2603 if (addreg1)
2604 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2606 /* Do that word. */
2607 output_asm_insn (singlemove_string (latehalf), latehalf);
2609 /* Undo the adds we just did. */
2610 if (addreg0)
2611 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2612 if (addreg1)
2613 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2615 return "";
2618 const char *
2619 output_fp_move_double (rtx *operands)
2621 if (FP_REG_P (operands[0]))
2623 if (FP_REG_P (operands[1])
2624 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2625 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2626 else
2627 output_asm_insn ("fldd%F1 %1,%0", operands);
2629 else if (FP_REG_P (operands[1]))
2631 output_asm_insn ("fstd%F0 %1,%0", operands);
2633 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2635 if (GET_CODE (operands[0]) == REG)
2637 rtx xoperands[2];
2638 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2639 xoperands[0] = operands[0];
2640 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2642 /* This is a pain. You have to be prepared to deal with an
2643 arbitrary address here including pre/post increment/decrement.
2645 so avoid this in the MD. */
2646 else
2647 abort ();
2649 else abort ();
2650 return "";
2653 /* Return a REG that occurs in ADDR with coefficient 1.
2654 ADDR can be effectively incremented by incrementing REG. */
2656 static rtx
2657 find_addr_reg (rtx addr)
2659 while (GET_CODE (addr) == PLUS)
2661 if (GET_CODE (XEXP (addr, 0)) == REG)
2662 addr = XEXP (addr, 0);
2663 else if (GET_CODE (XEXP (addr, 1)) == REG)
2664 addr = XEXP (addr, 1);
2665 else if (CONSTANT_P (XEXP (addr, 0)))
2666 addr = XEXP (addr, 1);
2667 else if (CONSTANT_P (XEXP (addr, 1)))
2668 addr = XEXP (addr, 0);
2669 else
2670 abort ();
2672 if (GET_CODE (addr) == REG)
2673 return addr;
2674 abort ();
2677 /* Emit code to perform a block move.
2679 OPERANDS[0] is the destination pointer as a REG, clobbered.
2680 OPERANDS[1] is the source pointer as a REG, clobbered.
2681 OPERANDS[2] is a register for temporary storage.
2682 OPERANDS[3] is a register for temporary storage.
2683 OPERANDS[4] is the size as a CONST_INT
2684 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2685 OPERANDS[6] is another temporary register. */
2687 const char *
2688 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2690 int align = INTVAL (operands[5]);
2691 unsigned long n_bytes = INTVAL (operands[4]);
2693 /* We can't move more than a word at a time because the PA
2694 has no longer integer move insns. (Could use fp mem ops?) */
2695 if (align > (TARGET_64BIT ? 8 : 4))
2696 align = (TARGET_64BIT ? 8 : 4);
2698 /* Note that we know each loop below will execute at least twice
2699 (else we would have open-coded the copy). */
2700 switch (align)
2702 case 8:
2703 /* Pre-adjust the loop counter. */
2704 operands[4] = GEN_INT (n_bytes - 16);
2705 output_asm_insn ("ldi %4,%2", operands);
2707 /* Copying loop. */
2708 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2709 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2710 output_asm_insn ("std,ma %3,8(%0)", operands);
2711 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2712 output_asm_insn ("std,ma %6,8(%0)", operands);
2714 /* Handle the residual. There could be up to 7 bytes of
2715 residual to copy! */
2716 if (n_bytes % 16 != 0)
2718 operands[4] = GEN_INT (n_bytes % 8);
2719 if (n_bytes % 16 >= 8)
2720 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2721 if (n_bytes % 8 != 0)
2722 output_asm_insn ("ldd 0(%1),%6", operands);
2723 if (n_bytes % 16 >= 8)
2724 output_asm_insn ("std,ma %3,8(%0)", operands);
2725 if (n_bytes % 8 != 0)
2726 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2728 return "";
2730 case 4:
2731 /* Pre-adjust the loop counter. */
2732 operands[4] = GEN_INT (n_bytes - 8);
2733 output_asm_insn ("ldi %4,%2", operands);
2735 /* Copying loop. */
2736 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2737 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2738 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2739 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2740 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2742 /* Handle the residual. There could be up to 7 bytes of
2743 residual to copy! */
2744 if (n_bytes % 8 != 0)
2746 operands[4] = GEN_INT (n_bytes % 4);
2747 if (n_bytes % 8 >= 4)
2748 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2749 if (n_bytes % 4 != 0)
2750 output_asm_insn ("ldw 0(%1),%6", operands);
2751 if (n_bytes % 8 >= 4)
2752 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2753 if (n_bytes % 4 != 0)
2754 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2756 return "";
2758 case 2:
2759 /* Pre-adjust the loop counter. */
2760 operands[4] = GEN_INT (n_bytes - 4);
2761 output_asm_insn ("ldi %4,%2", operands);
2763 /* Copying loop. */
2764 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2765 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2766 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2767 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2768 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2770 /* Handle the residual. */
2771 if (n_bytes % 4 != 0)
2773 if (n_bytes % 4 >= 2)
2774 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2775 if (n_bytes % 2 != 0)
2776 output_asm_insn ("ldb 0(%1),%6", operands);
2777 if (n_bytes % 4 >= 2)
2778 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2779 if (n_bytes % 2 != 0)
2780 output_asm_insn ("stb %6,0(%0)", operands);
2782 return "";
2784 case 1:
2785 /* Pre-adjust the loop counter. */
2786 operands[4] = GEN_INT (n_bytes - 2);
2787 output_asm_insn ("ldi %4,%2", operands);
2789 /* Copying loop. */
2790 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2791 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2792 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2793 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2794 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2796 /* Handle the residual. */
2797 if (n_bytes % 2 != 0)
2799 output_asm_insn ("ldb 0(%1),%3", operands);
2800 output_asm_insn ("stb %3,0(%0)", operands);
2802 return "";
2804 default:
2805 abort ();
2809 /* Count the number of insns necessary to handle this block move.
2811 Basic structure is the same as emit_block_move, except that we
2812 count insns rather than emit them. */
2814 static int
2815 compute_movmem_length (rtx insn)
2817 rtx pat = PATTERN (insn);
2818 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2819 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2820 unsigned int n_insns = 0;
2822 /* We can't move more than four bytes at a time because the PA
2823 has no longer integer move insns. (Could use fp mem ops?) */
2824 if (align > (TARGET_64BIT ? 8 : 4))
2825 align = (TARGET_64BIT ? 8 : 4);
2827 /* The basic copying loop. */
2828 n_insns = 6;
2830 /* Residuals. */
2831 if (n_bytes % (2 * align) != 0)
2833 if ((n_bytes % (2 * align)) >= align)
2834 n_insns += 2;
2836 if ((n_bytes % align) != 0)
2837 n_insns += 2;
2840 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2841 return n_insns * 4;
2844 /* Emit code to perform a block clear.
2846 OPERANDS[0] is the destination pointer as a REG, clobbered.
2847 OPERANDS[1] is a register for temporary storage.
2848 OPERANDS[2] is the size as a CONST_INT
2849 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2851 const char *
2852 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2854 int align = INTVAL (operands[3]);
2855 unsigned long n_bytes = INTVAL (operands[2]);
2857 /* We can't clear more than a word at a time because the PA
2858 has no longer integer move insns. */
2859 if (align > (TARGET_64BIT ? 8 : 4))
2860 align = (TARGET_64BIT ? 8 : 4);
2862 /* Note that we know each loop below will execute at least twice
2863 (else we would have open-coded the copy). */
2864 switch (align)
2866 case 8:
2867 /* Pre-adjust the loop counter. */
2868 operands[2] = GEN_INT (n_bytes - 16);
2869 output_asm_insn ("ldi %2,%1", operands);
2871 /* Loop. */
2872 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2873 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2874 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2876 /* Handle the residual. There could be up to 7 bytes of
2877 residual to copy! */
2878 if (n_bytes % 16 != 0)
2880 operands[2] = GEN_INT (n_bytes % 8);
2881 if (n_bytes % 16 >= 8)
2882 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2883 if (n_bytes % 8 != 0)
2884 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2886 return "";
2888 case 4:
2889 /* Pre-adjust the loop counter. */
2890 operands[2] = GEN_INT (n_bytes - 8);
2891 output_asm_insn ("ldi %2,%1", operands);
2893 /* Loop. */
2894 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2895 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2896 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2898 /* Handle the residual. There could be up to 7 bytes of
2899 residual to copy! */
2900 if (n_bytes % 8 != 0)
2902 operands[2] = GEN_INT (n_bytes % 4);
2903 if (n_bytes % 8 >= 4)
2904 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2905 if (n_bytes % 4 != 0)
2906 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2908 return "";
2910 case 2:
2911 /* Pre-adjust the loop counter. */
2912 operands[2] = GEN_INT (n_bytes - 4);
2913 output_asm_insn ("ldi %2,%1", operands);
2915 /* Loop. */
2916 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2917 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2918 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2920 /* Handle the residual. */
2921 if (n_bytes % 4 != 0)
2923 if (n_bytes % 4 >= 2)
2924 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2925 if (n_bytes % 2 != 0)
2926 output_asm_insn ("stb %%r0,0(%0)", operands);
2928 return "";
2930 case 1:
2931 /* Pre-adjust the loop counter. */
2932 operands[2] = GEN_INT (n_bytes - 2);
2933 output_asm_insn ("ldi %2,%1", operands);
2935 /* Loop. */
2936 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2937 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2938 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2940 /* Handle the residual. */
2941 if (n_bytes % 2 != 0)
2942 output_asm_insn ("stb %%r0,0(%0)", operands);
2944 return "";
2946 default:
2947 abort ();
2951 /* Count the number of insns necessary to handle this block move.
2953 Basic structure is the same as emit_block_move, except that we
2954 count insns rather than emit them. */
2956 static int
2957 compute_clrmem_length (rtx insn)
2959 rtx pat = PATTERN (insn);
2960 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2961 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2962 unsigned int n_insns = 0;
2964 /* We can't clear more than a word at a time because the PA
2965 has no longer integer move insns. */
2966 if (align > (TARGET_64BIT ? 8 : 4))
2967 align = (TARGET_64BIT ? 8 : 4);
2969 /* The basic loop. */
2970 n_insns = 4;
2972 /* Residuals. */
2973 if (n_bytes % (2 * align) != 0)
2975 if ((n_bytes % (2 * align)) >= align)
2976 n_insns++;
2978 if ((n_bytes % align) != 0)
2979 n_insns++;
2982 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2983 return n_insns * 4;
2987 const char *
2988 output_and (rtx *operands)
2990 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2992 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2993 int ls0, ls1, ms0, p, len;
2995 for (ls0 = 0; ls0 < 32; ls0++)
2996 if ((mask & (1 << ls0)) == 0)
2997 break;
2999 for (ls1 = ls0; ls1 < 32; ls1++)
3000 if ((mask & (1 << ls1)) != 0)
3001 break;
3003 for (ms0 = ls1; ms0 < 32; ms0++)
3004 if ((mask & (1 << ms0)) == 0)
3005 break;
3007 if (ms0 != 32)
3008 abort ();
3010 if (ls1 == 32)
3012 len = ls0;
3014 if (len == 0)
3015 abort ();
3017 operands[2] = GEN_INT (len);
3018 return "{extru|extrw,u} %1,31,%2,%0";
3020 else
3022 /* We could use this `depi' for the case above as well, but `depi'
3023 requires one more register file access than an `extru'. */
3025 p = 31 - ls0;
3026 len = ls1 - ls0;
3028 operands[2] = GEN_INT (p);
3029 operands[3] = GEN_INT (len);
3030 return "{depi|depwi} 0,%2,%3,%0";
3033 else
3034 return "and %1,%2,%0";
3037 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3038 storing the result in operands[0]. */
3039 const char *
3040 output_64bit_and (rtx *operands)
3042 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3044 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3045 int ls0, ls1, ms0, p, len;
3047 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3048 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3049 break;
3051 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3052 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3053 break;
3055 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3056 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3057 break;
3059 if (ms0 != HOST_BITS_PER_WIDE_INT)
3060 abort ();
3062 if (ls1 == HOST_BITS_PER_WIDE_INT)
3064 len = ls0;
3066 if (len == 0)
3067 abort ();
3069 operands[2] = GEN_INT (len);
3070 return "extrd,u %1,63,%2,%0";
3072 else
3074 /* We could use this `depi' for the case above as well, but `depi'
3075 requires one more register file access than an `extru'. */
3077 p = 63 - ls0;
3078 len = ls1 - ls0;
3080 operands[2] = GEN_INT (p);
3081 operands[3] = GEN_INT (len);
3082 return "depdi 0,%2,%3,%0";
3085 else
3086 return "and %1,%2,%0";
3089 const char *
3090 output_ior (rtx *operands)
3092 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3093 int bs0, bs1, p, len;
3095 if (INTVAL (operands[2]) == 0)
3096 return "copy %1,%0";
3098 for (bs0 = 0; bs0 < 32; bs0++)
3099 if ((mask & (1 << bs0)) != 0)
3100 break;
3102 for (bs1 = bs0; bs1 < 32; bs1++)
3103 if ((mask & (1 << bs1)) == 0)
3104 break;
3106 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3107 abort ();
3109 p = 31 - bs0;
3110 len = bs1 - bs0;
3112 operands[2] = GEN_INT (p);
3113 operands[3] = GEN_INT (len);
3114 return "{depi|depwi} -1,%2,%3,%0";
3117 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3118 storing the result in operands[0]. */
3119 const char *
3120 output_64bit_ior (rtx *operands)
3122 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3123 int bs0, bs1, p, len;
3125 if (INTVAL (operands[2]) == 0)
3126 return "copy %1,%0";
3128 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3129 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3130 break;
3132 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3133 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3134 break;
3136 if (bs1 != HOST_BITS_PER_WIDE_INT
3137 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3138 abort ();
3140 p = 63 - bs0;
3141 len = bs1 - bs0;
3143 operands[2] = GEN_INT (p);
3144 operands[3] = GEN_INT (len);
3145 return "depdi -1,%2,%3,%0";
3148 /* Target hook for assembling integer objects. This code handles
3149 aligned SI and DI integers specially, since function references must
3150 be preceded by P%. */
3152 static bool
3153 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3155 if (size == UNITS_PER_WORD && aligned_p
3156 && function_label_operand (x, VOIDmode))
3158 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3159 output_addr_const (asm_out_file, x);
3160 fputc ('\n', asm_out_file);
3161 return true;
3163 return default_assemble_integer (x, size, aligned_p);
3166 /* Output an ascii string. */
3167 void
3168 output_ascii (FILE *file, const char *p, int size)
3170 int i;
3171 int chars_output;
3172 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3174 /* The HP assembler can only take strings of 256 characters at one
3175 time. This is a limitation on input line length, *not* the
3176 length of the string. Sigh. Even worse, it seems that the
3177 restriction is in number of input characters (see \xnn &
3178 \whatever). So we have to do this very carefully. */
3180 fputs ("\t.STRING \"", file);
3182 chars_output = 0;
3183 for (i = 0; i < size; i += 4)
3185 int co = 0;
3186 int io = 0;
3187 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3189 register unsigned int c = (unsigned char) p[i + io];
3191 if (c == '\"' || c == '\\')
3192 partial_output[co++] = '\\';
3193 if (c >= ' ' && c < 0177)
3194 partial_output[co++] = c;
3195 else
3197 unsigned int hexd;
3198 partial_output[co++] = '\\';
3199 partial_output[co++] = 'x';
3200 hexd = c / 16 - 0 + '0';
3201 if (hexd > '9')
3202 hexd -= '9' - 'a' + 1;
3203 partial_output[co++] = hexd;
3204 hexd = c % 16 - 0 + '0';
3205 if (hexd > '9')
3206 hexd -= '9' - 'a' + 1;
3207 partial_output[co++] = hexd;
3210 if (chars_output + co > 243)
3212 fputs ("\"\n\t.STRING \"", file);
3213 chars_output = 0;
3215 fwrite (partial_output, 1, (size_t) co, file);
3216 chars_output += co;
3217 co = 0;
3219 fputs ("\"\n", file);
3222 /* Try to rewrite floating point comparisons & branches to avoid
3223 useless add,tr insns.
3225 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3226 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3227 first attempt to remove useless add,tr insns. It is zero
3228 for the second pass as reorg sometimes leaves bogus REG_DEAD
3229 notes lying around.
3231 When CHECK_NOTES is zero we can only eliminate add,tr insns
3232 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3233 instructions. */
3234 static void
3235 remove_useless_addtr_insns (int check_notes)
3237 rtx insn;
3238 static int pass = 0;
3240 /* This is fairly cheap, so always run it when optimizing. */
3241 if (optimize > 0)
3243 int fcmp_count = 0;
3244 int fbranch_count = 0;
3246 /* Walk all the insns in this function looking for fcmp & fbranch
3247 instructions. Keep track of how many of each we find. */
3248 for (insn = get_insns (); insn; insn = next_insn (insn))
3250 rtx tmp;
3252 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3253 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3254 continue;
3256 tmp = PATTERN (insn);
3258 /* It must be a set. */
3259 if (GET_CODE (tmp) != SET)
3260 continue;
3262 /* If the destination is CCFP, then we've found an fcmp insn. */
3263 tmp = SET_DEST (tmp);
3264 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3266 fcmp_count++;
3267 continue;
3270 tmp = PATTERN (insn);
3271 /* If this is an fbranch instruction, bump the fbranch counter. */
3272 if (GET_CODE (tmp) == SET
3273 && SET_DEST (tmp) == pc_rtx
3274 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3275 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3276 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3277 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3279 fbranch_count++;
3280 continue;
3285 /* Find all floating point compare + branch insns. If possible,
3286 reverse the comparison & the branch to avoid add,tr insns. */
3287 for (insn = get_insns (); insn; insn = next_insn (insn))
3289 rtx tmp, next;
3291 /* Ignore anything that isn't an INSN. */
3292 if (GET_CODE (insn) != INSN)
3293 continue;
3295 tmp = PATTERN (insn);
3297 /* It must be a set. */
3298 if (GET_CODE (tmp) != SET)
3299 continue;
3301 /* The destination must be CCFP, which is register zero. */
3302 tmp = SET_DEST (tmp);
3303 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3304 continue;
3306 /* INSN should be a set of CCFP.
3308 See if the result of this insn is used in a reversed FP
3309 conditional branch. If so, reverse our condition and
3310 the branch. Doing so avoids useless add,tr insns. */
3311 next = next_insn (insn);
3312 while (next)
3314 /* Jumps, calls and labels stop our search. */
3315 if (GET_CODE (next) == JUMP_INSN
3316 || GET_CODE (next) == CALL_INSN
3317 || GET_CODE (next) == CODE_LABEL)
3318 break;
3320 /* As does another fcmp insn. */
3321 if (GET_CODE (next) == INSN
3322 && GET_CODE (PATTERN (next)) == SET
3323 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3324 && REGNO (SET_DEST (PATTERN (next))) == 0)
3325 break;
3327 next = next_insn (next);
3330 /* Is NEXT_INSN a branch? */
3331 if (next
3332 && GET_CODE (next) == JUMP_INSN)
3334 rtx pattern = PATTERN (next);
3336 /* If it a reversed fp conditional branch (eg uses add,tr)
3337 and CCFP dies, then reverse our conditional and the branch
3338 to avoid the add,tr. */
3339 if (GET_CODE (pattern) == SET
3340 && SET_DEST (pattern) == pc_rtx
3341 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3342 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3343 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3344 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3345 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3346 && (fcmp_count == fbranch_count
3347 || (check_notes
3348 && find_regno_note (next, REG_DEAD, 0))))
3350 /* Reverse the branch. */
3351 tmp = XEXP (SET_SRC (pattern), 1);
3352 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3353 XEXP (SET_SRC (pattern), 2) = tmp;
3354 INSN_CODE (next) = -1;
3356 /* Reverse our condition. */
3357 tmp = PATTERN (insn);
3358 PUT_CODE (XEXP (tmp, 1),
3359 (reverse_condition_maybe_unordered
3360 (GET_CODE (XEXP (tmp, 1)))));
3366 pass = !pass;
3370 /* You may have trouble believing this, but this is the 32 bit HP-PA
3371 stack layout. Wow.
3373 Offset Contents
3375 Variable arguments (optional; any number may be allocated)
3377 SP-(4*(N+9)) arg word N
3379 SP-56 arg word 5
3380 SP-52 arg word 4
3382 Fixed arguments (must be allocated; may remain unused)
3384 SP-48 arg word 3
3385 SP-44 arg word 2
3386 SP-40 arg word 1
3387 SP-36 arg word 0
3389 Frame Marker
3391 SP-32 External Data Pointer (DP)
3392 SP-28 External sr4
3393 SP-24 External/stub RP (RP')
3394 SP-20 Current RP
3395 SP-16 Static Link
3396 SP-12 Clean up
3397 SP-8 Calling Stub RP (RP'')
3398 SP-4 Previous SP
3400 Top of Frame
3402 SP-0 Stack Pointer (points to next available address)
3406 /* This function saves registers as follows. Registers marked with ' are
3407 this function's registers (as opposed to the previous function's).
3408 If a frame_pointer isn't needed, r4 is saved as a general register;
3409 the space for the frame pointer is still allocated, though, to keep
3410 things simple.
3413 Top of Frame
3415 SP (FP') Previous FP
3416 SP + 4 Alignment filler (sigh)
3417 SP + 8 Space for locals reserved here.
3421 SP + n All call saved register used.
3425 SP + o All call saved fp registers used.
3429 SP + p (SP') points to next available address.
3433 /* Global variables set by output_function_prologue(). */
3434 /* Size of frame. Need to know this to emit return insns from
3435 leaf procedures. */
3436 static HOST_WIDE_INT actual_fsize, local_fsize;
3437 static int save_fregs;
3439 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3440 Handle case where DISP > 8k by using the add_high_const patterns.
3442 Note in DISP > 8k case, we will leave the high part of the address
3443 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3445 static void
3446 store_reg (int reg, HOST_WIDE_INT disp, int base)
3448 rtx insn, dest, src, basereg;
3450 src = gen_rtx_REG (word_mode, reg);
3451 basereg = gen_rtx_REG (Pmode, base);
3452 if (VAL_14_BITS_P (disp))
3454 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3455 insn = emit_move_insn (dest, src);
3457 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3459 rtx delta = GEN_INT (disp);
3460 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3462 emit_move_insn (tmpreg, delta);
3463 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3464 dest = gen_rtx_MEM (word_mode, tmpreg);
3465 insn = emit_move_insn (dest, src);
3466 if (DO_FRAME_NOTES)
3468 REG_NOTES (insn)
3469 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3470 gen_rtx_SET (VOIDmode,
3471 gen_rtx_MEM (word_mode,
3472 gen_rtx_PLUS (word_mode, basereg,
3473 delta)),
3474 src),
3475 REG_NOTES (insn));
3478 else
3480 rtx delta = GEN_INT (disp);
3481 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3482 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3484 emit_move_insn (tmpreg, high);
3485 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3486 insn = emit_move_insn (dest, src);
3487 if (DO_FRAME_NOTES)
3489 REG_NOTES (insn)
3490 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3491 gen_rtx_SET (VOIDmode,
3492 gen_rtx_MEM (word_mode,
3493 gen_rtx_PLUS (word_mode, basereg,
3494 delta)),
3495 src),
3496 REG_NOTES (insn));
3500 if (DO_FRAME_NOTES)
3501 RTX_FRAME_RELATED_P (insn) = 1;
3504 /* Emit RTL to store REG at the memory location specified by BASE and then
3505 add MOD to BASE. MOD must be <= 8k. */
3507 static void
3508 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3510 rtx insn, basereg, srcreg, delta;
3512 if (!VAL_14_BITS_P (mod))
3513 abort ();
3515 basereg = gen_rtx_REG (Pmode, base);
3516 srcreg = gen_rtx_REG (word_mode, reg);
3517 delta = GEN_INT (mod);
3519 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3520 if (DO_FRAME_NOTES)
3522 RTX_FRAME_RELATED_P (insn) = 1;
3524 /* RTX_FRAME_RELATED_P must be set on each frame related set
3525 in a parallel with more than one element. Don't set
3526 RTX_FRAME_RELATED_P in the first set if reg is temporary
3527 register 1. The effect of this operation is recorded in
3528 the initial copy. */
3529 if (reg != 1)
3531 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3532 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3534 else
3536 /* The first element of a PARALLEL is always processed if it is
3537 a SET. Thus, we need an expression list for this case. */
3538 REG_NOTES (insn)
3539 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3540 gen_rtx_SET (VOIDmode, basereg,
3541 gen_rtx_PLUS (word_mode, basereg, delta)),
3542 REG_NOTES (insn));
3547 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3548 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3549 whether to add a frame note or not.
3551 In the DISP > 8k case, we leave the high part of the address in %r1.
3552 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3554 static void
3555 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3557 rtx insn;
3559 if (VAL_14_BITS_P (disp))
3561 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3562 plus_constant (gen_rtx_REG (Pmode, base), disp));
3564 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3566 rtx basereg = gen_rtx_REG (Pmode, base);
3567 rtx delta = GEN_INT (disp);
3568 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3570 emit_move_insn (tmpreg, delta);
3571 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3572 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3574 else
3576 rtx basereg = gen_rtx_REG (Pmode, base);
3577 rtx delta = GEN_INT (disp);
3578 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3580 emit_move_insn (tmpreg,
3581 gen_rtx_PLUS (Pmode, basereg,
3582 gen_rtx_HIGH (Pmode, delta)));
3583 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3584 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3587 if (DO_FRAME_NOTES && note)
3588 RTX_FRAME_RELATED_P (insn) = 1;
3591 HOST_WIDE_INT
3592 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3594 int freg_saved = 0;
3595 int i, j;
3597 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3598 be consistent with the rounding and size calculation done here.
3599 Change them at the same time. */
3601 /* We do our own stack alignment. First, round the size of the
3602 stack locals up to a word boundary. */
3603 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3605 /* Space for previous frame pointer + filler. If any frame is
3606 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3607 waste some space here for the sake of HP compatibility. The
3608 first slot is only used when the frame pointer is needed. */
3609 if (size || frame_pointer_needed)
3610 size += STARTING_FRAME_OFFSET;
3612 /* If the current function calls __builtin_eh_return, then we need
3613 to allocate stack space for registers that will hold data for
3614 the exception handler. */
3615 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3617 unsigned int i;
3619 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3620 continue;
3621 size += i * UNITS_PER_WORD;
3624 /* Account for space used by the callee general register saves. */
3625 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3626 if (regs_ever_live[i])
3627 size += UNITS_PER_WORD;
3629 /* Account for space used by the callee floating point register saves. */
3630 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3631 if (regs_ever_live[i]
3632 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3634 freg_saved = 1;
3636 /* We always save both halves of the FP register, so always
3637 increment the frame size by 8 bytes. */
3638 size += 8;
3641 /* If any of the floating registers are saved, account for the
3642 alignment needed for the floating point register save block. */
3643 if (freg_saved)
3645 size = (size + 7) & ~7;
3646 if (fregs_live)
3647 *fregs_live = 1;
3650 /* The various ABIs include space for the outgoing parameters in the
3651 size of the current function's stack frame. We don't need to align
3652 for the outgoing arguments as their alignment is set by the final
3653 rounding for the frame as a whole. */
3654 size += current_function_outgoing_args_size;
3656 /* Allocate space for the fixed frame marker. This space must be
3657 allocated for any function that makes calls or allocates
3658 stack space. */
3659 if (!current_function_is_leaf || size)
3660 size += TARGET_64BIT ? 48 : 32;
3662 /* Finally, round to the preferred stack boundary. */
3663 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3664 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3667 /* Generate the assembly code for function entry. FILE is a stdio
3668 stream to output the code to. SIZE is an int: how many units of
3669 temporary storage to allocate.
3671 Refer to the array `regs_ever_live' to determine which registers to
3672 save; `regs_ever_live[I]' is nonzero if register number I is ever
3673 used in the function. This function is responsible for knowing
3674 which registers should not be saved even if used. */
3676 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3677 of memory. If any fpu reg is used in the function, we allocate
3678 such a block here, at the bottom of the frame, just in case it's needed.
3680 If this function is a leaf procedure, then we may choose not
3681 to do a "save" insn. The decision about whether or not
3682 to do this is made in regclass.c. */
3684 static void
3685 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3687 /* The function's label and associated .PROC must never be
3688 separated and must be output *after* any profiling declarations
3689 to avoid changing spaces/subspaces within a procedure. */
3690 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3691 fputs ("\t.PROC\n", file);
3693 /* hppa_expand_prologue does the dirty work now. We just need
3694 to output the assembler directives which denote the start
3695 of a function. */
3696 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3697 if (regs_ever_live[2])
3698 fputs (",CALLS,SAVE_RP", file);
3699 else
3700 fputs (",NO_CALLS", file);
3702 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3703 at the beginning of the frame and that it is used as the frame
3704 pointer for the frame. We do this because our current frame
3705 layout doesn't conform to that specified in the the HP runtime
3706 documentation and we need a way to indicate to programs such as
3707 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3708 isn't used by HP compilers but is supported by the assembler.
3709 However, SAVE_SP is supposed to indicate that the previous stack
3710 pointer has been saved in the frame marker. */
3711 if (frame_pointer_needed)
3712 fputs (",SAVE_SP", file);
3714 /* Pass on information about the number of callee register saves
3715 performed in the prologue.
3717 The compiler is supposed to pass the highest register number
3718 saved, the assembler then has to adjust that number before
3719 entering it into the unwind descriptor (to account for any
3720 caller saved registers with lower register numbers than the
3721 first callee saved register). */
3722 if (gr_saved)
3723 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3725 if (fr_saved)
3726 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3728 fputs ("\n\t.ENTRY\n", file);
3730 remove_useless_addtr_insns (0);
3733 void
3734 hppa_expand_prologue (void)
3736 int merge_sp_adjust_with_store = 0;
3737 HOST_WIDE_INT size = get_frame_size ();
3738 HOST_WIDE_INT offset;
3739 int i;
3740 rtx insn, tmpreg;
3742 gr_saved = 0;
3743 fr_saved = 0;
3744 save_fregs = 0;
3746 /* Compute total size for frame pointer, filler, locals and rounding to
3747 the next word boundary. Similar code appears in compute_frame_size
3748 and must be changed in tandem with this code. */
3749 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3750 if (local_fsize || frame_pointer_needed)
3751 local_fsize += STARTING_FRAME_OFFSET;
3753 actual_fsize = compute_frame_size (size, &save_fregs);
3755 /* Compute a few things we will use often. */
3756 tmpreg = gen_rtx_REG (word_mode, 1);
3758 /* Save RP first. The calling conventions manual states RP will
3759 always be stored into the caller's frame at sp - 20 or sp - 16
3760 depending on which ABI is in use. */
3761 if (regs_ever_live[2] || current_function_calls_eh_return)
3762 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3764 /* Allocate the local frame and set up the frame pointer if needed. */
3765 if (actual_fsize != 0)
3767 if (frame_pointer_needed)
3769 /* Copy the old frame pointer temporarily into %r1. Set up the
3770 new stack pointer, then store away the saved old frame pointer
3771 into the stack at sp and at the same time update the stack
3772 pointer by actual_fsize bytes. Two versions, first
3773 handles small (<8k) frames. The second handles large (>=8k)
3774 frames. */
3775 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3776 if (DO_FRAME_NOTES)
3778 /* We need to record the frame pointer save here since the
3779 new frame pointer is set in the following insn. */
3780 RTX_FRAME_RELATED_P (insn) = 1;
3781 REG_NOTES (insn)
3782 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3783 gen_rtx_SET (VOIDmode,
3784 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3785 frame_pointer_rtx),
3786 REG_NOTES (insn));
3789 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3790 if (DO_FRAME_NOTES)
3791 RTX_FRAME_RELATED_P (insn) = 1;
3793 if (VAL_14_BITS_P (actual_fsize))
3794 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3795 else
3797 /* It is incorrect to store the saved frame pointer at *sp,
3798 then increment sp (writes beyond the current stack boundary).
3800 So instead use stwm to store at *sp and post-increment the
3801 stack pointer as an atomic operation. Then increment sp to
3802 finish allocating the new frame. */
3803 HOST_WIDE_INT adjust1 = 8192 - 64;
3804 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3806 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3807 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3808 adjust2, 1);
3811 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3812 we need to store the previous stack pointer (frame pointer)
3813 into the frame marker on targets that use the HP unwind
3814 library. This allows the HP unwind library to be used to
3815 unwind GCC frames. However, we are not fully compatible
3816 with the HP library because our frame layout differs from
3817 that specified in the HP runtime specification.
3819 We don't want a frame note on this instruction as the frame
3820 marker moves during dynamic stack allocation.
3822 This instruction also serves as a blockage to prevent
3823 register spills from being scheduled before the stack
3824 pointer is raised. This is necessary as we store
3825 registers using the frame pointer as a base register,
3826 and the frame pointer is set before sp is raised. */
3827 if (TARGET_HPUX_UNWIND_LIBRARY)
3829 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3830 GEN_INT (TARGET_64BIT ? -8 : -4));
3832 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3833 frame_pointer_rtx);
3835 else
3836 emit_insn (gen_blockage ());
3838 /* no frame pointer needed. */
3839 else
3841 /* In some cases we can perform the first callee register save
3842 and allocating the stack frame at the same time. If so, just
3843 make a note of it and defer allocating the frame until saving
3844 the callee registers. */
3845 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3846 merge_sp_adjust_with_store = 1;
3847 /* Can not optimize. Adjust the stack frame by actual_fsize
3848 bytes. */
3849 else
3850 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3851 actual_fsize, 1);
3855 /* Normal register save.
3857 Do not save the frame pointer in the frame_pointer_needed case. It
3858 was done earlier. */
3859 if (frame_pointer_needed)
3861 offset = local_fsize;
3863 /* Saving the EH return data registers in the frame is the simplest
3864 way to get the frame unwind information emitted. We put them
3865 just before the general registers. */
3866 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3868 unsigned int i, regno;
3870 for (i = 0; ; ++i)
3872 regno = EH_RETURN_DATA_REGNO (i);
3873 if (regno == INVALID_REGNUM)
3874 break;
3876 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3877 offset += UNITS_PER_WORD;
3881 for (i = 18; i >= 4; i--)
3882 if (regs_ever_live[i] && ! call_used_regs[i])
3884 store_reg (i, offset, FRAME_POINTER_REGNUM);
3885 offset += UNITS_PER_WORD;
3886 gr_saved++;
3888 /* Account for %r3 which is saved in a special place. */
3889 gr_saved++;
3891 /* No frame pointer needed. */
3892 else
3894 offset = local_fsize - actual_fsize;
3896 /* Saving the EH return data registers in the frame is the simplest
3897 way to get the frame unwind information emitted. */
3898 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3900 unsigned int i, regno;
3902 for (i = 0; ; ++i)
3904 regno = EH_RETURN_DATA_REGNO (i);
3905 if (regno == INVALID_REGNUM)
3906 break;
3908 /* If merge_sp_adjust_with_store is nonzero, then we can
3909 optimize the first save. */
3910 if (merge_sp_adjust_with_store)
3912 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3913 merge_sp_adjust_with_store = 0;
3915 else
3916 store_reg (regno, offset, STACK_POINTER_REGNUM);
3917 offset += UNITS_PER_WORD;
3921 for (i = 18; i >= 3; i--)
3922 if (regs_ever_live[i] && ! call_used_regs[i])
3924 /* If merge_sp_adjust_with_store is nonzero, then we can
3925 optimize the first GR save. */
3926 if (merge_sp_adjust_with_store)
3928 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3929 merge_sp_adjust_with_store = 0;
3931 else
3932 store_reg (i, offset, STACK_POINTER_REGNUM);
3933 offset += UNITS_PER_WORD;
3934 gr_saved++;
3937 /* If we wanted to merge the SP adjustment with a GR save, but we never
3938 did any GR saves, then just emit the adjustment here. */
3939 if (merge_sp_adjust_with_store)
3940 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3941 actual_fsize, 1);
3944 /* The hppa calling conventions say that %r19, the pic offset
3945 register, is saved at sp - 32 (in this function's frame)
3946 when generating PIC code. FIXME: What is the correct thing
3947 to do for functions which make no calls and allocate no
3948 frame? Do we need to allocate a frame, or can we just omit
3949 the save? For now we'll just omit the save.
3951 We don't want a note on this insn as the frame marker can
3952 move if there is a dynamic stack allocation. */
3953 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3955 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3957 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3961 /* Align pointer properly (doubleword boundary). */
3962 offset = (offset + 7) & ~7;
3964 /* Floating point register store. */
3965 if (save_fregs)
3967 rtx base;
3969 /* First get the frame or stack pointer to the start of the FP register
3970 save area. */
3971 if (frame_pointer_needed)
3973 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3974 base = frame_pointer_rtx;
3976 else
3978 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3979 base = stack_pointer_rtx;
3982 /* Now actually save the FP registers. */
3983 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3985 if (regs_ever_live[i]
3986 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3988 rtx addr, insn, reg;
3989 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3990 reg = gen_rtx_REG (DFmode, i);
3991 insn = emit_move_insn (addr, reg);
3992 if (DO_FRAME_NOTES)
3994 RTX_FRAME_RELATED_P (insn) = 1;
3995 if (TARGET_64BIT)
3997 rtx mem = gen_rtx_MEM (DFmode,
3998 plus_constant (base, offset));
3999 REG_NOTES (insn)
4000 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4001 gen_rtx_SET (VOIDmode, mem, reg),
4002 REG_NOTES (insn));
4004 else
4006 rtx meml = gen_rtx_MEM (SFmode,
4007 plus_constant (base, offset));
4008 rtx memr = gen_rtx_MEM (SFmode,
4009 plus_constant (base, offset + 4));
4010 rtx regl = gen_rtx_REG (SFmode, i);
4011 rtx regr = gen_rtx_REG (SFmode, i + 1);
4012 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4013 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4014 rtvec vec;
4016 RTX_FRAME_RELATED_P (setl) = 1;
4017 RTX_FRAME_RELATED_P (setr) = 1;
4018 vec = gen_rtvec (2, setl, setr);
4019 REG_NOTES (insn)
4020 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4021 gen_rtx_SEQUENCE (VOIDmode, vec),
4022 REG_NOTES (insn));
4025 offset += GET_MODE_SIZE (DFmode);
4026 fr_saved++;
4032 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4033 Handle case where DISP > 8k by using the add_high_const patterns. */
4035 static void
4036 load_reg (int reg, HOST_WIDE_INT disp, int base)
4038 rtx dest = gen_rtx_REG (word_mode, reg);
4039 rtx basereg = gen_rtx_REG (Pmode, base);
4040 rtx src;
4042 if (VAL_14_BITS_P (disp))
4043 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4044 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4046 rtx delta = GEN_INT (disp);
4047 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4049 emit_move_insn (tmpreg, delta);
4050 if (TARGET_DISABLE_INDEXING)
4052 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4053 src = gen_rtx_MEM (word_mode, tmpreg);
4055 else
4056 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4058 else
4060 rtx delta = GEN_INT (disp);
4061 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4062 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4064 emit_move_insn (tmpreg, high);
4065 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4068 emit_move_insn (dest, src);
4071 /* Update the total code bytes output to the text section. */
4073 static void
4074 update_total_code_bytes (int nbytes)
4076 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4077 && !IN_NAMED_SECTION_P (cfun->decl))
4079 if (INSN_ADDRESSES_SET_P ())
4081 unsigned long old_total = total_code_bytes;
4083 total_code_bytes += nbytes;
4085 /* Be prepared to handle overflows. */
4086 if (old_total > total_code_bytes)
4087 total_code_bytes = -1;
4089 else
4090 total_code_bytes = -1;
4094 /* This function generates the assembly code for function exit.
4095 Args are as for output_function_prologue ().
4097 The function epilogue should not depend on the current stack
4098 pointer! It should use the frame pointer only. This is mandatory
4099 because of alloca; we also take advantage of it to omit stack
4100 adjustments before returning. */
4102 static void
4103 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4105 rtx insn = get_last_insn ();
4107 last_address = 0;
4109 /* hppa_expand_epilogue does the dirty work now. We just need
4110 to output the assembler directives which denote the end
4111 of a function.
4113 To make debuggers happy, emit a nop if the epilogue was completely
4114 eliminated due to a volatile call as the last insn in the
4115 current function. That way the return address (in %r2) will
4116 always point to a valid instruction in the current function. */
4118 /* Get the last real insn. */
4119 if (GET_CODE (insn) == NOTE)
4120 insn = prev_real_insn (insn);
4122 /* If it is a sequence, then look inside. */
4123 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4124 insn = XVECEXP (PATTERN (insn), 0, 0);
4126 /* If insn is a CALL_INSN, then it must be a call to a volatile
4127 function (otherwise there would be epilogue insns). */
4128 if (insn && GET_CODE (insn) == CALL_INSN)
4130 fputs ("\tnop\n", file);
4131 last_address += 4;
4134 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4136 if (INSN_ADDRESSES_SET_P ())
4138 insn = get_last_nonnote_insn ();
4139 last_address += INSN_ADDRESSES (INSN_UID (insn));
4140 if (INSN_P (insn))
4141 last_address += insn_default_length (insn);
4142 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4143 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4146 /* Finally, update the total number of code bytes output so far. */
4147 update_total_code_bytes (last_address);
4150 void
4151 hppa_expand_epilogue (void)
4153 rtx tmpreg;
4154 HOST_WIDE_INT offset;
4155 HOST_WIDE_INT ret_off = 0;
4156 int i;
4157 int merge_sp_adjust_with_load = 0;
4159 /* We will use this often. */
4160 tmpreg = gen_rtx_REG (word_mode, 1);
4162 /* Try to restore RP early to avoid load/use interlocks when
4163 RP gets used in the return (bv) instruction. This appears to still
4164 be necessary even when we schedule the prologue and epilogue. */
4165 if (regs_ever_live [2] || current_function_calls_eh_return)
4167 ret_off = TARGET_64BIT ? -16 : -20;
4168 if (frame_pointer_needed)
4170 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4171 ret_off = 0;
4173 else
4175 /* No frame pointer, and stack is smaller than 8k. */
4176 if (VAL_14_BITS_P (ret_off - actual_fsize))
4178 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4179 ret_off = 0;
4184 /* General register restores. */
4185 if (frame_pointer_needed)
4187 offset = local_fsize;
4189 /* If the current function calls __builtin_eh_return, then we need
4190 to restore the saved EH data registers. */
4191 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4193 unsigned int i, regno;
4195 for (i = 0; ; ++i)
4197 regno = EH_RETURN_DATA_REGNO (i);
4198 if (regno == INVALID_REGNUM)
4199 break;
4201 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4202 offset += UNITS_PER_WORD;
4206 for (i = 18; i >= 4; i--)
4207 if (regs_ever_live[i] && ! call_used_regs[i])
4209 load_reg (i, offset, FRAME_POINTER_REGNUM);
4210 offset += UNITS_PER_WORD;
4213 else
4215 offset = local_fsize - actual_fsize;
4217 /* If the current function calls __builtin_eh_return, then we need
4218 to restore the saved EH data registers. */
4219 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4221 unsigned int i, regno;
4223 for (i = 0; ; ++i)
4225 regno = EH_RETURN_DATA_REGNO (i);
4226 if (regno == INVALID_REGNUM)
4227 break;
4229 /* Only for the first load.
4230 merge_sp_adjust_with_load holds the register load
4231 with which we will merge the sp adjustment. */
4232 if (merge_sp_adjust_with_load == 0
4233 && local_fsize == 0
4234 && VAL_14_BITS_P (-actual_fsize))
4235 merge_sp_adjust_with_load = regno;
4236 else
4237 load_reg (regno, offset, STACK_POINTER_REGNUM);
4238 offset += UNITS_PER_WORD;
4242 for (i = 18; i >= 3; i--)
4244 if (regs_ever_live[i] && ! call_used_regs[i])
4246 /* Only for the first load.
4247 merge_sp_adjust_with_load holds the register load
4248 with which we will merge the sp adjustment. */
4249 if (merge_sp_adjust_with_load == 0
4250 && local_fsize == 0
4251 && VAL_14_BITS_P (-actual_fsize))
4252 merge_sp_adjust_with_load = i;
4253 else
4254 load_reg (i, offset, STACK_POINTER_REGNUM);
4255 offset += UNITS_PER_WORD;
4260 /* Align pointer properly (doubleword boundary). */
4261 offset = (offset + 7) & ~7;
4263 /* FP register restores. */
4264 if (save_fregs)
4266 /* Adjust the register to index off of. */
4267 if (frame_pointer_needed)
4268 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4269 else
4270 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4272 /* Actually do the restores now. */
4273 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4274 if (regs_ever_live[i]
4275 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4277 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4278 rtx dest = gen_rtx_REG (DFmode, i);
4279 emit_move_insn (dest, src);
4283 /* Emit a blockage insn here to keep these insns from being moved to
4284 an earlier spot in the epilogue, or into the main instruction stream.
4286 This is necessary as we must not cut the stack back before all the
4287 restores are finished. */
4288 emit_insn (gen_blockage ());
4290 /* Reset stack pointer (and possibly frame pointer). The stack
4291 pointer is initially set to fp + 64 to avoid a race condition. */
4292 if (frame_pointer_needed)
4294 rtx delta = GEN_INT (-64);
4296 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4297 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4299 /* If we were deferring a callee register restore, do it now. */
4300 else if (merge_sp_adjust_with_load)
4302 rtx delta = GEN_INT (-actual_fsize);
4303 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4305 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4307 else if (actual_fsize != 0)
4308 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4309 - actual_fsize, 0);
4311 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4312 frame greater than 8k), do so now. */
4313 if (ret_off != 0)
4314 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4316 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4318 rtx sa = EH_RETURN_STACKADJ_RTX;
4320 emit_insn (gen_blockage ());
4321 emit_insn (TARGET_64BIT
4322 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4323 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4328 hppa_pic_save_rtx (void)
4330 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4333 void
4334 hppa_profile_hook (int label_no)
4336 /* We use SImode for the address of the function in both 32 and
4337 64-bit code to avoid having to provide DImode versions of the
4338 lcla2 and load_offset_label_address insn patterns. */
4339 rtx reg = gen_reg_rtx (SImode);
4340 rtx label_rtx = gen_label_rtx ();
4341 rtx begin_label_rtx, call_insn;
4342 char begin_label_name[16];
4344 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4345 label_no);
4346 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4348 if (TARGET_64BIT)
4349 emit_move_insn (arg_pointer_rtx,
4350 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4351 GEN_INT (64)));
4353 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4355 /* The address of the function is loaded into %r25 with a instruction-
4356 relative sequence that avoids the use of relocations. The sequence
4357 is split so that the load_offset_label_address instruction can
4358 occupy the delay slot of the call to _mcount. */
4359 if (TARGET_PA_20)
4360 emit_insn (gen_lcla2 (reg, label_rtx));
4361 else
4362 emit_insn (gen_lcla1 (reg, label_rtx));
4364 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4365 reg, begin_label_rtx, label_rtx));
4367 #ifndef NO_PROFILE_COUNTERS
4369 rtx count_label_rtx, addr, r24;
4370 char count_label_name[16];
4372 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4373 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4375 addr = force_reg (Pmode, count_label_rtx);
4376 r24 = gen_rtx_REG (Pmode, 24);
4377 emit_move_insn (r24, addr);
4379 call_insn =
4380 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4381 gen_rtx_SYMBOL_REF (Pmode,
4382 "_mcount")),
4383 GEN_INT (TARGET_64BIT ? 24 : 12)));
4385 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4387 #else
4389 call_insn =
4390 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4391 gen_rtx_SYMBOL_REF (Pmode,
4392 "_mcount")),
4393 GEN_INT (TARGET_64BIT ? 16 : 8)));
4395 #endif
4397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4398 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4400 /* Indicate the _mcount call cannot throw, nor will it execute a
4401 non-local goto. */
4402 REG_NOTES (call_insn)
4403 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4406 /* Fetch the return address for the frame COUNT steps up from
4407 the current frame, after the prologue. FRAMEADDR is the
4408 frame pointer of the COUNT frame.
4410 We want to ignore any export stub remnants here. To handle this,
4411 we examine the code at the return address, and if it is an export
4412 stub, we return a memory rtx for the stub return address stored
4413 at frame-24.
4415 The value returned is used in two different ways:
4417 1. To find a function's caller.
4419 2. To change the return address for a function.
4421 This function handles most instances of case 1; however, it will
4422 fail if there are two levels of stubs to execute on the return
4423 path. The only way I believe that can happen is if the return value
4424 needs a parameter relocation, which never happens for C code.
4426 This function handles most instances of case 2; however, it will
4427 fail if we did not originally have stub code on the return path
4428 but will need stub code on the new return path. This can happen if
4429 the caller & callee are both in the main program, but the new
4430 return location is in a shared library. */
4433 return_addr_rtx (int count, rtx frameaddr)
4435 rtx label;
4436 rtx rp;
4437 rtx saved_rp;
4438 rtx ins;
4440 if (count != 0)
4441 return NULL_RTX;
4443 rp = get_hard_reg_initial_val (Pmode, 2);
4445 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4446 return rp;
4448 saved_rp = gen_reg_rtx (Pmode);
4449 emit_move_insn (saved_rp, rp);
4451 /* Get pointer to the instruction stream. We have to mask out the
4452 privilege level from the two low order bits of the return address
4453 pointer here so that ins will point to the start of the first
4454 instruction that would have been executed if we returned. */
4455 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4456 label = gen_label_rtx ();
4458 /* Check the instruction stream at the normal return address for the
4459 export stub:
4461 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4462 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4463 0x00011820 | stub+16: mtsp r1,sr0
4464 0xe0400002 | stub+20: be,n 0(sr0,rp)
4466 If it is an export stub, than our return address is really in
4467 -24[frameaddr]. */
4469 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4470 NULL_RTX, SImode, 1);
4471 emit_jump_insn (gen_bne (label));
4473 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4474 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4475 emit_jump_insn (gen_bne (label));
4477 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4478 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4479 emit_jump_insn (gen_bne (label));
4481 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4482 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4484 /* If there is no export stub then just use the value saved from
4485 the return pointer register. */
4487 emit_jump_insn (gen_bne (label));
4489 /* Here we know that our return address points to an export
4490 stub. We don't want to return the address of the export stub,
4491 but rather the return address of the export stub. That return
4492 address is stored at -24[frameaddr]. */
4494 emit_move_insn (saved_rp,
4495 gen_rtx_MEM (Pmode,
4496 memory_address (Pmode,
4497 plus_constant (frameaddr,
4498 -24))));
4500 emit_label (label);
4501 return saved_rp;
4504 /* This is only valid once reload has completed because it depends on
4505 knowing exactly how much (if any) frame there is and...
4507 It's only valid if there is no frame marker to de-allocate and...
4509 It's only valid if %r2 hasn't been saved into the caller's frame
4510 (we're not profiling and %r2 isn't live anywhere). */
4512 hppa_can_use_return_insn_p (void)
4514 return (reload_completed
4515 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4516 && ! regs_ever_live[2]
4517 && ! frame_pointer_needed);
4520 void
4521 emit_bcond_fp (enum rtx_code code, rtx operand0)
4523 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4524 gen_rtx_IF_THEN_ELSE (VOIDmode,
4525 gen_rtx_fmt_ee (code,
4526 VOIDmode,
4527 gen_rtx_REG (CCFPmode, 0),
4528 const0_rtx),
4529 gen_rtx_LABEL_REF (VOIDmode, operand0),
4530 pc_rtx)));
4535 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4537 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4538 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4541 /* Adjust the cost of a scheduling dependency. Return the new cost of
4542 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4544 static int
4545 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4547 enum attr_type attr_type;
4549 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4550 true dependencies as they are described with bypasses now. */
4551 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4552 return cost;
4554 if (! recog_memoized (insn))
4555 return 0;
4557 attr_type = get_attr_type (insn);
4559 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4561 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4562 cycles later. */
4564 if (attr_type == TYPE_FPLOAD)
4566 rtx pat = PATTERN (insn);
4567 rtx dep_pat = PATTERN (dep_insn);
4568 if (GET_CODE (pat) == PARALLEL)
4570 /* This happens for the fldXs,mb patterns. */
4571 pat = XVECEXP (pat, 0, 0);
4573 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4574 /* If this happens, we have to extend this to schedule
4575 optimally. Return 0 for now. */
4576 return 0;
4578 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4580 if (! recog_memoized (dep_insn))
4581 return 0;
4582 switch (get_attr_type (dep_insn))
4584 case TYPE_FPALU:
4585 case TYPE_FPMULSGL:
4586 case TYPE_FPMULDBL:
4587 case TYPE_FPDIVSGL:
4588 case TYPE_FPDIVDBL:
4589 case TYPE_FPSQRTSGL:
4590 case TYPE_FPSQRTDBL:
4591 /* A fpload can't be issued until one cycle before a
4592 preceding arithmetic operation has finished if
4593 the target of the fpload is any of the sources
4594 (or destination) of the arithmetic operation. */
4595 return insn_default_latency (dep_insn) - 1;
4597 default:
4598 return 0;
4602 else if (attr_type == TYPE_FPALU)
4604 rtx pat = PATTERN (insn);
4605 rtx dep_pat = PATTERN (dep_insn);
4606 if (GET_CODE (pat) == PARALLEL)
4608 /* This happens for the fldXs,mb patterns. */
4609 pat = XVECEXP (pat, 0, 0);
4611 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4612 /* If this happens, we have to extend this to schedule
4613 optimally. Return 0 for now. */
4614 return 0;
4616 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4618 if (! recog_memoized (dep_insn))
4619 return 0;
4620 switch (get_attr_type (dep_insn))
4622 case TYPE_FPDIVSGL:
4623 case TYPE_FPDIVDBL:
4624 case TYPE_FPSQRTSGL:
4625 case TYPE_FPSQRTDBL:
4626 /* An ALU flop can't be issued until two cycles before a
4627 preceding divide or sqrt operation has finished if
4628 the target of the ALU flop is any of the sources
4629 (or destination) of the divide or sqrt operation. */
4630 return insn_default_latency (dep_insn) - 2;
4632 default:
4633 return 0;
4638 /* For other anti dependencies, the cost is 0. */
4639 return 0;
4641 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4643 /* Output dependency; DEP_INSN writes a register that INSN writes some
4644 cycles later. */
4645 if (attr_type == TYPE_FPLOAD)
4647 rtx pat = PATTERN (insn);
4648 rtx dep_pat = PATTERN (dep_insn);
4649 if (GET_CODE (pat) == PARALLEL)
4651 /* This happens for the fldXs,mb patterns. */
4652 pat = XVECEXP (pat, 0, 0);
4654 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4655 /* If this happens, we have to extend this to schedule
4656 optimally. Return 0 for now. */
4657 return 0;
4659 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4661 if (! recog_memoized (dep_insn))
4662 return 0;
4663 switch (get_attr_type (dep_insn))
4665 case TYPE_FPALU:
4666 case TYPE_FPMULSGL:
4667 case TYPE_FPMULDBL:
4668 case TYPE_FPDIVSGL:
4669 case TYPE_FPDIVDBL:
4670 case TYPE_FPSQRTSGL:
4671 case TYPE_FPSQRTDBL:
4672 /* A fpload can't be issued until one cycle before a
4673 preceding arithmetic operation has finished if
4674 the target of the fpload is the destination of the
4675 arithmetic operation.
4677 Exception: For PA7100LC, PA7200 and PA7300, the cost
4678 is 3 cycles, unless they bundle together. We also
4679 pay the penalty if the second insn is a fpload. */
4680 return insn_default_latency (dep_insn) - 1;
4682 default:
4683 return 0;
4687 else if (attr_type == TYPE_FPALU)
4689 rtx pat = PATTERN (insn);
4690 rtx dep_pat = PATTERN (dep_insn);
4691 if (GET_CODE (pat) == PARALLEL)
4693 /* This happens for the fldXs,mb patterns. */
4694 pat = XVECEXP (pat, 0, 0);
4696 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4697 /* If this happens, we have to extend this to schedule
4698 optimally. Return 0 for now. */
4699 return 0;
4701 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4703 if (! recog_memoized (dep_insn))
4704 return 0;
4705 switch (get_attr_type (dep_insn))
4707 case TYPE_FPDIVSGL:
4708 case TYPE_FPDIVDBL:
4709 case TYPE_FPSQRTSGL:
4710 case TYPE_FPSQRTDBL:
4711 /* An ALU flop can't be issued until two cycles before a
4712 preceding divide or sqrt operation has finished if
4713 the target of the ALU flop is also the target of
4714 the divide or sqrt operation. */
4715 return insn_default_latency (dep_insn) - 2;
4717 default:
4718 return 0;
4723 /* For other output dependencies, the cost is 0. */
4724 return 0;
4726 else
4727 abort ();
4730 /* Adjust scheduling priorities. We use this to try and keep addil
4731 and the next use of %r1 close together. */
4732 static int
4733 pa_adjust_priority (rtx insn, int priority)
4735 rtx set = single_set (insn);
4736 rtx src, dest;
4737 if (set)
4739 src = SET_SRC (set);
4740 dest = SET_DEST (set);
4741 if (GET_CODE (src) == LO_SUM
4742 && symbolic_operand (XEXP (src, 1), VOIDmode)
4743 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4744 priority >>= 3;
4746 else if (GET_CODE (src) == MEM
4747 && GET_CODE (XEXP (src, 0)) == LO_SUM
4748 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4749 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4750 priority >>= 1;
4752 else if (GET_CODE (dest) == MEM
4753 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4754 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4755 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4756 priority >>= 3;
4758 return priority;
4761 /* The 700 can only issue a single insn at a time.
4762 The 7XXX processors can issue two insns at a time.
4763 The 8000 can issue 4 insns at a time. */
4764 static int
4765 pa_issue_rate (void)
4767 switch (pa_cpu)
4769 case PROCESSOR_700: return 1;
4770 case PROCESSOR_7100: return 2;
4771 case PROCESSOR_7100LC: return 2;
4772 case PROCESSOR_7200: return 2;
4773 case PROCESSOR_7300: return 2;
4774 case PROCESSOR_8000: return 4;
4776 default:
4777 abort ();
4783 /* Return any length adjustment needed by INSN which already has its length
4784 computed as LENGTH. Return zero if no adjustment is necessary.
4786 For the PA: function calls, millicode calls, and backwards short
4787 conditional branches with unfilled delay slots need an adjustment by +1
4788 (to account for the NOP which will be inserted into the instruction stream).
4790 Also compute the length of an inline block move here as it is too
4791 complicated to express as a length attribute in pa.md. */
4793 pa_adjust_insn_length (rtx insn, int length)
4795 rtx pat = PATTERN (insn);
4797 /* Jumps inside switch tables which have unfilled delay slots need
4798 adjustment. */
4799 if (GET_CODE (insn) == JUMP_INSN
4800 && GET_CODE (pat) == PARALLEL
4801 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4802 return 4;
4803 /* Millicode insn with an unfilled delay slot. */
4804 else if (GET_CODE (insn) == INSN
4805 && GET_CODE (pat) != SEQUENCE
4806 && GET_CODE (pat) != USE
4807 && GET_CODE (pat) != CLOBBER
4808 && get_attr_type (insn) == TYPE_MILLI)
4809 return 4;
4810 /* Block move pattern. */
4811 else if (GET_CODE (insn) == INSN
4812 && GET_CODE (pat) == PARALLEL
4813 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4814 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4815 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4816 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4817 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4818 return compute_movmem_length (insn) - 4;
4819 /* Block clear pattern. */
4820 else if (GET_CODE (insn) == INSN
4821 && GET_CODE (pat) == PARALLEL
4822 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4823 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4824 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4825 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4826 return compute_clrmem_length (insn) - 4;
4827 /* Conditional branch with an unfilled delay slot. */
4828 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4830 /* Adjust a short backwards conditional with an unfilled delay slot. */
4831 if (GET_CODE (pat) == SET
4832 && length == 4
4833 && ! forward_branch_p (insn))
4834 return 4;
4835 else if (GET_CODE (pat) == PARALLEL
4836 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4837 && length == 4)
4838 return 4;
4839 /* Adjust dbra insn with short backwards conditional branch with
4840 unfilled delay slot -- only for case where counter is in a
4841 general register register. */
4842 else if (GET_CODE (pat) == PARALLEL
4843 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4844 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4845 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4846 && length == 4
4847 && ! forward_branch_p (insn))
4848 return 4;
4849 else
4850 return 0;
4852 return 0;
4855 /* Print operand X (an rtx) in assembler syntax to file FILE.
4856 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4857 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4859 void
4860 print_operand (FILE *file, rtx x, int code)
4862 switch (code)
4864 case '#':
4865 /* Output a 'nop' if there's nothing for the delay slot. */
4866 if (dbr_sequence_length () == 0)
4867 fputs ("\n\tnop", file);
4868 return;
4869 case '*':
4870 /* Output a nullification completer if there's nothing for the */
4871 /* delay slot or nullification is requested. */
4872 if (dbr_sequence_length () == 0 ||
4873 (final_sequence &&
4874 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4875 fputs (",n", file);
4876 return;
4877 case 'R':
4878 /* Print out the second register name of a register pair.
4879 I.e., R (6) => 7. */
4880 fputs (reg_names[REGNO (x) + 1], file);
4881 return;
4882 case 'r':
4883 /* A register or zero. */
4884 if (x == const0_rtx
4885 || (x == CONST0_RTX (DFmode))
4886 || (x == CONST0_RTX (SFmode)))
4888 fputs ("%r0", file);
4889 return;
4891 else
4892 break;
4893 case 'f':
4894 /* A register or zero (floating point). */
4895 if (x == const0_rtx
4896 || (x == CONST0_RTX (DFmode))
4897 || (x == CONST0_RTX (SFmode)))
4899 fputs ("%fr0", file);
4900 return;
4902 else
4903 break;
4904 case 'A':
4906 rtx xoperands[2];
4908 xoperands[0] = XEXP (XEXP (x, 0), 0);
4909 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4910 output_global_address (file, xoperands[1], 0);
4911 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4912 return;
4915 case 'C': /* Plain (C)ondition */
4916 case 'X':
4917 switch (GET_CODE (x))
4919 case EQ:
4920 fputs ("=", file); break;
4921 case NE:
4922 fputs ("<>", file); break;
4923 case GT:
4924 fputs (">", file); break;
4925 case GE:
4926 fputs (">=", file); break;
4927 case GEU:
4928 fputs (">>=", file); break;
4929 case GTU:
4930 fputs (">>", file); break;
4931 case LT:
4932 fputs ("<", file); break;
4933 case LE:
4934 fputs ("<=", file); break;
4935 case LEU:
4936 fputs ("<<=", file); break;
4937 case LTU:
4938 fputs ("<<", file); break;
4939 default:
4940 abort ();
4942 return;
4943 case 'N': /* Condition, (N)egated */
4944 switch (GET_CODE (x))
4946 case EQ:
4947 fputs ("<>", file); break;
4948 case NE:
4949 fputs ("=", file); break;
4950 case GT:
4951 fputs ("<=", file); break;
4952 case GE:
4953 fputs ("<", file); break;
4954 case GEU:
4955 fputs ("<<", file); break;
4956 case GTU:
4957 fputs ("<<=", file); break;
4958 case LT:
4959 fputs (">=", file); break;
4960 case LE:
4961 fputs (">", file); break;
4962 case LEU:
4963 fputs (">>", file); break;
4964 case LTU:
4965 fputs (">>=", file); break;
4966 default:
4967 abort ();
4969 return;
4970 /* For floating point comparisons. Note that the output
4971 predicates are the complement of the desired mode. */
4972 case 'Y':
4973 switch (GET_CODE (x))
4975 case EQ:
4976 fputs ("!=", file); break;
4977 case NE:
4978 fputs ("=", file); break;
4979 case GT:
4980 fputs ("!>", file); break;
4981 case GE:
4982 fputs ("!>=", file); break;
4983 case LT:
4984 fputs ("!<", file); break;
4985 case LE:
4986 fputs ("!<=", file); break;
4987 case LTGT:
4988 fputs ("!<>", file); break;
4989 case UNLE:
4990 fputs (">", file); break;
4991 case UNLT:
4992 fputs (">=", file); break;
4993 case UNGE:
4994 fputs ("<", file); break;
4995 case UNGT:
4996 fputs ("<=", file); break;
4997 case UNEQ:
4998 fputs ("<>", file); break;
4999 case UNORDERED:
5000 fputs ("<=>", file); break;
5001 case ORDERED:
5002 fputs ("!<=>", file); break;
5003 default:
5004 abort ();
5006 return;
5007 case 'S': /* Condition, operands are (S)wapped. */
5008 switch (GET_CODE (x))
5010 case EQ:
5011 fputs ("=", file); break;
5012 case NE:
5013 fputs ("<>", file); break;
5014 case GT:
5015 fputs ("<", file); break;
5016 case GE:
5017 fputs ("<=", file); break;
5018 case GEU:
5019 fputs ("<<=", file); break;
5020 case GTU:
5021 fputs ("<<", file); break;
5022 case LT:
5023 fputs (">", file); break;
5024 case LE:
5025 fputs (">=", file); break;
5026 case LEU:
5027 fputs (">>=", file); break;
5028 case LTU:
5029 fputs (">>", file); break;
5030 default:
5031 abort ();
5033 return;
5034 case 'B': /* Condition, (B)oth swapped and negate. */
5035 switch (GET_CODE (x))
5037 case EQ:
5038 fputs ("<>", file); break;
5039 case NE:
5040 fputs ("=", file); break;
5041 case GT:
5042 fputs (">=", file); break;
5043 case GE:
5044 fputs (">", file); break;
5045 case GEU:
5046 fputs (">>", file); break;
5047 case GTU:
5048 fputs (">>=", file); break;
5049 case LT:
5050 fputs ("<=", file); break;
5051 case LE:
5052 fputs ("<", file); break;
5053 case LEU:
5054 fputs ("<<", file); break;
5055 case LTU:
5056 fputs ("<<=", file); break;
5057 default:
5058 abort ();
5060 return;
5061 case 'k':
5062 if (GET_CODE (x) == CONST_INT)
5064 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5065 return;
5067 abort ();
5068 case 'Q':
5069 if (GET_CODE (x) == CONST_INT)
5071 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5072 return;
5074 abort ();
5075 case 'L':
5076 if (GET_CODE (x) == CONST_INT)
5078 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5079 return;
5081 abort ();
5082 case 'O':
5083 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
5085 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5086 return;
5088 abort ();
5089 case 'p':
5090 if (GET_CODE (x) == CONST_INT)
5092 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5093 return;
5095 abort ();
5096 case 'P':
5097 if (GET_CODE (x) == CONST_INT)
5099 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5100 return;
5102 abort ();
5103 case 'I':
5104 if (GET_CODE (x) == CONST_INT)
5105 fputs ("i", file);
5106 return;
5107 case 'M':
5108 case 'F':
5109 switch (GET_CODE (XEXP (x, 0)))
5111 case PRE_DEC:
5112 case PRE_INC:
5113 if (ASSEMBLER_DIALECT == 0)
5114 fputs ("s,mb", file);
5115 else
5116 fputs (",mb", file);
5117 break;
5118 case POST_DEC:
5119 case POST_INC:
5120 if (ASSEMBLER_DIALECT == 0)
5121 fputs ("s,ma", file);
5122 else
5123 fputs (",ma", file);
5124 break;
5125 case PLUS:
5126 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5127 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5129 if (ASSEMBLER_DIALECT == 0)
5130 fputs ("x", file);
5132 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5133 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5135 if (ASSEMBLER_DIALECT == 0)
5136 fputs ("x,s", file);
5137 else
5138 fputs (",s", file);
5140 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5141 fputs ("s", file);
5142 break;
5143 default:
5144 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5145 fputs ("s", file);
5146 break;
5148 return;
5149 case 'G':
5150 output_global_address (file, x, 0);
5151 return;
5152 case 'H':
5153 output_global_address (file, x, 1);
5154 return;
5155 case 0: /* Don't do anything special */
5156 break;
5157 case 'Z':
5159 unsigned op[3];
5160 compute_zdepwi_operands (INTVAL (x), op);
5161 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5162 return;
5164 case 'z':
5166 unsigned op[3];
5167 compute_zdepdi_operands (INTVAL (x), op);
5168 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5169 return;
5171 case 'c':
5172 /* We can get here from a .vtable_inherit due to our
5173 CONSTANT_ADDRESS_P rejecting perfectly good constant
5174 addresses. */
5175 break;
5176 default:
5177 abort ();
5179 if (GET_CODE (x) == REG)
5181 fputs (reg_names [REGNO (x)], file);
5182 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5184 fputs ("R", file);
5185 return;
5187 if (FP_REG_P (x)
5188 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5189 && (REGNO (x) & 1) == 0)
5190 fputs ("L", file);
5192 else if (GET_CODE (x) == MEM)
5194 int size = GET_MODE_SIZE (GET_MODE (x));
5195 rtx base = NULL_RTX;
5196 switch (GET_CODE (XEXP (x, 0)))
5198 case PRE_DEC:
5199 case POST_DEC:
5200 base = XEXP (XEXP (x, 0), 0);
5201 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5202 break;
5203 case PRE_INC:
5204 case POST_INC:
5205 base = XEXP (XEXP (x, 0), 0);
5206 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5207 break;
5208 case PLUS:
5209 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5210 fprintf (file, "%s(%s)",
5211 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5212 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5213 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5214 fprintf (file, "%s(%s)",
5215 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5216 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5217 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5218 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5220 /* Because the REG_POINTER flag can get lost during reload,
5221 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5222 index and base registers in the combined move patterns. */
5223 rtx base = XEXP (XEXP (x, 0), 1);
5224 rtx index = XEXP (XEXP (x, 0), 0);
5226 fprintf (file, "%s(%s)",
5227 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5229 else
5230 output_address (XEXP (x, 0));
5231 break;
5232 default:
5233 output_address (XEXP (x, 0));
5234 break;
5237 else
5238 output_addr_const (file, x);
5241 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5243 void
5244 output_global_address (FILE *file, rtx x, int round_constant)
5247 /* Imagine (high (const (plus ...))). */
5248 if (GET_CODE (x) == HIGH)
5249 x = XEXP (x, 0);
5251 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5252 assemble_name (file, XSTR (x, 0));
5253 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5255 assemble_name (file, XSTR (x, 0));
5256 fputs ("-$global$", file);
5258 else if (GET_CODE (x) == CONST)
5260 const char *sep = "";
5261 int offset = 0; /* assembler wants -$global$ at end */
5262 rtx base = NULL_RTX;
5264 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5266 base = XEXP (XEXP (x, 0), 0);
5267 output_addr_const (file, base);
5269 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
5270 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5271 else abort ();
5273 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
5275 base = XEXP (XEXP (x, 0), 1);
5276 output_addr_const (file, base);
5278 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
5279 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5280 else abort ();
5282 /* How bogus. The compiler is apparently responsible for
5283 rounding the constant if it uses an LR field selector.
5285 The linker and/or assembler seem a better place since
5286 they have to do this kind of thing already.
5288 If we fail to do this, HP's optimizing linker may eliminate
5289 an addil, but not update the ldw/stw/ldo instruction that
5290 uses the result of the addil. */
5291 if (round_constant)
5292 offset = ((offset + 0x1000) & ~0x1fff);
5294 if (GET_CODE (XEXP (x, 0)) == PLUS)
5296 if (offset < 0)
5298 offset = -offset;
5299 sep = "-";
5301 else
5302 sep = "+";
5304 else if (GET_CODE (XEXP (x, 0)) == MINUS
5305 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5306 sep = "-";
5307 else abort ();
5309 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5310 fputs ("-$global$", file);
5311 if (offset)
5312 fprintf (file, "%s%d", sep, offset);
5314 else
5315 output_addr_const (file, x);
5318 /* Output boilerplate text to appear at the beginning of the file.
5319 There are several possible versions. */
5320 #define aputs(x) fputs(x, asm_out_file)
5321 static inline void
5322 pa_file_start_level (void)
5324 if (TARGET_64BIT)
5325 aputs ("\t.LEVEL 2.0w\n");
5326 else if (TARGET_PA_20)
5327 aputs ("\t.LEVEL 2.0\n");
5328 else if (TARGET_PA_11)
5329 aputs ("\t.LEVEL 1.1\n");
5330 else
5331 aputs ("\t.LEVEL 1.0\n");
5334 static inline void
5335 pa_file_start_space (int sortspace)
5337 aputs ("\t.SPACE $PRIVATE$");
5338 if (sortspace)
5339 aputs (",SORT=16");
5340 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5341 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5342 "\n\t.SPACE $TEXT$");
5343 if (sortspace)
5344 aputs (",SORT=8");
5345 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5346 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5349 static inline void
5350 pa_file_start_file (int want_version)
5352 if (write_symbols != NO_DEBUG)
5354 output_file_directive (asm_out_file, main_input_filename);
5355 if (want_version)
5356 aputs ("\t.version\t\"01.01\"\n");
5360 static inline void
5361 pa_file_start_mcount (const char *aswhat)
5363 if (profile_flag)
5364 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5367 static void
5368 pa_elf_file_start (void)
5370 pa_file_start_level ();
5371 pa_file_start_mcount ("ENTRY");
5372 pa_file_start_file (0);
5375 static void
5376 pa_som_file_start (void)
5378 pa_file_start_level ();
5379 pa_file_start_space (0);
5380 aputs ("\t.IMPORT $global$,DATA\n"
5381 "\t.IMPORT $$dyncall,MILLICODE\n");
5382 pa_file_start_mcount ("CODE");
5383 pa_file_start_file (0);
5386 static void
5387 pa_linux_file_start (void)
5389 pa_file_start_file (1);
5390 pa_file_start_level ();
5391 pa_file_start_mcount ("CODE");
5394 static void
5395 pa_hpux64_gas_file_start (void)
5397 pa_file_start_level ();
5398 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5399 if (profile_flag)
5400 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5401 #endif
5402 pa_file_start_file (1);
5405 static void
5406 pa_hpux64_hpas_file_start (void)
5408 pa_file_start_level ();
5409 pa_file_start_space (1);
5410 pa_file_start_mcount ("CODE");
5411 pa_file_start_file (0);
5413 #undef aputs
5415 static struct deferred_plabel *
5416 get_plabel (const char *fname)
5418 size_t i;
5420 /* See if we have already put this function on the list of deferred
5421 plabels. This list is generally small, so a liner search is not
5422 too ugly. If it proves too slow replace it with something faster. */
5423 for (i = 0; i < n_deferred_plabels; i++)
5424 if (strcmp (fname, deferred_plabels[i].name) == 0)
5425 break;
5427 /* If the deferred plabel list is empty, or this entry was not found
5428 on the list, create a new entry on the list. */
5429 if (deferred_plabels == NULL || i == n_deferred_plabels)
5431 const char *real_name;
5433 if (deferred_plabels == 0)
5434 deferred_plabels = (struct deferred_plabel *)
5435 ggc_alloc (sizeof (struct deferred_plabel));
5436 else
5437 deferred_plabels = (struct deferred_plabel *)
5438 ggc_realloc (deferred_plabels,
5439 ((n_deferred_plabels + 1)
5440 * sizeof (struct deferred_plabel)));
5442 i = n_deferred_plabels++;
5443 deferred_plabels[i].internal_label = gen_label_rtx ();
5444 deferred_plabels[i].name = ggc_strdup (fname);
5446 /* Gross. We have just implicitly taken the address of this function,
5447 mark it as such. */
5448 real_name = (*targetm.strip_name_encoding) (fname);
5449 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5452 return &deferred_plabels[i];
5455 static void
5456 output_deferred_plabels (void)
5458 size_t i;
5459 /* If we have deferred plabels, then we need to switch into the data
5460 section and align it to a 4 byte boundary before we output the
5461 deferred plabels. */
5462 if (n_deferred_plabels)
5464 data_section ();
5465 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5468 /* Now output the deferred plabels. */
5469 for (i = 0; i < n_deferred_plabels; i++)
5471 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5472 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5473 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5474 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5478 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5479 /* Initialize optabs to point to HPUX long double emulation routines. */
5480 static void
5481 pa_hpux_init_libfuncs (void)
5483 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5484 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5485 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5486 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5487 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5488 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5489 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5490 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5491 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5493 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5494 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5495 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5496 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5497 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5498 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5499 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5501 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5502 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5503 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5504 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5506 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5507 ? "__U_Qfcnvfxt_quad_to_sgl"
5508 : "_U_Qfcnvfxt_quad_to_sgl");
5509 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5510 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5511 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5513 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5514 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5516 #endif
5518 /* HP's millicode routines mean something special to the assembler.
5519 Keep track of which ones we have used. */
5521 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5522 static void import_milli (enum millicodes);
5523 static char imported[(int) end1000];
5524 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5525 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5526 #define MILLI_START 10
5528 static void
5529 import_milli (enum millicodes code)
5531 char str[sizeof (import_string)];
5533 if (!imported[(int) code])
5535 imported[(int) code] = 1;
5536 strcpy (str, import_string);
5537 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5538 output_asm_insn (str, 0);
5542 /* The register constraints have put the operands and return value in
5543 the proper registers. */
5545 const char *
5546 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5548 import_milli (mulI);
5549 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5552 /* Emit the rtl for doing a division by a constant. */
5554 /* Do magic division millicodes exist for this value? */
5555 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5556 1, 1};
5558 /* We'll use an array to keep track of the magic millicodes and
5559 whether or not we've used them already. [n][0] is signed, [n][1] is
5560 unsigned. */
5562 static int div_milli[16][2];
5565 div_operand (rtx op, enum machine_mode mode)
5567 return (mode == SImode
5568 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5569 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5570 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5574 emit_hpdiv_const (rtx *operands, int unsignedp)
5576 if (GET_CODE (operands[2]) == CONST_INT
5577 && INTVAL (operands[2]) > 0
5578 && INTVAL (operands[2]) < 16
5579 && magic_milli[INTVAL (operands[2])])
5581 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5583 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5584 emit
5585 (gen_rtx_PARALLEL
5586 (VOIDmode,
5587 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5588 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5589 SImode,
5590 gen_rtx_REG (SImode, 26),
5591 operands[2])),
5592 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5593 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5594 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5595 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5596 gen_rtx_CLOBBER (VOIDmode, ret))));
5597 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5598 return 1;
5600 return 0;
5603 const char *
5604 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5606 int divisor;
5608 /* If the divisor is a constant, try to use one of the special
5609 opcodes .*/
5610 if (GET_CODE (operands[0]) == CONST_INT)
5612 static char buf[100];
5613 divisor = INTVAL (operands[0]);
5614 if (!div_milli[divisor][unsignedp])
5616 div_milli[divisor][unsignedp] = 1;
5617 if (unsignedp)
5618 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5619 else
5620 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5622 if (unsignedp)
5624 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5625 INTVAL (operands[0]));
5626 return output_millicode_call (insn,
5627 gen_rtx_SYMBOL_REF (SImode, buf));
5629 else
5631 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5632 INTVAL (operands[0]));
5633 return output_millicode_call (insn,
5634 gen_rtx_SYMBOL_REF (SImode, buf));
5637 /* Divisor isn't a special constant. */
5638 else
5640 if (unsignedp)
5642 import_milli (divU);
5643 return output_millicode_call (insn,
5644 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5646 else
5648 import_milli (divI);
5649 return output_millicode_call (insn,
5650 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5655 /* Output a $$rem millicode to do mod. */
5657 const char *
5658 output_mod_insn (int unsignedp, rtx insn)
5660 if (unsignedp)
5662 import_milli (remU);
5663 return output_millicode_call (insn,
5664 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5666 else
5668 import_milli (remI);
5669 return output_millicode_call (insn,
5670 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5674 void
5675 output_arg_descriptor (rtx call_insn)
5677 const char *arg_regs[4];
5678 enum machine_mode arg_mode;
5679 rtx link;
5680 int i, output_flag = 0;
5681 int regno;
5683 /* We neither need nor want argument location descriptors for the
5684 64bit runtime environment or the ELF32 environment. */
5685 if (TARGET_64BIT || TARGET_ELF32)
5686 return;
5688 for (i = 0; i < 4; i++)
5689 arg_regs[i] = 0;
5691 /* Specify explicitly that no argument relocations should take place
5692 if using the portable runtime calling conventions. */
5693 if (TARGET_PORTABLE_RUNTIME)
5695 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5696 asm_out_file);
5697 return;
5700 if (GET_CODE (call_insn) != CALL_INSN)
5701 abort ();
5702 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5704 rtx use = XEXP (link, 0);
5706 if (! (GET_CODE (use) == USE
5707 && GET_CODE (XEXP (use, 0)) == REG
5708 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5709 continue;
5711 arg_mode = GET_MODE (XEXP (use, 0));
5712 regno = REGNO (XEXP (use, 0));
5713 if (regno >= 23 && regno <= 26)
5715 arg_regs[26 - regno] = "GR";
5716 if (arg_mode == DImode)
5717 arg_regs[25 - regno] = "GR";
5719 else if (regno >= 32 && regno <= 39)
5721 if (arg_mode == SFmode)
5722 arg_regs[(regno - 32) / 2] = "FR";
5723 else
5725 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5726 arg_regs[(regno - 34) / 2] = "FR";
5727 arg_regs[(regno - 34) / 2 + 1] = "FU";
5728 #else
5729 arg_regs[(regno - 34) / 2] = "FU";
5730 arg_regs[(regno - 34) / 2 + 1] = "FR";
5731 #endif
5735 fputs ("\t.CALL ", asm_out_file);
5736 for (i = 0; i < 4; i++)
5738 if (arg_regs[i])
5740 if (output_flag++)
5741 fputc (',', asm_out_file);
5742 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5745 fputc ('\n', asm_out_file);
5748 /* Return the class of any secondary reload register that is needed to
5749 move IN into a register in class CLASS using mode MODE.
5751 Profiling has showed this routine and its descendants account for
5752 a significant amount of compile time (~7%). So it has been
5753 optimized to reduce redundant computations and eliminate useless
5754 function calls.
5756 It might be worthwhile to try and make this a leaf function too. */
5758 enum reg_class
5759 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5761 int regno, is_symbolic;
5763 /* Trying to load a constant into a FP register during PIC code
5764 generation will require %r1 as a scratch register. */
5765 if (flag_pic
5766 && GET_MODE_CLASS (mode) == MODE_INT
5767 && FP_REG_CLASS_P (class)
5768 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5769 return R1_REGS;
5771 /* Profiling showed the PA port spends about 1.3% of its compilation
5772 time in true_regnum from calls inside secondary_reload_class. */
5774 if (GET_CODE (in) == REG)
5776 regno = REGNO (in);
5777 if (regno >= FIRST_PSEUDO_REGISTER)
5778 regno = true_regnum (in);
5780 else if (GET_CODE (in) == SUBREG)
5781 regno = true_regnum (in);
5782 else
5783 regno = -1;
5785 /* If we have something like (mem (mem (...)), we can safely assume the
5786 inner MEM will end up in a general register after reloading, so there's
5787 no need for a secondary reload. */
5788 if (GET_CODE (in) == MEM
5789 && GET_CODE (XEXP (in, 0)) == MEM)
5790 return NO_REGS;
5792 /* Handle out of range displacement for integer mode loads/stores of
5793 FP registers. */
5794 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5795 && GET_MODE_CLASS (mode) == MODE_INT
5796 && FP_REG_CLASS_P (class))
5797 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5798 return GENERAL_REGS;
5800 /* A SAR<->FP register copy requires a secondary register (GPR) as
5801 well as secondary memory. */
5802 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5803 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5804 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5805 return GENERAL_REGS;
5807 if (GET_CODE (in) == HIGH)
5808 in = XEXP (in, 0);
5810 /* Profiling has showed GCC spends about 2.6% of its compilation
5811 time in symbolic_operand from calls inside secondary_reload_class.
5813 We use an inline copy and only compute its return value once to avoid
5814 useless work. */
5815 switch (GET_CODE (in))
5817 rtx tmp;
5819 case SYMBOL_REF:
5820 case LABEL_REF:
5821 is_symbolic = 1;
5822 break;
5823 case CONST:
5824 tmp = XEXP (in, 0);
5825 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5826 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5827 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5828 break;
5830 default:
5831 is_symbolic = 0;
5832 break;
5835 if (!flag_pic
5836 && is_symbolic
5837 && read_only_operand (in, VOIDmode))
5838 return NO_REGS;
5840 if (class != R1_REGS && is_symbolic)
5841 return R1_REGS;
5843 return NO_REGS;
5846 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5847 by invisible reference. As a GCC extension, we also pass anything
5848 with a zero or variable size by reference.
5850 The 64-bit runtime does not describe passing any types by invisible
5851 reference. The internals of GCC can't currently handle passing
5852 empty structures, and zero or variable length arrays when they are
5853 not passed entirely on the stack or by reference. Thus, as a GCC
5854 extension, we pass these types by reference. The HP compiler doesn't
5855 support these types, so hopefully there shouldn't be any compatibility
5856 issues. This may have to be revisited when HP releases a C99 compiler
5857 or updates the ABI. */
5859 static bool
5860 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5861 enum machine_mode mode, tree type,
5862 bool named ATTRIBUTE_UNUSED)
5864 HOST_WIDE_INT size;
5866 if (type)
5867 size = int_size_in_bytes (type);
5868 else
5869 size = GET_MODE_SIZE (mode);
5871 if (TARGET_64BIT)
5872 return size <= 0;
5873 else
5874 return size <= 0 || size > 8;
5877 enum direction
5878 function_arg_padding (enum machine_mode mode, tree type)
5880 if (mode == BLKmode
5881 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5883 /* Return none if justification is not required. */
5884 if (type
5885 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5886 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5887 return none;
5889 /* The directions set here are ignored when a BLKmode argument larger
5890 than a word is placed in a register. Different code is used for
5891 the stack and registers. This makes it difficult to have a
5892 consistent data representation for both the stack and registers.
5893 For both runtimes, the justification and padding for arguments on
5894 the stack and in registers should be identical. */
5895 if (TARGET_64BIT)
5896 /* The 64-bit runtime specifies left justification for aggregates. */
5897 return upward;
5898 else
5899 /* The 32-bit runtime architecture specifies right justification.
5900 When the argument is passed on the stack, the argument is padded
5901 with garbage on the left. The HP compiler pads with zeros. */
5902 return downward;
5905 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5906 return downward;
5907 else
5908 return none;
5912 /* Do what is necessary for `va_start'. We look at the current function
5913 to determine if stdargs or varargs is used and fill in an initial
5914 va_list. A pointer to this constructor is returned. */
5916 static rtx
5917 hppa_builtin_saveregs (void)
5919 rtx offset, dest;
5920 tree fntype = TREE_TYPE (current_function_decl);
5921 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5922 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5923 != void_type_node)))
5924 ? UNITS_PER_WORD : 0);
5926 if (argadj)
5927 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5928 else
5929 offset = current_function_arg_offset_rtx;
5931 if (TARGET_64BIT)
5933 int i, off;
5935 /* Adjust for varargs/stdarg differences. */
5936 if (argadj)
5937 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5938 else
5939 offset = current_function_arg_offset_rtx;
5941 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5942 from the incoming arg pointer and growing to larger addresses. */
5943 for (i = 26, off = -64; i >= 19; i--, off += 8)
5944 emit_move_insn (gen_rtx_MEM (word_mode,
5945 plus_constant (arg_pointer_rtx, off)),
5946 gen_rtx_REG (word_mode, i));
5948 /* The incoming args pointer points just beyond the flushback area;
5949 normally this is not a serious concern. However, when we are doing
5950 varargs/stdargs we want to make the arg pointer point to the start
5951 of the incoming argument area. */
5952 emit_move_insn (virtual_incoming_args_rtx,
5953 plus_constant (arg_pointer_rtx, -64));
5955 /* Now return a pointer to the first anonymous argument. */
5956 return copy_to_reg (expand_binop (Pmode, add_optab,
5957 virtual_incoming_args_rtx,
5958 offset, 0, 0, OPTAB_LIB_WIDEN));
5961 /* Store general registers on the stack. */
5962 dest = gen_rtx_MEM (BLKmode,
5963 plus_constant (current_function_internal_arg_pointer,
5964 -16));
5965 set_mem_alias_set (dest, get_varargs_alias_set ());
5966 set_mem_align (dest, BITS_PER_WORD);
5967 move_block_from_reg (23, dest, 4);
5969 /* move_block_from_reg will emit code to store the argument registers
5970 individually as scalar stores.
5972 However, other insns may later load from the same addresses for
5973 a structure load (passing a struct to a varargs routine).
5975 The alias code assumes that such aliasing can never happen, so we
5976 have to keep memory referencing insns from moving up beyond the
5977 last argument register store. So we emit a blockage insn here. */
5978 emit_insn (gen_blockage ());
5980 return copy_to_reg (expand_binop (Pmode, add_optab,
5981 current_function_internal_arg_pointer,
5982 offset, 0, 0, OPTAB_LIB_WIDEN));
5985 void
5986 hppa_va_start (tree valist, rtx nextarg)
5988 nextarg = expand_builtin_saveregs ();
5989 std_expand_builtin_va_start (valist, nextarg);
5992 static tree
5993 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5995 if (TARGET_64BIT)
5997 /* Args grow upward. We can use the generic routines. */
5998 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6000 else /* !TARGET_64BIT */
6002 tree ptr = build_pointer_type (type);
6003 tree valist_type;
6004 tree t, u;
6005 unsigned int size, ofs;
6006 bool indirect;
6008 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6009 if (indirect)
6011 type = ptr;
6012 ptr = build_pointer_type (type);
6014 size = int_size_in_bytes (type);
6015 valist_type = TREE_TYPE (valist);
6017 /* Args grow down. Not handled by generic routines. */
6019 u = fold_convert (valist_type, size_in_bytes (type));
6020 t = build (MINUS_EXPR, valist_type, valist, u);
6022 /* Copied from va-pa.h, but we probably don't need to align to
6023 word size, since we generate and preserve that invariant. */
6024 u = build_int_2 ((size > 4 ? -8 : -4), -1);
6025 u = fold_convert (valist_type, u);
6026 t = build (BIT_AND_EXPR, valist_type, t, u);
6028 t = build (MODIFY_EXPR, valist_type, valist, t);
6030 ofs = (8 - size) % 4;
6031 if (ofs != 0)
6033 u = fold_convert (valist_type, size_int (ofs));
6034 t = build (PLUS_EXPR, valist_type, t, u);
6037 t = fold_convert (ptr, t);
6038 t = build_fold_indirect_ref (t);
6040 if (indirect)
6041 t = build_fold_indirect_ref (t);
6043 return t;
6047 /* This routine handles all the normal conditional branch sequences we
6048 might need to generate. It handles compare immediate vs compare
6049 register, nullification of delay slots, varying length branches,
6050 negated branches, and all combinations of the above. It returns the
6051 output appropriate to emit the branch corresponding to all given
6052 parameters. */
6054 const char *
6055 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
6057 static char buf[100];
6058 int useskip = 0;
6059 rtx xoperands[5];
6061 /* A conditional branch to the following instruction (eg the delay slot)
6062 is asking for a disaster. This can happen when not optimizing and
6063 when jump optimization fails.
6065 While it is usually safe to emit nothing, this can fail if the
6066 preceding instruction is a nullified branch with an empty delay
6067 slot and the same branch target as this branch. We could check
6068 for this but jump optimization should eliminate nop jumps. It
6069 is always safe to emit a nop. */
6070 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6071 return "nop";
6073 /* The doubleword form of the cmpib instruction doesn't have the LEU
6074 and GTU conditions while the cmpb instruction does. Since we accept
6075 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6076 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6077 operands[2] = gen_rtx_REG (DImode, 0);
6079 /* If this is a long branch with its delay slot unfilled, set `nullify'
6080 as it can nullify the delay slot and save a nop. */
6081 if (length == 8 && dbr_sequence_length () == 0)
6082 nullify = 1;
6084 /* If this is a short forward conditional branch which did not get
6085 its delay slot filled, the delay slot can still be nullified. */
6086 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6087 nullify = forward_branch_p (insn);
6089 /* A forward branch over a single nullified insn can be done with a
6090 comclr instruction. This avoids a single cycle penalty due to
6091 mis-predicted branch if we fall through (branch not taken). */
6092 if (length == 4
6093 && next_real_insn (insn) != 0
6094 && get_attr_length (next_real_insn (insn)) == 4
6095 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6096 && nullify)
6097 useskip = 1;
6099 switch (length)
6101 /* All short conditional branches except backwards with an unfilled
6102 delay slot. */
6103 case 4:
6104 if (useskip)
6105 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6106 else
6107 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6108 if (GET_MODE (operands[1]) == DImode)
6109 strcat (buf, "*");
6110 if (negated)
6111 strcat (buf, "%B3");
6112 else
6113 strcat (buf, "%S3");
6114 if (useskip)
6115 strcat (buf, " %2,%r1,%%r0");
6116 else if (nullify)
6117 strcat (buf, ",n %2,%r1,%0");
6118 else
6119 strcat (buf, " %2,%r1,%0");
6120 break;
6122 /* All long conditionals. Note a short backward branch with an
6123 unfilled delay slot is treated just like a long backward branch
6124 with an unfilled delay slot. */
6125 case 8:
6126 /* Handle weird backwards branch with a filled delay slot
6127 with is nullified. */
6128 if (dbr_sequence_length () != 0
6129 && ! forward_branch_p (insn)
6130 && nullify)
6132 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6133 if (GET_MODE (operands[1]) == DImode)
6134 strcat (buf, "*");
6135 if (negated)
6136 strcat (buf, "%S3");
6137 else
6138 strcat (buf, "%B3");
6139 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6141 /* Handle short backwards branch with an unfilled delay slot.
6142 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6143 taken and untaken branches. */
6144 else if (dbr_sequence_length () == 0
6145 && ! forward_branch_p (insn)
6146 && INSN_ADDRESSES_SET_P ()
6147 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6148 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6150 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6151 if (GET_MODE (operands[1]) == DImode)
6152 strcat (buf, "*");
6153 if (negated)
6154 strcat (buf, "%B3 %2,%r1,%0%#");
6155 else
6156 strcat (buf, "%S3 %2,%r1,%0%#");
6158 else
6160 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6161 if (GET_MODE (operands[1]) == DImode)
6162 strcat (buf, "*");
6163 if (negated)
6164 strcat (buf, "%S3");
6165 else
6166 strcat (buf, "%B3");
6167 if (nullify)
6168 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6169 else
6170 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6172 break;
6174 case 20:
6175 case 28:
6176 xoperands[0] = operands[0];
6177 xoperands[1] = operands[1];
6178 xoperands[2] = operands[2];
6179 xoperands[3] = operands[3];
6181 /* The reversed conditional branch must branch over one additional
6182 instruction if the delay slot is filled. If the delay slot
6183 is empty, the instruction after the reversed condition branch
6184 must be nullified. */
6185 nullify = dbr_sequence_length () == 0;
6186 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6188 /* Create a reversed conditional branch which branches around
6189 the following insns. */
6190 if (GET_MODE (operands[1]) != DImode)
6192 if (nullify)
6194 if (negated)
6195 strcpy (buf,
6196 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6197 else
6198 strcpy (buf,
6199 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6201 else
6203 if (negated)
6204 strcpy (buf,
6205 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6206 else
6207 strcpy (buf,
6208 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6211 else
6213 if (nullify)
6215 if (negated)
6216 strcpy (buf,
6217 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6218 else
6219 strcpy (buf,
6220 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6222 else
6224 if (negated)
6225 strcpy (buf,
6226 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6227 else
6228 strcpy (buf,
6229 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6233 output_asm_insn (buf, xoperands);
6234 return output_lbranch (operands[0], insn);
6236 default:
6237 abort ();
6239 return buf;
6242 /* This routine handles long unconditional branches that exceed the
6243 maximum range of a simple branch instruction. */
6245 const char *
6246 output_lbranch (rtx dest, rtx insn)
6248 rtx xoperands[2];
6250 xoperands[0] = dest;
6252 /* First, free up the delay slot. */
6253 if (dbr_sequence_length () != 0)
6255 /* We can't handle a jump in the delay slot. */
6256 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
6257 abort ();
6259 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6260 optimize, 0, 0, NULL);
6262 /* Now delete the delay insn. */
6263 PUT_CODE (NEXT_INSN (insn), NOTE);
6264 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6265 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6268 /* Output an insn to save %r1. The runtime documentation doesn't
6269 specify whether the "Clean Up" slot in the callers frame can
6270 be clobbered by the callee. It isn't copied by HP's builtin
6271 alloca, so this suggests that it can be clobbered if necessary.
6272 The "Static Link" location is copied by HP builtin alloca, so
6273 we avoid using it. Using the cleanup slot might be a problem
6274 if we have to interoperate with languages that pass cleanup
6275 information. However, it should be possible to handle these
6276 situations with GCC's asm feature.
6278 The "Current RP" slot is reserved for the called procedure, so
6279 we try to use it when we don't have a frame of our own. It's
6280 rather unlikely that we won't have a frame when we need to emit
6281 a very long branch.
6283 Really the way to go long term is a register scavenger; goto
6284 the target of the jump and find a register which we can use
6285 as a scratch to hold the value in %r1. Then, we wouldn't have
6286 to free up the delay slot or clobber a slot that may be needed
6287 for other purposes. */
6288 if (TARGET_64BIT)
6290 if (actual_fsize == 0 && !regs_ever_live[2])
6291 /* Use the return pointer slot in the frame marker. */
6292 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6293 else
6294 /* Use the slot at -40 in the frame marker since HP builtin
6295 alloca doesn't copy it. */
6296 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6298 else
6300 if (actual_fsize == 0 && !regs_ever_live[2])
6301 /* Use the return pointer slot in the frame marker. */
6302 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6303 else
6304 /* Use the "Clean Up" slot in the frame marker. In GCC,
6305 the only other use of this location is for copying a
6306 floating point double argument from a floating-point
6307 register to two general registers. The copy is done
6308 as an "atomic" operation when outputting a call, so it
6309 won't interfere with our using the location here. */
6310 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6313 if (TARGET_PORTABLE_RUNTIME)
6315 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6316 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6317 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6319 else if (flag_pic)
6321 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6322 if (TARGET_SOM || !TARGET_GAS)
6324 xoperands[1] = gen_label_rtx ();
6325 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6326 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6327 CODE_LABEL_NUMBER (xoperands[1]));
6328 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6330 else
6332 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6333 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6335 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6337 else
6338 /* Now output a very long branch to the original target. */
6339 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6341 /* Now restore the value of %r1 in the delay slot. */
6342 if (TARGET_64BIT)
6344 if (actual_fsize == 0 && !regs_ever_live[2])
6345 return "ldd -16(%%r30),%%r1";
6346 else
6347 return "ldd -40(%%r30),%%r1";
6349 else
6351 if (actual_fsize == 0 && !regs_ever_live[2])
6352 return "ldw -20(%%r30),%%r1";
6353 else
6354 return "ldw -12(%%r30),%%r1";
6358 /* This routine handles all the branch-on-bit conditional branch sequences we
6359 might need to generate. It handles nullification of delay slots,
6360 varying length branches, negated branches and all combinations of the
6361 above. it returns the appropriate output template to emit the branch. */
6363 const char *
6364 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6365 int negated, rtx insn, int which)
6367 static char buf[100];
6368 int useskip = 0;
6370 /* A conditional branch to the following instruction (eg the delay slot) is
6371 asking for a disaster. I do not think this can happen as this pattern
6372 is only used when optimizing; jump optimization should eliminate the
6373 jump. But be prepared just in case. */
6375 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6376 return "nop";
6378 /* If this is a long branch with its delay slot unfilled, set `nullify'
6379 as it can nullify the delay slot and save a nop. */
6380 if (length == 8 && dbr_sequence_length () == 0)
6381 nullify = 1;
6383 /* If this is a short forward conditional branch which did not get
6384 its delay slot filled, the delay slot can still be nullified. */
6385 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6386 nullify = forward_branch_p (insn);
6388 /* A forward branch over a single nullified insn can be done with a
6389 extrs instruction. This avoids a single cycle penalty due to
6390 mis-predicted branch if we fall through (branch not taken). */
6392 if (length == 4
6393 && next_real_insn (insn) != 0
6394 && get_attr_length (next_real_insn (insn)) == 4
6395 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6396 && nullify)
6397 useskip = 1;
6399 switch (length)
6402 /* All short conditional branches except backwards with an unfilled
6403 delay slot. */
6404 case 4:
6405 if (useskip)
6406 strcpy (buf, "{extrs,|extrw,s,}");
6407 else
6408 strcpy (buf, "bb,");
6409 if (useskip && GET_MODE (operands[0]) == DImode)
6410 strcpy (buf, "extrd,s,*");
6411 else if (GET_MODE (operands[0]) == DImode)
6412 strcpy (buf, "bb,*");
6413 if ((which == 0 && negated)
6414 || (which == 1 && ! negated))
6415 strcat (buf, ">=");
6416 else
6417 strcat (buf, "<");
6418 if (useskip)
6419 strcat (buf, " %0,%1,1,%%r0");
6420 else if (nullify && negated)
6421 strcat (buf, ",n %0,%1,%3");
6422 else if (nullify && ! negated)
6423 strcat (buf, ",n %0,%1,%2");
6424 else if (! nullify && negated)
6425 strcat (buf, "%0,%1,%3");
6426 else if (! nullify && ! negated)
6427 strcat (buf, " %0,%1,%2");
6428 break;
6430 /* All long conditionals. Note a short backward branch with an
6431 unfilled delay slot is treated just like a long backward branch
6432 with an unfilled delay slot. */
6433 case 8:
6434 /* Handle weird backwards branch with a filled delay slot
6435 with is nullified. */
6436 if (dbr_sequence_length () != 0
6437 && ! forward_branch_p (insn)
6438 && nullify)
6440 strcpy (buf, "bb,");
6441 if (GET_MODE (operands[0]) == DImode)
6442 strcat (buf, "*");
6443 if ((which == 0 && negated)
6444 || (which == 1 && ! negated))
6445 strcat (buf, "<");
6446 else
6447 strcat (buf, ">=");
6448 if (negated)
6449 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6450 else
6451 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6453 /* Handle short backwards branch with an unfilled delay slot.
6454 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6455 taken and untaken branches. */
6456 else if (dbr_sequence_length () == 0
6457 && ! forward_branch_p (insn)
6458 && INSN_ADDRESSES_SET_P ()
6459 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6460 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6462 strcpy (buf, "bb,");
6463 if (GET_MODE (operands[0]) == DImode)
6464 strcat (buf, "*");
6465 if ((which == 0 && negated)
6466 || (which == 1 && ! negated))
6467 strcat (buf, ">=");
6468 else
6469 strcat (buf, "<");
6470 if (negated)
6471 strcat (buf, " %0,%1,%3%#");
6472 else
6473 strcat (buf, " %0,%1,%2%#");
6475 else
6477 strcpy (buf, "{extrs,|extrw,s,}");
6478 if (GET_MODE (operands[0]) == DImode)
6479 strcpy (buf, "extrd,s,*");
6480 if ((which == 0 && negated)
6481 || (which == 1 && ! negated))
6482 strcat (buf, "<");
6483 else
6484 strcat (buf, ">=");
6485 if (nullify && negated)
6486 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6487 else if (nullify && ! negated)
6488 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6489 else if (negated)
6490 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6491 else
6492 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6494 break;
6496 default:
6497 abort ();
6499 return buf;
6502 /* This routine handles all the branch-on-variable-bit conditional branch
6503 sequences we might need to generate. It handles nullification of delay
6504 slots, varying length branches, negated branches and all combinations
6505 of the above. it returns the appropriate output template to emit the
6506 branch. */
6508 const char *
6509 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6510 int negated, rtx insn, int which)
6512 static char buf[100];
6513 int useskip = 0;
6515 /* A conditional branch to the following instruction (eg the delay slot) is
6516 asking for a disaster. I do not think this can happen as this pattern
6517 is only used when optimizing; jump optimization should eliminate the
6518 jump. But be prepared just in case. */
6520 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6521 return "nop";
6523 /* If this is a long branch with its delay slot unfilled, set `nullify'
6524 as it can nullify the delay slot and save a nop. */
6525 if (length == 8 && dbr_sequence_length () == 0)
6526 nullify = 1;
6528 /* If this is a short forward conditional branch which did not get
6529 its delay slot filled, the delay slot can still be nullified. */
6530 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6531 nullify = forward_branch_p (insn);
6533 /* A forward branch over a single nullified insn can be done with a
6534 extrs instruction. This avoids a single cycle penalty due to
6535 mis-predicted branch if we fall through (branch not taken). */
6537 if (length == 4
6538 && next_real_insn (insn) != 0
6539 && get_attr_length (next_real_insn (insn)) == 4
6540 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6541 && nullify)
6542 useskip = 1;
6544 switch (length)
6547 /* All short conditional branches except backwards with an unfilled
6548 delay slot. */
6549 case 4:
6550 if (useskip)
6551 strcpy (buf, "{vextrs,|extrw,s,}");
6552 else
6553 strcpy (buf, "{bvb,|bb,}");
6554 if (useskip && GET_MODE (operands[0]) == DImode)
6555 strcpy (buf, "extrd,s,*");
6556 else if (GET_MODE (operands[0]) == DImode)
6557 strcpy (buf, "bb,*");
6558 if ((which == 0 && negated)
6559 || (which == 1 && ! negated))
6560 strcat (buf, ">=");
6561 else
6562 strcat (buf, "<");
6563 if (useskip)
6564 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6565 else if (nullify && negated)
6566 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6567 else if (nullify && ! negated)
6568 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6569 else if (! nullify && negated)
6570 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6571 else if (! nullify && ! negated)
6572 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6573 break;
6575 /* All long conditionals. Note a short backward branch with an
6576 unfilled delay slot is treated just like a long backward branch
6577 with an unfilled delay slot. */
6578 case 8:
6579 /* Handle weird backwards branch with a filled delay slot
6580 with is nullified. */
6581 if (dbr_sequence_length () != 0
6582 && ! forward_branch_p (insn)
6583 && nullify)
6585 strcpy (buf, "{bvb,|bb,}");
6586 if (GET_MODE (operands[0]) == DImode)
6587 strcat (buf, "*");
6588 if ((which == 0 && negated)
6589 || (which == 1 && ! negated))
6590 strcat (buf, "<");
6591 else
6592 strcat (buf, ">=");
6593 if (negated)
6594 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6595 else
6596 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6598 /* Handle short backwards branch with an unfilled delay slot.
6599 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6600 taken and untaken branches. */
6601 else if (dbr_sequence_length () == 0
6602 && ! forward_branch_p (insn)
6603 && INSN_ADDRESSES_SET_P ()
6604 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6605 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6607 strcpy (buf, "{bvb,|bb,}");
6608 if (GET_MODE (operands[0]) == DImode)
6609 strcat (buf, "*");
6610 if ((which == 0 && negated)
6611 || (which == 1 && ! negated))
6612 strcat (buf, ">=");
6613 else
6614 strcat (buf, "<");
6615 if (negated)
6616 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6617 else
6618 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6620 else
6622 strcpy (buf, "{vextrs,|extrw,s,}");
6623 if (GET_MODE (operands[0]) == DImode)
6624 strcpy (buf, "extrd,s,*");
6625 if ((which == 0 && negated)
6626 || (which == 1 && ! negated))
6627 strcat (buf, "<");
6628 else
6629 strcat (buf, ">=");
6630 if (nullify && negated)
6631 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6632 else if (nullify && ! negated)
6633 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6634 else if (negated)
6635 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6636 else
6637 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6639 break;
6641 default:
6642 abort ();
6644 return buf;
6647 /* Return the output template for emitting a dbra type insn.
6649 Note it may perform some output operations on its own before
6650 returning the final output string. */
6651 const char *
6652 output_dbra (rtx *operands, rtx insn, int which_alternative)
6655 /* A conditional branch to the following instruction (eg the delay slot) is
6656 asking for a disaster. Be prepared! */
6658 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6660 if (which_alternative == 0)
6661 return "ldo %1(%0),%0";
6662 else if (which_alternative == 1)
6664 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6665 output_asm_insn ("ldw -16(%%r30),%4", operands);
6666 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6667 return "{fldws|fldw} -16(%%r30),%0";
6669 else
6671 output_asm_insn ("ldw %0,%4", operands);
6672 return "ldo %1(%4),%4\n\tstw %4,%0";
6676 if (which_alternative == 0)
6678 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6679 int length = get_attr_length (insn);
6681 /* If this is a long branch with its delay slot unfilled, set `nullify'
6682 as it can nullify the delay slot and save a nop. */
6683 if (length == 8 && dbr_sequence_length () == 0)
6684 nullify = 1;
6686 /* If this is a short forward conditional branch which did not get
6687 its delay slot filled, the delay slot can still be nullified. */
6688 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6689 nullify = forward_branch_p (insn);
6691 /* Handle short versions first. */
6692 if (length == 4 && nullify)
6693 return "addib,%C2,n %1,%0,%3";
6694 else if (length == 4 && ! nullify)
6695 return "addib,%C2 %1,%0,%3";
6696 else if (length == 8)
6698 /* Handle weird backwards branch with a fulled delay slot
6699 which is nullified. */
6700 if (dbr_sequence_length () != 0
6701 && ! forward_branch_p (insn)
6702 && nullify)
6703 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6704 /* Handle short backwards branch with an unfilled delay slot.
6705 Using a addb;nop rather than addi;bl saves 1 cycle for both
6706 taken and untaken branches. */
6707 else if (dbr_sequence_length () == 0
6708 && ! forward_branch_p (insn)
6709 && INSN_ADDRESSES_SET_P ()
6710 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6711 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6712 return "addib,%C2 %1,%0,%3%#";
6714 /* Handle normal cases. */
6715 if (nullify)
6716 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6717 else
6718 return "addi,%N2 %1,%0,%0\n\tb %3";
6720 else
6721 abort ();
6723 /* Deal with gross reload from FP register case. */
6724 else if (which_alternative == 1)
6726 /* Move loop counter from FP register to MEM then into a GR,
6727 increment the GR, store the GR into MEM, and finally reload
6728 the FP register from MEM from within the branch's delay slot. */
6729 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6730 operands);
6731 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6732 if (get_attr_length (insn) == 24)
6733 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6734 else
6735 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6737 /* Deal with gross reload from memory case. */
6738 else
6740 /* Reload loop counter from memory, the store back to memory
6741 happens in the branch's delay slot. */
6742 output_asm_insn ("ldw %0,%4", operands);
6743 if (get_attr_length (insn) == 12)
6744 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6745 else
6746 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6750 /* Return the output template for emitting a dbra type insn.
6752 Note it may perform some output operations on its own before
6753 returning the final output string. */
6754 const char *
6755 output_movb (rtx *operands, rtx insn, int which_alternative,
6756 int reverse_comparison)
6759 /* A conditional branch to the following instruction (eg the delay slot) is
6760 asking for a disaster. Be prepared! */
6762 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6764 if (which_alternative == 0)
6765 return "copy %1,%0";
6766 else if (which_alternative == 1)
6768 output_asm_insn ("stw %1,-16(%%r30)", operands);
6769 return "{fldws|fldw} -16(%%r30),%0";
6771 else if (which_alternative == 2)
6772 return "stw %1,%0";
6773 else
6774 return "mtsar %r1";
6777 /* Support the second variant. */
6778 if (reverse_comparison)
6779 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6781 if (which_alternative == 0)
6783 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6784 int length = get_attr_length (insn);
6786 /* If this is a long branch with its delay slot unfilled, set `nullify'
6787 as it can nullify the delay slot and save a nop. */
6788 if (length == 8 && dbr_sequence_length () == 0)
6789 nullify = 1;
6791 /* If this is a short forward conditional branch which did not get
6792 its delay slot filled, the delay slot can still be nullified. */
6793 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6794 nullify = forward_branch_p (insn);
6796 /* Handle short versions first. */
6797 if (length == 4 && nullify)
6798 return "movb,%C2,n %1,%0,%3";
6799 else if (length == 4 && ! nullify)
6800 return "movb,%C2 %1,%0,%3";
6801 else if (length == 8)
6803 /* Handle weird backwards branch with a filled delay slot
6804 which is nullified. */
6805 if (dbr_sequence_length () != 0
6806 && ! forward_branch_p (insn)
6807 && nullify)
6808 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6810 /* Handle short backwards branch with an unfilled delay slot.
6811 Using a movb;nop rather than or;bl saves 1 cycle for both
6812 taken and untaken branches. */
6813 else if (dbr_sequence_length () == 0
6814 && ! forward_branch_p (insn)
6815 && INSN_ADDRESSES_SET_P ()
6816 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6817 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6818 return "movb,%C2 %1,%0,%3%#";
6819 /* Handle normal cases. */
6820 if (nullify)
6821 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6822 else
6823 return "or,%N2 %1,%%r0,%0\n\tb %3";
6825 else
6826 abort ();
6828 /* Deal with gross reload from FP register case. */
6829 else if (which_alternative == 1)
6831 /* Move loop counter from FP register to MEM then into a GR,
6832 increment the GR, store the GR into MEM, and finally reload
6833 the FP register from MEM from within the branch's delay slot. */
6834 output_asm_insn ("stw %1,-16(%%r30)", operands);
6835 if (get_attr_length (insn) == 12)
6836 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6837 else
6838 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6840 /* Deal with gross reload from memory case. */
6841 else if (which_alternative == 2)
6843 /* Reload loop counter from memory, the store back to memory
6844 happens in the branch's delay slot. */
6845 if (get_attr_length (insn) == 8)
6846 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6847 else
6848 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6850 /* Handle SAR as a destination. */
6851 else
6853 if (get_attr_length (insn) == 8)
6854 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6855 else
6856 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6860 /* Copy any FP arguments in INSN into integer registers. */
6861 static void
6862 copy_fp_args (rtx insn)
6864 rtx link;
6865 rtx xoperands[2];
6867 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6869 int arg_mode, regno;
6870 rtx use = XEXP (link, 0);
6872 if (! (GET_CODE (use) == USE
6873 && GET_CODE (XEXP (use, 0)) == REG
6874 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6875 continue;
6877 arg_mode = GET_MODE (XEXP (use, 0));
6878 regno = REGNO (XEXP (use, 0));
6880 /* Is it a floating point register? */
6881 if (regno >= 32 && regno <= 39)
6883 /* Copy the FP register into an integer register via memory. */
6884 if (arg_mode == SFmode)
6886 xoperands[0] = XEXP (use, 0);
6887 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6888 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6889 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6891 else
6893 xoperands[0] = XEXP (use, 0);
6894 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6895 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6896 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6897 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6903 /* Compute length of the FP argument copy sequence for INSN. */
6904 static int
6905 length_fp_args (rtx insn)
6907 int length = 0;
6908 rtx link;
6910 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6912 int arg_mode, regno;
6913 rtx use = XEXP (link, 0);
6915 if (! (GET_CODE (use) == USE
6916 && GET_CODE (XEXP (use, 0)) == REG
6917 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6918 continue;
6920 arg_mode = GET_MODE (XEXP (use, 0));
6921 regno = REGNO (XEXP (use, 0));
6923 /* Is it a floating point register? */
6924 if (regno >= 32 && regno <= 39)
6926 if (arg_mode == SFmode)
6927 length += 8;
6928 else
6929 length += 12;
6933 return length;
6936 /* Return the attribute length for the millicode call instruction INSN.
6937 The length must match the code generated by output_millicode_call.
6938 We include the delay slot in the returned length as it is better to
6939 over estimate the length than to under estimate it. */
6942 attr_length_millicode_call (rtx insn)
6944 unsigned long distance = -1;
6945 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6947 if (INSN_ADDRESSES_SET_P ())
6949 distance = (total + insn_current_reference_address (insn));
6950 if (distance < total)
6951 distance = -1;
6954 if (TARGET_64BIT)
6956 if (!TARGET_LONG_CALLS && distance < 7600000)
6957 return 8;
6959 return 20;
6961 else if (TARGET_PORTABLE_RUNTIME)
6962 return 24;
6963 else
6965 if (!TARGET_LONG_CALLS && distance < 240000)
6966 return 8;
6968 if (TARGET_LONG_ABS_CALL && !flag_pic)
6969 return 12;
6971 return 24;
6975 /* INSN is a function call. It may have an unconditional jump
6976 in its delay slot.
6978 CALL_DEST is the routine we are calling. */
6980 const char *
6981 output_millicode_call (rtx insn, rtx call_dest)
6983 int attr_length = get_attr_length (insn);
6984 int seq_length = dbr_sequence_length ();
6985 int distance;
6986 rtx seq_insn;
6987 rtx xoperands[3];
6989 xoperands[0] = call_dest;
6990 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6992 /* Handle the common case where we are sure that the branch will
6993 reach the beginning of the $CODE$ subspace. The within reach
6994 form of the $$sh_func_adrs call has a length of 28. Because
6995 it has an attribute type of multi, it never has a nonzero
6996 sequence length. The length of the $$sh_func_adrs is the same
6997 as certain out of reach PIC calls to other routines. */
6998 if (!TARGET_LONG_CALLS
6999 && ((seq_length == 0
7000 && (attr_length == 12
7001 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7002 || (seq_length != 0 && attr_length == 8)))
7004 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7006 else
7008 if (TARGET_64BIT)
7010 /* It might seem that one insn could be saved by accessing
7011 the millicode function using the linkage table. However,
7012 this doesn't work in shared libraries and other dynamically
7013 loaded objects. Using a pc-relative sequence also avoids
7014 problems related to the implicit use of the gp register. */
7015 output_asm_insn ("b,l .+8,%%r1", xoperands);
7017 if (TARGET_GAS)
7019 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7020 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7022 else
7024 xoperands[1] = gen_label_rtx ();
7025 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7026 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7027 CODE_LABEL_NUMBER (xoperands[1]));
7028 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7031 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7033 else if (TARGET_PORTABLE_RUNTIME)
7035 /* Pure portable runtime doesn't allow be/ble; we also don't
7036 have PIC support in the assembler/linker, so this sequence
7037 is needed. */
7039 /* Get the address of our target into %r1. */
7040 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7041 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7043 /* Get our return address into %r31. */
7044 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7045 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7047 /* Jump to our target address in %r1. */
7048 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7050 else if (!flag_pic)
7052 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7053 if (TARGET_PA_20)
7054 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7055 else
7056 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7058 else
7060 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7061 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7063 if (TARGET_SOM || !TARGET_GAS)
7065 /* The HP assembler can generate relocations for the
7066 difference of two symbols. GAS can do this for a
7067 millicode symbol but not an arbitrary external
7068 symbol when generating SOM output. */
7069 xoperands[1] = gen_label_rtx ();
7070 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7071 CODE_LABEL_NUMBER (xoperands[1]));
7072 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7073 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7075 else
7077 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7078 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7079 xoperands);
7082 /* Jump to our target address in %r1. */
7083 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7087 if (seq_length == 0)
7088 output_asm_insn ("nop", xoperands);
7090 /* We are done if there isn't a jump in the delay slot. */
7091 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7092 return "";
7094 /* This call has an unconditional jump in its delay slot. */
7095 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7097 /* See if the return address can be adjusted. Use the containing
7098 sequence insn's address. */
7099 if (INSN_ADDRESSES_SET_P ())
7101 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7102 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7103 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7105 if (VAL_14_BITS_P (distance))
7107 xoperands[1] = gen_label_rtx ();
7108 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7109 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7110 CODE_LABEL_NUMBER (xoperands[1]));
7112 else
7113 /* ??? This branch may not reach its target. */
7114 output_asm_insn ("nop\n\tb,n %0", xoperands);
7116 else
7117 /* ??? This branch may not reach its target. */
7118 output_asm_insn ("nop\n\tb,n %0", xoperands);
7120 /* Delete the jump. */
7121 PUT_CODE (NEXT_INSN (insn), NOTE);
7122 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7123 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7125 return "";
7128 /* Return the attribute length of the call instruction INSN. The SIBCALL
7129 flag indicates whether INSN is a regular call or a sibling call. The
7130 length returned must be longer than the code actually generated by
7131 output_call. Since branch shortening is done before delay branch
7132 sequencing, there is no way to determine whether or not the delay
7133 slot will be filled during branch shortening. Even when the delay
7134 slot is filled, we may have to add a nop if the delay slot contains
7135 a branch that can't reach its target. Thus, we always have to include
7136 the delay slot in the length estimate. This used to be done in
7137 pa_adjust_insn_length but we do it here now as some sequences always
7138 fill the delay slot and we can save four bytes in the estimate for
7139 these sequences. */
7142 attr_length_call (rtx insn, int sibcall)
7144 int local_call;
7145 rtx call_dest;
7146 tree call_decl;
7147 int length = 0;
7148 rtx pat = PATTERN (insn);
7149 unsigned long distance = -1;
7151 if (INSN_ADDRESSES_SET_P ())
7153 unsigned long total;
7155 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7156 distance = (total + insn_current_reference_address (insn));
7157 if (distance < total)
7158 distance = -1;
7161 /* Determine if this is a local call. */
7162 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7163 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7164 else
7165 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7167 call_decl = SYMBOL_REF_DECL (call_dest);
7168 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7170 /* pc-relative branch. */
7171 if (!TARGET_LONG_CALLS
7172 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7173 || distance < 240000))
7174 length += 8;
7176 /* 64-bit plabel sequence. */
7177 else if (TARGET_64BIT && !local_call)
7178 length += sibcall ? 28 : 24;
7180 /* non-pic long absolute branch sequence. */
7181 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7182 length += 12;
7184 /* long pc-relative branch sequence. */
7185 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7186 || (TARGET_64BIT && !TARGET_GAS)
7187 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7189 length += 20;
7191 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7192 length += 8;
7195 /* 32-bit plabel sequence. */
7196 else
7198 length += 32;
7200 if (TARGET_SOM)
7201 length += length_fp_args (insn);
7203 if (flag_pic)
7204 length += 4;
7206 if (!TARGET_PA_20)
7208 if (!sibcall)
7209 length += 8;
7211 if (!TARGET_NO_SPACE_REGS)
7212 length += 8;
7216 return length;
7219 /* INSN is a function call. It may have an unconditional jump
7220 in its delay slot.
7222 CALL_DEST is the routine we are calling. */
7224 const char *
7225 output_call (rtx insn, rtx call_dest, int sibcall)
7227 int delay_insn_deleted = 0;
7228 int delay_slot_filled = 0;
7229 int seq_length = dbr_sequence_length ();
7230 tree call_decl = SYMBOL_REF_DECL (call_dest);
7231 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7232 rtx xoperands[2];
7234 xoperands[0] = call_dest;
7236 /* Handle the common case where we're sure that the branch will reach
7237 the beginning of the "$CODE$" subspace. This is the beginning of
7238 the current function if we are in a named section. */
7239 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7241 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7242 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7244 else
7246 if (TARGET_64BIT && !local_call)
7248 /* ??? As far as I can tell, the HP linker doesn't support the
7249 long pc-relative sequence described in the 64-bit runtime
7250 architecture. So, we use a slightly longer indirect call. */
7251 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7253 xoperands[0] = p->internal_label;
7254 xoperands[1] = gen_label_rtx ();
7256 /* If this isn't a sibcall, we put the load of %r27 into the
7257 delay slot. We can't do this in a sibcall as we don't
7258 have a second call-clobbered scratch register available. */
7259 if (seq_length != 0
7260 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7261 && !sibcall)
7263 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7264 optimize, 0, 0, NULL);
7266 /* Now delete the delay insn. */
7267 PUT_CODE (NEXT_INSN (insn), NOTE);
7268 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7269 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7270 delay_insn_deleted = 1;
7273 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7274 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7275 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7277 if (sibcall)
7279 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7280 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7281 output_asm_insn ("bve (%%r1)", xoperands);
7283 else
7285 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7286 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7287 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7288 delay_slot_filled = 1;
7291 else
7293 int indirect_call = 0;
7295 /* Emit a long call. There are several different sequences
7296 of increasing length and complexity. In most cases,
7297 they don't allow an instruction in the delay slot. */
7298 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7299 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7300 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7301 && !TARGET_64BIT)
7302 indirect_call = 1;
7304 if (seq_length != 0
7305 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7306 && !sibcall
7307 && (!TARGET_PA_20 || indirect_call))
7309 /* A non-jump insn in the delay slot. By definition we can
7310 emit this insn before the call (and in fact before argument
7311 relocating. */
7312 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0,
7313 NULL);
7315 /* Now delete the delay insn. */
7316 PUT_CODE (NEXT_INSN (insn), NOTE);
7317 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7318 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7319 delay_insn_deleted = 1;
7322 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7324 /* This is the best sequence for making long calls in
7325 non-pic code. Unfortunately, GNU ld doesn't provide
7326 the stub needed for external calls, and GAS's support
7327 for this with the SOM linker is buggy. It is safe
7328 to use this for local calls. */
7329 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7330 if (sibcall)
7331 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7332 else
7334 if (TARGET_PA_20)
7335 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7336 xoperands);
7337 else
7338 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7340 output_asm_insn ("copy %%r31,%%r2", xoperands);
7341 delay_slot_filled = 1;
7344 else
7346 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7347 || (TARGET_64BIT && !TARGET_GAS))
7349 /* The HP assembler and linker can handle relocations
7350 for the difference of two symbols. GAS and the HP
7351 linker can't do this when one of the symbols is
7352 external. */
7353 xoperands[1] = gen_label_rtx ();
7354 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7355 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7356 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7357 CODE_LABEL_NUMBER (xoperands[1]));
7358 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7360 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7362 /* GAS currently can't generate the relocations that
7363 are needed for the SOM linker under HP-UX using this
7364 sequence. The GNU linker doesn't generate the stubs
7365 that are needed for external calls on TARGET_ELF32
7366 with this sequence. For now, we have to use a
7367 longer plabel sequence when using GAS. */
7368 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7369 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7370 xoperands);
7371 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7372 xoperands);
7374 else
7376 /* Emit a long plabel-based call sequence. This is
7377 essentially an inline implementation of $$dyncall.
7378 We don't actually try to call $$dyncall as this is
7379 as difficult as calling the function itself. */
7380 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7382 xoperands[0] = p->internal_label;
7383 xoperands[1] = gen_label_rtx ();
7385 /* Since the call is indirect, FP arguments in registers
7386 need to be copied to the general registers. Then, the
7387 argument relocation stub will copy them back. */
7388 if (TARGET_SOM)
7389 copy_fp_args (insn);
7391 if (flag_pic)
7393 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7394 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7395 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7397 else
7399 output_asm_insn ("addil LR'%0-$global$,%%r27",
7400 xoperands);
7401 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7402 xoperands);
7405 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7406 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7407 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7408 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7410 if (!sibcall && !TARGET_PA_20)
7412 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7413 if (TARGET_NO_SPACE_REGS)
7414 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7415 else
7416 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7420 if (TARGET_PA_20)
7422 if (sibcall)
7423 output_asm_insn ("bve (%%r1)", xoperands);
7424 else
7426 if (indirect_call)
7428 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7429 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7430 delay_slot_filled = 1;
7432 else
7433 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7436 else
7438 if (!TARGET_NO_SPACE_REGS)
7439 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7440 xoperands);
7442 if (sibcall)
7444 if (TARGET_NO_SPACE_REGS)
7445 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7446 else
7447 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7449 else
7451 if (TARGET_NO_SPACE_REGS)
7452 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7453 else
7454 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7456 if (indirect_call)
7457 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7458 else
7459 output_asm_insn ("copy %%r31,%%r2", xoperands);
7460 delay_slot_filled = 1;
7467 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7468 output_asm_insn ("nop", xoperands);
7470 /* We are done if there isn't a jump in the delay slot. */
7471 if (seq_length == 0
7472 || delay_insn_deleted
7473 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7474 return "";
7476 /* A sibcall should never have a branch in the delay slot. */
7477 if (sibcall)
7478 abort ();
7480 /* This call has an unconditional jump in its delay slot. */
7481 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7483 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7485 /* See if the return address can be adjusted. Use the containing
7486 sequence insn's address. */
7487 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7488 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7489 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7491 if (VAL_14_BITS_P (distance))
7493 xoperands[1] = gen_label_rtx ();
7494 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7495 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7496 CODE_LABEL_NUMBER (xoperands[1]));
7498 else
7499 output_asm_insn ("nop\n\tb,n %0", xoperands);
7501 else
7502 output_asm_insn ("b,n %0", xoperands);
7504 /* Delete the jump. */
7505 PUT_CODE (NEXT_INSN (insn), NOTE);
7506 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7507 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7509 return "";
7512 /* Return the attribute length of the indirect call instruction INSN.
7513 The length must match the code generated by output_indirect call.
7514 The returned length includes the delay slot. Currently, the delay
7515 slot of an indirect call sequence is not exposed and it is used by
7516 the sequence itself. */
7519 attr_length_indirect_call (rtx insn)
7521 unsigned long distance = -1;
7522 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7524 if (INSN_ADDRESSES_SET_P ())
7526 distance = (total + insn_current_reference_address (insn));
7527 if (distance < total)
7528 distance = -1;
7531 if (TARGET_64BIT)
7532 return 12;
7534 if (TARGET_FAST_INDIRECT_CALLS
7535 || (!TARGET_PORTABLE_RUNTIME
7536 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7537 return 8;
7539 if (flag_pic)
7540 return 24;
7542 if (TARGET_PORTABLE_RUNTIME)
7543 return 20;
7545 /* Out of reach, can use ble. */
7546 return 12;
7549 const char *
7550 output_indirect_call (rtx insn, rtx call_dest)
7552 rtx xoperands[1];
7554 if (TARGET_64BIT)
7556 xoperands[0] = call_dest;
7557 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7558 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7559 return "";
7562 /* First the special case for kernels, level 0 systems, etc. */
7563 if (TARGET_FAST_INDIRECT_CALLS)
7564 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7566 /* Now the normal case -- we can reach $$dyncall directly or
7567 we're sure that we can get there via a long-branch stub.
7569 No need to check target flags as the length uniquely identifies
7570 the remaining cases. */
7571 if (attr_length_indirect_call (insn) == 8)
7573 /* The HP linker substitutes a BLE for millicode calls using
7574 the short PIC PCREL form. Thus, we must use %r31 as the
7575 link register when generating PA 1.x code. */
7576 if (TARGET_PA_20)
7577 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7578 else
7579 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7582 /* Long millicode call, but we are not generating PIC or portable runtime
7583 code. */
7584 if (attr_length_indirect_call (insn) == 12)
7585 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7587 /* Long millicode call for portable runtime. */
7588 if (attr_length_indirect_call (insn) == 20)
7589 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7591 /* We need a long PIC call to $$dyncall. */
7592 xoperands[0] = NULL_RTX;
7593 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7594 if (TARGET_SOM || !TARGET_GAS)
7596 xoperands[0] = gen_label_rtx ();
7597 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7598 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7599 CODE_LABEL_NUMBER (xoperands[0]));
7600 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7602 else
7604 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7605 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7606 xoperands);
7608 output_asm_insn ("blr %%r0,%%r2", xoperands);
7609 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7610 return "";
7613 /* Return the total length of the save and restore instructions needed for
7614 the data linkage table pointer (i.e., the PIC register) across the call
7615 instruction INSN. No-return calls do not require a save and restore.
7616 In addition, we may be able to avoid the save and restore for calls
7617 within the same translation unit. */
7620 attr_length_save_restore_dltp (rtx insn)
7622 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7623 return 0;
7625 return 8;
7628 /* In HPUX 8.0's shared library scheme, special relocations are needed
7629 for function labels if they might be passed to a function
7630 in a shared library (because shared libraries don't live in code
7631 space), and special magic is needed to construct their address. */
7633 void
7634 hppa_encode_label (rtx sym)
7636 const char *str = XSTR (sym, 0);
7637 int len = strlen (str) + 1;
7638 char *newstr, *p;
7640 p = newstr = alloca (len + 1);
7641 *p++ = '@';
7642 strcpy (p, str);
7644 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7647 static void
7648 pa_encode_section_info (tree decl, rtx rtl, int first)
7650 if (first && TEXT_SPACE_P (decl))
7652 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7653 if (TREE_CODE (decl) == FUNCTION_DECL)
7654 hppa_encode_label (XEXP (rtl, 0));
7658 /* This is sort of inverse to pa_encode_section_info. */
7660 static const char *
7661 pa_strip_name_encoding (const char *str)
7663 str += (*str == '@');
7664 str += (*str == '*');
7665 return str;
7669 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7671 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7674 /* Returns 1 if OP is a function label involved in a simple addition
7675 with a constant. Used to keep certain patterns from matching
7676 during instruction combination. */
7678 is_function_label_plus_const (rtx op)
7680 /* Strip off any CONST. */
7681 if (GET_CODE (op) == CONST)
7682 op = XEXP (op, 0);
7684 return (GET_CODE (op) == PLUS
7685 && function_label_operand (XEXP (op, 0), Pmode)
7686 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7689 /* Output assembly code for a thunk to FUNCTION. */
7691 static void
7692 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7693 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7694 tree function)
7696 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7697 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7698 int val_14 = VAL_14_BITS_P (delta);
7699 int nbytes = 0;
7700 static unsigned int current_thunk_number;
7701 char label[16];
7703 ASM_OUTPUT_LABEL (file, tname);
7704 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7706 fname = (*targetm.strip_name_encoding) (fname);
7707 tname = (*targetm.strip_name_encoding) (tname);
7709 /* Output the thunk. We know that the function is in the same
7710 translation unit (i.e., the same space) as the thunk, and that
7711 thunks are output after their method. Thus, we don't need an
7712 external branch to reach the function. With SOM and GAS,
7713 functions and thunks are effectively in different sections.
7714 Thus, we can always use a IA-relative branch and the linker
7715 will add a long branch stub if necessary.
7717 However, we have to be careful when generating PIC code on the
7718 SOM port to ensure that the sequence does not transfer to an
7719 import stub for the target function as this could clobber the
7720 return value saved at SP-24. This would also apply to the
7721 32-bit linux port if the multi-space model is implemented. */
7722 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7723 && !(flag_pic && TREE_PUBLIC (function))
7724 && (TARGET_GAS || last_address < 262132))
7725 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7726 && ((targetm.have_named_sections
7727 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7728 /* The GNU 64-bit linker has rather poor stub management.
7729 So, we use a long branch from thunks that aren't in
7730 the same section as the target function. */
7731 && ((!TARGET_64BIT
7732 && (DECL_SECTION_NAME (thunk_fndecl)
7733 != DECL_SECTION_NAME (function)))
7734 || ((DECL_SECTION_NAME (thunk_fndecl)
7735 == DECL_SECTION_NAME (function))
7736 && last_address < 262132)))
7737 || (!targetm.have_named_sections && last_address < 262132))))
7739 if (val_14)
7741 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7742 "(%%r26),%%r26\n", fname, delta);
7743 nbytes += 8;
7745 else
7747 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7748 ",%%r26\n", delta);
7749 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7750 "(%%r1),%%r26\n", fname, delta);
7751 nbytes += 12;
7754 else if (TARGET_64BIT)
7756 /* We only have one call-clobbered scratch register, so we can't
7757 make use of the delay slot if delta doesn't fit in 14 bits. */
7758 if (!val_14)
7759 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7760 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7761 "(%%r1),%%r26\n", delta, delta);
7763 fprintf (file, "\tb,l .+8,%%r1\n");
7765 if (TARGET_GAS)
7767 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7768 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7770 else
7772 int off = val_14 ? 8 : 16;
7773 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7774 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7777 if (val_14)
7779 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7780 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7781 nbytes += 20;
7783 else
7785 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7786 nbytes += 24;
7789 else if (TARGET_PORTABLE_RUNTIME)
7791 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7792 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7794 if (val_14)
7796 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7797 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7798 nbytes += 16;
7800 else
7802 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7803 ",%%r26\n", delta);
7804 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7805 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7806 nbytes += 20;
7809 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7811 /* The function is accessible from outside this module. The only
7812 way to avoid an import stub between the thunk and function is to
7813 call the function directly with an indirect sequence similar to
7814 that used by $$dyncall. This is possible because $$dyncall acts
7815 as the import stub in an indirect call. */
7816 const char *lab;
7818 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7819 lab = (*targetm.strip_name_encoding) (label);
7821 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7822 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7823 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7824 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7825 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7826 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7827 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7828 if (!val_14)
7830 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7831 ",%%r26\n", delta);
7832 nbytes += 4;
7834 if (TARGET_PA_20)
7836 fprintf (file, "\tbve (%%r22)\n\tldo ");
7837 nbytes += 36;
7839 else
7841 if (TARGET_NO_SPACE_REGS)
7843 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7844 nbytes += 36;
7846 else
7848 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7849 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7850 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7851 nbytes += 44;
7855 if (val_14)
7856 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7857 else
7858 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7860 else if (flag_pic)
7862 if (TARGET_PA_20)
7863 fprintf (file, "\tb,l .+8,%%r1\n");
7864 else
7865 fprintf (file, "\tbl .+8,%%r1\n");
7867 if (TARGET_SOM || !TARGET_GAS)
7869 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7870 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7872 else
7874 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7875 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7878 if (val_14)
7880 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7881 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7882 nbytes += 20;
7884 else
7886 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7887 ",%%r26\n", delta);
7888 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7889 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7890 nbytes += 24;
7893 else
7895 if (!val_14)
7896 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7898 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7899 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7901 if (val_14)
7903 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7904 nbytes += 12;
7906 else
7908 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7909 nbytes += 16;
7913 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7915 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7917 data_section ();
7918 fprintf (file, "\t.align 4\n");
7919 ASM_OUTPUT_LABEL (file, label);
7920 fprintf (file, "\t.word P'%s\n", fname);
7921 function_section (thunk_fndecl);
7924 current_thunk_number++;
7925 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7926 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7927 last_address += nbytes;
7928 update_total_code_bytes (nbytes);
7931 /* Only direct calls to static functions are allowed to be sibling (tail)
7932 call optimized.
7934 This restriction is necessary because some linker generated stubs will
7935 store return pointers into rp' in some cases which might clobber a
7936 live value already in rp'.
7938 In a sibcall the current function and the target function share stack
7939 space. Thus if the path to the current function and the path to the
7940 target function save a value in rp', they save the value into the
7941 same stack slot, which has undesirable consequences.
7943 Because of the deferred binding nature of shared libraries any function
7944 with external scope could be in a different load module and thus require
7945 rp' to be saved when calling that function. So sibcall optimizations
7946 can only be safe for static function.
7948 Note that GCC never needs return value relocations, so we don't have to
7949 worry about static calls with return value relocations (which require
7950 saving rp').
7952 It is safe to perform a sibcall optimization when the target function
7953 will never return. */
7954 static bool
7955 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7957 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7958 single subspace mode and the call is not indirect. As far as I know,
7959 there is no operating system support for the multiple subspace mode.
7960 It might be possible to support indirect calls if we didn't use
7961 $$dyncall (see the indirect sequence generated in output_call). */
7962 if (TARGET_ELF32)
7963 return (decl != NULL_TREE);
7965 /* Sibcalls are not ok because the arg pointer register is not a fixed
7966 register. This prevents the sibcall optimization from occurring. In
7967 addition, there are problems with stub placement using GNU ld. This
7968 is because a normal sibcall branch uses a 17-bit relocation while
7969 a regular call branch uses a 22-bit relocation. As a result, more
7970 care needs to be taken in the placement of long-branch stubs. */
7971 if (TARGET_64BIT)
7972 return false;
7974 return (decl
7975 && !TARGET_PORTABLE_RUNTIME
7976 && !TREE_PUBLIC (decl));
7979 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7980 use in fmpyadd instructions. */
7982 fmpyaddoperands (rtx *operands)
7984 enum machine_mode mode = GET_MODE (operands[0]);
7986 /* Must be a floating point mode. */
7987 if (mode != SFmode && mode != DFmode)
7988 return 0;
7990 /* All modes must be the same. */
7991 if (! (mode == GET_MODE (operands[1])
7992 && mode == GET_MODE (operands[2])
7993 && mode == GET_MODE (operands[3])
7994 && mode == GET_MODE (operands[4])
7995 && mode == GET_MODE (operands[5])))
7996 return 0;
7998 /* All operands must be registers. */
7999 if (! (GET_CODE (operands[1]) == REG
8000 && GET_CODE (operands[2]) == REG
8001 && GET_CODE (operands[3]) == REG
8002 && GET_CODE (operands[4]) == REG
8003 && GET_CODE (operands[5]) == REG))
8004 return 0;
8006 /* Only 2 real operands to the addition. One of the input operands must
8007 be the same as the output operand. */
8008 if (! rtx_equal_p (operands[3], operands[4])
8009 && ! rtx_equal_p (operands[3], operands[5]))
8010 return 0;
8012 /* Inout operand of add can not conflict with any operands from multiply. */
8013 if (rtx_equal_p (operands[3], operands[0])
8014 || rtx_equal_p (operands[3], operands[1])
8015 || rtx_equal_p (operands[3], operands[2]))
8016 return 0;
8018 /* multiply can not feed into addition operands. */
8019 if (rtx_equal_p (operands[4], operands[0])
8020 || rtx_equal_p (operands[5], operands[0]))
8021 return 0;
8023 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8024 if (mode == SFmode
8025 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8026 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8027 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8028 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8029 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8030 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8031 return 0;
8033 /* Passed. Operands are suitable for fmpyadd. */
8034 return 1;
8037 #if !defined(USE_COLLECT2)
8038 static void
8039 pa_asm_out_constructor (rtx symbol, int priority)
8041 if (!function_label_operand (symbol, VOIDmode))
8042 hppa_encode_label (symbol);
8044 #ifdef CTORS_SECTION_ASM_OP
8045 default_ctor_section_asm_out_constructor (symbol, priority);
8046 #else
8047 # ifdef TARGET_ASM_NAMED_SECTION
8048 default_named_section_asm_out_constructor (symbol, priority);
8049 # else
8050 default_stabs_asm_out_constructor (symbol, priority);
8051 # endif
8052 #endif
8055 static void
8056 pa_asm_out_destructor (rtx symbol, int priority)
8058 if (!function_label_operand (symbol, VOIDmode))
8059 hppa_encode_label (symbol);
8061 #ifdef DTORS_SECTION_ASM_OP
8062 default_dtor_section_asm_out_destructor (symbol, priority);
8063 #else
8064 # ifdef TARGET_ASM_NAMED_SECTION
8065 default_named_section_asm_out_destructor (symbol, priority);
8066 # else
8067 default_stabs_asm_out_destructor (symbol, priority);
8068 # endif
8069 #endif
8071 #endif
8073 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8074 use in fmpysub instructions. */
8076 fmpysuboperands (rtx *operands)
8078 enum machine_mode mode = GET_MODE (operands[0]);
8080 /* Must be a floating point mode. */
8081 if (mode != SFmode && mode != DFmode)
8082 return 0;
8084 /* All modes must be the same. */
8085 if (! (mode == GET_MODE (operands[1])
8086 && mode == GET_MODE (operands[2])
8087 && mode == GET_MODE (operands[3])
8088 && mode == GET_MODE (operands[4])
8089 && mode == GET_MODE (operands[5])))
8090 return 0;
8092 /* All operands must be registers. */
8093 if (! (GET_CODE (operands[1]) == REG
8094 && GET_CODE (operands[2]) == REG
8095 && GET_CODE (operands[3]) == REG
8096 && GET_CODE (operands[4]) == REG
8097 && GET_CODE (operands[5]) == REG))
8098 return 0;
8100 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8101 operation, so operands[4] must be the same as operand[3]. */
8102 if (! rtx_equal_p (operands[3], operands[4]))
8103 return 0;
8105 /* multiply can not feed into subtraction. */
8106 if (rtx_equal_p (operands[5], operands[0]))
8107 return 0;
8109 /* Inout operand of sub can not conflict with any operands from multiply. */
8110 if (rtx_equal_p (operands[3], operands[0])
8111 || rtx_equal_p (operands[3], operands[1])
8112 || rtx_equal_p (operands[3], operands[2]))
8113 return 0;
8115 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8116 if (mode == SFmode
8117 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8118 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8119 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8120 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8121 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8122 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8123 return 0;
8125 /* Passed. Operands are suitable for fmpysub. */
8126 return 1;
8130 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8132 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
8133 || GET_CODE (op) == IOR);
8136 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8137 constants for shadd instructions. */
8138 static int
8139 shadd_constant_p (int val)
8141 if (val == 2 || val == 4 || val == 8)
8142 return 1;
8143 else
8144 return 0;
8147 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
8148 the valid constant for shadd instructions. */
8150 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8152 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
8155 /* Return 1 if OP is valid as a base or index register in a
8156 REG+REG address. */
8159 borx_reg_operand (rtx op, enum machine_mode mode)
8161 if (GET_CODE (op) != REG)
8162 return 0;
8164 /* We must reject virtual registers as the only expressions that
8165 can be instantiated are REG and REG+CONST. */
8166 if (op == virtual_incoming_args_rtx
8167 || op == virtual_stack_vars_rtx
8168 || op == virtual_stack_dynamic_rtx
8169 || op == virtual_outgoing_args_rtx
8170 || op == virtual_cfa_rtx)
8171 return 0;
8173 /* While it's always safe to index off the frame pointer, it's not
8174 profitable to do so when the frame pointer is being eliminated. */
8175 if (!reload_completed
8176 && flag_omit_frame_pointer
8177 && !current_function_calls_alloca
8178 && op == frame_pointer_rtx)
8179 return 0;
8181 return register_operand (op, mode);
8184 /* Return 1 if this operand is anything other than a hard register. */
8187 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8189 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8192 /* Return 1 if INSN branches forward. Should be using insn_addresses
8193 to avoid walking through all the insns... */
8194 static int
8195 forward_branch_p (rtx insn)
8197 rtx label = JUMP_LABEL (insn);
8199 while (insn)
8201 if (insn == label)
8202 break;
8203 else
8204 insn = NEXT_INSN (insn);
8207 return (insn == label);
8210 /* Return 1 if OP is an equality comparison, else return 0. */
8212 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8214 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8217 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
8219 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8221 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
8222 || GET_CODE (op) == LT || GET_CODE (op) == GE);
8225 /* Return 1 if INSN is in the delay slot of a call instruction. */
8227 jump_in_call_delay (rtx insn)
8230 if (GET_CODE (insn) != JUMP_INSN)
8231 return 0;
8233 if (PREV_INSN (insn)
8234 && PREV_INSN (PREV_INSN (insn))
8235 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8237 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8239 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8240 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8243 else
8244 return 0;
8247 /* Output an unconditional move and branch insn. */
8249 const char *
8250 output_parallel_movb (rtx *operands, int length)
8252 /* These are the cases in which we win. */
8253 if (length == 4)
8254 return "mov%I1b,tr %1,%0,%2";
8256 /* None of these cases wins, but they don't lose either. */
8257 if (dbr_sequence_length () == 0)
8259 /* Nothing in the delay slot, fake it by putting the combined
8260 insn (the copy or add) in the delay slot of a bl. */
8261 if (GET_CODE (operands[1]) == CONST_INT)
8262 return "b %2\n\tldi %1,%0";
8263 else
8264 return "b %2\n\tcopy %1,%0";
8266 else
8268 /* Something in the delay slot, but we've got a long branch. */
8269 if (GET_CODE (operands[1]) == CONST_INT)
8270 return "ldi %1,%0\n\tb %2";
8271 else
8272 return "copy %1,%0\n\tb %2";
8276 /* Output an unconditional add and branch insn. */
8278 const char *
8279 output_parallel_addb (rtx *operands, int length)
8281 /* To make life easy we want operand0 to be the shared input/output
8282 operand and operand1 to be the readonly operand. */
8283 if (operands[0] == operands[1])
8284 operands[1] = operands[2];
8286 /* These are the cases in which we win. */
8287 if (length == 4)
8288 return "add%I1b,tr %1,%0,%3";
8290 /* None of these cases win, but they don't lose either. */
8291 if (dbr_sequence_length () == 0)
8293 /* Nothing in the delay slot, fake it by putting the combined
8294 insn (the copy or add) in the delay slot of a bl. */
8295 return "b %3\n\tadd%I1 %1,%0,%0";
8297 else
8299 /* Something in the delay slot, but we've got a long branch. */
8300 return "add%I1 %1,%0,%0\n\tb %3";
8304 /* Return nonzero if INSN (a jump insn) immediately follows a call
8305 to a named function. This is used to avoid filling the delay slot
8306 of the jump since it can usually be eliminated by modifying RP in
8307 the delay slot of the call. */
8310 following_call (rtx insn)
8312 if (! TARGET_JUMP_IN_DELAY)
8313 return 0;
8315 /* Find the previous real insn, skipping NOTEs. */
8316 insn = PREV_INSN (insn);
8317 while (insn && GET_CODE (insn) == NOTE)
8318 insn = PREV_INSN (insn);
8320 /* Check for CALL_INSNs and millicode calls. */
8321 if (insn
8322 && ((GET_CODE (insn) == CALL_INSN
8323 && get_attr_type (insn) != TYPE_DYNCALL)
8324 || (GET_CODE (insn) == INSN
8325 && GET_CODE (PATTERN (insn)) != SEQUENCE
8326 && GET_CODE (PATTERN (insn)) != USE
8327 && GET_CODE (PATTERN (insn)) != CLOBBER
8328 && get_attr_type (insn) == TYPE_MILLI)))
8329 return 1;
8331 return 0;
8334 /* We use this hook to perform a PA specific optimization which is difficult
8335 to do in earlier passes.
8337 We want the delay slots of branches within jump tables to be filled.
8338 None of the compiler passes at the moment even has the notion that a
8339 PA jump table doesn't contain addresses, but instead contains actual
8340 instructions!
8342 Because we actually jump into the table, the addresses of each entry
8343 must stay constant in relation to the beginning of the table (which
8344 itself must stay constant relative to the instruction to jump into
8345 it). I don't believe we can guarantee earlier passes of the compiler
8346 will adhere to those rules.
8348 So, late in the compilation process we find all the jump tables, and
8349 expand them into real code -- eg each entry in the jump table vector
8350 will get an appropriate label followed by a jump to the final target.
8352 Reorg and the final jump pass can then optimize these branches and
8353 fill their delay slots. We end up with smaller, more efficient code.
8355 The jump instructions within the table are special; we must be able
8356 to identify them during assembly output (if the jumps don't get filled
8357 we need to emit a nop rather than nullifying the delay slot)). We
8358 identify jumps in switch tables by using insns with the attribute
8359 type TYPE_BTABLE_BRANCH.
8361 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8362 insns. This serves two purposes, first it prevents jump.c from
8363 noticing that the last N entries in the table jump to the instruction
8364 immediately after the table and deleting the jumps. Second, those
8365 insns mark where we should emit .begin_brtab and .end_brtab directives
8366 when using GAS (allows for better link time optimizations). */
8368 static void
8369 pa_reorg (void)
8371 rtx insn;
8373 remove_useless_addtr_insns (1);
8375 if (pa_cpu < PROCESSOR_8000)
8376 pa_combine_instructions ();
8379 /* This is fairly cheap, so always run it if optimizing. */
8380 if (optimize > 0 && !TARGET_BIG_SWITCH)
8382 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8383 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8385 rtx pattern, tmp, location, label;
8386 unsigned int length, i;
8388 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8389 if (GET_CODE (insn) != JUMP_INSN
8390 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8391 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8392 continue;
8394 /* Emit marker for the beginning of the branch table. */
8395 emit_insn_before (gen_begin_brtab (), insn);
8397 pattern = PATTERN (insn);
8398 location = PREV_INSN (insn);
8399 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8401 for (i = 0; i < length; i++)
8403 /* Emit a label before each jump to keep jump.c from
8404 removing this code. */
8405 tmp = gen_label_rtx ();
8406 LABEL_NUSES (tmp) = 1;
8407 emit_label_after (tmp, location);
8408 location = NEXT_INSN (location);
8410 if (GET_CODE (pattern) == ADDR_VEC)
8411 label = XEXP (XVECEXP (pattern, 0, i), 0);
8412 else
8413 label = XEXP (XVECEXP (pattern, 1, i), 0);
8415 tmp = gen_short_jump (label);
8417 /* Emit the jump itself. */
8418 tmp = emit_jump_insn_after (tmp, location);
8419 JUMP_LABEL (tmp) = label;
8420 LABEL_NUSES (label)++;
8421 location = NEXT_INSN (location);
8423 /* Emit a BARRIER after the jump. */
8424 emit_barrier_after (location);
8425 location = NEXT_INSN (location);
8428 /* Emit marker for the end of the branch table. */
8429 emit_insn_before (gen_end_brtab (), location);
8430 location = NEXT_INSN (location);
8431 emit_barrier_after (location);
8433 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8434 delete_insn (insn);
8437 else
8439 /* Still need brtab marker insns. FIXME: the presence of these
8440 markers disables output of the branch table to readonly memory,
8441 and any alignment directives that might be needed. Possibly,
8442 the begin_brtab insn should be output before the label for the
8443 table. This doesn't matter at the moment since the tables are
8444 always output in the text section. */
8445 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8447 /* Find an ADDR_VEC insn. */
8448 if (GET_CODE (insn) != JUMP_INSN
8449 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8450 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8451 continue;
8453 /* Now generate markers for the beginning and end of the
8454 branch table. */
8455 emit_insn_before (gen_begin_brtab (), insn);
8456 emit_insn_after (gen_end_brtab (), insn);
8461 /* The PA has a number of odd instructions which can perform multiple
8462 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8463 it may be profitable to combine two instructions into one instruction
8464 with two outputs. It's not profitable PA2.0 machines because the
8465 two outputs would take two slots in the reorder buffers.
8467 This routine finds instructions which can be combined and combines
8468 them. We only support some of the potential combinations, and we
8469 only try common ways to find suitable instructions.
8471 * addb can add two registers or a register and a small integer
8472 and jump to a nearby (+-8k) location. Normally the jump to the
8473 nearby location is conditional on the result of the add, but by
8474 using the "true" condition we can make the jump unconditional.
8475 Thus addb can perform two independent operations in one insn.
8477 * movb is similar to addb in that it can perform a reg->reg
8478 or small immediate->reg copy and jump to a nearby (+-8k location).
8480 * fmpyadd and fmpysub can perform a FP multiply and either an
8481 FP add or FP sub if the operands of the multiply and add/sub are
8482 independent (there are other minor restrictions). Note both
8483 the fmpy and fadd/fsub can in theory move to better spots according
8484 to data dependencies, but for now we require the fmpy stay at a
8485 fixed location.
8487 * Many of the memory operations can perform pre & post updates
8488 of index registers. GCC's pre/post increment/decrement addressing
8489 is far too simple to take advantage of all the possibilities. This
8490 pass may not be suitable since those insns may not be independent.
8492 * comclr can compare two ints or an int and a register, nullify
8493 the following instruction and zero some other register. This
8494 is more difficult to use as it's harder to find an insn which
8495 will generate a comclr than finding something like an unconditional
8496 branch. (conditional moves & long branches create comclr insns).
8498 * Most arithmetic operations can conditionally skip the next
8499 instruction. They can be viewed as "perform this operation
8500 and conditionally jump to this nearby location" (where nearby
8501 is an insns away). These are difficult to use due to the
8502 branch length restrictions. */
8504 static void
8505 pa_combine_instructions (void)
8507 rtx anchor, new;
8509 /* This can get expensive since the basic algorithm is on the
8510 order of O(n^2) (or worse). Only do it for -O2 or higher
8511 levels of optimization. */
8512 if (optimize < 2)
8513 return;
8515 /* Walk down the list of insns looking for "anchor" insns which
8516 may be combined with "floating" insns. As the name implies,
8517 "anchor" instructions don't move, while "floating" insns may
8518 move around. */
8519 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8520 new = make_insn_raw (new);
8522 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8524 enum attr_pa_combine_type anchor_attr;
8525 enum attr_pa_combine_type floater_attr;
8527 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8528 Also ignore any special USE insns. */
8529 if ((GET_CODE (anchor) != INSN
8530 && GET_CODE (anchor) != JUMP_INSN
8531 && GET_CODE (anchor) != CALL_INSN)
8532 || GET_CODE (PATTERN (anchor)) == USE
8533 || GET_CODE (PATTERN (anchor)) == CLOBBER
8534 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8535 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8536 continue;
8538 anchor_attr = get_attr_pa_combine_type (anchor);
8539 /* See if anchor is an insn suitable for combination. */
8540 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8541 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8542 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8543 && ! forward_branch_p (anchor)))
8545 rtx floater;
8547 for (floater = PREV_INSN (anchor);
8548 floater;
8549 floater = PREV_INSN (floater))
8551 if (GET_CODE (floater) == NOTE
8552 || (GET_CODE (floater) == INSN
8553 && (GET_CODE (PATTERN (floater)) == USE
8554 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8555 continue;
8557 /* Anything except a regular INSN will stop our search. */
8558 if (GET_CODE (floater) != INSN
8559 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8560 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8562 floater = NULL_RTX;
8563 break;
8566 /* See if FLOATER is suitable for combination with the
8567 anchor. */
8568 floater_attr = get_attr_pa_combine_type (floater);
8569 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8570 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8571 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8572 && floater_attr == PA_COMBINE_TYPE_FMPY))
8574 /* If ANCHOR and FLOATER can be combined, then we're
8575 done with this pass. */
8576 if (pa_can_combine_p (new, anchor, floater, 0,
8577 SET_DEST (PATTERN (floater)),
8578 XEXP (SET_SRC (PATTERN (floater)), 0),
8579 XEXP (SET_SRC (PATTERN (floater)), 1)))
8580 break;
8583 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8584 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8586 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8588 if (pa_can_combine_p (new, anchor, floater, 0,
8589 SET_DEST (PATTERN (floater)),
8590 XEXP (SET_SRC (PATTERN (floater)), 0),
8591 XEXP (SET_SRC (PATTERN (floater)), 1)))
8592 break;
8594 else
8596 if (pa_can_combine_p (new, anchor, floater, 0,
8597 SET_DEST (PATTERN (floater)),
8598 SET_SRC (PATTERN (floater)),
8599 SET_SRC (PATTERN (floater))))
8600 break;
8605 /* If we didn't find anything on the backwards scan try forwards. */
8606 if (!floater
8607 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8608 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8610 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8612 if (GET_CODE (floater) == NOTE
8613 || (GET_CODE (floater) == INSN
8614 && (GET_CODE (PATTERN (floater)) == USE
8615 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8617 continue;
8619 /* Anything except a regular INSN will stop our search. */
8620 if (GET_CODE (floater) != INSN
8621 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8622 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8624 floater = NULL_RTX;
8625 break;
8628 /* See if FLOATER is suitable for combination with the
8629 anchor. */
8630 floater_attr = get_attr_pa_combine_type (floater);
8631 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8632 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8633 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8634 && floater_attr == PA_COMBINE_TYPE_FMPY))
8636 /* If ANCHOR and FLOATER can be combined, then we're
8637 done with this pass. */
8638 if (pa_can_combine_p (new, anchor, floater, 1,
8639 SET_DEST (PATTERN (floater)),
8640 XEXP (SET_SRC (PATTERN (floater)),
8642 XEXP (SET_SRC (PATTERN (floater)),
8643 1)))
8644 break;
8649 /* FLOATER will be nonzero if we found a suitable floating
8650 insn for combination with ANCHOR. */
8651 if (floater
8652 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8653 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8655 /* Emit the new instruction and delete the old anchor. */
8656 emit_insn_before (gen_rtx_PARALLEL
8657 (VOIDmode,
8658 gen_rtvec (2, PATTERN (anchor),
8659 PATTERN (floater))),
8660 anchor);
8662 PUT_CODE (anchor, NOTE);
8663 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8664 NOTE_SOURCE_FILE (anchor) = 0;
8666 /* Emit a special USE insn for FLOATER, then delete
8667 the floating insn. */
8668 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8669 delete_insn (floater);
8671 continue;
8673 else if (floater
8674 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8676 rtx temp;
8677 /* Emit the new_jump instruction and delete the old anchor. */
8678 temp
8679 = emit_jump_insn_before (gen_rtx_PARALLEL
8680 (VOIDmode,
8681 gen_rtvec (2, PATTERN (anchor),
8682 PATTERN (floater))),
8683 anchor);
8685 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8686 PUT_CODE (anchor, NOTE);
8687 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8688 NOTE_SOURCE_FILE (anchor) = 0;
8690 /* Emit a special USE insn for FLOATER, then delete
8691 the floating insn. */
8692 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8693 delete_insn (floater);
8694 continue;
8700 static int
8701 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8702 rtx src1, rtx src2)
8704 int insn_code_number;
8705 rtx start, end;
8707 /* Create a PARALLEL with the patterns of ANCHOR and
8708 FLOATER, try to recognize it, then test constraints
8709 for the resulting pattern.
8711 If the pattern doesn't match or the constraints
8712 aren't met keep searching for a suitable floater
8713 insn. */
8714 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8715 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8716 INSN_CODE (new) = -1;
8717 insn_code_number = recog_memoized (new);
8718 if (insn_code_number < 0
8719 || (extract_insn (new), ! constrain_operands (1)))
8720 return 0;
8722 if (reversed)
8724 start = anchor;
8725 end = floater;
8727 else
8729 start = floater;
8730 end = anchor;
8733 /* There's up to three operands to consider. One
8734 output and two inputs.
8736 The output must not be used between FLOATER & ANCHOR
8737 exclusive. The inputs must not be set between
8738 FLOATER and ANCHOR exclusive. */
8740 if (reg_used_between_p (dest, start, end))
8741 return 0;
8743 if (reg_set_between_p (src1, start, end))
8744 return 0;
8746 if (reg_set_between_p (src2, start, end))
8747 return 0;
8749 /* If we get here, then everything is good. */
8750 return 1;
8753 /* Return nonzero if references for INSN are delayed.
8755 Millicode insns are actually function calls with some special
8756 constraints on arguments and register usage.
8758 Millicode calls always expect their arguments in the integer argument
8759 registers, and always return their result in %r29 (ret1). They
8760 are expected to clobber their arguments, %r1, %r29, and the return
8761 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8763 This function tells reorg that the references to arguments and
8764 millicode calls do not appear to happen until after the millicode call.
8765 This allows reorg to put insns which set the argument registers into the
8766 delay slot of the millicode call -- thus they act more like traditional
8767 CALL_INSNs.
8769 Note we can not consider side effects of the insn to be delayed because
8770 the branch and link insn will clobber the return pointer. If we happened
8771 to use the return pointer in the delay slot of the call, then we lose.
8773 get_attr_type will try to recognize the given insn, so make sure to
8774 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8775 in particular. */
8777 insn_refs_are_delayed (rtx insn)
8779 return ((GET_CODE (insn) == INSN
8780 && GET_CODE (PATTERN (insn)) != SEQUENCE
8781 && GET_CODE (PATTERN (insn)) != USE
8782 && GET_CODE (PATTERN (insn)) != CLOBBER
8783 && get_attr_type (insn) == TYPE_MILLI));
8786 /* On the HP-PA the value is found in register(s) 28(-29), unless
8787 the mode is SF or DF. Then the value is returned in fr4 (32).
8789 This must perform the same promotions as PROMOTE_MODE, else
8790 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8792 Small structures must be returned in a PARALLEL on PA64 in order
8793 to match the HP Compiler ABI. */
8796 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8798 enum machine_mode valmode;
8800 /* Aggregates with a size less than or equal to 128 bits are returned
8801 in GR 28(-29). They are left justified. The pad bits are undefined.
8802 Larger aggregates are returned in memory. */
8803 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8805 rtx loc[2];
8806 int i, offset = 0;
8807 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8809 for (i = 0; i < ub; i++)
8811 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8812 gen_rtx_REG (DImode, 28 + i),
8813 GEN_INT (offset));
8814 offset += 8;
8817 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8820 if ((INTEGRAL_TYPE_P (valtype)
8821 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8822 || POINTER_TYPE_P (valtype))
8823 valmode = word_mode;
8824 else
8825 valmode = TYPE_MODE (valtype);
8827 if (TREE_CODE (valtype) == REAL_TYPE
8828 && TYPE_MODE (valtype) != TFmode
8829 && !TARGET_SOFT_FLOAT)
8830 return gen_rtx_REG (valmode, 32);
8832 return gen_rtx_REG (valmode, 28);
8835 /* Return the location of a parameter that is passed in a register or NULL
8836 if the parameter has any component that is passed in memory.
8838 This is new code and will be pushed to into the net sources after
8839 further testing.
8841 ??? We might want to restructure this so that it looks more like other
8842 ports. */
8844 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8845 int named ATTRIBUTE_UNUSED)
8847 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8848 int alignment = 0;
8849 int arg_size;
8850 int fpr_reg_base;
8851 int gpr_reg_base;
8852 rtx retval;
8854 if (mode == VOIDmode)
8855 return NULL_RTX;
8857 arg_size = FUNCTION_ARG_SIZE (mode, type);
8859 /* If this arg would be passed partially or totally on the stack, then
8860 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8861 handle arguments which are split between regs and stack slots if
8862 the ABI mandates split arguments. */
8863 if (! TARGET_64BIT)
8865 /* The 32-bit ABI does not split arguments. */
8866 if (cum->words + arg_size > max_arg_words)
8867 return NULL_RTX;
8869 else
8871 if (arg_size > 1)
8872 alignment = cum->words & 1;
8873 if (cum->words + alignment >= max_arg_words)
8874 return NULL_RTX;
8877 /* The 32bit ABIs and the 64bit ABIs are rather different,
8878 particularly in their handling of FP registers. We might
8879 be able to cleverly share code between them, but I'm not
8880 going to bother in the hope that splitting them up results
8881 in code that is more easily understood. */
8883 if (TARGET_64BIT)
8885 /* Advance the base registers to their current locations.
8887 Remember, gprs grow towards smaller register numbers while
8888 fprs grow to higher register numbers. Also remember that
8889 although FP regs are 32-bit addressable, we pretend that
8890 the registers are 64-bits wide. */
8891 gpr_reg_base = 26 - cum->words;
8892 fpr_reg_base = 32 + cum->words;
8894 /* Arguments wider than one word and small aggregates need special
8895 treatment. */
8896 if (arg_size > 1
8897 || mode == BLKmode
8898 || (type && AGGREGATE_TYPE_P (type)))
8900 /* Double-extended precision (80-bit), quad-precision (128-bit)
8901 and aggregates including complex numbers are aligned on
8902 128-bit boundaries. The first eight 64-bit argument slots
8903 are associated one-to-one, with general registers r26
8904 through r19, and also with floating-point registers fr4
8905 through fr11. Arguments larger than one word are always
8906 passed in general registers.
8908 Using a PARALLEL with a word mode register results in left
8909 justified data on a big-endian target. */
8911 rtx loc[8];
8912 int i, offset = 0, ub = arg_size;
8914 /* Align the base register. */
8915 gpr_reg_base -= alignment;
8917 ub = MIN (ub, max_arg_words - cum->words - alignment);
8918 for (i = 0; i < ub; i++)
8920 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8921 gen_rtx_REG (DImode, gpr_reg_base),
8922 GEN_INT (offset));
8923 gpr_reg_base -= 1;
8924 offset += 8;
8927 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8930 else
8932 /* If the argument is larger than a word, then we know precisely
8933 which registers we must use. */
8934 if (arg_size > 1)
8936 if (cum->words)
8938 gpr_reg_base = 23;
8939 fpr_reg_base = 38;
8941 else
8943 gpr_reg_base = 25;
8944 fpr_reg_base = 34;
8947 /* Structures 5 to 8 bytes in size are passed in the general
8948 registers in the same manner as other non floating-point
8949 objects. The data is right-justified and zero-extended
8950 to 64 bits. This is opposite to the normal justification
8951 used on big endian targets and requires special treatment.
8952 We now define BLOCK_REG_PADDING to pad these objects. */
8953 if (mode == BLKmode)
8955 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8956 gen_rtx_REG (DImode, gpr_reg_base),
8957 const0_rtx);
8958 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8961 else
8963 /* We have a single word (32 bits). A simple computation
8964 will get us the register #s we need. */
8965 gpr_reg_base = 26 - cum->words;
8966 fpr_reg_base = 32 + 2 * cum->words;
8970 /* Determine if the argument needs to be passed in both general and
8971 floating point registers. */
8972 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8973 /* If we are doing soft-float with portable runtime, then there
8974 is no need to worry about FP regs. */
8975 && !TARGET_SOFT_FLOAT
8976 /* The parameter must be some kind of float, else we can just
8977 pass it in integer registers. */
8978 && FLOAT_MODE_P (mode)
8979 /* The target function must not have a prototype. */
8980 && cum->nargs_prototype <= 0
8981 /* libcalls do not need to pass items in both FP and general
8982 registers. */
8983 && type != NULL_TREE
8984 /* All this hair applies to "outgoing" args only. This includes
8985 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8986 && !cum->incoming)
8987 /* Also pass outgoing floating arguments in both registers in indirect
8988 calls with the 32 bit ABI and the HP assembler since there is no
8989 way to the specify argument locations in static functions. */
8990 || (!TARGET_64BIT
8991 && !TARGET_GAS
8992 && !cum->incoming
8993 && cum->indirect
8994 && FLOAT_MODE_P (mode)))
8996 retval
8997 = gen_rtx_PARALLEL
8998 (mode,
8999 gen_rtvec (2,
9000 gen_rtx_EXPR_LIST (VOIDmode,
9001 gen_rtx_REG (mode, fpr_reg_base),
9002 const0_rtx),
9003 gen_rtx_EXPR_LIST (VOIDmode,
9004 gen_rtx_REG (mode, gpr_reg_base),
9005 const0_rtx)));
9007 else
9009 /* See if we should pass this parameter in a general register. */
9010 if (TARGET_SOFT_FLOAT
9011 /* Indirect calls in the normal 32bit ABI require all arguments
9012 to be passed in general registers. */
9013 || (!TARGET_PORTABLE_RUNTIME
9014 && !TARGET_64BIT
9015 && !TARGET_ELF32
9016 && cum->indirect)
9017 /* If the parameter is not a floating point parameter, then
9018 it belongs in GPRs. */
9019 || !FLOAT_MODE_P (mode))
9020 retval = gen_rtx_REG (mode, gpr_reg_base);
9021 else
9022 retval = gen_rtx_REG (mode, fpr_reg_base);
9024 return retval;
9028 /* If this arg would be passed totally in registers or totally on the stack,
9029 then this routine should return zero. It is currently called only for
9030 the 64-bit target. */
9032 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9033 tree type, int named ATTRIBUTE_UNUSED)
9035 unsigned int max_arg_words = 8;
9036 unsigned int offset = 0;
9038 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9039 offset = 1;
9041 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9042 /* Arg fits fully into registers. */
9043 return 0;
9044 else if (cum->words + offset >= max_arg_words)
9045 /* Arg fully on the stack. */
9046 return 0;
9047 else
9048 /* Arg is split. */
9049 return max_arg_words - cum->words - offset;
9053 /* Return 1 if this is a comparison operator. This allows the use of
9054 MATCH_OPERATOR to recognize all the branch insns. */
9057 cmpib_comparison_operator (rtx op, enum machine_mode mode)
9059 return ((mode == VOIDmode || GET_MODE (op) == mode)
9060 && (GET_CODE (op) == EQ
9061 || GET_CODE (op) == NE
9062 || GET_CODE (op) == GT
9063 || GET_CODE (op) == GTU
9064 || GET_CODE (op) == GE
9065 || GET_CODE (op) == LT
9066 || GET_CODE (op) == LE
9067 || GET_CODE (op) == LEU));
9070 /* On hpux10, the linker will give an error if we have a reference
9071 in the read-only data section to a symbol defined in a shared
9072 library. Therefore, expressions that might require a reloc can
9073 not be placed in the read-only data section. */
9075 static void
9076 pa_select_section (tree exp, int reloc,
9077 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9079 if (TREE_CODE (exp) == VAR_DECL
9080 && TREE_READONLY (exp)
9081 && !TREE_THIS_VOLATILE (exp)
9082 && DECL_INITIAL (exp)
9083 && (DECL_INITIAL (exp) == error_mark_node
9084 || TREE_CONSTANT (DECL_INITIAL (exp)))
9085 && !reloc)
9086 readonly_data_section ();
9087 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
9088 && !reloc)
9089 readonly_data_section ();
9090 else
9091 data_section ();
9094 static void
9095 pa_globalize_label (FILE *stream, const char *name)
9097 /* We only handle DATA objects here, functions are globalized in
9098 ASM_DECLARE_FUNCTION_NAME. */
9099 if (! FUNCTION_NAME_P (name))
9101 fputs ("\t.EXPORT ", stream);
9102 assemble_name (stream, name);
9103 fputs (",DATA\n", stream);
9107 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9109 static rtx
9110 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9111 int incoming ATTRIBUTE_UNUSED)
9113 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9116 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9118 bool
9119 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9121 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9122 PA64 ABI says that objects larger than 128 bits are returned in memory.
9123 Note, int_size_in_bytes can return -1 if the size of the object is
9124 variable or larger than the maximum value that can be expressed as
9125 a HOST_WIDE_INT. It can also return zero for an empty type. The
9126 simplest way to handle variable and empty types is to pass them in
9127 memory. This avoids problems in defining the boundaries of argument
9128 slots, allocating registers, etc. */
9129 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9130 || int_size_in_bytes (type) <= 0);
9133 #include "gt-pa.h"