1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
58 /* This file should be included last. */
59 #include "target-def.h"
61 /* Return nonzero if there is a bypass for the output of
62 OUT_INSN and the fp store IN_INSN. */
64 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
66 machine_mode store_mode
;
67 machine_mode other_mode
;
70 if (recog_memoized (in_insn
) < 0
71 || (get_attr_type (in_insn
) != TYPE_FPSTORE
72 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
73 || recog_memoized (out_insn
) < 0)
76 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
78 set
= single_set (out_insn
);
82 other_mode
= GET_MODE (SET_SRC (set
));
84 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
92 #define DO_FRAME_NOTES 0
96 static void pa_option_override (void);
97 static void copy_reg_pointer (rtx
, rtx
);
98 static void fix_range (const char *);
99 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
101 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
102 static bool hppa_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
103 static inline rtx
force_mode (machine_mode
, rtx
);
104 static void pa_reorg (void);
105 static void pa_combine_instructions (void);
106 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
108 static bool forward_branch_p (rtx_insn
*);
109 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
110 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
111 static int compute_cpymem_length (rtx_insn
*);
112 static int compute_clrmem_length (rtx_insn
*);
113 static bool pa_assemble_integer (rtx
, unsigned int, int);
114 static void remove_useless_addtr_insns (int);
115 static void store_reg (int, HOST_WIDE_INT
, int);
116 static void store_reg_modify (int, int, HOST_WIDE_INT
);
117 static void load_reg (int, HOST_WIDE_INT
, int);
118 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
119 static rtx
pa_function_value (const_tree
, const_tree
, bool);
120 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
121 static bool pa_function_value_regno_p (const unsigned int);
122 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED
;
123 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED
;
124 static void update_total_code_bytes (unsigned int);
125 static void pa_output_function_epilogue (FILE *);
126 static int pa_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
127 static int pa_issue_rate (void);
128 static int pa_reloc_rw_mask (void);
129 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
130 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
131 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
133 static void pa_encode_section_info (tree
, rtx
, int);
134 static const char *pa_strip_name_encoding (const char *);
135 static bool pa_function_ok_for_sibcall (tree
, tree
);
136 static void pa_globalize_label (FILE *, const char *)
138 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
139 HOST_WIDE_INT
, tree
);
140 #if !defined(USE_COLLECT2)
141 static void pa_asm_out_constructor (rtx
, int);
142 static void pa_asm_out_destructor (rtx
, int);
144 static void pa_init_builtins (void);
145 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
146 static tree
pa_builtin_decl (unsigned, bool);
147 static rtx
hppa_builtin_saveregs (void);
148 static void hppa_va_start (tree
, rtx
);
149 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
150 static bool pa_scalar_mode_supported_p (scalar_mode
);
151 static bool pa_commutative_p (const_rtx x
, int outer_code
);
152 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
153 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
154 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
155 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
156 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
157 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
158 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
159 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
160 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
161 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
162 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
163 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
164 static void output_deferred_plabels (void);
165 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
166 static void pa_file_end (void);
167 static void pa_init_libfuncs (void);
168 static rtx
pa_struct_value_rtx (tree
, int);
169 static bool pa_pass_by_reference (cumulative_args_t
,
170 const function_arg_info
&);
171 static int pa_arg_partial_bytes (cumulative_args_t
, const function_arg_info
&);
172 static void pa_function_arg_advance (cumulative_args_t
,
173 const function_arg_info
&);
174 static rtx
pa_function_arg (cumulative_args_t
, const function_arg_info
&);
175 static pad_direction
pa_function_arg_padding (machine_mode
, const_tree
);
176 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
177 static struct machine_function
* pa_init_machine_status (void);
178 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
180 secondary_reload_info
*);
181 static bool pa_secondary_memory_needed (machine_mode
,
182 reg_class_t
, reg_class_t
);
183 static void pa_extra_live_on_entry (bitmap
);
184 static machine_mode
pa_promote_function_mode (const_tree
,
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx
, tree
, rtx
);
190 static rtx
pa_trampoline_adjust_address (rtx
);
191 static rtx
pa_delegitimize_address (rtx
);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx
pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode
pa_c_mode_for_suffix (char);
197 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
199 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
200 static unsigned int pa_section_type_flags (tree
, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool,
202 code_helper
= ERROR_MARK
);
203 static bool pa_callee_copies (cumulative_args_t
, const function_arg_info
&);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode
);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode
);
206 static bool pa_modes_tieable_p (machine_mode
, machine_mode
);
207 static bool pa_can_change_mode_class (machine_mode
, machine_mode
, reg_class_t
);
208 static HOST_WIDE_INT
pa_starting_frame_offset (void);
209 static section
* pa_elf_select_rtx_section(machine_mode
, rtx
, unsigned HOST_WIDE_INT
) ATTRIBUTE_UNUSED
;
210 static void pa_atomic_assign_expand_fenv (tree
*, tree
*, tree
*);
212 /* The following extra sections are only used for SOM. */
213 static GTY(()) section
*som_readonly_data_section
;
214 static GTY(()) section
*som_one_only_readonly_data_section
;
215 static GTY(()) section
*som_one_only_data_section
;
216 static GTY(()) section
*som_tm_clone_table_section
;
218 /* Counts for the number of callee-saved general and floating point
219 registers which were saved by the current function's prologue. */
220 static int gr_saved
, fr_saved
;
222 /* Boolean indicating whether the return pointer was saved by the
223 current function's prologue. */
224 static bool rp_saved
;
226 static rtx
find_addr_reg (rtx
);
228 /* Keep track of the number of bytes we have output in the CODE subspace
229 during this compilation so we'll know when to emit inline long-calls. */
230 unsigned long total_code_bytes
;
232 /* The last address of the previous function plus the number of bytes in
233 associated thunks that have been output. This is used to determine if
234 a thunk can use an IA-relative branch to reach its target function. */
235 static unsigned int last_address
;
237 /* Variables to handle plabels that we discover are necessary at assembly
238 output time. They are output after the current function. */
239 struct GTY(()) deferred_plabel
244 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
246 static size_t n_deferred_plabels
= 0;
248 /* Initialize the GCC target structure. */
250 #undef TARGET_OPTION_OVERRIDE
251 #define TARGET_OPTION_OVERRIDE pa_option_override
253 #undef TARGET_ASM_ALIGNED_HI_OP
254 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
255 #undef TARGET_ASM_ALIGNED_SI_OP
256 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
257 #undef TARGET_ASM_ALIGNED_DI_OP
258 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
259 #undef TARGET_ASM_UNALIGNED_HI_OP
260 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
261 #undef TARGET_ASM_UNALIGNED_SI_OP
262 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
263 #undef TARGET_ASM_UNALIGNED_DI_OP
264 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
265 #undef TARGET_ASM_INTEGER
266 #define TARGET_ASM_INTEGER pa_assemble_integer
268 #undef TARGET_ASM_FUNCTION_EPILOGUE
269 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
271 #undef TARGET_FUNCTION_VALUE
272 #define TARGET_FUNCTION_VALUE pa_function_value
273 #undef TARGET_LIBCALL_VALUE
274 #define TARGET_LIBCALL_VALUE pa_libcall_value
275 #undef TARGET_FUNCTION_VALUE_REGNO_P
276 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
278 #undef TARGET_LEGITIMIZE_ADDRESS
279 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
281 #undef TARGET_SCHED_ADJUST_COST
282 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
283 #undef TARGET_SCHED_ISSUE_RATE
284 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
286 #undef TARGET_ENCODE_SECTION_INFO
287 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
288 #undef TARGET_STRIP_NAME_ENCODING
289 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
291 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
292 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
294 #undef TARGET_COMMUTATIVE_P
295 #define TARGET_COMMUTATIVE_P pa_commutative_p
297 #undef TARGET_ASM_OUTPUT_MI_THUNK
298 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_END
303 #define TARGET_ASM_FILE_END pa_file_end
305 #undef TARGET_ASM_RELOC_RW_MASK
306 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
308 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
309 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
311 #if !defined(USE_COLLECT2)
312 #undef TARGET_ASM_CONSTRUCTOR
313 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
314 #undef TARGET_ASM_DESTRUCTOR
315 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
318 #undef TARGET_INIT_BUILTINS
319 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
322 #undef TARGET_BUILTIN_DECL
323 #define TARGET_BUILTIN_DECL pa_builtin_decl
325 #undef TARGET_REGISTER_MOVE_COST
326 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
327 #undef TARGET_RTX_COSTS
328 #define TARGET_RTX_COSTS hppa_rtx_costs
329 #undef TARGET_ADDRESS_COST
330 #define TARGET_ADDRESS_COST hppa_address_cost
332 #undef TARGET_MACHINE_DEPENDENT_REORG
333 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
335 #undef TARGET_INIT_LIBFUNCS
336 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
338 #undef TARGET_PROMOTE_FUNCTION_MODE
339 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
340 #undef TARGET_PROMOTE_PROTOTYPES
341 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
343 #undef TARGET_STRUCT_VALUE_RTX
344 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
345 #undef TARGET_RETURN_IN_MEMORY
346 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
347 #undef TARGET_MUST_PASS_IN_STACK
348 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
351 #undef TARGET_CALLEE_COPIES
352 #define TARGET_CALLEE_COPIES pa_callee_copies
353 #undef TARGET_ARG_PARTIAL_BYTES
354 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
355 #undef TARGET_FUNCTION_ARG
356 #define TARGET_FUNCTION_ARG pa_function_arg
357 #undef TARGET_FUNCTION_ARG_ADVANCE
358 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
359 #undef TARGET_FUNCTION_ARG_PADDING
360 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
361 #undef TARGET_FUNCTION_ARG_BOUNDARY
362 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
364 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
365 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
366 #undef TARGET_EXPAND_BUILTIN_VA_START
367 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
368 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
369 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
371 #undef TARGET_SCALAR_MODE_SUPPORTED_P
372 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
374 #undef TARGET_CANNOT_FORCE_CONST_MEM
375 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
377 #undef TARGET_SECONDARY_RELOAD
378 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
379 #undef TARGET_SECONDARY_MEMORY_NEEDED
380 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
382 #undef TARGET_EXTRA_LIVE_ON_ENTRY
383 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
385 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
386 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
387 #undef TARGET_TRAMPOLINE_INIT
388 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
389 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
390 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
391 #undef TARGET_DELEGITIMIZE_ADDRESS
392 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
393 #undef TARGET_INTERNAL_ARG_POINTER
394 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
395 #undef TARGET_CAN_ELIMINATE
396 #define TARGET_CAN_ELIMINATE pa_can_eliminate
397 #undef TARGET_CONDITIONAL_REGISTER_USAGE
398 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
399 #undef TARGET_C_MODE_FOR_SUFFIX
400 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
401 #undef TARGET_ASM_FUNCTION_SECTION
402 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
404 #undef TARGET_LEGITIMATE_CONSTANT_P
405 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
406 #undef TARGET_SECTION_TYPE_FLAGS
407 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
408 #undef TARGET_LEGITIMATE_ADDRESS_P
409 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
412 #define TARGET_LRA_P hook_bool_void_false
414 #undef TARGET_HARD_REGNO_NREGS
415 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
416 #undef TARGET_HARD_REGNO_MODE_OK
417 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
418 #undef TARGET_MODES_TIEABLE_P
419 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
421 #undef TARGET_CAN_CHANGE_MODE_CLASS
422 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
424 #undef TARGET_CONSTANT_ALIGNMENT
425 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
427 #undef TARGET_STARTING_FRAME_OFFSET
428 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
430 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
431 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
433 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
434 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV pa_atomic_assign_expand_fenv
436 struct gcc_target targetm
= TARGET_INITIALIZER
;
438 /* Parse the -mfixed-range= option string. */
441 fix_range (const char *const_str
)
444 char *str
, *dash
, *comma
;
446 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
447 REG2 are either register names or register numbers. The effect
448 of this option is to mark the registers in the range from REG1 to
449 REG2 as ``fixed'' so they won't be used by the compiler. This is
450 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
452 i
= strlen (const_str
);
453 str
= (char *) alloca (i
+ 1);
454 memcpy (str
, const_str
, i
+ 1);
458 dash
= strchr (str
, '-');
461 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
466 comma
= strchr (dash
+ 1, ',');
470 first
= decode_reg_name (str
);
473 warning (0, "unknown register name: %s", str
);
477 last
= decode_reg_name (dash
+ 1);
480 warning (0, "unknown register name: %s", dash
+ 1);
488 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
492 for (i
= first
; i
<= last
; ++i
)
493 fixed_regs
[i
] = call_used_regs
[i
] = 1;
502 /* Check if all floating point registers have been fixed. */
503 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
508 target_flags
|= MASK_SOFT_FLOAT
;
511 /* Implement the TARGET_OPTION_OVERRIDE hook. */
514 pa_option_override (void)
517 cl_deferred_option
*opt
;
518 vec
<cl_deferred_option
> *v
519 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
522 FOR_EACH_VEC_ELT (*v
, i
, opt
)
524 switch (opt
->opt_index
)
526 case OPT_mfixed_range_
:
527 fix_range (opt
->arg
);
535 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
537 warning (0, "PIC code generation is not supported in the portable runtime model");
540 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
542 warning (0, "PIC code generation is not compatible with fast indirect calls");
545 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
547 warning (0, "%<-g%> is only supported when using GAS on this processor");
548 warning (0, "%<-g%> option disabled");
549 write_symbols
= NO_DEBUG
;
552 if (TARGET_64BIT
&& TARGET_HPUX
)
554 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
555 specifically selected. */
556 if (!OPTION_SET_P (dwarf_strict
))
558 if (!OPTION_SET_P (dwarf_version
))
562 /* We only support the "big PIC" model now. And we always generate PIC
563 code when in 64bit mode. */
564 if (flag_pic
== 1 || TARGET_64BIT
)
567 /* 64-bit target is always PIE. */
571 /* Disable -freorder-blocks-and-partition as we don't support hot and
572 cold partitioning. */
573 if (flag_reorder_blocks_and_partition
)
575 inform (input_location
,
576 "%<-freorder-blocks-and-partition%> does not work "
577 "on this architecture");
578 flag_reorder_blocks_and_partition
= 0;
579 flag_reorder_blocks
= 1;
582 /* Disable -fstack-protector to suppress warning. */
583 flag_stack_protect
= 0;
585 /* We can't guarantee that .dword is available for 32-bit targets. */
586 if (UNITS_PER_WORD
== 4)
587 targetm
.asm_out
.aligned_op
.di
= NULL
;
589 /* The unaligned ops are only available when using GAS. */
592 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
593 targetm
.asm_out
.unaligned_op
.si
= NULL
;
594 targetm
.asm_out
.unaligned_op
.di
= NULL
;
597 init_machine_status
= pa_init_machine_status
;
606 PA_BUILTIN_COPYSIGNQ
,
609 PA_BUILTIN_HUGE_VALQ
,
613 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
614 static GTY(()) enum insn_code pa_builtins_icode
[(int) PA_BUILTIN_max
];
616 /* Add a PA builtin function with NAME, ICODE, CODE and TYPE. Return the
617 function decl or NULL_TREE if the builtin was not added. */
620 def_builtin (const char *name
, enum insn_code icode
, enum pa_builtins code
,
624 = add_builtin_function (name
, type
, code
, BUILT_IN_MD
, NULL
, NULL_TREE
);
628 pa_builtins
[code
] = t
;
629 pa_builtins_icode
[code
] = icode
;
635 /* Create builtin functions for FPU instructions. */
638 pa_fpu_init_builtins (void)
642 ftype
= build_function_type_list (unsigned_type_node
, 0);
643 def_builtin ("__builtin_get_fpsr", CODE_FOR_get_fpsr
,
644 PA_BUILTIN_GET_FPSR
, ftype
);
645 ftype
= build_function_type_list (void_type_node
, unsigned_type_node
, 0);
646 def_builtin ("__builtin_set_fpsr", CODE_FOR_set_fpsr
,
647 PA_BUILTIN_SET_FPSR
, ftype
);
651 pa_init_builtins (void)
653 if (!TARGET_SOFT_FLOAT
)
654 pa_fpu_init_builtins ();
656 #ifdef DONT_HAVE_FPUTC_UNLOCKED
658 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
659 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
660 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
667 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
668 set_user_assembler_name (decl
, "_Isfinite");
669 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
670 set_user_assembler_name (decl
, "_Isfinitef");
674 if (HPUX_LONG_DOUBLE_LIBRARY
)
678 /* Under HPUX, the __float128 type is a synonym for "long double". */
679 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
682 /* TFmode support builtins. */
683 ftype
= build_function_type_list (long_double_type_node
,
684 long_double_type_node
,
686 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
687 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
688 "_U_Qfabs", NULL_TREE
);
689 TREE_READONLY (decl
) = 1;
690 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
692 ftype
= build_function_type_list (long_double_type_node
,
693 long_double_type_node
,
694 long_double_type_node
,
696 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
697 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
698 "_U_Qfcopysign", NULL_TREE
);
699 TREE_READONLY (decl
) = 1;
700 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
702 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
703 decl
= add_builtin_function ("__builtin_infq", ftype
,
704 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
706 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
708 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
709 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
711 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
715 /* Implement TARGET_BUILTIN_DECL. */
718 pa_builtin_decl (unsigned int code
, bool initialize_p ATTRIBUTE_UNUSED
)
720 if (code
>= PA_BUILTIN_max
)
721 return error_mark_node
;
722 return pa_builtins
[code
];
726 pa_expand_builtin_1 (tree exp
, rtx target
,
727 rtx subtarget ATTRIBUTE_UNUSED
,
728 machine_mode tmode ATTRIBUTE_UNUSED
,
729 int ignore ATTRIBUTE_UNUSED
)
731 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
732 enum pa_builtins code
733 = (enum pa_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
734 enum insn_code icode
= pa_builtins_icode
[code
];
735 bool nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
736 call_expr_arg_iterator iter
;
743 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
745 || GET_MODE (target
) != tmode
746 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
747 op
[0] = gen_reg_rtx (tmode
);
754 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
756 const struct insn_operand_data
*insn_op
;
759 if (arg
== error_mark_node
)
763 idx
= arg_count
- !nonvoid
;
764 insn_op
= &insn_data
[icode
].operand
[idx
];
765 op
[arg_count
] = expand_normal (arg
);
767 if (! (*insn_data
[icode
].operand
[idx
].predicate
) (op
[arg_count
],
769 op
[arg_count
] = copy_to_mode_reg (insn_op
->mode
, op
[arg_count
]);
775 pat
= GEN_FCN (icode
) (op
[0]);
779 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
781 pat
= GEN_FCN (icode
) (op
[1]);
784 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
787 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
798 return (nonvoid
? op
[0] : const0_rtx
);
802 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
803 machine_mode mode ATTRIBUTE_UNUSED
,
804 int ignore ATTRIBUTE_UNUSED
)
806 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
807 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
811 case PA_BUILTIN_GET_FPSR
:
812 case PA_BUILTIN_SET_FPSR
:
813 return pa_expand_builtin_1 (exp
, target
, subtarget
, mode
, ignore
);
815 case PA_BUILTIN_FABSQ
:
816 case PA_BUILTIN_COPYSIGNQ
:
817 return expand_call (exp
, target
, ignore
);
819 case PA_BUILTIN_INFQ
:
820 case PA_BUILTIN_HUGE_VALQ
:
822 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
827 tmp
= const_double_from_real_value (inf
, target_mode
);
829 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
832 target
= gen_reg_rtx (target_mode
);
834 emit_move_insn (target
, tmp
);
845 /* Function to init struct machine_function.
846 This will be called, via a pointer variable,
847 from push_function_context. */
849 static struct machine_function
*
850 pa_init_machine_status (void)
852 return ggc_cleared_alloc
<machine_function
> ();
855 /* If FROM is a probable pointer register, mark TO as a probable
856 pointer register with the same pointer alignment as FROM. */
859 copy_reg_pointer (rtx to
, rtx from
)
861 if (REG_POINTER (from
))
862 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
865 /* Return 1 if X contains a symbolic expression. We know these
866 expressions will have one of a few well defined forms, so
867 we need only check those forms. */
869 pa_symbolic_expression_p (rtx x
)
872 /* Strip off any HIGH. */
873 if (GET_CODE (x
) == HIGH
)
876 return symbolic_operand (x
, VOIDmode
);
879 /* Accept any constant that can be moved in one instruction into a
882 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival
)
884 /* OK if ldo, ldil, or zdepi, can be used. */
885 return (VAL_14_BITS_P (ival
)
886 || pa_ldil_cint_p (ival
)
887 || pa_zdepi_cint_p (ival
));
890 /* True iff ldil can be used to load this CONST_INT. The least
891 significant 11 bits of the value must be zero and the value must
892 not change sign when extended from 32 to 64 bits. */
894 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival
)
896 unsigned HOST_WIDE_INT x
;
898 x
= ival
& (((unsigned HOST_WIDE_INT
) -1 << 31) | 0x7ff);
899 return x
== 0 || x
== ((unsigned HOST_WIDE_INT
) -1 << 31);
902 /* True iff zdepi can be used to generate this CONST_INT.
903 zdepi first sign extends a 5-bit signed number to a given field
904 length, then places this field anywhere in a zero. */
906 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
908 unsigned HOST_WIDE_INT lsb_mask
, t
;
910 /* This might not be obvious, but it's at least fast.
911 This function is critical; we don't have the time loops would take. */
913 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
914 /* Return true iff t is a power of two. */
915 return ((t
& (t
- 1)) == 0);
918 /* True iff depi or extru can be used to compute (reg & mask).
919 Accept bit pattern like these:
924 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
927 mask
+= mask
& -mask
;
928 return (mask
& (mask
- 1)) == 0;
931 /* True iff depi can be used to compute (reg | MASK). */
933 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
935 mask
+= mask
& -mask
;
936 return (mask
& (mask
- 1)) == 0;
939 /* Legitimize PIC addresses. If the address is already
940 position-independent, we return ORIG. Newly generated
941 position-independent addresses go to REG. If we need more
942 than one register, we lose. */
945 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
949 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
951 /* Labels need special handling. */
952 if (pic_label_operand (orig
, mode
))
956 /* We do not want to go through the movXX expanders here since that
957 would create recursion.
959 Nor do we really want to call a generator for a named pattern
960 since that requires multiple patterns if we want to support
963 So instead we just emit the raw set, which avoids the movXX
964 expanders completely. */
965 mark_reg_pointer (reg
, BITS_PER_UNIT
);
966 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
968 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
969 add_reg_note (insn
, REG_EQUAL
, orig
);
971 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
972 and update LABEL_NUSES because this is not done automatically. */
973 if (reload_in_progress
|| reload_completed
)
975 /* Extract LABEL_REF. */
976 if (GET_CODE (orig
) == CONST
)
977 orig
= XEXP (XEXP (orig
, 0), 0);
978 /* Extract CODE_LABEL. */
979 orig
= XEXP (orig
, 0);
980 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
981 /* Make sure we have label and not a note. */
983 LABEL_NUSES (orig
)++;
985 crtl
->uses_pic_offset_table
= 1;
988 if (GET_CODE (orig
) == SYMBOL_REF
)
995 /* Before reload, allocate a temporary register for the intermediate
996 result. This allows the sequence to be deleted when the final
997 result is unused and the insns are trivially dead. */
998 tmp_reg
= ((reload_in_progress
|| reload_completed
)
999 ? reg
: gen_reg_rtx (Pmode
));
1001 if (function_label_operand (orig
, VOIDmode
))
1003 /* Force function label into memory in word mode. */
1004 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
1005 /* Load plabel address from DLT. */
1006 emit_move_insn (tmp_reg
,
1007 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
1008 gen_rtx_HIGH (word_mode
, orig
)));
1010 = gen_const_mem (Pmode
,
1011 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
1012 gen_rtx_UNSPEC (Pmode
,
1013 gen_rtvec (1, orig
),
1014 UNSPEC_DLTIND14R
)));
1015 emit_move_insn (reg
, pic_ref
);
1016 /* Now load address of function descriptor. */
1017 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
1021 /* Load symbol reference from DLT. */
1022 emit_move_insn (tmp_reg
,
1023 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
1024 gen_rtx_HIGH (word_mode
, orig
)));
1026 = gen_const_mem (Pmode
,
1027 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
1028 gen_rtx_UNSPEC (Pmode
,
1029 gen_rtvec (1, orig
),
1030 UNSPEC_DLTIND14R
)));
1033 crtl
->uses_pic_offset_table
= 1;
1034 mark_reg_pointer (reg
, BITS_PER_UNIT
);
1035 insn
= emit_move_insn (reg
, pic_ref
);
1037 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
1038 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
1042 else if (GET_CODE (orig
) == CONST
)
1046 if (GET_CODE (XEXP (orig
, 0)) == PLUS
1047 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
1051 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
1053 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
1054 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
1055 base
== reg
? 0 : reg
);
1057 if (GET_CODE (orig
) == CONST_INT
)
1059 if (INT_14_BITS (orig
))
1060 return plus_constant (Pmode
, base
, INTVAL (orig
));
1061 orig
= force_reg (Pmode
, orig
);
1063 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
1064 /* Likewise, should we set special REG_NOTEs here? */
1070 static GTY(()) rtx gen_tls_tga
;
1073 gen_tls_get_addr (void)
1076 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1081 hppa_tls_call (rtx arg
)
1085 ret
= gen_reg_rtx (Pmode
);
1086 emit_library_call_value (gen_tls_get_addr (), ret
,
1087 LCT_CONST
, Pmode
, arg
, Pmode
);
1093 legitimize_tls_address (rtx addr
)
1095 rtx ret
, tmp
, t1
, t2
, tp
;
1098 /* Currently, we can't handle anything but a SYMBOL_REF. */
1099 if (GET_CODE (addr
) != SYMBOL_REF
)
1102 switch (SYMBOL_REF_TLS_MODEL (addr
))
1104 case TLS_MODEL_GLOBAL_DYNAMIC
:
1105 tmp
= gen_reg_rtx (Pmode
);
1107 emit_insn (gen_tgd_load_pic (tmp
, addr
));
1109 emit_insn (gen_tgd_load (tmp
, addr
));
1110 ret
= hppa_tls_call (tmp
);
1113 case TLS_MODEL_LOCAL_DYNAMIC
:
1114 ret
= gen_reg_rtx (Pmode
);
1115 tmp
= gen_reg_rtx (Pmode
);
1118 emit_insn (gen_tld_load_pic (tmp
, addr
));
1120 emit_insn (gen_tld_load (tmp
, addr
));
1121 t1
= hppa_tls_call (tmp
);
1122 insn
= get_insns ();
1124 t2
= gen_reg_rtx (Pmode
);
1125 emit_libcall_block (insn
, t2
, t1
,
1126 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1128 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
1131 case TLS_MODEL_INITIAL_EXEC
:
1132 tp
= gen_reg_rtx (Pmode
);
1133 tmp
= gen_reg_rtx (Pmode
);
1134 ret
= gen_reg_rtx (Pmode
);
1135 emit_insn (gen_tp_load (tp
));
1137 emit_insn (gen_tie_load_pic (tmp
, addr
));
1139 emit_insn (gen_tie_load (tmp
, addr
));
1140 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
1143 case TLS_MODEL_LOCAL_EXEC
:
1144 tp
= gen_reg_rtx (Pmode
);
1145 ret
= gen_reg_rtx (Pmode
);
1146 emit_insn (gen_tp_load (tp
));
1147 emit_insn (gen_tle_load (ret
, addr
, tp
));
1157 /* Helper for hppa_legitimize_address. Given X, return true if it
1158 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1160 This respectively represent canonical shift-add rtxs or scaled
1161 memory addresses. */
1163 mem_shadd_or_shadd_rtx_p (rtx x
)
1165 return ((GET_CODE (x
) == ASHIFT
1166 || GET_CODE (x
) == MULT
)
1167 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1168 && ((GET_CODE (x
) == ASHIFT
1169 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
1170 || (GET_CODE (x
) == MULT
1171 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
1174 /* Try machine-dependent ways of modifying an illegitimate address
1175 to be legitimate. If we find one, return the new, valid address.
1176 This macro is used in only one place: `memory_address' in explow.cc.
1178 OLDX is the address as it was before break_out_memory_refs was called.
1179 In some cases it is useful to look at this to decide what needs to be done.
1181 It is always safe for this macro to do nothing. It exists to recognize
1182 opportunities to optimize the output.
1184 For the PA, transform:
1186 memory(X + <large int>)
1190 if (<large int> & mask) >= 16
1191 Y = (<large int> & ~mask) + mask + 1 Round up.
1193 Y = (<large int> & ~mask) Round down.
1195 memory (Z + (<large int> - Y));
1197 This is for CSE to find several similar references, and only use one Z.
1199 X can either be a SYMBOL_REF or REG, but because combine cannot
1200 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1201 D will not fit in 14 bits.
1203 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1206 MODE_INT references allow displacements which fit in 14 bits, so use
1209 This relies on the fact that most mode MODE_FLOAT references will use FP
1210 registers and most mode MODE_INT references will use integer registers.
1211 (In the rare case of an FP register used in an integer MODE, we depend
1212 on secondary reloads to clean things up.)
1215 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1216 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1217 addressing modes to be used).
1219 Note that the addresses passed into hppa_legitimize_address always
1220 come from a MEM, so we only have to match the MULT form on incoming
1221 addresses. But to be future proof we also match the ASHIFT form.
1223 However, this routine always places those shift-add sequences into
1224 registers, so we have to generate the ASHIFT form as our output.
1226 Put X and Z into registers. Then put the entire expression into
1230 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1235 /* We need to canonicalize the order of operands in unscaled indexed
1236 addresses since the code that checks if an address is valid doesn't
1237 always try both orders. */
1238 if (!TARGET_NO_SPACE_REGS
1239 && GET_CODE (x
) == PLUS
1240 && GET_MODE (x
) == Pmode
1241 && REG_P (XEXP (x
, 0))
1242 && REG_P (XEXP (x
, 1))
1243 && REG_POINTER (XEXP (x
, 0))
1244 && !REG_POINTER (XEXP (x
, 1)))
1245 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1247 if (tls_referenced_p (x
))
1248 return legitimize_tls_address (x
);
1250 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1252 /* Strip off CONST. */
1253 if (GET_CODE (x
) == CONST
)
1256 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1257 That should always be safe. */
1258 if (GET_CODE (x
) == PLUS
1259 && GET_CODE (XEXP (x
, 0)) == REG
1260 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1262 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1263 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1266 /* Note we must reject symbols which represent function addresses
1267 since the assembler/linker can't handle arithmetic on plabels. */
1268 if (GET_CODE (x
) == PLUS
1269 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1270 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1271 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1272 || GET_CODE (XEXP (x
, 0)) == REG
))
1274 rtx int_part
, ptr_reg
;
1276 int offset
= INTVAL (XEXP (x
, 1));
1279 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1280 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1282 /* Choose which way to round the offset. Round up if we
1283 are >= halfway to the next boundary. */
1284 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1285 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1287 newoffset
= (offset
& ~ mask
);
1289 /* If the newoffset will not fit in 14 bits (ldo), then
1290 handling this would take 4 or 5 instructions (2 to load
1291 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1292 add the new offset and the SYMBOL_REF.) Combine cannot
1293 handle 4->2 or 5->2 combinations, so do not create
1295 if (! VAL_14_BITS_P (newoffset
)
1296 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1298 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1301 gen_rtx_HIGH (Pmode
, const_part
));
1304 gen_rtx_LO_SUM (Pmode
,
1305 tmp_reg
, const_part
));
1309 if (! VAL_14_BITS_P (newoffset
))
1310 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1312 int_part
= GEN_INT (newoffset
);
1314 ptr_reg
= force_reg (Pmode
,
1315 gen_rtx_PLUS (Pmode
,
1316 force_reg (Pmode
, XEXP (x
, 0)),
1319 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1322 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1324 if (GET_CODE (x
) == PLUS
1325 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1326 && (OBJECT_P (XEXP (x
, 1))
1327 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1328 && GET_CODE (XEXP (x
, 1)) != CONST
)
1330 /* If we were given a MULT, we must fix the constant
1331 as we're going to create the ASHIFT form. */
1332 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1333 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1334 shift_val
= exact_log2 (shift_val
);
1338 if (GET_CODE (reg1
) != REG
)
1339 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1341 reg2
= XEXP (XEXP (x
, 0), 0);
1342 if (GET_CODE (reg2
) != REG
)
1343 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1345 return force_reg (Pmode
,
1346 gen_rtx_PLUS (Pmode
,
1347 gen_rtx_ASHIFT (Pmode
, reg2
,
1348 GEN_INT (shift_val
)),
1352 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1354 Only do so for floating point modes since this is more speculative
1355 and we lose if it's an integer store. */
1356 if (GET_CODE (x
) == PLUS
1357 && GET_CODE (XEXP (x
, 0)) == PLUS
1358 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1359 && (mode
== SFmode
|| mode
== DFmode
))
1361 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1363 /* If we were given a MULT, we must fix the constant
1364 as we're going to create the ASHIFT form. */
1365 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1366 shift_val
= exact_log2 (shift_val
);
1368 /* Try and figure out what to use as a base register. */
1369 rtx reg1
, reg2
, base
, idx
;
1371 reg1
= XEXP (XEXP (x
, 0), 1);
1376 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1377 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1378 it's a base register below. */
1379 if (GET_CODE (reg1
) != REG
)
1380 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1382 if (GET_CODE (reg2
) != REG
)
1383 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1385 /* Figure out what the base and index are. */
1387 if (GET_CODE (reg1
) == REG
1388 && REG_POINTER (reg1
))
1391 idx
= gen_rtx_PLUS (Pmode
,
1392 gen_rtx_ASHIFT (Pmode
,
1393 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1394 GEN_INT (shift_val
)),
1397 else if (GET_CODE (reg2
) == REG
1398 && REG_POINTER (reg2
))
1407 /* If the index adds a large constant, try to scale the
1408 constant so that it can be loaded with only one insn. */
1409 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1410 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1411 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1412 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1414 /* Divide the CONST_INT by the scale factor, then add it to A. */
1415 int val
= INTVAL (XEXP (idx
, 1));
1416 val
/= (1 << shift_val
);
1418 reg1
= XEXP (XEXP (idx
, 0), 0);
1419 if (GET_CODE (reg1
) != REG
)
1420 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1422 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1424 /* We can now generate a simple scaled indexed address. */
1427 (Pmode
, gen_rtx_PLUS (Pmode
,
1428 gen_rtx_ASHIFT (Pmode
, reg1
,
1429 GEN_INT (shift_val
)),
1433 /* If B + C is still a valid base register, then add them. */
1434 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1435 && INTVAL (XEXP (idx
, 1)) <= 4096
1436 && INTVAL (XEXP (idx
, 1)) >= -4096)
1440 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1442 reg2
= XEXP (XEXP (idx
, 0), 0);
1443 if (GET_CODE (reg2
) != CONST_INT
)
1444 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1446 return force_reg (Pmode
,
1447 gen_rtx_PLUS (Pmode
,
1448 gen_rtx_ASHIFT (Pmode
, reg2
,
1449 GEN_INT (shift_val
)),
1453 /* Get the index into a register, then add the base + index and
1454 return a register holding the result. */
1456 /* First get A into a register. */
1457 reg1
= XEXP (XEXP (idx
, 0), 0);
1458 if (GET_CODE (reg1
) != REG
)
1459 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1461 /* And get B into a register. */
1462 reg2
= XEXP (idx
, 1);
1463 if (GET_CODE (reg2
) != REG
)
1464 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1466 reg1
= force_reg (Pmode
,
1467 gen_rtx_PLUS (Pmode
,
1468 gen_rtx_ASHIFT (Pmode
, reg1
,
1469 GEN_INT (shift_val
)),
1472 /* Add the result to our base register and return. */
1473 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1477 /* Uh-oh. We might have an address for x[n-100000]. This needs
1478 special handling to avoid creating an indexed memory address
1479 with x-100000 as the base.
1481 If the constant part is small enough, then it's still safe because
1482 there is a guard page at the beginning and end of the data segment.
1484 Scaled references are common enough that we want to try and rearrange the
1485 terms so that we can use indexing for these addresses too. Only
1486 do the optimization for floatint point modes. */
1488 if (GET_CODE (x
) == PLUS
1489 && pa_symbolic_expression_p (XEXP (x
, 1)))
1491 /* Ugly. We modify things here so that the address offset specified
1492 by the index expression is computed first, then added to x to form
1493 the entire address. */
1495 rtx regx1
, regx2
, regy1
, regy2
, y
;
1497 /* Strip off any CONST. */
1499 if (GET_CODE (y
) == CONST
)
1502 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1504 /* See if this looks like
1505 (plus (mult (reg) (mem_shadd_const))
1506 (const (plus (symbol_ref) (const_int))))
1508 Where const_int is small. In that case the const
1509 expression is a valid pointer for indexing.
1511 If const_int is big, but can be divided evenly by shadd_const
1512 and added to (reg). This allows more scaled indexed addresses. */
1513 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1514 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1515 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1516 && INTVAL (XEXP (y
, 1)) >= -4096
1517 && INTVAL (XEXP (y
, 1)) <= 4095)
1519 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1521 /* If we were given a MULT, we must fix the constant
1522 as we're going to create the ASHIFT form. */
1523 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1524 shift_val
= exact_log2 (shift_val
);
1529 if (GET_CODE (reg1
) != REG
)
1530 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1532 reg2
= XEXP (XEXP (x
, 0), 0);
1533 if (GET_CODE (reg2
) != REG
)
1534 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1538 gen_rtx_PLUS (Pmode
,
1539 gen_rtx_ASHIFT (Pmode
,
1541 GEN_INT (shift_val
)),
1544 else if ((mode
== DFmode
|| mode
== SFmode
)
1545 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1546 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1547 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1548 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1550 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1552 /* If we were given a MULT, we must fix the constant
1553 as we're going to create the ASHIFT form. */
1554 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1555 shift_val
= exact_log2 (shift_val
);
1558 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1559 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1560 regx2
= XEXP (XEXP (x
, 0), 0);
1561 if (GET_CODE (regx2
) != REG
)
1562 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1563 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1567 gen_rtx_PLUS (Pmode
,
1568 gen_rtx_ASHIFT (Pmode
, regx2
,
1569 GEN_INT (shift_val
)),
1570 force_reg (Pmode
, XEXP (y
, 0))));
1572 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1573 && INTVAL (XEXP (y
, 1)) >= -4096
1574 && INTVAL (XEXP (y
, 1)) <= 4095)
1576 /* This is safe because of the guard page at the
1577 beginning and end of the data space. Just
1578 return the original address. */
1583 /* Doesn't look like one we can optimize. */
1584 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1585 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1586 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1587 regx1
= force_reg (Pmode
,
1588 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1590 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1598 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1600 Compute extra cost of moving data between one register class
1603 Make moves from SAR so expensive they should never happen. We used to
1604 have 0xffff here, but that generates overflow in rare cases.
1606 Copies involving a FP register and a non-FP register are relatively
1607 expensive because they must go through memory.
1609 Other copies are reasonably cheap. */
1612 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1613 reg_class_t from
, reg_class_t to
)
1615 if (from
== SHIFT_REGS
)
1617 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1619 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1620 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1626 /* For the HPPA, REG and REG+CONST is cost 0
1627 and addresses involving symbolic constants are cost 2.
1629 PIC addresses are very expensive.
1631 It is no coincidence that this has the same structure
1632 as pa_legitimate_address_p. */
1635 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1636 addr_space_t as ATTRIBUTE_UNUSED
,
1637 bool speed ATTRIBUTE_UNUSED
)
1639 switch (GET_CODE (X
))
1652 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1653 The machine mode of X is known to be SImode or DImode. */
1656 hppa_rtx_costs_shadd_p (rtx x
)
1658 if (GET_CODE (x
) != PLUS
1659 || !REG_P (XEXP (x
, 1)))
1661 rtx op0
= XEXP (x
, 0);
1662 if (GET_CODE (op0
) == ASHIFT
1663 && CONST_INT_P (XEXP (op0
, 1))
1664 && REG_P (XEXP (op0
, 0)))
1666 unsigned HOST_WIDE_INT x
= UINTVAL (XEXP (op0
, 1));
1667 return x
== 1 || x
== 2 || x
== 3;
1669 if (GET_CODE (op0
) == MULT
1670 && CONST_INT_P (XEXP (op0
, 1))
1671 && REG_P (XEXP (op0
, 0)))
1673 unsigned HOST_WIDE_INT x
= UINTVAL (XEXP (op0
, 1));
1674 return x
== 2 || x
== 4 || x
== 8;
1679 /* Compute a (partial) cost for rtx X. Return true if the complete
1680 cost has been computed, and false if subexpressions should be
1681 scanned. In either case, *TOTAL contains the cost result. */
1684 hppa_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
1685 int opno ATTRIBUTE_UNUSED
,
1686 int *total
, bool speed
)
1688 int code
= GET_CODE (x
);
1693 if (outer_code
== SET
)
1694 *total
= COSTS_N_INSNS (1);
1695 else if (INTVAL (x
) == 0)
1697 else if (INT_14_BITS (x
))
1714 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1715 && outer_code
!= SET
)
1722 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1724 *total
= COSTS_N_INSNS (3);
1726 else if (mode
== DImode
)
1728 if (TARGET_PA_11
&& !TARGET_SOFT_FLOAT
&& !TARGET_SOFT_MULT
)
1729 *total
= COSTS_N_INSNS (25);
1731 *total
= COSTS_N_INSNS (80);
1735 if (TARGET_PA_11
&& !TARGET_SOFT_FLOAT
&& !TARGET_SOFT_MULT
)
1736 *total
= COSTS_N_INSNS (8);
1738 *total
= COSTS_N_INSNS (20);
1740 return REG_P (XEXP (x
, 0)) && REG_P (XEXP (x
, 1));
1743 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1745 *total
= COSTS_N_INSNS (14);
1753 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1755 *total
= COSTS_N_INSNS (240);
1757 *total
= COSTS_N_INSNS (60);
1758 return REG_P (XEXP (x
, 0)) && REG_P (XEXP (x
, 1));
1760 case PLUS
: /* this includes shNadd insns */
1762 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1763 *total
= COSTS_N_INSNS (3);
1764 else if (mode
== DImode
)
1768 *total
= COSTS_N_INSNS (1);
1769 /* Handle shladd,l instructions. */
1770 if (hppa_rtx_costs_shadd_p (x
))
1774 *total
= COSTS_N_INSNS (2);
1778 *total
= COSTS_N_INSNS (1);
1779 /* Handle shNadd instructions. */
1780 if (hppa_rtx_costs_shadd_p (x
))
1783 return REG_P (XEXP (x
, 0))
1784 && (REG_P (XEXP (x
, 1))
1785 || CONST_INT_P (XEXP (x
, 1)));
1790 if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1793 *total
= COSTS_N_INSNS (1);
1795 *total
= COSTS_N_INSNS (2);
1798 else if (TARGET_64BIT
)
1799 *total
= COSTS_N_INSNS (3);
1801 *total
= COSTS_N_INSNS (13);
1803 *total
= COSTS_N_INSNS (18);
1805 else if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1808 *total
= COSTS_N_INSNS (2);
1810 *total
= COSTS_N_INSNS (1);
1813 else if (TARGET_64BIT
)
1814 *total
= COSTS_N_INSNS (4);
1816 *total
= COSTS_N_INSNS (2);
1817 return REG_P (XEXP (x
, 0))
1818 && (REG_P (XEXP (x
, 1))
1819 || CONST_INT_P (XEXP (x
, 1)));
1824 if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1827 *total
= COSTS_N_INSNS (1);
1829 *total
= COSTS_N_INSNS (2);
1832 else if (TARGET_64BIT
)
1833 *total
= COSTS_N_INSNS (3);
1835 *total
= COSTS_N_INSNS (14);
1837 *total
= COSTS_N_INSNS (19);
1839 else if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1842 *total
= COSTS_N_INSNS (2);
1844 *total
= COSTS_N_INSNS (1);
1847 else if (TARGET_64BIT
)
1848 *total
= COSTS_N_INSNS (4);
1850 *total
= COSTS_N_INSNS (2);
1851 return REG_P (XEXP (x
, 0))
1852 && (REG_P (XEXP (x
, 1))
1853 || CONST_INT_P (XEXP (x
, 1)));
1858 if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1861 *total
= COSTS_N_INSNS (1);
1863 *total
= COSTS_N_INSNS (2);
1866 else if (TARGET_64BIT
)
1867 *total
= COSTS_N_INSNS (2);
1869 *total
= COSTS_N_INSNS (12);
1871 *total
= COSTS_N_INSNS (15);
1873 else if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1875 *total
= COSTS_N_INSNS (1);
1878 else if (TARGET_64BIT
)
1879 *total
= COSTS_N_INSNS (3);
1881 *total
= COSTS_N_INSNS (2);
1882 return REG_P (XEXP (x
, 0))
1883 && (REG_P (XEXP (x
, 1))
1884 || CONST_INT_P (XEXP (x
, 1)));
1891 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1892 new rtx with the correct mode. */
1894 force_mode (machine_mode mode
, rtx orig
)
1896 if (mode
== GET_MODE (orig
))
1899 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1901 return gen_rtx_REG (mode
, REGNO (orig
));
1904 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1907 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1909 return tls_referenced_p (x
);
1912 /* Emit insns to move operands[1] into operands[0].
1914 Return 1 if we have written out everything that needs to be done to
1915 do the move. Otherwise, return 0 and the caller will emit the move
1918 Note SCRATCH_REG may not be in the proper mode depending on how it
1919 will be used. This routine is responsible for creating a new copy
1920 of SCRATCH_REG in the proper mode. */
1923 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1925 rtx operand0
= operands
[0];
1926 rtx operand1
= operands
[1];
1929 /* We can only handle indexed addresses in the destination operand
1930 of floating point stores. Thus, we need to break out indexed
1931 addresses from the destination operand. */
1932 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1934 gcc_assert (can_create_pseudo_p ());
1936 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1937 operand0
= replace_equiv_address (operand0
, tem
);
1940 /* On targets with non-equivalent space registers, break out unscaled
1941 indexed addresses from the source operand before the final CSE.
1942 We have to do this because the REG_POINTER flag is not correctly
1943 carried through various optimization passes and CSE may substitute
1944 a pseudo without the pointer set for one with the pointer set. As
1945 a result, we loose various opportunities to create insns with
1946 unscaled indexed addresses. */
1947 if (!TARGET_NO_SPACE_REGS
1948 && !cse_not_expected
1949 && GET_CODE (operand1
) == MEM
1950 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1951 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1952 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1954 = replace_equiv_address (operand1
,
1955 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1958 && reload_in_progress
&& GET_CODE (operand0
) == REG
1959 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1960 operand0
= reg_equiv_mem (REGNO (operand0
));
1961 else if (scratch_reg
1962 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1963 && GET_CODE (SUBREG_REG (operand0
)) == REG
1964 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1966 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1967 the code which tracks sets/uses for delete_output_reload. */
1968 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1969 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1970 SUBREG_BYTE (operand0
));
1971 operand0
= alter_subreg (&temp
, true);
1975 && reload_in_progress
&& GET_CODE (operand1
) == REG
1976 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1977 operand1
= reg_equiv_mem (REGNO (operand1
));
1978 else if (scratch_reg
1979 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1980 && GET_CODE (SUBREG_REG (operand1
)) == REG
1981 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1983 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1984 the code which tracks sets/uses for delete_output_reload. */
1985 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1986 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1987 SUBREG_BYTE (operand1
));
1988 operand1
= alter_subreg (&temp
, true);
1991 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1992 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1993 != XEXP (operand0
, 0)))
1994 operand0
= replace_equiv_address (operand0
, tem
);
1996 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1997 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1998 != XEXP (operand1
, 0)))
1999 operand1
= replace_equiv_address (operand1
, tem
);
2001 /* Handle secondary reloads for loads/stores of FP registers from
2002 REG+D addresses where D does not fit in 5 or 14 bits, including
2003 (subreg (mem (addr))) cases, and reloads for other unsupported
2006 && FP_REG_P (operand0
)
2007 && (MEM_P (operand1
)
2008 || (GET_CODE (operand1
) == SUBREG
2009 && MEM_P (XEXP (operand1
, 0)))))
2013 if (GET_CODE (op1
) == SUBREG
)
2014 op1
= XEXP (op1
, 0);
2016 if (reg_plus_base_memory_operand (op1
, GET_MODE (op1
)))
2018 if (!(INT14_OK_STRICT
&& INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
2019 && !INT_5_BITS (XEXP (XEXP (op1
, 0), 1)))
2021 /* SCRATCH_REG will hold an address and maybe the actual data.
2022 We want it in WORD_MODE regardless of what mode it was
2023 originally given to us. */
2024 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2026 /* D might not fit in 14 bits either; for such cases load D
2027 into scratch reg. */
2028 if (!INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
2030 emit_move_insn (scratch_reg
, XEXP (XEXP (op1
, 0), 1));
2031 emit_move_insn (scratch_reg
,
2032 gen_rtx_fmt_ee (GET_CODE (XEXP (op1
, 0)),
2034 XEXP (XEXP (op1
, 0), 0),
2038 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
2039 op1
= replace_equiv_address (op1
, scratch_reg
);
2042 else if (((TARGET_ELF32
|| !TARGET_PA_20
)
2043 && symbolic_memory_operand (op1
, VOIDmode
))
2044 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1
, 0))
2045 || IS_INDEX_ADDR_P (XEXP (op1
, 0)))
2047 /* Load memory address into SCRATCH_REG. */
2048 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2049 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
2050 op1
= replace_equiv_address (op1
, scratch_reg
);
2052 emit_insn (gen_rtx_SET (operand0
, op1
));
2055 else if (scratch_reg
2056 && FP_REG_P (operand1
)
2057 && (MEM_P (operand0
)
2058 || (GET_CODE (operand0
) == SUBREG
2059 && MEM_P (XEXP (operand0
, 0)))))
2063 if (GET_CODE (op0
) == SUBREG
)
2064 op0
= XEXP (op0
, 0);
2066 if (reg_plus_base_memory_operand (op0
, GET_MODE (op0
)))
2068 if (!(INT14_OK_STRICT
&& INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
2069 && !INT_5_BITS (XEXP (XEXP (op0
, 0), 1)))
2071 /* SCRATCH_REG will hold an address and maybe the actual data.
2072 We want it in WORD_MODE regardless of what mode it was
2073 originally given to us. */
2074 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2076 /* D might not fit in 14 bits either; for such cases load D
2077 into scratch reg. */
2078 if (!INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
2080 emit_move_insn (scratch_reg
, XEXP (XEXP (op0
, 0), 1));
2081 emit_move_insn (scratch_reg
,
2082 gen_rtx_fmt_ee (GET_CODE (XEXP (op0
, 0)),
2084 XEXP (XEXP (op0
, 0), 0),
2088 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
2089 op0
= replace_equiv_address (op0
, scratch_reg
);
2092 else if (((TARGET_ELF32
|| !TARGET_PA_20
)
2093 && symbolic_memory_operand (op0
, VOIDmode
))
2094 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0
, 0))
2095 || IS_INDEX_ADDR_P (XEXP (op0
, 0)))
2097 /* Load memory address into SCRATCH_REG. */
2098 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2099 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
2100 op0
= replace_equiv_address (op0
, scratch_reg
);
2102 emit_insn (gen_rtx_SET (op0
, operand1
));
2105 /* Handle secondary reloads for loads of FP registers from constant
2106 expressions by forcing the constant into memory. For the most part,
2107 this is only necessary for SImode and DImode.
2109 Use scratch_reg to hold the address of the memory location. */
2110 else if (scratch_reg
2111 && CONSTANT_P (operand1
)
2112 && FP_REG_P (operand0
))
2114 rtx const_mem
, xoperands
[2];
2116 if (operand1
== CONST0_RTX (mode
))
2118 emit_insn (gen_rtx_SET (operand0
, operand1
));
2122 /* SCRATCH_REG will hold an address and maybe the actual data. We want
2123 it in WORD_MODE regardless of what mode it was originally given
2125 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2127 /* Force the constant into memory and put the address of the
2128 memory location into scratch_reg. */
2129 const_mem
= force_const_mem (mode
, operand1
);
2130 xoperands
[0] = scratch_reg
;
2131 xoperands
[1] = XEXP (const_mem
, 0);
2132 pa_emit_move_sequence (xoperands
, Pmode
, 0);
2134 /* Now load the destination register. */
2135 emit_insn (gen_rtx_SET (operand0
,
2136 replace_equiv_address (const_mem
, scratch_reg
)));
2139 /* Handle secondary reloads for SAR. These occur when trying to load
2140 the SAR from memory or a constant. */
2141 else if (scratch_reg
2142 && GET_CODE (operand0
) == REG
2143 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
2144 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
2145 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
2147 /* D might not fit in 14 bits either; for such cases load D into
2149 if (GET_CODE (operand1
) == MEM
2150 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
2152 /* We are reloading the address into the scratch register, so we
2153 want to make sure the scratch register is a full register. */
2154 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2156 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
2157 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
2160 XEXP (XEXP (operand1
, 0),
2164 /* Now we are going to load the scratch register from memory,
2165 we want to load it in the same width as the original MEM,
2166 which must be the same as the width of the ultimate destination,
2168 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
2170 emit_move_insn (scratch_reg
,
2171 replace_equiv_address (operand1
, scratch_reg
));
2175 /* We want to load the scratch register using the same mode as
2176 the ultimate destination. */
2177 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
2179 emit_move_insn (scratch_reg
, operand1
);
2182 /* And emit the insn to set the ultimate destination. We know that
2183 the scratch register has the same mode as the destination at this
2185 emit_move_insn (operand0
, scratch_reg
);
2189 /* Handle the most common case: storing into a register. */
2190 if (register_operand (operand0
, mode
))
2192 /* Legitimize TLS symbol references. This happens for references
2193 that aren't a legitimate constant. */
2194 if (PA_SYMBOL_REF_TLS_P (operand1
))
2195 operand1
= legitimize_tls_address (operand1
);
2197 if (register_operand (operand1
, mode
)
2198 || (GET_CODE (operand1
) == CONST_INT
2199 && pa_cint_ok_for_move (UINTVAL (operand1
)))
2200 || (operand1
== CONST0_RTX (mode
))
2201 || (GET_CODE (operand1
) == HIGH
2202 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
2203 /* Only `general_operands' can come here, so MEM is ok. */
2204 || GET_CODE (operand1
) == MEM
)
2206 /* Various sets are created during RTL generation which don't
2207 have the REG_POINTER flag correctly set. After the CSE pass,
2208 instruction recognition can fail if we don't consistently
2209 set this flag when performing register copies. This should
2210 also improve the opportunities for creating insns that use
2211 unscaled indexing. */
2212 if (REG_P (operand0
) && REG_P (operand1
))
2214 if (REG_POINTER (operand1
)
2215 && !REG_POINTER (operand0
)
2216 && !HARD_REGISTER_P (operand0
))
2217 copy_reg_pointer (operand0
, operand1
);
2220 /* When MEMs are broken out, the REG_POINTER flag doesn't
2221 get set. In some cases, we can set the REG_POINTER flag
2222 from the declaration for the MEM. */
2223 if (REG_P (operand0
)
2224 && GET_CODE (operand1
) == MEM
2225 && !REG_POINTER (operand0
))
2227 tree decl
= MEM_EXPR (operand1
);
2229 /* Set the register pointer flag and register alignment
2230 if the declaration for this memory reference is a
2236 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2238 if (TREE_CODE (decl
) == COMPONENT_REF
)
2239 decl
= TREE_OPERAND (decl
, 1);
2241 type
= TREE_TYPE (decl
);
2242 type
= strip_array_types (type
);
2244 if (POINTER_TYPE_P (type
))
2245 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2249 emit_insn (gen_rtx_SET (operand0
, operand1
));
2253 else if (GET_CODE (operand0
) == MEM
)
2255 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
2256 && !(reload_in_progress
|| reload_completed
))
2258 rtx temp
= gen_reg_rtx (DFmode
);
2260 emit_insn (gen_rtx_SET (temp
, operand1
));
2261 emit_insn (gen_rtx_SET (operand0
, temp
));
2264 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
2266 /* Run this case quickly. */
2267 emit_insn (gen_rtx_SET (operand0
, operand1
));
2270 if (! (reload_in_progress
|| reload_completed
))
2272 operands
[0] = validize_mem (operand0
);
2273 operands
[1] = operand1
= force_reg (mode
, operand1
);
2277 /* Simplify the source if we need to.
2278 Note we do have to handle function labels here, even though we do
2279 not consider them legitimate constants. Loop optimizations can
2280 call the emit_move_xxx with one as a source. */
2281 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
2282 || (GET_CODE (operand1
) == HIGH
2283 && symbolic_operand (XEXP (operand1
, 0), mode
))
2284 || function_label_operand (operand1
, VOIDmode
)
2285 || tls_referenced_p (operand1
))
2289 if (GET_CODE (operand1
) == HIGH
)
2292 operand1
= XEXP (operand1
, 0);
2294 if (symbolic_operand (operand1
, mode
))
2296 /* Argh. The assembler and linker can't handle arithmetic
2299 So we force the plabel into memory, load operand0 from
2300 the memory location, then add in the constant part. */
2301 if ((GET_CODE (operand1
) == CONST
2302 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2303 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
2305 || function_label_operand (operand1
, VOIDmode
))
2307 rtx temp
, const_part
;
2309 /* Figure out what (if any) scratch register to use. */
2310 if (reload_in_progress
|| reload_completed
)
2312 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
2313 /* SCRATCH_REG will hold an address and maybe the actual
2314 data. We want it in WORD_MODE regardless of what mode it
2315 was originally given to us. */
2316 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2319 scratch_reg
= gen_reg_rtx (Pmode
);
2321 if (GET_CODE (operand1
) == CONST
)
2323 /* Save away the constant part of the expression. */
2324 const_part
= XEXP (XEXP (operand1
, 0), 1);
2325 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
2327 /* Force the function label into memory. */
2328 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2332 /* No constant part. */
2333 const_part
= NULL_RTX
;
2335 /* Force the function label into memory. */
2336 temp
= force_const_mem (mode
, operand1
);
2340 /* Get the address of the memory location. PIC-ify it if
2342 temp
= XEXP (temp
, 0);
2344 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2346 /* Put the address of the memory location into our destination
2349 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2351 /* Now load from the memory location into our destination
2353 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2354 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2356 /* And add back in the constant part. */
2357 if (const_part
!= NULL_RTX
)
2358 expand_inc (operand0
, const_part
);
2368 if (reload_in_progress
|| reload_completed
)
2370 temp
= scratch_reg
? scratch_reg
: operand0
;
2371 /* TEMP will hold an address and maybe the actual
2372 data. We want it in WORD_MODE regardless of what mode it
2373 was originally given to us. */
2374 temp
= force_mode (word_mode
, temp
);
2377 temp
= gen_reg_rtx (Pmode
);
2379 /* Force (const (plus (symbol) (const_int))) to memory
2380 if the const_int will not fit in 14 bits. Although
2381 this requires a relocation, the instruction sequence
2382 needed to load the value is shorter. */
2383 if (GET_CODE (operand1
) == CONST
2384 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2385 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2386 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2388 rtx x
, m
= force_const_mem (mode
, operand1
);
2390 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2391 x
= replace_equiv_address (m
, x
);
2392 insn
= emit_move_insn (operand0
, x
);
2396 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2397 if (REG_P (operand0
) && REG_P (operands
[1]))
2398 copy_reg_pointer (operand0
, operands
[1]);
2399 insn
= emit_move_insn (operand0
, operands
[1]);
2402 /* Put a REG_EQUAL note on this insn. */
2403 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2405 /* On the HPPA, references to data space are supposed to use dp,
2406 register 27, but showing it in the RTL inhibits various cse
2407 and loop optimizations. */
2412 if (reload_in_progress
|| reload_completed
)
2414 temp
= scratch_reg
? scratch_reg
: operand0
;
2415 /* TEMP will hold an address and maybe the actual
2416 data. We want it in WORD_MODE regardless of what mode it
2417 was originally given to us. */
2418 temp
= force_mode (word_mode
, temp
);
2421 temp
= gen_reg_rtx (mode
);
2423 /* Loading a SYMBOL_REF into a register makes that register
2424 safe to be used as the base in an indexed address.
2426 Don't mark hard registers though. That loses. */
2427 if (GET_CODE (operand0
) == REG
2428 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2429 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2430 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2431 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2434 set
= gen_rtx_SET (operand0
, temp
);
2436 set
= gen_rtx_SET (operand0
,
2437 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2439 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2445 else if (tls_referenced_p (operand1
))
2450 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2452 addend
= XEXP (XEXP (tmp
, 0), 1);
2453 tmp
= XEXP (XEXP (tmp
, 0), 0);
2456 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2457 tmp
= legitimize_tls_address (tmp
);
2460 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2461 tmp
= force_operand (tmp
, operands
[0]);
2465 else if (GET_CODE (operand1
) != CONST_INT
2466 || !pa_cint_ok_for_move (UINTVAL (operand1
)))
2471 HOST_WIDE_INT value
= 0;
2472 HOST_WIDE_INT insv
= 0;
2475 if (GET_CODE (operand1
) == CONST_INT
)
2476 value
= INTVAL (operand1
);
2479 && GET_CODE (operand1
) == CONST_INT
2480 && HOST_BITS_PER_WIDE_INT
> 32
2481 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2485 /* Extract the low order 32 bits of the value and sign extend.
2486 If the new value is the same as the original value, we can
2487 can use the original value as-is. If the new value is
2488 different, we use it and insert the most-significant 32-bits
2489 of the original value into the final result. */
2490 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2491 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2494 #if HOST_BITS_PER_WIDE_INT > 32
2495 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2499 operand1
= GEN_INT (nval
);
2503 if (reload_in_progress
|| reload_completed
)
2504 temp
= scratch_reg
? scratch_reg
: operand0
;
2506 temp
= gen_reg_rtx (mode
);
2508 /* We don't directly split DImode constants on 32-bit targets
2509 because PLUS uses an 11-bit immediate and the insn sequence
2510 generated is not as efficient as the one using HIGH/LO_SUM. */
2511 if (GET_CODE (operand1
) == CONST_INT
2512 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2513 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2516 /* Directly break constant into high and low parts. This
2517 provides better optimization opportunities because various
2518 passes recognize constants split with PLUS but not LO_SUM.
2519 We use a 14-bit signed low part except when the addition
2520 of 0x4000 to the high part might change the sign of the
2522 HOST_WIDE_INT low
= value
& 0x3fff;
2523 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2527 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2535 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2536 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2540 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2541 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2544 insn
= emit_move_insn (operands
[0], operands
[1]);
2546 /* Now insert the most significant 32 bits of the value
2547 into the register. When we don't have a second register
2548 available, it could take up to nine instructions to load
2549 a 64-bit integer constant. Prior to reload, we force
2550 constants that would take more than three instructions
2551 to load to the constant pool. During and after reload,
2552 we have to handle all possible values. */
2555 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2556 register and the value to be inserted is outside the
2557 range that can be loaded with three depdi instructions. */
2558 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2560 operand1
= GEN_INT (insv
);
2562 emit_insn (gen_rtx_SET (temp
,
2563 gen_rtx_HIGH (mode
, operand1
)));
2564 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2566 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2569 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2574 int len
= 5, pos
= 27;
2576 /* Insert the bits using the depdi instruction. */
2579 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2580 HOST_WIDE_INT sign
= v5
< 0;
2582 /* Left extend the insertion. */
2583 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2584 while (pos
> 0 && (insv
& 1) == sign
)
2586 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2592 insn
= emit_insn (gen_insvdi (operand0
,
2597 insn
= emit_insn (gen_insvsi (operand0
,
2602 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2608 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2613 /* Now have insn-emit do whatever it normally does. */
2617 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2618 it will need a link/runtime reloc). */
2621 pa_reloc_needed (tree exp
)
2625 switch (TREE_CODE (exp
))
2630 case POINTER_PLUS_EXPR
:
2633 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2634 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2638 case NON_LVALUE_EXPR
:
2639 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2645 unsigned HOST_WIDE_INT ix
;
2647 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2649 reloc
|= pa_reloc_needed (value
);
2663 /* Return the best assembler insn template
2664 for moving operands[1] into operands[0] as a fullword. */
2666 pa_singlemove_string (rtx
*operands
)
2668 HOST_WIDE_INT intval
;
2670 if (GET_CODE (operands
[0]) == MEM
)
2671 return "stw %r1,%0";
2672 if (GET_CODE (operands
[1]) == MEM
)
2674 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2678 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2680 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2682 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands
[1]), i
);
2684 operands
[1] = GEN_INT (i
);
2685 /* Fall through to CONST_INT case. */
2687 if (GET_CODE (operands
[1]) == CONST_INT
)
2689 intval
= INTVAL (operands
[1]);
2691 if (VAL_14_BITS_P (intval
))
2693 else if ((intval
& 0x7ff) == 0)
2694 return "ldil L'%1,%0";
2695 else if (pa_zdepi_cint_p (intval
))
2696 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2698 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2700 return "copy %1,%0";
2704 /* Compute position (in OP[1]) and width (in OP[2])
2705 useful for copying IMM to a register using the zdepi
2706 instructions. Store the immediate value to insert in OP[0]. */
2708 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2712 /* Find the least significant set bit in IMM. */
2713 for (lsb
= 0; lsb
< 32; lsb
++)
2720 /* Choose variants based on *sign* of the 5-bit field. */
2721 if ((imm
& 0x10) == 0)
2722 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2725 /* Find the width of the bitstring in IMM. */
2726 for (len
= 5; len
< 32 - lsb
; len
++)
2728 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2732 /* Sign extend IMM as a 5-bit value. */
2733 imm
= (imm
& 0xf) - 0x10;
2741 /* Compute position (in OP[1]) and width (in OP[2])
2742 useful for copying IMM to a register using the depdi,z
2743 instructions. Store the immediate value to insert in OP[0]. */
2746 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2748 int lsb
, len
, maxlen
;
2750 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2752 /* Find the least significant set bit in IMM. */
2753 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2760 /* Choose variants based on *sign* of the 5-bit field. */
2761 if ((imm
& 0x10) == 0)
2762 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2765 /* Find the width of the bitstring in IMM. */
2766 for (len
= 5; len
< maxlen
- lsb
; len
++)
2768 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2772 /* Extend length if host is narrow and IMM is negative. */
2773 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2776 /* Sign extend IMM as a 5-bit value. */
2777 imm
= (imm
& 0xf) - 0x10;
2785 /* Output assembler code to perform a doubleword move insn
2786 with operands OPERANDS. */
2789 pa_output_move_double (rtx
*operands
)
2791 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2793 rtx addreg0
= 0, addreg1
= 0;
2796 /* First classify both operands. */
2798 if (REG_P (operands
[0]))
2800 else if (offsettable_memref_p (operands
[0]))
2802 else if (GET_CODE (operands
[0]) == MEM
)
2807 if (REG_P (operands
[1]))
2809 else if (CONSTANT_P (operands
[1]))
2811 else if (offsettable_memref_p (operands
[1]))
2813 else if (GET_CODE (operands
[1]) == MEM
)
2818 /* Check for the cases that the operand constraints are not
2819 supposed to allow to happen. */
2820 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2822 /* Handle copies between general and floating registers. */
2824 if (optype0
== REGOP
&& optype1
== REGOP
2825 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2827 if (FP_REG_P (operands
[0]))
2829 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2830 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2831 return "{fldds|fldd} -16(%%sp),%0";
2835 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2836 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2837 return "{ldws|ldw} -12(%%sp),%R0";
2841 /* Handle auto decrementing and incrementing loads and stores
2842 specifically, since the structure of the function doesn't work
2843 for them without major modification. Do it better when we learn
2844 this port about the general inc/dec addressing of PA.
2845 (This was written by tege. Chide him if it doesn't work.) */
2847 if (optype0
== MEMOP
)
2849 /* We have to output the address syntax ourselves, since print_operand
2850 doesn't deal with the addresses we want to use. Fix this later. */
2852 rtx addr
= XEXP (operands
[0], 0);
2853 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2855 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2857 operands
[0] = XEXP (addr
, 0);
2858 gcc_assert (GET_CODE (operands
[1]) == REG
2859 && GET_CODE (operands
[0]) == REG
);
2861 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2863 /* No overlap between high target register and address
2864 register. (We do this in a non-obvious way to
2865 save a register file writeback) */
2866 if (GET_CODE (addr
) == POST_INC
)
2867 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2868 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2870 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2872 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2874 operands
[0] = XEXP (addr
, 0);
2875 gcc_assert (GET_CODE (operands
[1]) == REG
2876 && GET_CODE (operands
[0]) == REG
);
2878 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2879 /* No overlap between high target register and address
2880 register. (We do this in a non-obvious way to save a
2881 register file writeback) */
2882 if (GET_CODE (addr
) == PRE_INC
)
2883 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2884 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2887 if (optype1
== MEMOP
)
2889 /* We have to output the address syntax ourselves, since print_operand
2890 doesn't deal with the addresses we want to use. Fix this later. */
2892 rtx addr
= XEXP (operands
[1], 0);
2893 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2895 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2897 operands
[1] = XEXP (addr
, 0);
2898 gcc_assert (GET_CODE (operands
[0]) == REG
2899 && GET_CODE (operands
[1]) == REG
);
2901 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2903 /* No overlap between high target register and address
2904 register. (We do this in a non-obvious way to
2905 save a register file writeback) */
2906 if (GET_CODE (addr
) == POST_INC
)
2907 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2908 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2912 /* This is an undefined situation. We should load into the
2913 address register *and* update that register. Probably
2914 we don't need to handle this at all. */
2915 if (GET_CODE (addr
) == POST_INC
)
2916 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2917 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2920 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2922 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2924 operands
[1] = XEXP (addr
, 0);
2925 gcc_assert (GET_CODE (operands
[0]) == REG
2926 && GET_CODE (operands
[1]) == REG
);
2928 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2930 /* No overlap between high target register and address
2931 register. (We do this in a non-obvious way to
2932 save a register file writeback) */
2933 if (GET_CODE (addr
) == PRE_INC
)
2934 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2935 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2939 /* This is an undefined situation. We should load into the
2940 address register *and* update that register. Probably
2941 we don't need to handle this at all. */
2942 if (GET_CODE (addr
) == PRE_INC
)
2943 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2944 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2947 else if (GET_CODE (addr
) == PLUS
2948 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2952 /* Load address into left half of destination register. */
2953 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2954 xoperands
[1] = XEXP (addr
, 1);
2955 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2956 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2957 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2959 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2961 else if (GET_CODE (addr
) == PLUS
2962 && REG_P (XEXP (addr
, 0))
2963 && REG_P (XEXP (addr
, 1)))
2967 /* Load address into left half of destination register. */
2968 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2969 xoperands
[1] = XEXP (addr
, 0);
2970 xoperands
[2] = XEXP (addr
, 1);
2971 output_asm_insn ("{addl|add,l} %1,%2,%0",
2973 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2977 /* If an operand is an unoffsettable memory ref, find a register
2978 we can increment temporarily to make it refer to the second word. */
2980 if (optype0
== MEMOP
)
2981 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2983 if (optype1
== MEMOP
)
2984 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2986 /* Ok, we can do one word at a time.
2987 Normally we do the low-numbered word first.
2989 In either case, set up in LATEHALF the operands to use
2990 for the high-numbered word and in some cases alter the
2991 operands in OPERANDS to be suitable for the low-numbered word. */
2993 if (optype0
== REGOP
)
2994 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2995 else if (optype0
== OFFSOP
)
2996 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2998 latehalf
[0] = operands
[0];
3000 if (optype1
== REGOP
)
3001 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
3002 else if (optype1
== OFFSOP
)
3003 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
3004 else if (optype1
== CNSTOP
)
3006 if (GET_CODE (operands
[1]) == HIGH
)
3008 operands
[1] = XEXP (operands
[1], 0);
3011 split_double (operands
[1], &operands
[1], &latehalf
[1]);
3014 latehalf
[1] = operands
[1];
3016 /* If the first move would clobber the source of the second one,
3017 do them in the other order.
3019 This can happen in two cases:
3021 mem -> register where the first half of the destination register
3022 is the same register used in the memory's address. Reload
3023 can create such insns.
3025 mem in this case will be either register indirect or register
3026 indirect plus a valid offset.
3028 register -> register move where REGNO(dst) == REGNO(src + 1)
3029 someone (Tim/Tege?) claimed this can happen for parameter loads.
3031 Handle mem -> register case first. */
3032 if (optype0
== REGOP
3033 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
3034 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
3036 /* Do the late half first. */
3038 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
3039 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
3043 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
3044 return pa_singlemove_string (operands
);
3047 /* Now handle register -> register case. */
3048 if (optype0
== REGOP
&& optype1
== REGOP
3049 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
3051 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
3052 return pa_singlemove_string (operands
);
3055 /* Normal case: do the two words, low-numbered first. */
3057 output_asm_insn (pa_singlemove_string (operands
), operands
);
3059 /* Make any unoffsettable addresses point at high-numbered word. */
3061 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
3063 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
3065 /* Do high-numbered word. */
3067 output_asm_insn ("ldil L'%1,%0", latehalf
);
3069 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
3071 /* Undo the adds we just did. */
3073 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
3075 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
3081 pa_output_fp_move_double (rtx
*operands
)
3083 if (FP_REG_P (operands
[0]))
3085 if (FP_REG_P (operands
[1])
3086 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
3087 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
3089 output_asm_insn ("fldd%F1 %1,%0", operands
);
3091 else if (FP_REG_P (operands
[1]))
3093 output_asm_insn ("fstd%F0 %1,%0", operands
);
3099 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
3101 /* This is a pain. You have to be prepared to deal with an
3102 arbitrary address here including pre/post increment/decrement.
3104 so avoid this in the MD. */
3105 gcc_assert (GET_CODE (operands
[0]) == REG
);
3107 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
3108 xoperands
[0] = operands
[0];
3109 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
3114 /* Return a REG that occurs in ADDR with coefficient 1.
3115 ADDR can be effectively incremented by incrementing REG. */
3118 find_addr_reg (rtx addr
)
3120 while (GET_CODE (addr
) == PLUS
)
3122 if (GET_CODE (XEXP (addr
, 0)) == REG
)
3123 addr
= XEXP (addr
, 0);
3124 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
3125 addr
= XEXP (addr
, 1);
3126 else if (CONSTANT_P (XEXP (addr
, 0)))
3127 addr
= XEXP (addr
, 1);
3128 else if (CONSTANT_P (XEXP (addr
, 1)))
3129 addr
= XEXP (addr
, 0);
3133 gcc_assert (GET_CODE (addr
) == REG
);
3137 /* Emit code to perform a block move.
3139 OPERANDS[0] is the destination pointer as a REG, clobbered.
3140 OPERANDS[1] is the source pointer as a REG, clobbered.
3141 OPERANDS[2] is a register for temporary storage.
3142 OPERANDS[3] is a register for temporary storage.
3143 OPERANDS[4] is the size as a CONST_INT
3144 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3145 OPERANDS[6] is another temporary register. */
3148 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
3150 int align
= INTVAL (operands
[5]);
3151 unsigned long n_bytes
= INTVAL (operands
[4]);
3153 /* We can't move more than a word at a time because the PA
3154 has no longer integer move insns. (Could use fp mem ops?) */
3155 if (align
> (TARGET_64BIT
? 8 : 4))
3156 align
= (TARGET_64BIT
? 8 : 4);
3158 /* Note that we know each loop below will execute at least twice
3159 (else we would have open-coded the copy). */
3163 /* Pre-adjust the loop counter. */
3164 operands
[4] = GEN_INT (n_bytes
- 16);
3165 output_asm_insn ("ldi %4,%2", operands
);
3168 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
3169 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
3170 output_asm_insn ("std,ma %3,8(%0)", operands
);
3171 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
3172 output_asm_insn ("std,ma %6,8(%0)", operands
);
3174 /* Handle the residual. There could be up to 7 bytes of
3175 residual to copy! */
3176 if (n_bytes
% 16 != 0)
3178 operands
[4] = GEN_INT (n_bytes
% 8);
3179 if (n_bytes
% 16 >= 8)
3180 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
3181 if (n_bytes
% 8 != 0)
3182 output_asm_insn ("ldd 0(%1),%6", operands
);
3183 if (n_bytes
% 16 >= 8)
3184 output_asm_insn ("std,ma %3,8(%0)", operands
);
3185 if (n_bytes
% 8 != 0)
3186 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
3191 /* Pre-adjust the loop counter. */
3192 operands
[4] = GEN_INT (n_bytes
- 8);
3193 output_asm_insn ("ldi %4,%2", operands
);
3196 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
3197 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
3198 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
3199 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
3200 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
3202 /* Handle the residual. There could be up to 7 bytes of
3203 residual to copy! */
3204 if (n_bytes
% 8 != 0)
3206 operands
[4] = GEN_INT (n_bytes
% 4);
3207 if (n_bytes
% 8 >= 4)
3208 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
3209 if (n_bytes
% 4 != 0)
3210 output_asm_insn ("ldw 0(%1),%6", operands
);
3211 if (n_bytes
% 8 >= 4)
3212 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
3213 if (n_bytes
% 4 != 0)
3214 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
3219 /* Pre-adjust the loop counter. */
3220 operands
[4] = GEN_INT (n_bytes
- 4);
3221 output_asm_insn ("ldi %4,%2", operands
);
3224 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
3225 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
3226 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
3227 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
3228 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
3230 /* Handle the residual. */
3231 if (n_bytes
% 4 != 0)
3233 if (n_bytes
% 4 >= 2)
3234 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
3235 if (n_bytes
% 2 != 0)
3236 output_asm_insn ("ldb 0(%1),%6", operands
);
3237 if (n_bytes
% 4 >= 2)
3238 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
3239 if (n_bytes
% 2 != 0)
3240 output_asm_insn ("stb %6,0(%0)", operands
);
3245 /* Pre-adjust the loop counter. */
3246 operands
[4] = GEN_INT (n_bytes
- 2);
3247 output_asm_insn ("ldi %4,%2", operands
);
3250 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
3251 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
3252 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
3253 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
3254 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
3256 /* Handle the residual. */
3257 if (n_bytes
% 2 != 0)
3259 output_asm_insn ("ldb 0(%1),%3", operands
);
3260 output_asm_insn ("stb %3,0(%0)", operands
);
3269 /* Count the number of insns necessary to handle this block move.
3271 Basic structure is the same as emit_block_move, except that we
3272 count insns rather than emit them. */
3275 compute_cpymem_length (rtx_insn
*insn
)
3277 rtx pat
= PATTERN (insn
);
3278 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
3279 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
3280 unsigned int n_insns
= 0;
3282 /* We can't move more than four bytes at a time because the PA
3283 has no longer integer move insns. (Could use fp mem ops?) */
3284 if (align
> (TARGET_64BIT
? 8 : 4))
3285 align
= (TARGET_64BIT
? 8 : 4);
3287 /* The basic copying loop. */
3291 if (n_bytes
% (2 * align
) != 0)
3293 if ((n_bytes
% (2 * align
)) >= align
)
3296 if ((n_bytes
% align
) != 0)
3300 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3304 /* Emit code to perform a block clear.
3306 OPERANDS[0] is the destination pointer as a REG, clobbered.
3307 OPERANDS[1] is a register for temporary storage.
3308 OPERANDS[2] is the size as a CONST_INT
3309 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3312 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
3314 int align
= INTVAL (operands
[3]);
3315 unsigned long n_bytes
= INTVAL (operands
[2]);
3317 /* We can't clear more than a word at a time because the PA
3318 has no longer integer move insns. */
3319 if (align
> (TARGET_64BIT
? 8 : 4))
3320 align
= (TARGET_64BIT
? 8 : 4);
3322 /* Note that we know each loop below will execute at least twice
3323 (else we would have open-coded the copy). */
3327 /* Pre-adjust the loop counter. */
3328 operands
[2] = GEN_INT (n_bytes
- 16);
3329 output_asm_insn ("ldi %2,%1", operands
);
3332 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3333 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
3334 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3336 /* Handle the residual. There could be up to 7 bytes of
3337 residual to copy! */
3338 if (n_bytes
% 16 != 0)
3340 operands
[2] = GEN_INT (n_bytes
% 8);
3341 if (n_bytes
% 16 >= 8)
3342 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3343 if (n_bytes
% 8 != 0)
3344 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3349 /* Pre-adjust the loop counter. */
3350 operands
[2] = GEN_INT (n_bytes
- 8);
3351 output_asm_insn ("ldi %2,%1", operands
);
3354 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3355 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3356 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3358 /* Handle the residual. There could be up to 7 bytes of
3359 residual to copy! */
3360 if (n_bytes
% 8 != 0)
3362 operands
[2] = GEN_INT (n_bytes
% 4);
3363 if (n_bytes
% 8 >= 4)
3364 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3365 if (n_bytes
% 4 != 0)
3366 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3371 /* Pre-adjust the loop counter. */
3372 operands
[2] = GEN_INT (n_bytes
- 4);
3373 output_asm_insn ("ldi %2,%1", operands
);
3376 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3377 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3378 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3380 /* Handle the residual. */
3381 if (n_bytes
% 4 != 0)
3383 if (n_bytes
% 4 >= 2)
3384 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3385 if (n_bytes
% 2 != 0)
3386 output_asm_insn ("stb %%r0,0(%0)", operands
);
3391 /* Pre-adjust the loop counter. */
3392 operands
[2] = GEN_INT (n_bytes
- 2);
3393 output_asm_insn ("ldi %2,%1", operands
);
3396 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3397 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3398 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3400 /* Handle the residual. */
3401 if (n_bytes
% 2 != 0)
3402 output_asm_insn ("stb %%r0,0(%0)", operands
);
3411 /* Count the number of insns necessary to handle this block move.
3413 Basic structure is the same as emit_block_move, except that we
3414 count insns rather than emit them. */
3417 compute_clrmem_length (rtx_insn
*insn
)
3419 rtx pat
= PATTERN (insn
);
3420 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3421 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3422 unsigned int n_insns
= 0;
3424 /* We can't clear more than a word at a time because the PA
3425 has no longer integer move insns. */
3426 if (align
> (TARGET_64BIT
? 8 : 4))
3427 align
= (TARGET_64BIT
? 8 : 4);
3429 /* The basic loop. */
3433 if (n_bytes
% (2 * align
) != 0)
3435 if ((n_bytes
% (2 * align
)) >= align
)
3438 if ((n_bytes
% align
) != 0)
3442 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3448 pa_output_and (rtx
*operands
)
3450 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3452 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3453 int ls0
, ls1
, ms0
, p
, len
;
3455 for (ls0
= 0; ls0
< 32; ls0
++)
3456 if ((mask
& (1 << ls0
)) == 0)
3459 for (ls1
= ls0
; ls1
< 32; ls1
++)
3460 if ((mask
& (1 << ls1
)) != 0)
3463 for (ms0
= ls1
; ms0
< 32; ms0
++)
3464 if ((mask
& (1 << ms0
)) == 0)
3467 gcc_assert (ms0
== 32);
3475 operands
[2] = GEN_INT (len
);
3476 return "{extru|extrw,u} %1,31,%2,%0";
3480 /* We could use this `depi' for the case above as well, but `depi'
3481 requires one more register file access than an `extru'. */
3486 operands
[2] = GEN_INT (p
);
3487 operands
[3] = GEN_INT (len
);
3488 return "{depi|depwi} 0,%2,%3,%0";
3492 return "and %1,%2,%0";
3495 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3496 storing the result in operands[0]. */
3498 pa_output_64bit_and (rtx
*operands
)
3500 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3502 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3503 int ls0
, ls1
, ms0
, p
, len
;
3505 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3506 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3509 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3510 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3513 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3514 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3517 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3519 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3525 operands
[2] = GEN_INT (len
);
3526 return "extrd,u %1,63,%2,%0";
3530 /* We could use this `depi' for the case above as well, but `depi'
3531 requires one more register file access than an `extru'. */
3536 operands
[2] = GEN_INT (p
);
3537 operands
[3] = GEN_INT (len
);
3538 return "depdi 0,%2,%3,%0";
3542 return "and %1,%2,%0";
3546 pa_output_ior (rtx
*operands
)
3548 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3549 int bs0
, bs1
, p
, len
;
3551 if (INTVAL (operands
[2]) == 0)
3552 return "copy %1,%0";
3554 for (bs0
= 0; bs0
< 32; bs0
++)
3555 if ((mask
& (1 << bs0
)) != 0)
3558 for (bs1
= bs0
; bs1
< 32; bs1
++)
3559 if ((mask
& (1 << bs1
)) == 0)
3562 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3567 operands
[2] = GEN_INT (p
);
3568 operands
[3] = GEN_INT (len
);
3569 return "{depi|depwi} -1,%2,%3,%0";
3572 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3573 storing the result in operands[0]. */
3575 pa_output_64bit_ior (rtx
*operands
)
3577 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3578 int bs0
, bs1
, p
, len
;
3580 if (INTVAL (operands
[2]) == 0)
3581 return "copy %1,%0";
3583 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3584 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3587 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3588 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3591 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3592 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3597 operands
[2] = GEN_INT (p
);
3598 operands
[3] = GEN_INT (len
);
3599 return "depdi -1,%2,%3,%0";
3602 /* Target hook for assembling integer objects. This code handles
3603 aligned SI and DI integers specially since function references
3604 must be preceded by P%. */
3607 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3612 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3613 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3614 calling output_addr_const. Otherwise, it may call assemble_external
3615 in the midst of outputing the assembler code for the SYMBOL_REF.
3616 We restore the SYMBOL_REF_DECL after the output is done. */
3617 if (GET_CODE (x
) == SYMBOL_REF
)
3619 decl
= SYMBOL_REF_DECL (x
);
3622 assemble_external (decl
);
3623 SET_SYMBOL_REF_DECL (x
, NULL
);
3627 if (size
== UNITS_PER_WORD
3629 && function_label_operand (x
, VOIDmode
))
3631 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3633 /* We don't want an OPD when generating fast indirect calls. */
3634 if (!TARGET_FAST_INDIRECT_CALLS
)
3635 fputs ("P%", asm_out_file
);
3637 output_addr_const (asm_out_file
, x
);
3638 fputc ('\n', asm_out_file
);
3642 result
= default_assemble_integer (x
, size
, aligned_p
);
3645 SET_SYMBOL_REF_DECL (x
, decl
);
3650 /* Output an ascii string. */
3652 pa_output_ascii (FILE *file
, const char *p
, int size
)
3656 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3658 /* The HP assembler can only take strings of 256 characters at one
3659 time. This is a limitation on input line length, *not* the
3660 length of the string. Sigh. Even worse, it seems that the
3661 restriction is in number of input characters (see \xnn &
3662 \whatever). So we have to do this very carefully. */
3664 fputs ("\t.STRING \"", file
);
3667 for (i
= 0; i
< size
; i
+= 4)
3671 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3673 unsigned int c
= (unsigned char) p
[i
+ io
];
3675 if (c
== '\"' || c
== '\\')
3676 partial_output
[co
++] = '\\';
3677 if (c
>= ' ' && c
< 0177)
3678 partial_output
[co
++] = c
;
3682 partial_output
[co
++] = '\\';
3683 partial_output
[co
++] = 'x';
3684 hexd
= c
/ 16 - 0 + '0';
3686 hexd
-= '9' - 'a' + 1;
3687 partial_output
[co
++] = hexd
;
3688 hexd
= c
% 16 - 0 + '0';
3690 hexd
-= '9' - 'a' + 1;
3691 partial_output
[co
++] = hexd
;
3694 if (chars_output
+ co
> 243)
3696 fputs ("\"\n\t.STRING \"", file
);
3699 fwrite (partial_output
, 1, (size_t) co
, file
);
3703 fputs ("\"\n", file
);
3706 /* Try to rewrite floating point comparisons & branches to avoid
3707 useless add,tr insns.
3709 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3710 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3711 first attempt to remove useless add,tr insns. It is zero
3712 for the second pass as reorg sometimes leaves bogus REG_DEAD
3715 When CHECK_NOTES is zero we can only eliminate add,tr insns
3716 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3719 remove_useless_addtr_insns (int check_notes
)
3722 static int pass
= 0;
3724 /* This is fairly cheap, so always run it when optimizing. */
3728 int fbranch_count
= 0;
3730 /* Walk all the insns in this function looking for fcmp & fbranch
3731 instructions. Keep track of how many of each we find. */
3732 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3736 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3737 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3740 tmp
= PATTERN (insn
);
3742 /* It must be a set. */
3743 if (GET_CODE (tmp
) != SET
)
3746 /* If the destination is CCFP, then we've found an fcmp insn. */
3747 tmp
= SET_DEST (tmp
);
3748 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3754 tmp
= PATTERN (insn
);
3755 /* If this is an fbranch instruction, bump the fbranch counter. */
3756 if (GET_CODE (tmp
) == SET
3757 && SET_DEST (tmp
) == pc_rtx
3758 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3759 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3760 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3761 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3769 /* Find all floating point compare + branch insns. If possible,
3770 reverse the comparison & the branch to avoid add,tr insns. */
3771 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3776 /* Ignore anything that isn't an INSN. */
3777 if (! NONJUMP_INSN_P (insn
))
3780 tmp
= PATTERN (insn
);
3782 /* It must be a set. */
3783 if (GET_CODE (tmp
) != SET
)
3786 /* The destination must be CCFP, which is register zero. */
3787 tmp
= SET_DEST (tmp
);
3788 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3791 /* INSN should be a set of CCFP.
3793 See if the result of this insn is used in a reversed FP
3794 conditional branch. If so, reverse our condition and
3795 the branch. Doing so avoids useless add,tr insns. */
3796 next
= next_insn (insn
);
3799 /* Jumps, calls and labels stop our search. */
3800 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3803 /* As does another fcmp insn. */
3804 if (NONJUMP_INSN_P (next
)
3805 && GET_CODE (PATTERN (next
)) == SET
3806 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3807 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3810 next
= next_insn (next
);
3813 /* Is NEXT_INSN a branch? */
3814 if (next
&& JUMP_P (next
))
3816 rtx pattern
= PATTERN (next
);
3818 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3819 and CCFP dies, then reverse our conditional and the branch
3820 to avoid the add,tr. */
3821 if (GET_CODE (pattern
) == SET
3822 && SET_DEST (pattern
) == pc_rtx
3823 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3824 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3825 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3826 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3827 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3828 && (fcmp_count
== fbranch_count
3830 && find_regno_note (next
, REG_DEAD
, 0))))
3832 /* Reverse the branch. */
3833 tmp
= XEXP (SET_SRC (pattern
), 1);
3834 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3835 XEXP (SET_SRC (pattern
), 2) = tmp
;
3836 INSN_CODE (next
) = -1;
3838 /* Reverse our condition. */
3839 tmp
= PATTERN (insn
);
3840 PUT_CODE (XEXP (tmp
, 1),
3841 (reverse_condition_maybe_unordered
3842 (GET_CODE (XEXP (tmp
, 1)))));
3852 /* You may have trouble believing this, but this is the 32 bit HP-PA
3857 Variable arguments (optional; any number may be allocated)
3859 SP-(4*(N+9)) arg word N
3864 Fixed arguments (must be allocated; may remain unused)
3873 SP-32 External Data Pointer (DP)
3875 SP-24 External/stub RP (RP')
3879 SP-8 Calling Stub RP (RP'')
3884 SP-0 Stack Pointer (points to next available address)
3888 /* This function saves registers as follows. Registers marked with ' are
3889 this function's registers (as opposed to the previous function's).
3890 If a frame_pointer isn't needed, r4 is saved as a general register;
3891 the space for the frame pointer is still allocated, though, to keep
3897 SP (FP') Previous FP
3898 SP + 4 Alignment filler (sigh)
3899 SP + 8 Space for locals reserved here.
3903 SP + n All call saved register used.
3907 SP + o All call saved fp registers used.
3911 SP + p (SP') points to next available address.
3915 /* Global variables set by output_function_prologue(). */
3916 /* Size of frame. Need to know this to emit return insns from
3918 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3919 static int save_fregs
;
3921 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3922 Handle case where DISP > 8k by using the add_high_const patterns.
3924 Note in DISP > 8k case, we will leave the high part of the address
3925 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3928 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3930 rtx dest
, src
, basereg
;
3933 src
= gen_rtx_REG (word_mode
, reg
);
3934 basereg
= gen_rtx_REG (Pmode
, base
);
3935 if (VAL_14_BITS_P (disp
))
3937 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3938 insn
= emit_move_insn (dest
, src
);
3940 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3942 rtx delta
= GEN_INT (disp
);
3943 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3945 emit_move_insn (tmpreg
, delta
);
3946 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3949 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3950 gen_rtx_SET (tmpreg
,
3951 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3952 RTX_FRAME_RELATED_P (insn
) = 1;
3954 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3955 insn
= emit_move_insn (dest
, src
);
3959 rtx delta
= GEN_INT (disp
);
3960 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3961 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3963 emit_move_insn (tmpreg
, high
);
3964 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3965 insn
= emit_move_insn (dest
, src
);
3967 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3968 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3969 gen_rtx_PLUS (word_mode
,
3976 RTX_FRAME_RELATED_P (insn
) = 1;
3979 /* Emit RTL to store REG at the memory location specified by BASE and then
3980 add MOD to BASE. MOD must be <= 8k. */
3983 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3985 rtx basereg
, srcreg
, delta
;
3988 gcc_assert (VAL_14_BITS_P (mod
));
3990 basereg
= gen_rtx_REG (Pmode
, base
);
3991 srcreg
= gen_rtx_REG (word_mode
, reg
);
3992 delta
= GEN_INT (mod
);
3994 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3997 RTX_FRAME_RELATED_P (insn
) = 1;
3999 /* RTX_FRAME_RELATED_P must be set on each frame related set
4000 in a parallel with more than one element. */
4001 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
4002 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
4006 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
4007 where DISP > 8k by using the add_high_const patterns. NOTE indicates
4008 whether to add a frame note or not.
4010 In the DISP > 8k case, we leave the high part of the address in %r1.
4011 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
4014 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
4018 if (VAL_14_BITS_P (disp
))
4020 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
4021 plus_constant (Pmode
,
4022 gen_rtx_REG (Pmode
, base
), disp
));
4024 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4026 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4027 rtx delta
= GEN_INT (disp
);
4028 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4030 emit_move_insn (tmpreg
, delta
);
4031 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
4032 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4034 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4035 gen_rtx_SET (tmpreg
,
4036 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
4040 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4041 rtx delta
= GEN_INT (disp
);
4042 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4044 emit_move_insn (tmpreg
,
4045 gen_rtx_PLUS (Pmode
, basereg
,
4046 gen_rtx_HIGH (Pmode
, delta
)));
4047 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
4048 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4051 if (DO_FRAME_NOTES
&& note
)
4052 RTX_FRAME_RELATED_P (insn
) = 1;
4056 pa_compute_frame_size (poly_int64 size
, int *fregs_live
)
4061 /* The code in pa_expand_prologue and pa_expand_epilogue must
4062 be consistent with the rounding and size calculation done here.
4063 Change them at the same time. */
4065 /* We do our own stack alignment. First, round the size of the
4066 stack locals up to a word boundary. */
4067 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
4069 /* Space for previous frame pointer + filler. If any frame is
4070 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
4071 waste some space here for the sake of HP compatibility. The
4072 first slot is only used when the frame pointer is needed. */
4073 if (size
|| frame_pointer_needed
)
4074 size
+= pa_starting_frame_offset ();
4076 /* If the current function calls __builtin_eh_return, then we need
4077 to allocate stack space for registers that will hold data for
4078 the exception handler. */
4079 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4083 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
4085 size
+= i
* UNITS_PER_WORD
;
4088 /* Account for space used by the callee general register saves. */
4089 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
4090 if (df_regs_ever_live_p (i
))
4091 size
+= UNITS_PER_WORD
;
4093 /* Account for space used by the callee floating point register saves. */
4094 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4095 if (df_regs_ever_live_p (i
)
4096 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4100 /* We always save both halves of the FP register, so always
4101 increment the frame size by 8 bytes. */
4105 /* If any of the floating registers are saved, account for the
4106 alignment needed for the floating point register save block. */
4109 size
= (size
+ 7) & ~7;
4114 /* The various ABIs include space for the outgoing parameters in the
4115 size of the current function's stack frame. We don't need to align
4116 for the outgoing arguments as their alignment is set by the final
4117 rounding for the frame as a whole. */
4118 size
+= crtl
->outgoing_args_size
;
4120 /* Allocate space for the fixed frame marker. This space must be
4121 allocated for any function that makes calls or allocates
4123 if (!crtl
->is_leaf
|| size
)
4124 size
+= TARGET_64BIT
? 48 : 32;
4126 /* Finally, round to the preferred stack boundary. */
4127 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
4128 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
4131 /* Output function label, and associated .PROC and .CALLINFO statements. */
4134 pa_output_function_label (FILE *file
)
4136 /* The function's label and associated .PROC must never be
4137 separated and must be output *after* any profiling declarations
4138 to avoid changing spaces/subspaces within a procedure. */
4139 const char *name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
4140 ASM_OUTPUT_FUNCTION_LABEL (file
, name
, current_function_decl
);
4141 fputs ("\t.PROC\n", file
);
4143 /* pa_expand_prologue does the dirty work now. We just need
4144 to output the assembler directives which denote the start
4146 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
4148 fputs (",NO_CALLS", file
);
4150 fputs (",CALLS", file
);
4152 fputs (",SAVE_RP", file
);
4154 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4155 at the beginning of the frame and that it is used as the frame
4156 pointer for the frame. We do this because our current frame
4157 layout doesn't conform to that specified in the HP runtime
4158 documentation and we need a way to indicate to programs such as
4159 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4160 isn't used by HP compilers but is supported by the assembler.
4161 However, SAVE_SP is supposed to indicate that the previous stack
4162 pointer has been saved in the frame marker. */
4163 if (frame_pointer_needed
)
4164 fputs (",SAVE_SP", file
);
4166 /* Pass on information about the number of callee register saves
4167 performed in the prologue.
4169 The compiler is supposed to pass the highest register number
4170 saved, the assembler then has to adjust that number before
4171 entering it into the unwind descriptor (to account for any
4172 caller saved registers with lower register numbers than the
4173 first callee saved register). */
4175 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
4178 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
4180 fputs ("\n\t.ENTRY\n", file
);
4183 /* Output function prologue. */
4186 pa_output_function_prologue (FILE *file
)
4188 pa_output_function_label (file
);
4189 remove_useless_addtr_insns (0);
4192 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4195 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED
)
4197 remove_useless_addtr_insns (0);
4201 pa_expand_prologue (void)
4203 int merge_sp_adjust_with_store
= 0;
4204 HOST_WIDE_INT size
= get_frame_size ();
4205 HOST_WIDE_INT offset
;
4214 /* Compute total size for frame pointer, filler, locals and rounding to
4215 the next word boundary. Similar code appears in pa_compute_frame_size
4216 and must be changed in tandem with this code. */
4217 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
4218 if (local_fsize
|| frame_pointer_needed
)
4219 local_fsize
+= pa_starting_frame_offset ();
4221 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
4222 if (flag_stack_usage_info
)
4223 current_function_static_stack_size
= actual_fsize
;
4225 /* Compute a few things we will use often. */
4226 tmpreg
= gen_rtx_REG (word_mode
, 1);
4228 /* Save RP first. The calling conventions manual states RP will
4229 always be stored into the caller's frame at sp - 20 or sp - 16
4230 depending on which ABI is in use. */
4231 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
4233 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
4239 /* Allocate the local frame and set up the frame pointer if needed. */
4240 if (actual_fsize
!= 0)
4242 if (frame_pointer_needed
)
4244 /* Copy the old frame pointer temporarily into %r1. Set up the
4245 new stack pointer, then store away the saved old frame pointer
4246 into the stack at sp and at the same time update the stack
4247 pointer by actual_fsize bytes. Two versions, first
4248 handles small (<8k) frames. The second handles large (>=8k)
4250 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
4252 RTX_FRAME_RELATED_P (insn
) = 1;
4254 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4256 RTX_FRAME_RELATED_P (insn
) = 1;
4258 if (VAL_14_BITS_P (actual_fsize
))
4259 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
4262 /* It is incorrect to store the saved frame pointer at *sp,
4263 then increment sp (writes beyond the current stack boundary).
4265 So instead use stwm to store at *sp and post-increment the
4266 stack pointer as an atomic operation. Then increment sp to
4267 finish allocating the new frame. */
4268 HOST_WIDE_INT adjust1
= 8192 - 64;
4269 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
4271 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
4272 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4276 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4277 we need to store the previous stack pointer (frame pointer)
4278 into the frame marker on targets that use the HP unwind
4279 library. This allows the HP unwind library to be used to
4280 unwind GCC frames. However, we are not fully compatible
4281 with the HP library because our frame layout differs from
4282 that specified in the HP runtime specification.
4284 We don't want a frame note on this instruction as the frame
4285 marker moves during dynamic stack allocation.
4287 This instruction also serves as a blockage to prevent
4288 register spills from being scheduled before the stack
4289 pointer is raised. This is necessary as we store
4290 registers using the frame pointer as a base register,
4291 and the frame pointer is set before sp is raised. */
4292 if (TARGET_HPUX_UNWIND_LIBRARY
)
4294 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
4295 GEN_INT (TARGET_64BIT
? -8 : -4));
4297 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
4298 hard_frame_pointer_rtx
);
4301 emit_insn (gen_blockage ());
4303 /* no frame pointer needed. */
4306 /* In some cases we can perform the first callee register save
4307 and allocating the stack frame at the same time. If so, just
4308 make a note of it and defer allocating the frame until saving
4309 the callee registers. */
4310 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
4311 merge_sp_adjust_with_store
= 1;
4312 /* Cannot optimize. Adjust the stack frame by actual_fsize
4315 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4320 /* Normal register save.
4322 Do not save the frame pointer in the frame_pointer_needed case. It
4323 was done earlier. */
4324 if (frame_pointer_needed
)
4326 offset
= local_fsize
;
4328 /* Saving the EH return data registers in the frame is the simplest
4329 way to get the frame unwind information emitted. We put them
4330 just before the general registers. */
4331 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4333 unsigned int i
, regno
;
4337 regno
= EH_RETURN_DATA_REGNO (i
);
4338 if (regno
== INVALID_REGNUM
)
4341 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4342 offset
+= UNITS_PER_WORD
;
4346 for (i
= 18; i
>= 4; i
--)
4347 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
4349 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4350 offset
+= UNITS_PER_WORD
;
4353 /* Account for %r3 which is saved in a special place. */
4356 /* No frame pointer needed. */
4359 offset
= local_fsize
- actual_fsize
;
4361 /* Saving the EH return data registers in the frame is the simplest
4362 way to get the frame unwind information emitted. */
4363 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4365 unsigned int i
, regno
;
4369 regno
= EH_RETURN_DATA_REGNO (i
);
4370 if (regno
== INVALID_REGNUM
)
4373 /* If merge_sp_adjust_with_store is nonzero, then we can
4374 optimize the first save. */
4375 if (merge_sp_adjust_with_store
)
4377 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4378 merge_sp_adjust_with_store
= 0;
4381 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4382 offset
+= UNITS_PER_WORD
;
4386 for (i
= 18; i
>= 3; i
--)
4387 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
4389 /* If merge_sp_adjust_with_store is nonzero, then we can
4390 optimize the first GR save. */
4391 if (merge_sp_adjust_with_store
)
4393 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4394 merge_sp_adjust_with_store
= 0;
4397 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4398 offset
+= UNITS_PER_WORD
;
4402 /* If we wanted to merge the SP adjustment with a GR save, but we never
4403 did any GR saves, then just emit the adjustment here. */
4404 if (merge_sp_adjust_with_store
)
4405 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4409 /* The hppa calling conventions say that %r19, the pic offset
4410 register, is saved at sp - 32 (in this function's frame)
4411 when generating PIC code. FIXME: What is the correct thing
4412 to do for functions which make no calls and allocate no
4413 frame? Do we need to allocate a frame, or can we just omit
4414 the save? For now we'll just omit the save.
4416 We don't want a note on this insn as the frame marker can
4417 move if there is a dynamic stack allocation. */
4418 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4420 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4422 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4426 /* Align pointer properly (doubleword boundary). */
4427 offset
= (offset
+ 7) & ~7;
4429 /* Floating point register store. */
4434 /* First get the frame or stack pointer to the start of the FP register
4436 if (frame_pointer_needed
)
4438 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4439 base
= hard_frame_pointer_rtx
;
4443 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4444 base
= stack_pointer_rtx
;
4447 /* Now actually save the FP registers. */
4448 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4450 if (df_regs_ever_live_p (i
)
4451 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4455 addr
= gen_rtx_MEM (DFmode
,
4456 gen_rtx_POST_INC (word_mode
, tmpreg
));
4457 reg
= gen_rtx_REG (DFmode
, i
);
4458 insn
= emit_move_insn (addr
, reg
);
4461 RTX_FRAME_RELATED_P (insn
) = 1;
4464 rtx mem
= gen_rtx_MEM (DFmode
,
4465 plus_constant (Pmode
, base
,
4467 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4468 gen_rtx_SET (mem
, reg
));
4472 rtx meml
= gen_rtx_MEM (SFmode
,
4473 plus_constant (Pmode
, base
,
4475 rtx memr
= gen_rtx_MEM (SFmode
,
4476 plus_constant (Pmode
, base
,
4478 rtx regl
= gen_rtx_REG (SFmode
, i
);
4479 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4480 rtx setl
= gen_rtx_SET (meml
, regl
);
4481 rtx setr
= gen_rtx_SET (memr
, regr
);
4484 RTX_FRAME_RELATED_P (setl
) = 1;
4485 RTX_FRAME_RELATED_P (setr
) = 1;
4486 vec
= gen_rtvec (2, setl
, setr
);
4487 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4488 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4491 offset
+= GET_MODE_SIZE (DFmode
);
4498 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4499 Handle case where DISP > 8k by using the add_high_const patterns. */
4502 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4504 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4505 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4508 if (VAL_14_BITS_P (disp
))
4509 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4510 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4512 rtx delta
= GEN_INT (disp
);
4513 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4515 emit_move_insn (tmpreg
, delta
);
4516 if (TARGET_DISABLE_INDEXING
)
4518 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4519 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4522 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4526 rtx delta
= GEN_INT (disp
);
4527 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4528 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4530 emit_move_insn (tmpreg
, high
);
4531 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4534 emit_move_insn (dest
, src
);
4537 /* Update the total code bytes output to the text section. */
4540 update_total_code_bytes (unsigned int nbytes
)
4542 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4543 && !IN_NAMED_SECTION_P (cfun
->decl
))
4545 unsigned int old_total
= total_code_bytes
;
4547 total_code_bytes
+= nbytes
;
4549 /* Be prepared to handle overflows. */
4550 if (old_total
> total_code_bytes
)
4551 total_code_bytes
= UINT_MAX
;
4555 /* This function generates the assembly code for function exit.
4556 Args are as for output_function_prologue ().
4558 The function epilogue should not depend on the current stack
4559 pointer! It should use the frame pointer only. This is mandatory
4560 because of alloca; we also take advantage of it to omit stack
4561 adjustments before returning. */
4564 pa_output_function_epilogue (FILE *file
)
4566 rtx_insn
*insn
= get_last_insn ();
4569 /* pa_expand_epilogue does the dirty work now. We just need
4570 to output the assembler directives which denote the end
4573 To make debuggers happy, emit a nop if the epilogue was completely
4574 eliminated due to a volatile call as the last insn in the
4575 current function. That way the return address (in %r2) will
4576 always point to a valid instruction in the current function. */
4578 /* Get the last real insn. */
4580 insn
= prev_real_insn (insn
);
4582 /* If it is a sequence, then look inside. */
4583 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4584 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4586 /* If insn is a CALL_INSN, then it must be a call to a volatile
4587 function (otherwise there would be epilogue insns). */
4588 if (insn
&& CALL_P (insn
))
4590 fputs ("\tnop\n", file
);
4596 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4598 if (TARGET_SOM
&& TARGET_GAS
)
4600 /* We are done with this subspace except possibly for some additional
4601 debug information. Forget that we are in this subspace to ensure
4602 that the next function is output in its own subspace. */
4604 cfun
->machine
->in_nsubspa
= 2;
4607 /* Thunks do their own insn accounting. */
4611 if (INSN_ADDRESSES_SET_P ())
4613 last_address
= extra_nop
? 4 : 0;
4614 insn
= get_last_nonnote_insn ();
4617 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4619 last_address
+= insn_default_length (insn
);
4621 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4622 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4625 last_address
= UINT_MAX
;
4627 /* Finally, update the total number of code bytes output so far. */
4628 update_total_code_bytes (last_address
);
4632 pa_expand_epilogue (void)
4635 HOST_WIDE_INT offset
;
4636 HOST_WIDE_INT ret_off
= 0;
4638 int merge_sp_adjust_with_load
= 0;
4640 /* We will use this often. */
4641 tmpreg
= gen_rtx_REG (word_mode
, 1);
4643 /* Try to restore RP early to avoid load/use interlocks when
4644 RP gets used in the return (bv) instruction. This appears to still
4645 be necessary even when we schedule the prologue and epilogue. */
4648 ret_off
= TARGET_64BIT
? -16 : -20;
4649 if (frame_pointer_needed
)
4651 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4656 /* No frame pointer, and stack is smaller than 8k. */
4657 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4659 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4665 /* General register restores. */
4666 if (frame_pointer_needed
)
4668 offset
= local_fsize
;
4670 /* If the current function calls __builtin_eh_return, then we need
4671 to restore the saved EH data registers. */
4672 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4674 unsigned int i
, regno
;
4678 regno
= EH_RETURN_DATA_REGNO (i
);
4679 if (regno
== INVALID_REGNUM
)
4682 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4683 offset
+= UNITS_PER_WORD
;
4687 for (i
= 18; i
>= 4; i
--)
4688 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
4690 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4691 offset
+= UNITS_PER_WORD
;
4696 offset
= local_fsize
- actual_fsize
;
4698 /* If the current function calls __builtin_eh_return, then we need
4699 to restore the saved EH data registers. */
4700 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4702 unsigned int i
, regno
;
4706 regno
= EH_RETURN_DATA_REGNO (i
);
4707 if (regno
== INVALID_REGNUM
)
4710 /* Only for the first load.
4711 merge_sp_adjust_with_load holds the register load
4712 with which we will merge the sp adjustment. */
4713 if (merge_sp_adjust_with_load
== 0
4715 && VAL_14_BITS_P (-actual_fsize
))
4716 merge_sp_adjust_with_load
= regno
;
4718 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4719 offset
+= UNITS_PER_WORD
;
4723 for (i
= 18; i
>= 3; i
--)
4725 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
4727 /* Only for the first load.
4728 merge_sp_adjust_with_load holds the register load
4729 with which we will merge the sp adjustment. */
4730 if (merge_sp_adjust_with_load
== 0
4732 && VAL_14_BITS_P (-actual_fsize
))
4733 merge_sp_adjust_with_load
= i
;
4735 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4736 offset
+= UNITS_PER_WORD
;
4741 /* Align pointer properly (doubleword boundary). */
4742 offset
= (offset
+ 7) & ~7;
4744 /* FP register restores. */
4747 /* Adjust the register to index off of. */
4748 if (frame_pointer_needed
)
4749 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4751 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4753 /* Actually do the restores now. */
4754 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4755 if (df_regs_ever_live_p (i
)
4756 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4758 rtx src
= gen_rtx_MEM (DFmode
,
4759 gen_rtx_POST_INC (word_mode
, tmpreg
));
4760 rtx dest
= gen_rtx_REG (DFmode
, i
);
4761 emit_move_insn (dest
, src
);
4765 /* Emit a blockage insn here to keep these insns from being moved to
4766 an earlier spot in the epilogue, or into the main instruction stream.
4768 This is necessary as we must not cut the stack back before all the
4769 restores are finished. */
4770 emit_insn (gen_blockage ());
4772 /* Reset stack pointer (and possibly frame pointer). The stack
4773 pointer is initially set to fp + 64 to avoid a race condition. */
4774 if (frame_pointer_needed
)
4776 rtx delta
= GEN_INT (-64);
4778 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4779 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4780 stack_pointer_rtx
, delta
));
4782 /* If we were deferring a callee register restore, do it now. */
4783 else if (merge_sp_adjust_with_load
)
4785 rtx delta
= GEN_INT (-actual_fsize
);
4786 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4788 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4790 else if (actual_fsize
!= 0)
4791 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4794 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4795 frame greater than 8k), do so now. */
4797 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4799 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4801 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4803 emit_insn (gen_blockage ());
4804 emit_insn (TARGET_64BIT
4805 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4806 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4811 pa_can_use_return_insn (void)
4813 if (!reload_completed
)
4816 if (frame_pointer_needed
)
4819 if (df_regs_ever_live_p (2))
4825 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4829 hppa_pic_save_rtx (void)
4831 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4834 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4835 #define NO_DEFERRED_PROFILE_COUNTERS 0
4839 /* Vector of funcdef numbers. */
4840 static vec
<int> funcdef_nos
;
4842 /* Output deferred profile counters. */
4844 output_deferred_profile_counters (void)
4849 if (funcdef_nos
.is_empty ())
4852 switch_to_section (data_section
);
4853 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4854 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4856 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4858 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4859 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4862 funcdef_nos
.release ();
4866 hppa_profile_hook (int label_no
)
4868 rtx_code_label
*label_rtx
= gen_label_rtx ();
4869 int reg_parm_stack_space
= REG_PARM_STACK_SPACE (NULL_TREE
);
4870 rtx arg_bytes
, begin_label_rtx
, mcount
, sym
;
4871 rtx_insn
*call_insn
;
4872 char begin_label_name
[16];
4873 bool use_mcount_pcrel_call
;
4875 /* Set up call destination. */
4876 sym
= gen_rtx_SYMBOL_REF (Pmode
, "_mcount");
4877 pa_encode_label (sym
);
4878 mcount
= gen_rtx_MEM (Pmode
, sym
);
4880 /* If we can reach _mcount with a pc-relative call, we can optimize
4881 loading the address of the current function. This requires linker
4882 long branch stub support. */
4883 if (!TARGET_PORTABLE_RUNTIME
4884 && !TARGET_LONG_CALLS
4885 && (TARGET_SOM
|| flag_function_sections
))
4886 use_mcount_pcrel_call
= TRUE
;
4888 use_mcount_pcrel_call
= FALSE
;
4890 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4892 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4894 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4896 if (!use_mcount_pcrel_call
)
4898 /* The address of the function is loaded into %r25 with an instruction-
4899 relative sequence that avoids the use of relocations. We use SImode
4900 for the address of the function in both 32 and 64-bit code to avoid
4901 having to provide DImode versions of the lcla2 pattern. */
4903 emit_insn (gen_lcla2 (gen_rtx_REG (SImode
, 25), label_rtx
));
4905 emit_insn (gen_lcla1 (gen_rtx_REG (SImode
, 25), label_rtx
));
4908 if (!NO_DEFERRED_PROFILE_COUNTERS
)
4910 rtx count_label_rtx
, addr
, r24
;
4911 char count_label_name
[16];
4913 funcdef_nos
.safe_push (label_no
);
4914 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4915 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
,
4916 ggc_strdup (count_label_name
));
4918 addr
= force_reg (Pmode
, count_label_rtx
);
4919 r24
= gen_rtx_REG (Pmode
, 24);
4920 emit_move_insn (r24
, addr
);
4922 arg_bytes
= GEN_INT (TARGET_64BIT
? 24 : 12);
4923 if (use_mcount_pcrel_call
)
4924 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4927 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4929 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4933 arg_bytes
= GEN_INT (TARGET_64BIT
? 16 : 8);
4934 if (use_mcount_pcrel_call
)
4935 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4938 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4941 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4942 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4944 /* Indicate the _mcount call cannot throw, nor will it execute a
4946 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4948 /* Allocate space for fixed arguments. */
4949 if (reg_parm_stack_space
> crtl
->outgoing_args_size
)
4950 crtl
->outgoing_args_size
= reg_parm_stack_space
;
4953 /* Fetch the return address for the frame COUNT steps up from
4954 the current frame, after the prologue. FRAMEADDR is the
4955 frame pointer of the COUNT frame.
4957 We want to ignore any export stub remnants here. To handle this,
4958 we examine the code at the return address, and if it is an export
4959 stub, we return a memory rtx for the stub return address stored
4962 The value returned is used in two different ways:
4964 1. To find a function's caller.
4966 2. To change the return address for a function.
4968 This function handles most instances of case 1; however, it will
4969 fail if there are two levels of stubs to execute on the return
4970 path. The only way I believe that can happen is if the return value
4971 needs a parameter relocation, which never happens for C code.
4973 This function handles most instances of case 2; however, it will
4974 fail if we did not originally have stub code on the return path
4975 but will need stub code on the new return path. This can happen if
4976 the caller & callee are both in the main program, but the new
4977 return location is in a shared library. */
4980 pa_return_addr_rtx (int count
, rtx frameaddr
)
4987 /* The instruction stream at the return address of a PA1.X export stub is:
4989 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4990 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4991 0x00011820 | stub+16: mtsp r1,sr0
4992 0xe0400002 | stub+20: be,n 0(sr0,rp)
4994 0xe0400002 must be specified as -532676606 so that it won't be
4995 rejected as an invalid immediate operand on 64-bit hosts.
4997 The instruction stream at the return address of a PA2.0 export stub is:
4999 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
5000 0xe840d002 | stub+12: bve,n (rp)
5003 HOST_WIDE_INT insns
[4];
5009 rp
= get_hard_reg_initial_val (Pmode
, 2);
5011 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
5014 /* If there is no export stub then just use the value saved from
5015 the return pointer register. */
5017 saved_rp
= gen_reg_rtx (Pmode
);
5018 emit_move_insn (saved_rp
, rp
);
5020 /* Get pointer to the instruction stream. We have to mask out the
5021 privilege level from the two low order bits of the return address
5022 pointer here so that ins will point to the start of the first
5023 instruction that would have been executed if we returned. */
5024 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
5025 label
= gen_label_rtx ();
5029 insns
[0] = 0x4bc23fd1;
5030 insns
[1] = -398405630;
5035 insns
[0] = 0x4bc23fd1;
5036 insns
[1] = 0x004010a1;
5037 insns
[2] = 0x00011820;
5038 insns
[3] = -532676606;
5042 /* Check the instruction stream at the normal return address for the
5043 export stub. If it is an export stub, than our return address is
5044 really in -24[frameaddr]. */
5046 for (i
= 0; i
< len
; i
++)
5048 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
5049 rtx op1
= GEN_INT (insns
[i
]);
5050 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
5053 /* Here we know that our return address points to an export
5054 stub. We don't want to return the address of the export stub,
5055 but rather the return address of the export stub. That return
5056 address is stored at -24[frameaddr]. */
5058 emit_move_insn (saved_rp
,
5060 memory_address (Pmode
,
5061 plus_constant (Pmode
, frameaddr
,
5070 pa_emit_bcond_fp (rtx operands
[])
5072 enum rtx_code code
= GET_CODE (operands
[0]);
5073 rtx operand0
= operands
[1];
5074 rtx operand1
= operands
[2];
5075 rtx label
= operands
[3];
5077 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
5078 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
5080 emit_jump_insn (gen_rtx_SET (pc_rtx
,
5081 gen_rtx_IF_THEN_ELSE (VOIDmode
,
5084 gen_rtx_REG (CCFPmode
, 0),
5086 gen_rtx_LABEL_REF (VOIDmode
, label
),
5091 /* Adjust the cost of a scheduling dependency. Return the new cost of
5092 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5095 pa_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
5098 enum attr_type attr_type
;
5100 /* Don't adjust costs for a pa8000 chip, also do not adjust any
5101 true dependencies as they are described with bypasses now. */
5102 if (pa_cpu
>= PROCESSOR_8000
|| dep_type
== 0)
5105 if (! recog_memoized (insn
))
5108 attr_type
= get_attr_type (insn
);
5113 /* Anti dependency; DEP_INSN reads a register that INSN writes some
5116 if (attr_type
== TYPE_FPLOAD
)
5118 rtx pat
= PATTERN (insn
);
5119 rtx dep_pat
= PATTERN (dep_insn
);
5120 if (GET_CODE (pat
) == PARALLEL
)
5122 /* This happens for the fldXs,mb patterns. */
5123 pat
= XVECEXP (pat
, 0, 0);
5125 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
5126 /* If this happens, we have to extend this to schedule
5127 optimally. Return 0 for now. */
5130 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
5132 if (! recog_memoized (dep_insn
))
5134 switch (get_attr_type (dep_insn
))
5141 case TYPE_FPSQRTSGL
:
5142 case TYPE_FPSQRTDBL
:
5143 /* A fpload can't be issued until one cycle before a
5144 preceding arithmetic operation has finished if
5145 the target of the fpload is any of the sources
5146 (or destination) of the arithmetic operation. */
5147 return insn_default_latency (dep_insn
) - 1;
5154 else if (attr_type
== TYPE_FPALU
)
5156 rtx pat
= PATTERN (insn
);
5157 rtx dep_pat
= PATTERN (dep_insn
);
5158 if (GET_CODE (pat
) == PARALLEL
)
5160 /* This happens for the fldXs,mb patterns. */
5161 pat
= XVECEXP (pat
, 0, 0);
5163 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
5164 /* If this happens, we have to extend this to schedule
5165 optimally. Return 0 for now. */
5168 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
5170 if (! recog_memoized (dep_insn
))
5172 switch (get_attr_type (dep_insn
))
5176 case TYPE_FPSQRTSGL
:
5177 case TYPE_FPSQRTDBL
:
5178 /* An ALU flop can't be issued until two cycles before a
5179 preceding divide or sqrt operation has finished if
5180 the target of the ALU flop is any of the sources
5181 (or destination) of the divide or sqrt operation. */
5182 return insn_default_latency (dep_insn
) - 2;
5190 /* For other anti dependencies, the cost is 0. */
5193 case REG_DEP_OUTPUT
:
5194 /* Output dependency; DEP_INSN writes a register that INSN writes some
5196 if (attr_type
== TYPE_FPLOAD
)
5198 rtx pat
= PATTERN (insn
);
5199 rtx dep_pat
= PATTERN (dep_insn
);
5200 if (GET_CODE (pat
) == PARALLEL
)
5202 /* This happens for the fldXs,mb patterns. */
5203 pat
= XVECEXP (pat
, 0, 0);
5205 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
5206 /* If this happens, we have to extend this to schedule
5207 optimally. Return 0 for now. */
5210 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
5212 if (! recog_memoized (dep_insn
))
5214 switch (get_attr_type (dep_insn
))
5221 case TYPE_FPSQRTSGL
:
5222 case TYPE_FPSQRTDBL
:
5223 /* A fpload can't be issued until one cycle before a
5224 preceding arithmetic operation has finished if
5225 the target of the fpload is the destination of the
5226 arithmetic operation.
5228 Exception: For PA7100LC, PA7200 and PA7300, the cost
5229 is 3 cycles, unless they bundle together. We also
5230 pay the penalty if the second insn is a fpload. */
5231 return insn_default_latency (dep_insn
) - 1;
5238 else if (attr_type
== TYPE_FPALU
)
5240 rtx pat
= PATTERN (insn
);
5241 rtx dep_pat
= PATTERN (dep_insn
);
5242 if (GET_CODE (pat
) == PARALLEL
)
5244 /* This happens for the fldXs,mb patterns. */
5245 pat
= XVECEXP (pat
, 0, 0);
5247 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
5248 /* If this happens, we have to extend this to schedule
5249 optimally. Return 0 for now. */
5252 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
5254 if (! recog_memoized (dep_insn
))
5256 switch (get_attr_type (dep_insn
))
5260 case TYPE_FPSQRTSGL
:
5261 case TYPE_FPSQRTDBL
:
5262 /* An ALU flop can't be issued until two cycles before a
5263 preceding divide or sqrt operation has finished if
5264 the target of the ALU flop is also the target of
5265 the divide or sqrt operation. */
5266 return insn_default_latency (dep_insn
) - 2;
5274 /* For other output dependencies, the cost is 0. */
5282 /* The 700 can only issue a single insn at a time.
5283 The 7XXX processors can issue two insns at a time.
5284 The 8000 can issue 4 insns at a time. */
5286 pa_issue_rate (void)
5290 case PROCESSOR_700
: return 1;
5291 case PROCESSOR_7100
: return 2;
5292 case PROCESSOR_7100LC
: return 2;
5293 case PROCESSOR_7200
: return 2;
5294 case PROCESSOR_7300
: return 2;
5295 case PROCESSOR_8000
: return 4;
5304 /* Return any length plus adjustment needed by INSN which already has
5305 its length computed as LENGTH. Return LENGTH if no adjustment is
5308 Also compute the length of an inline block move here as it is too
5309 complicated to express as a length attribute in pa.md. */
5311 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
5313 rtx pat
= PATTERN (insn
);
5315 /* If length is negative or undefined, provide initial length. */
5316 if ((unsigned int) length
>= INT_MAX
)
5318 if (GET_CODE (pat
) == SEQUENCE
)
5319 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
5321 switch (get_attr_type (insn
))
5324 length
= pa_attr_length_millicode_call (insn
);
5327 length
= pa_attr_length_call (insn
, 0);
5330 length
= pa_attr_length_call (insn
, 1);
5333 length
= pa_attr_length_indirect_call (insn
);
5335 case TYPE_SH_FUNC_ADRS
:
5336 length
= pa_attr_length_millicode_call (insn
) + 20;
5343 /* Block move pattern. */
5344 if (NONJUMP_INSN_P (insn
)
5345 && GET_CODE (pat
) == PARALLEL
5346 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5347 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5348 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5349 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5350 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5351 length
+= compute_cpymem_length (insn
) - 4;
5352 /* Block clear pattern. */
5353 else if (NONJUMP_INSN_P (insn
)
5354 && GET_CODE (pat
) == PARALLEL
5355 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5356 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5357 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5358 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5359 length
+= compute_clrmem_length (insn
) - 4;
5360 /* Conditional branch with an unfilled delay slot. */
5361 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5363 /* Adjust a short backwards conditional with an unfilled delay slot. */
5364 if (GET_CODE (pat
) == SET
5366 && JUMP_LABEL (insn
) != NULL_RTX
5367 && ! forward_branch_p (insn
))
5369 else if (GET_CODE (pat
) == PARALLEL
5370 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5373 /* Adjust dbra insn with short backwards conditional branch with
5374 unfilled delay slot -- only for case where counter is in a
5375 general register register. */
5376 else if (GET_CODE (pat
) == PARALLEL
5377 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5378 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5379 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5381 && ! forward_branch_p (insn
))
5387 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5390 pa_print_operand_punct_valid_p (unsigned char code
)
5401 /* Print operand X (an rtx) in assembler syntax to file FILE.
5402 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5403 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5406 pa_print_operand (FILE *file
, rtx x
, int code
)
5411 /* Output a 'nop' if there's nothing for the delay slot. */
5412 if (dbr_sequence_length () == 0)
5413 fputs ("\n\tnop", file
);
5416 /* Output a nullification completer if there's nothing for the */
5417 /* delay slot or nullification is requested. */
5418 if (dbr_sequence_length () == 0 ||
5420 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5424 /* Print out the second register name of a register pair.
5425 I.e., R (6) => 7. */
5426 fputs (reg_names
[REGNO (x
) + 1], file
);
5429 /* A register or zero. */
5431 || (x
== CONST0_RTX (DFmode
))
5432 || (x
== CONST0_RTX (SFmode
)))
5434 fputs ("%r0", file
);
5440 /* A register or zero (floating point). */
5442 || (x
== CONST0_RTX (DFmode
))
5443 || (x
== CONST0_RTX (SFmode
)))
5445 fputs ("%fr0", file
);
5454 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5455 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5456 pa_output_global_address (file
, xoperands
[1], 0);
5457 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5461 case 'C': /* Plain (C)ondition */
5463 switch (GET_CODE (x
))
5466 fputs ("=", file
); break;
5468 fputs ("<>", file
); break;
5470 fputs (">", file
); break;
5472 fputs (">=", file
); break;
5474 fputs (">>=", file
); break;
5476 fputs (">>", file
); break;
5478 fputs ("<", file
); break;
5480 fputs ("<=", file
); break;
5482 fputs ("<<=", file
); break;
5484 fputs ("<<", file
); break;
5489 case 'N': /* Condition, (N)egated */
5490 switch (GET_CODE (x
))
5493 fputs ("<>", file
); break;
5495 fputs ("=", file
); break;
5497 fputs ("<=", file
); break;
5499 fputs ("<", file
); break;
5501 fputs ("<<", file
); break;
5503 fputs ("<<=", file
); break;
5505 fputs (">=", file
); break;
5507 fputs (">", file
); break;
5509 fputs (">>", file
); break;
5511 fputs (">>=", file
); break;
5516 /* For floating point comparisons. Note that the output
5517 predicates are the complement of the desired mode. The
5518 conditions for GT, GE, LT, LE and LTGT cause an invalid
5519 operation exception if the result is unordered and this
5520 exception is enabled in the floating-point status register. */
5522 switch (GET_CODE (x
))
5525 fputs ("!=", file
); break;
5527 fputs ("=", file
); break;
5529 fputs ("!>", file
); break;
5531 fputs ("!>=", file
); break;
5533 fputs ("!<", file
); break;
5535 fputs ("!<=", file
); break;
5537 fputs ("!<>", file
); break;
5539 fputs ("!?<=", file
); break;
5541 fputs ("!?<", file
); break;
5543 fputs ("!?>=", file
); break;
5545 fputs ("!?>", file
); break;
5547 fputs ("!?=", file
); break;
5549 fputs ("!?", file
); break;
5551 fputs ("?", file
); break;
5556 case 'S': /* Condition, operands are (S)wapped. */
5557 switch (GET_CODE (x
))
5560 fputs ("=", file
); break;
5562 fputs ("<>", file
); break;
5564 fputs ("<", file
); break;
5566 fputs ("<=", file
); break;
5568 fputs ("<<=", file
); break;
5570 fputs ("<<", file
); break;
5572 fputs (">", file
); break;
5574 fputs (">=", file
); break;
5576 fputs (">>=", file
); break;
5578 fputs (">>", file
); break;
5583 case 'B': /* Condition, (B)oth swapped and negate. */
5584 switch (GET_CODE (x
))
5587 fputs ("<>", file
); break;
5589 fputs ("=", file
); break;
5591 fputs (">=", file
); break;
5593 fputs (">", file
); break;
5595 fputs (">>", file
); break;
5597 fputs (">>=", file
); break;
5599 fputs ("<=", file
); break;
5601 fputs ("<", file
); break;
5603 fputs ("<<", file
); break;
5605 fputs ("<<=", file
); break;
5611 gcc_assert (GET_CODE (x
) == CONST_INT
);
5612 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5615 gcc_assert (GET_CODE (x
) == CONST_INT
);
5616 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5619 gcc_assert (GET_CODE (x
) == CONST_INT
);
5620 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5623 gcc_assert (GET_CODE (x
) == CONST_INT
5624 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5625 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5628 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5629 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5632 gcc_assert (GET_CODE (x
) == CONST_INT
);
5633 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5636 gcc_assert (GET_CODE (x
) == CONST_INT
);
5637 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5640 if (GET_CODE (x
) == CONST_INT
)
5645 switch (GET_CODE (XEXP (x
, 0)))
5649 if (ASSEMBLER_DIALECT
== 0)
5650 fputs ("s,mb", file
);
5652 fputs (",mb", file
);
5656 if (ASSEMBLER_DIALECT
== 0)
5657 fputs ("s,ma", file
);
5659 fputs (",ma", file
);
5662 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5663 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5665 if (ASSEMBLER_DIALECT
== 0)
5668 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5669 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5671 if (ASSEMBLER_DIALECT
== 0)
5672 fputs ("x,s", file
);
5676 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5680 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5686 pa_output_global_address (file
, x
, 0);
5689 pa_output_global_address (file
, x
, 1);
5691 case 0: /* Don't do anything special */
5696 compute_zdepwi_operands (INTVAL (x
), op
);
5697 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5703 compute_zdepdi_operands (INTVAL (x
), op
);
5704 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5708 /* We can get here from a .vtable_inherit due to our
5709 CONSTANT_ADDRESS_P rejecting perfectly good constant
5715 if (GET_CODE (x
) == REG
)
5717 fputs (reg_names
[REGNO (x
)], file
);
5718 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5724 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5725 && (REGNO (x
) & 1) == 0)
5728 else if (GET_CODE (x
) == MEM
)
5730 int size
= GET_MODE_SIZE (GET_MODE (x
));
5731 rtx base
= NULL_RTX
;
5732 switch (GET_CODE (XEXP (x
, 0)))
5736 base
= XEXP (XEXP (x
, 0), 0);
5737 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5741 base
= XEXP (XEXP (x
, 0), 0);
5742 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5745 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5746 fprintf (file
, "%s(%s)",
5747 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5748 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5749 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5750 fprintf (file
, "%s(%s)",
5751 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5752 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5753 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5754 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5756 /* Because the REG_POINTER flag can get lost during reload,
5757 pa_legitimate_address_p canonicalizes the order of the
5758 index and base registers in the combined move patterns. */
5759 rtx base
= XEXP (XEXP (x
, 0), 1);
5760 rtx index
= XEXP (XEXP (x
, 0), 0);
5762 fprintf (file
, "%s(%s)",
5763 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5766 output_address (GET_MODE (x
), XEXP (x
, 0));
5769 output_address (GET_MODE (x
), XEXP (x
, 0));
5774 output_addr_const (file
, x
);
5777 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5780 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5783 /* Imagine (high (const (plus ...))). */
5784 if (GET_CODE (x
) == HIGH
)
5787 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5788 output_addr_const (file
, x
);
5789 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5791 output_addr_const (file
, x
);
5792 fputs ("-$global$", file
);
5794 else if (GET_CODE (x
) == CONST
)
5796 const char *sep
= "";
5797 int offset
= 0; /* assembler wants -$global$ at end */
5798 rtx base
= NULL_RTX
;
5800 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5804 base
= XEXP (XEXP (x
, 0), 0);
5805 output_addr_const (file
, base
);
5808 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5814 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5818 base
= XEXP (XEXP (x
, 0), 1);
5819 output_addr_const (file
, base
);
5822 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5828 /* How bogus. The compiler is apparently responsible for
5829 rounding the constant if it uses an LR field selector.
5831 The linker and/or assembler seem a better place since
5832 they have to do this kind of thing already.
5834 If we fail to do this, HP's optimizing linker may eliminate
5835 an addil, but not update the ldw/stw/ldo instruction that
5836 uses the result of the addil. */
5838 offset
= ((offset
+ 0x1000) & ~0x1fff);
5840 switch (GET_CODE (XEXP (x
, 0)))
5853 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5861 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5862 fputs ("-$global$", file
);
5864 fprintf (file
, "%s%d", sep
, offset
);
5867 output_addr_const (file
, x
);
5870 /* Output boilerplate text to appear at the beginning of the file.
5871 There are several possible versions. */
5872 #define aputs(x) fputs(x, asm_out_file)
5874 pa_file_start_level (void)
5877 aputs ("\t.LEVEL 2.0w\n");
5878 else if (TARGET_PA_20
)
5879 aputs ("\t.LEVEL 2.0\n");
5880 else if (TARGET_PA_11
)
5881 aputs ("\t.LEVEL 1.1\n");
5883 aputs ("\t.LEVEL 1.0\n");
5887 pa_file_start_space (int sortspace
)
5889 aputs ("\t.SPACE $PRIVATE$");
5892 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5894 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5895 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5896 "\n\t.SPACE $TEXT$");
5899 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5900 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5904 pa_file_start_file (int want_version
)
5906 if (write_symbols
!= NO_DEBUG
)
5908 output_file_directive (asm_out_file
, main_input_filename
);
5910 aputs ("\t.version\t\"01.01\"\n");
5915 pa_file_start_mcount (const char *aswhat
)
5918 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5922 pa_elf_file_start (void)
5924 pa_file_start_level ();
5925 pa_file_start_mcount ("ENTRY");
5926 pa_file_start_file (0);
5930 pa_som_file_start (void)
5932 pa_file_start_level ();
5933 pa_file_start_space (0);
5934 aputs ("\t.IMPORT $global$,DATA\n"
5935 "\t.IMPORT $$dyncall,MILLICODE\n");
5936 pa_file_start_mcount ("CODE");
5937 pa_file_start_file (0);
5941 pa_linux_file_start (void)
5943 pa_file_start_file (1);
5944 pa_file_start_level ();
5945 pa_file_start_mcount ("CODE");
5949 pa_hpux64_gas_file_start (void)
5951 pa_file_start_level ();
5952 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5954 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5956 pa_file_start_file (1);
5960 pa_hpux64_hpas_file_start (void)
5962 pa_file_start_level ();
5963 pa_file_start_space (1);
5964 pa_file_start_mcount ("CODE");
5965 pa_file_start_file (0);
5969 /* Search the deferred plabel list for SYMBOL and return its internal
5970 label. If an entry for SYMBOL is not found, a new entry is created. */
5973 pa_get_deferred_plabel (rtx symbol
)
5975 const char *fname
= XSTR (symbol
, 0);
5978 /* See if we have already put this function on the list of deferred
5979 plabels. This list is generally small, so a liner search is not
5980 too ugly. If it proves too slow replace it with something faster. */
5981 for (i
= 0; i
< n_deferred_plabels
; i
++)
5982 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5985 /* If the deferred plabel list is empty, or this entry was not found
5986 on the list, create a new entry on the list. */
5987 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5991 if (deferred_plabels
== 0)
5992 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5994 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5996 n_deferred_plabels
+ 1);
5998 i
= n_deferred_plabels
++;
5999 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
6000 deferred_plabels
[i
].symbol
= symbol
;
6002 /* Gross. We have just implicitly taken the address of this
6003 function. Mark it in the same manner as assemble_name. */
6004 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
6006 mark_referenced (id
);
6009 return deferred_plabels
[i
].internal_label
;
6013 output_deferred_plabels (void)
6017 /* If we have some deferred plabels, then we need to switch into the
6018 data or readonly data section, and align it to a 4 byte boundary
6019 before outputting the deferred plabels. */
6020 if (n_deferred_plabels
)
6022 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
6023 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
6026 /* Now output the deferred plabels. */
6027 for (i
= 0; i
< n_deferred_plabels
; i
++)
6029 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6030 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
6031 assemble_integer (deferred_plabels
[i
].symbol
,
6032 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
6036 /* Initialize optabs to point to emulation routines. */
6039 pa_init_libfuncs (void)
6041 if (HPUX_LONG_DOUBLE_LIBRARY
)
6043 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
6044 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
6045 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
6046 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
6047 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
6048 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
6049 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
6050 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
6051 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
6053 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
6054 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
6055 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
6056 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
6057 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
6058 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
6059 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
6061 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
6062 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
6063 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
6064 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
6066 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
6067 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
6068 : "_U_Qfcnvfxt_quad_to_sgl");
6069 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
6070 "_U_Qfcnvfxt_quad_to_dbl");
6071 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
6072 "_U_Qfcnvfxt_quad_to_usgl");
6073 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
6074 "_U_Qfcnvfxt_quad_to_udbl");
6076 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
6077 "_U_Qfcnvxf_sgl_to_quad");
6078 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
6079 "_U_Qfcnvxf_dbl_to_quad");
6080 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
6081 "_U_Qfcnvxf_usgl_to_quad");
6082 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
6083 "_U_Qfcnvxf_udbl_to_quad");
6086 if (TARGET_SYNC_LIBCALLS
)
6087 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
6090 /* HP's millicode routines mean something special to the assembler.
6091 Keep track of which ones we have used. */
6093 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
6094 static void import_milli (enum millicodes
);
6095 static char imported
[(int) end1000
];
6096 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
6097 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
6098 #define MILLI_START 10
6101 import_milli (enum millicodes code
)
6103 char str
[sizeof (import_string
)];
6105 if (!imported
[(int) code
])
6107 imported
[(int) code
] = 1;
6108 strcpy (str
, import_string
);
6109 memcpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
6110 output_asm_insn (str
, 0);
6114 /* The register constraints have put the operands and return value in
6115 the proper registers. */
6118 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
6120 import_milli (mulI
);
6121 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
6124 /* Emit the rtl for doing a division by a constant. */
6126 /* Do magic division millicodes exist for this value? */
6127 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
6129 /* We'll use an array to keep track of the magic millicodes and
6130 whether or not we've used them already. [n][0] is signed, [n][1] is
6133 static int div_milli
[16][2];
6136 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
6138 if (GET_CODE (operands
[2]) == CONST_INT
6139 && INTVAL (operands
[2]) > 0
6140 && INTVAL (operands
[2]) < 16
6141 && pa_magic_milli
[INTVAL (operands
[2])])
6143 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
6145 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
6149 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
6150 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
6152 gen_rtx_REG (SImode
, 26),
6154 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
6155 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
6156 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
6157 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
6158 gen_rtx_CLOBBER (VOIDmode
, ret
))));
6159 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
6166 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
6170 /* If the divisor is a constant, try to use one of the special
6172 if (GET_CODE (operands
[0]) == CONST_INT
)
6174 static char buf
[100];
6175 divisor
= INTVAL (operands
[0]);
6176 if (!div_milli
[divisor
][unsignedp
])
6178 div_milli
[divisor
][unsignedp
] = 1;
6180 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
6182 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
6186 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
6187 INTVAL (operands
[0]));
6188 return pa_output_millicode_call (insn
,
6189 gen_rtx_SYMBOL_REF (SImode
, buf
));
6193 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
6194 INTVAL (operands
[0]));
6195 return pa_output_millicode_call (insn
,
6196 gen_rtx_SYMBOL_REF (SImode
, buf
));
6199 /* Divisor isn't a special constant. */
6204 import_milli (divU
);
6205 return pa_output_millicode_call (insn
,
6206 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
6210 import_milli (divI
);
6211 return pa_output_millicode_call (insn
,
6212 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
6217 /* Output a $$rem millicode to do mod. */
6220 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
6224 import_milli (remU
);
6225 return pa_output_millicode_call (insn
,
6226 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
6230 import_milli (remI
);
6231 return pa_output_millicode_call (insn
,
6232 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
6237 pa_output_arg_descriptor (rtx_insn
*call_insn
)
6239 const char *arg_regs
[4];
6240 machine_mode arg_mode
;
6242 int i
, output_flag
= 0;
6245 /* We neither need nor want argument location descriptors for the
6246 64bit runtime environment or the ELF32 environment. */
6247 if (TARGET_64BIT
|| TARGET_ELF32
)
6250 for (i
= 0; i
< 4; i
++)
6253 /* Specify explicitly that no argument relocations should take place
6254 if using the portable runtime calling conventions. */
6255 if (TARGET_PORTABLE_RUNTIME
)
6257 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6262 gcc_assert (CALL_P (call_insn
));
6263 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
6264 link
; link
= XEXP (link
, 1))
6266 rtx use
= XEXP (link
, 0);
6268 if (! (GET_CODE (use
) == USE
6269 && GET_CODE (XEXP (use
, 0)) == REG
6270 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
6273 arg_mode
= GET_MODE (XEXP (use
, 0));
6274 regno
= REGNO (XEXP (use
, 0));
6275 if (regno
>= 23 && regno
<= 26)
6277 arg_regs
[26 - regno
] = "GR";
6278 if (arg_mode
== DImode
)
6279 arg_regs
[25 - regno
] = "GR";
6281 else if (regno
>= 32 && regno
<= 39)
6283 if (arg_mode
== SFmode
)
6284 arg_regs
[(regno
- 32) / 2] = "FR";
6287 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6288 arg_regs
[(regno
- 34) / 2] = "FR";
6289 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
6291 arg_regs
[(regno
- 34) / 2] = "FU";
6292 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
6297 fputs ("\t.CALL ", asm_out_file
);
6298 for (i
= 0; i
< 4; i
++)
6303 fputc (',', asm_out_file
);
6304 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
6307 fputc ('\n', asm_out_file
);
6310 /* Inform reload about cases where moving X with a mode MODE to or from
6311 a register in RCLASS requires an extra scratch or immediate register.
6312 Return the class needed for the immediate register. */
6315 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
6316 machine_mode mode
, secondary_reload_info
*sri
)
6319 enum reg_class rclass
= (enum reg_class
) rclass_i
;
6321 /* Handle the easy stuff first. */
6322 if (rclass
== R1_REGS
)
6328 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
6334 /* If we have something like (mem (mem (...)), we can safely assume the
6335 inner MEM will end up in a general register after reloading, so there's
6336 no need for a secondary reload. */
6337 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
6340 /* Trying to load a constant into a FP register during PIC code
6341 generation requires %r1 as a scratch register. For float modes,
6342 the only legitimate constant is CONST0_RTX. However, there are
6343 a few patterns that accept constant double operands. */
6345 && FP_REG_CLASS_P (rclass
)
6346 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6351 sri
->icode
= CODE_FOR_reload_insi_r1
;
6355 sri
->icode
= CODE_FOR_reload_indi_r1
;
6359 sri
->icode
= CODE_FOR_reload_insf_r1
;
6363 sri
->icode
= CODE_FOR_reload_indf_r1
;
6372 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6373 register when we're generating PIC code or when the operand isn't
6375 if (pa_symbolic_expression_p (x
))
6377 if (GET_CODE (x
) == HIGH
)
6380 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6385 sri
->icode
= CODE_FOR_reload_insi_r1
;
6389 sri
->icode
= CODE_FOR_reload_indi_r1
;
6399 /* Profiling showed the PA port spends about 1.3% of its compilation
6400 time in true_regnum from calls inside pa_secondary_reload_class. */
6401 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6402 regno
= true_regnum (x
);
6404 /* Handle reloads for floating point loads and stores. */
6405 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6406 && FP_REG_CLASS_P (rclass
))
6412 /* We don't need a secondary reload for indexed memory addresses.
6414 When INT14_OK_STRICT is true, it might appear that we could
6415 directly allow register indirect memory addresses. However,
6416 this doesn't work because we don't support SUBREGs in
6417 floating-point register copies and reload doesn't tell us
6418 when it's going to use a SUBREG. */
6419 if (IS_INDEX_ADDR_P (x
))
6423 /* Request a secondary reload with a general scratch register
6424 for everything else. ??? Could symbolic operands be handled
6425 directly when generating non-pic PA 2.0 code? */
6427 ? direct_optab_handler (reload_in_optab
, mode
)
6428 : direct_optab_handler (reload_out_optab
, mode
));
6432 /* A SAR<->FP register copy requires an intermediate general register
6433 and secondary memory. We need a secondary reload with a general
6434 scratch register for spills. */
6435 if (rclass
== SHIFT_REGS
)
6438 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6441 ? direct_optab_handler (reload_in_optab
, mode
)
6442 : direct_optab_handler (reload_out_optab
, mode
));
6446 /* Handle FP copy. */
6447 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6448 return GENERAL_REGS
;
6451 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6452 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6453 && FP_REG_CLASS_P (rclass
))
6454 return GENERAL_REGS
;
6459 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6462 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED
,
6463 reg_class_t class1 ATTRIBUTE_UNUSED
,
6464 reg_class_t class2 ATTRIBUTE_UNUSED
)
6466 #ifdef PA_SECONDARY_MEMORY_NEEDED
6467 return PA_SECONDARY_MEMORY_NEEDED (mode
, class1
, class2
);
6473 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6474 is only marked as live on entry by df-scan when it is a fixed
6475 register. It isn't a fixed register in the 64-bit runtime,
6476 so we need to mark it here. */
6479 pa_extra_live_on_entry (bitmap regs
)
6482 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6485 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6486 to prevent it from being deleted. */
6489 pa_eh_return_handler_rtx (void)
6493 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6494 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6495 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6500 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6501 by invisible reference. As a GCC extension, we also pass anything
6502 with a zero or variable size by reference.
6504 The 64-bit runtime does not describe passing any types by invisible
6505 reference. The internals of GCC can't currently handle passing
6506 empty structures, and zero or variable length arrays when they are
6507 not passed entirely on the stack or by reference. Thus, as a GCC
6508 extension, we pass these types by reference. The HP compiler doesn't
6509 support these types, so hopefully there shouldn't be any compatibility
6510 issues. This may have to be revisited when HP releases a C99 compiler
6511 or updates the ABI. */
6514 pa_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
6516 HOST_WIDE_INT size
= arg
.type_size_in_bytes ();
6520 return size
<= 0 || size
> 8;
6523 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6525 static pad_direction
6526 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6531 && (AGGREGATE_TYPE_P (type
)
6532 || TREE_CODE (type
) == COMPLEX_TYPE
6533 || VECTOR_TYPE_P (type
))))
6535 /* Return PAD_NONE if justification is not required. */
6537 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6538 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6541 /* The directions set here are ignored when a BLKmode argument larger
6542 than a word is placed in a register. Different code is used for
6543 the stack and registers. This makes it difficult to have a
6544 consistent data representation for both the stack and registers.
6545 For both runtimes, the justification and padding for arguments on
6546 the stack and in registers should be identical. */
6548 /* The 64-bit runtime specifies left justification for aggregates. */
6551 /* The 32-bit runtime architecture specifies right justification.
6552 When the argument is passed on the stack, the argument is padded
6553 with garbage on the left. The HP compiler pads with zeros. */
6554 return PAD_DOWNWARD
;
6557 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6558 return PAD_DOWNWARD
;
6564 /* Do what is necessary for `va_start'. We look at the current function
6565 to determine if stdargs or varargs is used and fill in an initial
6566 va_list. A pointer to this constructor is returned. */
6569 hppa_builtin_saveregs (void)
6572 tree fntype
= TREE_TYPE (current_function_decl
);
6573 int argadj
= ((!stdarg_p (fntype
))
6574 ? UNITS_PER_WORD
: 0);
6577 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6579 offset
= crtl
->args
.arg_offset_rtx
;
6585 /* Adjust for varargs/stdarg differences. */
6587 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6589 offset
= crtl
->args
.arg_offset_rtx
;
6591 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6592 from the incoming arg pointer and growing to larger addresses. */
6593 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6594 emit_move_insn (gen_rtx_MEM (word_mode
,
6595 plus_constant (Pmode
,
6596 arg_pointer_rtx
, off
)),
6597 gen_rtx_REG (word_mode
, i
));
6599 /* The incoming args pointer points just beyond the flushback area;
6600 normally this is not a serious concern. However, when we are doing
6601 varargs/stdargs we want to make the arg pointer point to the start
6602 of the incoming argument area. */
6603 emit_move_insn (virtual_incoming_args_rtx
,
6604 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6606 /* Now return a pointer to the first anonymous argument. */
6607 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6608 virtual_incoming_args_rtx
,
6609 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6612 /* Store general registers on the stack. */
6613 dest
= gen_rtx_MEM (BLKmode
,
6614 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6616 set_mem_alias_set (dest
, get_varargs_alias_set ());
6617 set_mem_align (dest
, BITS_PER_WORD
);
6618 move_block_from_reg (23, dest
, 4);
6620 /* move_block_from_reg will emit code to store the argument registers
6621 individually as scalar stores.
6623 However, other insns may later load from the same addresses for
6624 a structure load (passing a struct to a varargs routine).
6626 The alias code assumes that such aliasing can never happen, so we
6627 have to keep memory referencing insns from moving up beyond the
6628 last argument register store. So we emit a blockage insn here. */
6629 emit_insn (gen_blockage ());
6631 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6632 crtl
->args
.internal_arg_pointer
,
6633 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6637 hppa_va_start (tree valist
, rtx nextarg
)
6639 nextarg
= expand_builtin_saveregs ();
6640 std_expand_builtin_va_start (valist
, nextarg
);
6644 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6649 /* Args grow upward. We can use the generic routines. */
6650 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6652 else /* !TARGET_64BIT */
6654 tree ptr
= build_pointer_type (type
);
6657 unsigned int size
, ofs
;
6660 indirect
= pass_va_arg_by_reference (type
);
6664 ptr
= build_pointer_type (type
);
6666 size
= int_size_in_bytes (type
);
6667 valist_type
= TREE_TYPE (valist
);
6669 /* Args grow down. Not handled by generic routines. */
6671 u
= fold_convert (sizetype
, size_in_bytes (type
));
6672 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6673 t
= fold_build_pointer_plus (valist
, u
);
6675 /* Align to 4 or 8 byte boundary depending on argument size. */
6677 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6678 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6679 t
= fold_convert (valist_type
, t
);
6681 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6683 ofs
= (8 - size
) % 4;
6685 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6687 t
= fold_convert (ptr
, t
);
6688 t
= build_va_arg_indirect_ref (t
);
6691 t
= build_va_arg_indirect_ref (t
);
6697 /* True if MODE is valid for the target. By "valid", we mean able to
6698 be manipulated in non-trivial ways. In particular, this means all
6699 the arithmetic is supported. */
6702 pa_scalar_mode_supported_p (scalar_mode mode
)
6704 int precision
= GET_MODE_PRECISION (mode
);
6706 if (TARGET_64BIT
&& mode
== TImode
)
6709 switch (GET_MODE_CLASS (mode
))
6711 case MODE_PARTIAL_INT
:
6713 if (precision
== CHAR_TYPE_SIZE
)
6715 if (precision
== SHORT_TYPE_SIZE
)
6717 if (precision
== INT_TYPE_SIZE
)
6719 if (precision
== LONG_TYPE_SIZE
)
6721 if (precision
== LONG_LONG_TYPE_SIZE
)
6726 if (precision
== FLOAT_TYPE_SIZE
)
6728 if (precision
== DOUBLE_TYPE_SIZE
)
6730 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6734 case MODE_DECIMAL_FLOAT
:
6742 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6743 it branches into the delay slot. Otherwise, return FALSE. */
6746 branch_to_delay_slot_p (rtx_insn
*insn
)
6748 rtx_insn
*jump_insn
;
6750 if (dbr_sequence_length ())
6753 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6756 insn
= next_active_insn (insn
);
6757 if (jump_insn
== insn
)
6760 /* We can't rely on the length of asms. So, we return FALSE when
6761 the branch is followed by an asm. */
6763 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6764 || asm_noperands (PATTERN (insn
)) >= 0
6765 || get_attr_length (insn
) > 0)
6772 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6774 This occurs when INSN has an unfilled delay slot and is followed
6775 by an asm. Disaster can occur if the asm is empty and the jump
6776 branches into the delay slot. So, we add a nop in the delay slot
6777 when this occurs. */
6780 branch_needs_nop_p (rtx_insn
*insn
)
6782 rtx_insn
*jump_insn
;
6784 if (dbr_sequence_length ())
6787 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6790 insn
= next_active_insn (insn
);
6791 if (!insn
|| jump_insn
== insn
)
6794 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6795 || asm_noperands (PATTERN (insn
)) >= 0)
6796 && get_attr_length (insn
) > 0)
6803 /* Return TRUE if INSN, a forward jump insn, can use nullification
6804 to skip the following instruction. This avoids an extra cycle due
6805 to a mis-predicted branch when we fall through. */
6808 use_skip_p (rtx_insn
*insn
)
6810 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6814 insn
= next_active_insn (insn
);
6816 /* We can't rely on the length of asms, so we can't skip asms. */
6818 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6819 || asm_noperands (PATTERN (insn
)) >= 0)
6821 if (get_attr_length (insn
) == 4
6822 && jump_insn
== next_active_insn (insn
))
6824 if (get_attr_length (insn
) > 0)
6831 /* This routine handles all the normal conditional branch sequences we
6832 might need to generate. It handles compare immediate vs compare
6833 register, nullification of delay slots, varying length branches,
6834 negated branches, and all combinations of the above. It returns the
6835 output appropriate to emit the branch corresponding to all given
6839 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6841 static char buf
[100];
6843 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6844 int length
= get_attr_length (insn
);
6847 /* A conditional branch to the following instruction (e.g. the delay slot)
6848 is asking for a disaster. This can happen when not optimizing and
6849 when jump optimization fails.
6851 While it is usually safe to emit nothing, this can fail if the
6852 preceding instruction is a nullified branch with an empty delay
6853 slot and the same branch target as this branch. We could check
6854 for this but jump optimization should eliminate nop jumps. It
6855 is always safe to emit a nop. */
6856 if (branch_to_delay_slot_p (insn
))
6859 /* The doubleword form of the cmpib instruction doesn't have the LEU
6860 and GTU conditions while the cmpb instruction does. Since we accept
6861 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6862 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6863 operands
[2] = gen_rtx_REG (DImode
, 0);
6864 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6865 operands
[1] = gen_rtx_REG (DImode
, 0);
6867 /* If this is a long branch with its delay slot unfilled, set `nullify'
6868 as it can nullify the delay slot and save a nop. */
6869 if (length
== 8 && dbr_sequence_length () == 0)
6872 /* If this is a short forward conditional branch which did not get
6873 its delay slot filled, the delay slot can still be nullified. */
6874 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6875 nullify
= forward_branch_p (insn
);
6877 /* A forward branch over a single nullified insn can be done with a
6878 comclr instruction. This avoids a single cycle penalty due to
6879 mis-predicted branch if we fall through (branch not taken). */
6880 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6884 /* All short conditional branches except backwards with an unfilled
6888 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6890 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6891 if (GET_MODE (operands
[1]) == DImode
)
6894 strcat (buf
, "%B3");
6896 strcat (buf
, "%S3");
6898 strcat (buf
, " %2,%r1,%%r0");
6901 if (branch_needs_nop_p (insn
))
6902 strcat (buf
, ",n %2,%r1,%0%#");
6904 strcat (buf
, ",n %2,%r1,%0");
6907 strcat (buf
, " %2,%r1,%0");
6910 /* All long conditionals. Note a short backward branch with an
6911 unfilled delay slot is treated just like a long backward branch
6912 with an unfilled delay slot. */
6914 /* Handle weird backwards branch with a filled delay slot
6915 which is nullified. */
6916 if (dbr_sequence_length () != 0
6917 && ! forward_branch_p (insn
)
6920 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6921 if (GET_MODE (operands
[1]) == DImode
)
6924 strcat (buf
, "%S3");
6926 strcat (buf
, "%B3");
6927 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6929 /* Handle short backwards branch with an unfilled delay slot.
6930 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6931 taken and untaken branches. */
6932 else if (dbr_sequence_length () == 0
6933 && ! forward_branch_p (insn
)
6934 && INSN_ADDRESSES_SET_P ()
6935 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6936 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6938 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6939 if (GET_MODE (operands
[1]) == DImode
)
6942 strcat (buf
, "%B3 %2,%r1,%0%#");
6944 strcat (buf
, "%S3 %2,%r1,%0%#");
6948 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6949 if (GET_MODE (operands
[1]) == DImode
)
6952 strcat (buf
, "%S3");
6954 strcat (buf
, "%B3");
6956 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6958 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6963 /* The reversed conditional branch must branch over one additional
6964 instruction if the delay slot is filled and needs to be extracted
6965 by pa_output_lbranch. If the delay slot is empty or this is a
6966 nullified forward branch, the instruction after the reversed
6967 condition branch must be nullified. */
6968 if (dbr_sequence_length () == 0
6969 || (nullify
&& forward_branch_p (insn
)))
6973 operands
[4] = GEN_INT (length
);
6978 operands
[4] = GEN_INT (length
+ 4);
6981 /* Create a reversed conditional branch which branches around
6982 the following insns. */
6983 if (GET_MODE (operands
[1]) != DImode
)
6989 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6992 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6998 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
7001 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
7010 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
7013 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
7019 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
7022 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
7026 output_asm_insn (buf
, operands
);
7027 return pa_output_lbranch (operands
[0], insn
, xdelay
);
7032 /* Output a PIC pc-relative instruction sequence to load the address of
7033 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
7034 or a code label. OPERANDS[1] specifies the register to use to load
7035 the program counter. OPERANDS[3] may be used for label generation
7036 The sequence is always three instructions in length. The program
7037 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
7038 Register %r1 is clobbered. */
7041 pa_output_pic_pcrel_sequence (rtx
*operands
)
7043 gcc_assert (SYMBOL_REF_P (operands
[0]) || LABEL_P (operands
[0]));
7046 /* We can use mfia to determine the current program counter. */
7047 if (TARGET_SOM
|| !TARGET_GAS
)
7049 operands
[3] = gen_label_rtx ();
7050 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7051 CODE_LABEL_NUMBER (operands
[3]));
7052 output_asm_insn ("mfia %1", operands
);
7053 output_asm_insn ("addil L'%0-%l3,%1", operands
);
7054 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
7058 output_asm_insn ("mfia %1", operands
);
7059 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands
);
7060 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands
);
7065 /* We need to use a branch to determine the current program counter. */
7066 output_asm_insn ("{bl|b,l} .+8,%1", operands
);
7067 if (TARGET_SOM
|| !TARGET_GAS
)
7069 operands
[3] = gen_label_rtx ();
7070 output_asm_insn ("addil L'%0-%l3,%1", operands
);
7071 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7072 CODE_LABEL_NUMBER (operands
[3]));
7073 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
7077 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands
);
7078 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands
);
7083 /* This routine handles output of long unconditional branches that
7084 exceed the maximum range of a simple branch instruction. Since
7085 we don't have a register available for the branch, we save register
7086 %r1 in the frame marker, load the branch destination DEST into %r1,
7087 execute the branch, and restore %r1 in the delay slot of the branch.
7089 Since long branches may have an insn in the delay slot and the
7090 delay slot is used to restore %r1, we in general need to extract
7091 this insn and execute it before the branch. However, to facilitate
7092 use of this function by conditional branches, we also provide an
7093 option to not extract the delay insn so that it will be emitted
7094 after the long branch. So, if there is an insn in the delay slot,
7095 it is extracted if XDELAY is nonzero.
7097 The lengths of the various long-branch sequences are 20, 16 and 24
7098 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
7101 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
7105 xoperands
[0] = dest
;
7107 /* First, free up the delay slot. */
7108 if (xdelay
&& dbr_sequence_length () != 0)
7110 /* We can't handle a jump in the delay slot. */
7111 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
7113 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7116 /* Now delete the delay insn. */
7117 SET_INSN_DELETED (NEXT_INSN (insn
));
7120 /* Output an insn to save %r1. The runtime documentation doesn't
7121 specify whether the "Clean Up" slot in the callers frame can
7122 be clobbered by the callee. It isn't copied by HP's builtin
7123 alloca, so this suggests that it can be clobbered if necessary.
7124 The "Static Link" location is copied by HP builtin alloca, so
7125 we avoid using it. Using the cleanup slot might be a problem
7126 if we have to interoperate with languages that pass cleanup
7127 information. However, it should be possible to handle these
7128 situations with GCC's asm feature.
7130 The "Current RP" slot is reserved for the called procedure, so
7131 we try to use it when we don't have a frame of our own. It's
7132 rather unlikely that we won't have a frame when we need to emit
7135 Really the way to go long term is a register scavenger; goto
7136 the target of the jump and find a register which we can use
7137 as a scratch to hold the value in %r1. Then, we wouldn't have
7138 to free up the delay slot or clobber a slot that may be needed
7139 for other purposes. */
7142 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
7143 /* Use the return pointer slot in the frame marker. */
7144 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
7146 /* Use the slot at -40 in the frame marker since HP builtin
7147 alloca doesn't copy it. */
7148 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
7152 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
7153 /* Use the return pointer slot in the frame marker. */
7154 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
7156 /* Use the "Clean Up" slot in the frame marker. In GCC,
7157 the only other use of this location is for copying a
7158 floating point double argument from a floating-point
7159 register to two general registers. The copy is done
7160 as an "atomic" operation when outputting a call, so it
7161 won't interfere with our using the location here. */
7162 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
7165 if (TARGET_PORTABLE_RUNTIME
)
7167 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7168 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7169 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7173 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
7174 xoperands
[2] = xoperands
[1];
7175 pa_output_pic_pcrel_sequence (xoperands
);
7176 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7179 /* Now output a very long branch to the original target. */
7180 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
7182 /* Now restore the value of %r1 in the delay slot. */
7185 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
7186 return "ldd -16(%%r30),%%r1";
7188 return "ldd -40(%%r30),%%r1";
7192 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
7193 return "ldw -20(%%r30),%%r1";
7195 return "ldw -12(%%r30),%%r1";
7199 /* This routine handles all the branch-on-bit conditional branch sequences we
7200 might need to generate. It handles nullification of delay slots,
7201 varying length branches, negated branches and all combinations of the
7202 above. it returns the appropriate output template to emit the branch. */
7205 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
7207 static char buf
[100];
7209 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7210 int length
= get_attr_length (insn
);
7213 /* A conditional branch to the following instruction (e.g. the delay slot) is
7214 asking for a disaster. I do not think this can happen as this pattern
7215 is only used when optimizing; jump optimization should eliminate the
7216 jump. But be prepared just in case. */
7218 if (branch_to_delay_slot_p (insn
))
7221 /* If this is a long branch with its delay slot unfilled, set `nullify'
7222 as it can nullify the delay slot and save a nop. */
7223 if (length
== 8 && dbr_sequence_length () == 0)
7226 /* If this is a short forward conditional branch which did not get
7227 its delay slot filled, the delay slot can still be nullified. */
7228 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7229 nullify
= forward_branch_p (insn
);
7231 /* A forward branch over a single nullified insn can be done with a
7232 extrs instruction. This avoids a single cycle penalty due to
7233 mis-predicted branch if we fall through (branch not taken). */
7234 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7239 /* All short conditional branches except backwards with an unfilled
7243 strcpy (buf
, "{extrs,|extrw,s,}");
7245 strcpy (buf
, "bb,");
7246 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7247 strcpy (buf
, "extrd,s,*");
7248 else if (GET_MODE (operands
[0]) == DImode
)
7249 strcpy (buf
, "bb,*");
7250 if ((which
== 0 && negated
)
7251 || (which
== 1 && ! negated
))
7256 strcat (buf
, " %0,%1,1,%%r0");
7257 else if (nullify
&& negated
)
7259 if (branch_needs_nop_p (insn
))
7260 strcat (buf
, ",n %0,%1,%3%#");
7262 strcat (buf
, ",n %0,%1,%3");
7264 else if (nullify
&& ! negated
)
7266 if (branch_needs_nop_p (insn
))
7267 strcat (buf
, ",n %0,%1,%2%#");
7269 strcat (buf
, ",n %0,%1,%2");
7271 else if (! nullify
&& negated
)
7272 strcat (buf
, " %0,%1,%3");
7273 else if (! nullify
&& ! negated
)
7274 strcat (buf
, " %0,%1,%2");
7277 /* All long conditionals. Note a short backward branch with an
7278 unfilled delay slot is treated just like a long backward branch
7279 with an unfilled delay slot. */
7281 /* Handle weird backwards branch with a filled delay slot
7282 which is nullified. */
7283 if (dbr_sequence_length () != 0
7284 && ! forward_branch_p (insn
)
7287 strcpy (buf
, "bb,");
7288 if (GET_MODE (operands
[0]) == DImode
)
7290 if ((which
== 0 && negated
)
7291 || (which
== 1 && ! negated
))
7296 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
7298 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
7300 /* Handle short backwards branch with an unfilled delay slot.
7301 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7302 taken and untaken branches. */
7303 else if (dbr_sequence_length () == 0
7304 && ! forward_branch_p (insn
)
7305 && INSN_ADDRESSES_SET_P ()
7306 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7307 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7309 strcpy (buf
, "bb,");
7310 if (GET_MODE (operands
[0]) == DImode
)
7312 if ((which
== 0 && negated
)
7313 || (which
== 1 && ! negated
))
7318 strcat (buf
, " %0,%1,%3%#");
7320 strcat (buf
, " %0,%1,%2%#");
7324 if (GET_MODE (operands
[0]) == DImode
)
7325 strcpy (buf
, "extrd,s,*");
7327 strcpy (buf
, "{extrs,|extrw,s,}");
7328 if ((which
== 0 && negated
)
7329 || (which
== 1 && ! negated
))
7333 if (nullify
&& negated
)
7334 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
7335 else if (nullify
&& ! negated
)
7336 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
7338 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
7340 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
7345 /* The reversed conditional branch must branch over one additional
7346 instruction if the delay slot is filled and needs to be extracted
7347 by pa_output_lbranch. If the delay slot is empty or this is a
7348 nullified forward branch, the instruction after the reversed
7349 condition branch must be nullified. */
7350 if (dbr_sequence_length () == 0
7351 || (nullify
&& forward_branch_p (insn
)))
7355 operands
[4] = GEN_INT (length
);
7360 operands
[4] = GEN_INT (length
+ 4);
7363 if (GET_MODE (operands
[0]) == DImode
)
7364 strcpy (buf
, "bb,*");
7366 strcpy (buf
, "bb,");
7367 if ((which
== 0 && negated
)
7368 || (which
== 1 && !negated
))
7373 strcat (buf
, ",n %0,%1,.+%4");
7375 strcat (buf
, " %0,%1,.+%4");
7376 output_asm_insn (buf
, operands
);
7377 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7383 /* This routine handles all the branch-on-variable-bit conditional branch
7384 sequences we might need to generate. It handles nullification of delay
7385 slots, varying length branches, negated branches and all combinations
7386 of the above. it returns the appropriate output template to emit the
7390 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7393 static char buf
[100];
7395 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7396 int length
= get_attr_length (insn
);
7399 /* A conditional branch to the following instruction (e.g. the delay slot) is
7400 asking for a disaster. I do not think this can happen as this pattern
7401 is only used when optimizing; jump optimization should eliminate the
7402 jump. But be prepared just in case. */
7404 if (branch_to_delay_slot_p (insn
))
7407 /* If this is a long branch with its delay slot unfilled, set `nullify'
7408 as it can nullify the delay slot and save a nop. */
7409 if (length
== 8 && dbr_sequence_length () == 0)
7412 /* If this is a short forward conditional branch which did not get
7413 its delay slot filled, the delay slot can still be nullified. */
7414 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7415 nullify
= forward_branch_p (insn
);
7417 /* A forward branch over a single nullified insn can be done with a
7418 extrs instruction. This avoids a single cycle penalty due to
7419 mis-predicted branch if we fall through (branch not taken). */
7420 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7425 /* All short conditional branches except backwards with an unfilled
7429 strcpy (buf
, "{vextrs,|extrw,s,}");
7431 strcpy (buf
, "{bvb,|bb,}");
7432 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7433 strcpy (buf
, "extrd,s,*");
7434 else if (GET_MODE (operands
[0]) == DImode
)
7435 strcpy (buf
, "bb,*");
7436 if ((which
== 0 && negated
)
7437 || (which
== 1 && ! negated
))
7442 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7443 else if (nullify
&& negated
)
7445 if (branch_needs_nop_p (insn
))
7446 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7448 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7450 else if (nullify
&& ! negated
)
7452 if (branch_needs_nop_p (insn
))
7453 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7455 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7457 else if (! nullify
&& negated
)
7458 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7459 else if (! nullify
&& ! negated
)
7460 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7463 /* All long conditionals. Note a short backward branch with an
7464 unfilled delay slot is treated just like a long backward branch
7465 with an unfilled delay slot. */
7467 /* Handle weird backwards branch with a filled delay slot
7468 which is nullified. */
7469 if (dbr_sequence_length () != 0
7470 && ! forward_branch_p (insn
)
7473 strcpy (buf
, "{bvb,|bb,}");
7474 if (GET_MODE (operands
[0]) == DImode
)
7476 if ((which
== 0 && negated
)
7477 || (which
== 1 && ! negated
))
7482 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7484 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7486 /* Handle short backwards branch with an unfilled delay slot.
7487 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7488 taken and untaken branches. */
7489 else if (dbr_sequence_length () == 0
7490 && ! forward_branch_p (insn
)
7491 && INSN_ADDRESSES_SET_P ()
7492 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7493 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7495 strcpy (buf
, "{bvb,|bb,}");
7496 if (GET_MODE (operands
[0]) == DImode
)
7498 if ((which
== 0 && negated
)
7499 || (which
== 1 && ! negated
))
7504 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7506 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7510 strcpy (buf
, "{vextrs,|extrw,s,}");
7511 if (GET_MODE (operands
[0]) == DImode
)
7512 strcpy (buf
, "extrd,s,*");
7513 if ((which
== 0 && negated
)
7514 || (which
== 1 && ! negated
))
7518 if (nullify
&& negated
)
7519 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7520 else if (nullify
&& ! negated
)
7521 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7523 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7525 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7530 /* The reversed conditional branch must branch over one additional
7531 instruction if the delay slot is filled and needs to be extracted
7532 by pa_output_lbranch. If the delay slot is empty or this is a
7533 nullified forward branch, the instruction after the reversed
7534 condition branch must be nullified. */
7535 if (dbr_sequence_length () == 0
7536 || (nullify
&& forward_branch_p (insn
)))
7540 operands
[4] = GEN_INT (length
);
7545 operands
[4] = GEN_INT (length
+ 4);
7548 if (GET_MODE (operands
[0]) == DImode
)
7549 strcpy (buf
, "bb,*");
7551 strcpy (buf
, "{bvb,|bb,}");
7552 if ((which
== 0 && negated
)
7553 || (which
== 1 && !negated
))
7558 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7560 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7561 output_asm_insn (buf
, operands
);
7562 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7568 /* Return the output template for emitting a dbra type insn.
7570 Note it may perform some output operations on its own before
7571 returning the final output string. */
7573 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7575 int length
= get_attr_length (insn
);
7577 /* A conditional branch to the following instruction (e.g. the delay slot) is
7578 asking for a disaster. Be prepared! */
7580 if (branch_to_delay_slot_p (insn
))
7582 if (which_alternative
== 0)
7583 return "ldo %1(%0),%0";
7584 else if (which_alternative
== 1)
7586 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7587 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7588 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7589 return "{fldws|fldw} -16(%%r30),%0";
7593 output_asm_insn ("ldw %0,%4", operands
);
7594 return "ldo %1(%4),%4\n\tstw %4,%0";
7598 if (which_alternative
== 0)
7600 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7603 /* If this is a long branch with its delay slot unfilled, set `nullify'
7604 as it can nullify the delay slot and save a nop. */
7605 if (length
== 8 && dbr_sequence_length () == 0)
7608 /* If this is a short forward conditional branch which did not get
7609 its delay slot filled, the delay slot can still be nullified. */
7610 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7611 nullify
= forward_branch_p (insn
);
7618 if (branch_needs_nop_p (insn
))
7619 return "addib,%C2,n %1,%0,%3%#";
7621 return "addib,%C2,n %1,%0,%3";
7624 return "addib,%C2 %1,%0,%3";
7627 /* Handle weird backwards branch with a fulled delay slot
7628 which is nullified. */
7629 if (dbr_sequence_length () != 0
7630 && ! forward_branch_p (insn
)
7632 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7633 /* Handle short backwards branch with an unfilled delay slot.
7634 Using a addb;nop rather than addi;bl saves 1 cycle for both
7635 taken and untaken branches. */
7636 else if (dbr_sequence_length () == 0
7637 && ! forward_branch_p (insn
)
7638 && INSN_ADDRESSES_SET_P ()
7639 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7640 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7641 return "addib,%C2 %1,%0,%3%#";
7643 /* Handle normal cases. */
7645 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7647 return "addi,%N2 %1,%0,%0\n\tb %3";
7650 /* The reversed conditional branch must branch over one additional
7651 instruction if the delay slot is filled and needs to be extracted
7652 by pa_output_lbranch. If the delay slot is empty or this is a
7653 nullified forward branch, the instruction after the reversed
7654 condition branch must be nullified. */
7655 if (dbr_sequence_length () == 0
7656 || (nullify
&& forward_branch_p (insn
)))
7660 operands
[4] = GEN_INT (length
);
7665 operands
[4] = GEN_INT (length
+ 4);
7669 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7671 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7673 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7677 /* Deal with gross reload from FP register case. */
7678 else if (which_alternative
== 1)
7680 /* Move loop counter from FP register to MEM then into a GR,
7681 increment the GR, store the GR into MEM, and finally reload
7682 the FP register from MEM from within the branch's delay slot. */
7683 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7685 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7687 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7688 else if (length
== 28)
7689 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7692 operands
[5] = GEN_INT (length
- 16);
7693 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7694 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7695 return pa_output_lbranch (operands
[3], insn
, 0);
7698 /* Deal with gross reload from memory case. */
7701 /* Reload loop counter from memory, the store back to memory
7702 happens in the branch's delay slot. */
7703 output_asm_insn ("ldw %0,%4", operands
);
7705 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7706 else if (length
== 16)
7707 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7710 operands
[5] = GEN_INT (length
- 4);
7711 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7712 return pa_output_lbranch (operands
[3], insn
, 0);
7717 /* Return the output template for emitting a movb type insn.
7719 Note it may perform some output operations on its own before
7720 returning the final output string. */
7722 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7723 int reverse_comparison
)
7725 int length
= get_attr_length (insn
);
7727 /* A conditional branch to the following instruction (e.g. the delay slot) is
7728 asking for a disaster. Be prepared! */
7730 if (branch_to_delay_slot_p (insn
))
7732 if (which_alternative
== 0)
7733 return "copy %1,%0";
7734 else if (which_alternative
== 1)
7736 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7737 return "{fldws|fldw} -16(%%r30),%0";
7739 else if (which_alternative
== 2)
7745 /* Support the second variant. */
7746 if (reverse_comparison
)
7747 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7749 if (which_alternative
== 0)
7751 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7754 /* If this is a long branch with its delay slot unfilled, set `nullify'
7755 as it can nullify the delay slot and save a nop. */
7756 if (length
== 8 && dbr_sequence_length () == 0)
7759 /* If this is a short forward conditional branch which did not get
7760 its delay slot filled, the delay slot can still be nullified. */
7761 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7762 nullify
= forward_branch_p (insn
);
7769 if (branch_needs_nop_p (insn
))
7770 return "movb,%C2,n %1,%0,%3%#";
7772 return "movb,%C2,n %1,%0,%3";
7775 return "movb,%C2 %1,%0,%3";
7778 /* Handle weird backwards branch with a filled delay slot
7779 which is nullified. */
7780 if (dbr_sequence_length () != 0
7781 && ! forward_branch_p (insn
)
7783 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7785 /* Handle short backwards branch with an unfilled delay slot.
7786 Using a movb;nop rather than or;bl saves 1 cycle for both
7787 taken and untaken branches. */
7788 else if (dbr_sequence_length () == 0
7789 && ! forward_branch_p (insn
)
7790 && INSN_ADDRESSES_SET_P ()
7791 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7792 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7793 return "movb,%C2 %1,%0,%3%#";
7794 /* Handle normal cases. */
7796 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7798 return "or,%N2 %1,%%r0,%0\n\tb %3";
7801 /* The reversed conditional branch must branch over one additional
7802 instruction if the delay slot is filled and needs to be extracted
7803 by pa_output_lbranch. If the delay slot is empty or this is a
7804 nullified forward branch, the instruction after the reversed
7805 condition branch must be nullified. */
7806 if (dbr_sequence_length () == 0
7807 || (nullify
&& forward_branch_p (insn
)))
7811 operands
[4] = GEN_INT (length
);
7816 operands
[4] = GEN_INT (length
+ 4);
7820 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7822 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7824 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7827 /* Deal with gross reload for FP destination register case. */
7828 else if (which_alternative
== 1)
7830 /* Move source register to MEM, perform the branch test, then
7831 finally load the FP register from MEM from within the branch's
7833 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7835 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7836 else if (length
== 16)
7837 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7840 operands
[4] = GEN_INT (length
- 4);
7841 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7842 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7843 return pa_output_lbranch (operands
[3], insn
, 0);
7846 /* Deal with gross reload from memory case. */
7847 else if (which_alternative
== 2)
7849 /* Reload loop counter from memory, the store back to memory
7850 happens in the branch's delay slot. */
7852 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7853 else if (length
== 12)
7854 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7857 operands
[4] = GEN_INT (length
);
7858 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7860 return pa_output_lbranch (operands
[3], insn
, 0);
7863 /* Handle SAR as a destination. */
7867 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7868 else if (length
== 12)
7869 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7872 operands
[4] = GEN_INT (length
);
7873 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7875 return pa_output_lbranch (operands
[3], insn
, 0);
7880 /* Copy any FP arguments in INSN into integer registers. */
7882 copy_fp_args (rtx_insn
*insn
)
7887 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7889 int arg_mode
, regno
;
7890 rtx use
= XEXP (link
, 0);
7892 if (! (GET_CODE (use
) == USE
7893 && GET_CODE (XEXP (use
, 0)) == REG
7894 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7897 arg_mode
= GET_MODE (XEXP (use
, 0));
7898 regno
= REGNO (XEXP (use
, 0));
7900 /* Is it a floating point register? */
7901 if (regno
>= 32 && regno
<= 39)
7903 /* Copy the FP register into an integer register via memory. */
7904 if (arg_mode
== SFmode
)
7906 xoperands
[0] = XEXP (use
, 0);
7907 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7908 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7909 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7913 xoperands
[0] = XEXP (use
, 0);
7914 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7915 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7916 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7917 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7923 /* Compute length of the FP argument copy sequence for INSN. */
7925 length_fp_args (rtx_insn
*insn
)
7930 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7932 int arg_mode
, regno
;
7933 rtx use
= XEXP (link
, 0);
7935 if (! (GET_CODE (use
) == USE
7936 && GET_CODE (XEXP (use
, 0)) == REG
7937 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7940 arg_mode
= GET_MODE (XEXP (use
, 0));
7941 regno
= REGNO (XEXP (use
, 0));
7943 /* Is it a floating point register? */
7944 if (regno
>= 32 && regno
<= 39)
7946 if (arg_mode
== SFmode
)
7956 /* Return the attribute length for the millicode call instruction INSN.
7957 The length must match the code generated by pa_output_millicode_call.
7958 We include the delay slot in the returned length as it is better to
7959 over estimate the length than to under estimate it. */
7962 pa_attr_length_millicode_call (rtx_insn
*insn
)
7964 unsigned long distance
= -1;
7965 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7967 if (INSN_ADDRESSES_SET_P ())
7969 distance
= (total
+ insn_current_reference_address (insn
));
7970 if (distance
< total
)
7976 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7981 else if (TARGET_PORTABLE_RUNTIME
)
7985 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7995 /* INSN is a function call.
7997 CALL_DEST is the routine we are calling. */
8000 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
8002 int attr_length
= get_attr_length (insn
);
8003 int seq_length
= dbr_sequence_length ();
8006 xoperands
[0] = call_dest
;
8008 /* Handle the common case where we are sure that the branch will
8009 reach the beginning of the $CODE$ subspace. The within reach
8010 form of the $$sh_func_adrs call has a length of 28. Because it
8011 has an attribute type of sh_func_adrs, it never has a nonzero
8012 sequence length (i.e., the delay slot is never filled). */
8013 if (!TARGET_LONG_CALLS
8014 && (attr_length
== 8
8015 || (attr_length
== 28
8016 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
8018 xoperands
[1] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
8019 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
8025 /* It might seem that one insn could be saved by accessing
8026 the millicode function using the linkage table. However,
8027 this doesn't work in shared libraries and other dynamically
8028 loaded objects. Using a pc-relative sequence also avoids
8029 problems related to the implicit use of the gp register. */
8030 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
8031 xoperands
[2] = xoperands
[1];
8032 pa_output_pic_pcrel_sequence (xoperands
);
8033 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8035 else if (TARGET_PORTABLE_RUNTIME
)
8037 /* Pure portable runtime doesn't allow be/ble; we also don't
8038 have PIC support in the assembler/linker, so this sequence
8041 /* Get the address of our target into %r1. */
8042 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8043 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
8045 /* Get our return address into %r31. */
8046 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
8047 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
8049 /* Jump to our target address in %r1. */
8050 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8054 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8056 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
8058 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
8062 xoperands
[1] = gen_rtx_REG (Pmode
, 31);
8063 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
8064 pa_output_pic_pcrel_sequence (xoperands
);
8066 /* Adjust return address. */
8067 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands
);
8069 /* Jump to our target address in %r1. */
8070 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8074 if (seq_length
== 0)
8075 output_asm_insn ("nop", xoperands
);
8080 /* Return the attribute length of the call instruction INSN. The SIBCALL
8081 flag indicates whether INSN is a regular call or a sibling call. The
8082 length returned must be longer than the code actually generated by
8083 pa_output_call. Since branch shortening is done before delay branch
8084 sequencing, there is no way to determine whether or not the delay
8085 slot will be filled during branch shortening. Even when the delay
8086 slot is filled, we may have to add a nop if the delay slot contains
8087 a branch that can't reach its target. Thus, we always have to include
8088 the delay slot in the length estimate. This used to be done in
8089 pa_adjust_insn_length but we do it here now as some sequences always
8090 fill the delay slot and we can save four bytes in the estimate for
8094 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
8097 rtx call
, call_dest
;
8100 rtx pat
= PATTERN (insn
);
8101 unsigned long distance
= -1;
8103 gcc_assert (CALL_P (insn
));
8105 if (INSN_ADDRESSES_SET_P ())
8107 unsigned long total
;
8109 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8110 distance
= (total
+ insn_current_reference_address (insn
));
8111 if (distance
< total
)
8115 gcc_assert (GET_CODE (pat
) == PARALLEL
);
8117 /* Get the call rtx. */
8118 call
= XVECEXP (pat
, 0, 0);
8119 if (GET_CODE (call
) == SET
)
8120 call
= SET_SRC (call
);
8122 gcc_assert (GET_CODE (call
) == CALL
);
8124 /* Determine if this is a local call. */
8125 call_dest
= XEXP (XEXP (call
, 0), 0);
8126 call_decl
= SYMBOL_REF_DECL (call_dest
);
8127 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
8129 /* pc-relative branch. */
8130 if (!TARGET_LONG_CALLS
8131 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
8132 || distance
< MAX_PCREL17F_OFFSET
))
8135 /* 64-bit plabel sequence. */
8136 else if (TARGET_64BIT
&& !local_call
)
8139 /* non-pic long absolute branch sequence. */
8140 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
8143 /* long pc-relative branch sequence. */
8144 else if (TARGET_LONG_PIC_SDIFF_CALL
8145 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
8149 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8153 /* 32-bit plabel sequence. */
8159 length
+= length_fp_args (insn
);
8169 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8177 /* INSN is a function call.
8179 CALL_DEST is the routine we are calling. */
8182 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
8184 int seq_length
= dbr_sequence_length ();
8185 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
8186 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
8189 xoperands
[0] = call_dest
;
8191 /* Handle the common case where we're sure that the branch will reach
8192 the beginning of the "$CODE$" subspace. This is the beginning of
8193 the current function if we are in a named section. */
8194 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
8196 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
8197 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
8201 if (TARGET_64BIT
&& !local_call
)
8203 /* ??? As far as I can tell, the HP linker doesn't support the
8204 long pc-relative sequence described in the 64-bit runtime
8205 architecture. So, we use a slightly longer indirect call. */
8206 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
8207 xoperands
[1] = gen_label_rtx ();
8209 /* Put the load of %r27 into the delay slot. We don't need to
8210 do anything when generating fast indirect calls. */
8211 if (seq_length
!= 0)
8213 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
8216 /* Now delete the delay insn. */
8217 SET_INSN_DELETED (NEXT_INSN (insn
));
8220 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
8221 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
8222 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
8223 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
8224 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
8225 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
8230 int indirect_call
= 0;
8232 /* Emit a long call. There are several different sequences
8233 of increasing length and complexity. In most cases,
8234 they don't allow an instruction in the delay slot. */
8235 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
8236 && !TARGET_LONG_PIC_SDIFF_CALL
8237 && !(TARGET_GAS
&& !TARGET_SOM
&& local_call
)
8245 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
8247 /* A non-jump insn in the delay slot. By definition we can
8248 emit this insn before the call (and in fact before argument
8250 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
8253 /* Now delete the delay insn. */
8254 SET_INSN_DELETED (NEXT_INSN (insn
));
8258 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
8260 /* This is the best sequence for making long calls in
8261 non-pic code. Unfortunately, GNU ld doesn't provide
8262 the stub needed for external calls, and GAS's support
8263 for this with the SOM linker is buggy. It is safe
8264 to use this for local calls. */
8265 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8267 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
8271 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8274 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
8276 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8282 /* The HP assembler and linker can handle relocations for
8283 the difference of two symbols. The HP assembler
8284 recognizes the sequence as a pc-relative call and
8285 the linker provides stubs when needed. */
8287 /* GAS currently can't generate the relocations that
8288 are needed for the SOM linker under HP-UX using this
8289 sequence. The GNU linker doesn't generate the stubs
8290 that are needed for external calls on TARGET_ELF32
8291 with this sequence. For now, we have to use a longer
8292 plabel sequence when using GAS for non local calls. */
8293 if (TARGET_LONG_PIC_SDIFF_CALL
8294 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
8296 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
8297 xoperands
[2] = xoperands
[1];
8298 pa_output_pic_pcrel_sequence (xoperands
);
8302 /* Emit a long plabel-based call sequence. This is
8303 essentially an inline implementation of $$dyncall.
8304 We don't actually try to call $$dyncall as this is
8305 as difficult as calling the function itself. */
8306 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
8307 xoperands
[1] = gen_label_rtx ();
8309 /* Since the call is indirect, FP arguments in registers
8310 need to be copied to the general registers. Then, the
8311 argument relocation stub will copy them back. */
8313 copy_fp_args (insn
);
8317 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
8318 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
8319 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands
);
8323 output_asm_insn ("addil LR'%0-$global$,%%r27",
8325 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8329 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8330 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8331 /* Should this be an ordered load to ensure the target
8332 address is loaded before the global pointer? */
8333 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands
);
8334 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands
);
8336 if (!sibcall
&& !TARGET_PA_20
)
8338 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8339 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8340 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8342 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8349 output_asm_insn ("bve (%%r1)", xoperands
);
8354 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8355 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8359 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8364 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8365 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8370 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8371 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8373 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8377 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8378 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8380 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8383 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8385 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8393 if (seq_length
== 0)
8394 output_asm_insn ("nop", xoperands
);
8399 /* Return the attribute length of the indirect call instruction INSN.
8400 The length must match the code generated by output_indirect call.
8401 The returned length includes the delay slot. Currently, the delay
8402 slot of an indirect call sequence is not exposed and it is used by
8403 the sequence itself. */
8406 pa_attr_length_indirect_call (rtx_insn
*insn
)
8408 unsigned long distance
= -1;
8409 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8411 if (INSN_ADDRESSES_SET_P ())
8413 distance
= (total
+ insn_current_reference_address (insn
));
8414 if (distance
< total
)
8421 if (TARGET_FAST_INDIRECT_CALLS
)
8424 if (TARGET_PORTABLE_RUNTIME
)
8427 if (!TARGET_LONG_CALLS
8428 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8429 || distance
< MAX_PCREL17F_OFFSET
))
8432 /* Out of reach, can use ble. */
8436 /* Inline versions of $$dyncall. */
8439 if (TARGET_NO_SPACE_REGS
)
8446 /* Long PIC pc-relative call. */
8451 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8458 xoperands
[0] = call_dest
;
8459 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8460 "bve,l (%%r2),%%r2\n\t"
8461 "ldd 24(%0),%%r27", xoperands
);
8465 /* First the special case for kernels, level 0 systems, etc. */
8466 if (TARGET_FAST_INDIRECT_CALLS
)
8468 pa_output_arg_descriptor (insn
);
8470 return "bve,l,n (%%r22),%%r2\n\tnop";
8471 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8474 if (TARGET_PORTABLE_RUNTIME
)
8476 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8477 "ldo R'$$dyncall(%%r31),%%r31", xoperands
);
8478 pa_output_arg_descriptor (insn
);
8479 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8482 /* Now the normal case -- we can reach $$dyncall directly or
8483 we're sure that we can get there via a long-branch stub.
8485 No need to check target flags as the length uniquely identifies
8486 the remaining cases. */
8487 length
= pa_attr_length_indirect_call (insn
);
8490 pa_output_arg_descriptor (insn
);
8492 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8493 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8494 variant of the B,L instruction can't be used on the SOM target. */
8495 if (TARGET_PA_20
&& !TARGET_SOM
)
8496 return "b,l,n $$dyncall,%%r2\n\tnop";
8498 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8501 /* Long millicode call, but we are not generating PIC or portable runtime
8505 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands
);
8506 pa_output_arg_descriptor (insn
);
8507 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8510 /* The long PIC pc-relative call sequence is five instructions. So,
8511 let's use an inline version of $$dyncall when the calling sequence
8512 has a roughly similar number of instructions and we are not optimizing
8513 for size. We need two instructions to load the return pointer plus
8514 the $$dyncall implementation. */
8517 if (TARGET_NO_SPACE_REGS
)
8519 pa_output_arg_descriptor (insn
);
8520 output_asm_insn ("bl .+8,%%r2\n\t"
8521 "ldo 20(%%r2),%%r2\n\t"
8522 "extru,<> %%r22,30,1,%%r0\n\t"
8523 "bv,n %%r0(%%r22)\n\t"
8524 "ldw -2(%%r22),%%r21\n\t"
8525 "bv %%r0(%%r21)\n\t"
8526 "ldw 2(%%r22),%%r19", xoperands
);
8531 pa_output_arg_descriptor (insn
);
8532 output_asm_insn ("bl .+8,%%r2\n\t"
8533 "ldo 24(%%r2),%%r2\n\t"
8534 "stw %%r2,-24(%%sp)\n\t"
8535 "extru,<> %r22,30,1,%%r0\n\t"
8537 "ldw -2(%%r22),%%r21\n\t"
8539 "ldw 2(%%r22),%%r19", xoperands
);
8544 /* We need a long PIC call to $$dyncall. */
8545 xoperands
[0] = gen_rtx_SYMBOL_REF (Pmode
, "$$dyncall");
8546 xoperands
[1] = gen_rtx_REG (Pmode
, 2);
8547 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
8548 pa_output_pic_pcrel_sequence (xoperands
);
8549 pa_output_arg_descriptor (insn
);
8550 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8553 /* In HPUX 8.0's shared library scheme, special relocations are needed
8554 for function labels if they might be passed to a function
8555 in a shared library (because shared libraries don't live in code
8556 space), and special magic is needed to construct their address. */
8559 pa_encode_label (rtx sym
)
8561 const char *str
= XSTR (sym
, 0);
8562 int len
= strlen (str
) + 1;
8565 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8569 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8573 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8575 int old_referenced
= 0;
8577 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8579 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8581 default_encode_section_info (decl
, rtl
, first
);
8583 if (first
&& TEXT_SPACE_P (decl
))
8585 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8586 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8587 pa_encode_label (XEXP (rtl
, 0));
8589 else if (old_referenced
)
8590 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8593 /* This is sort of inverse to pa_encode_section_info. */
8596 pa_strip_name_encoding (const char *str
)
8598 str
+= (*str
== '@');
8599 str
+= (*str
== '*');
8603 /* Returns 1 if OP is a function label involved in a simple addition
8604 with a constant. Used to keep certain patterns from matching
8605 during instruction combination. */
8607 pa_is_function_label_plus_const (rtx op
)
8609 /* Strip off any CONST. */
8610 if (GET_CODE (op
) == CONST
)
8613 return (GET_CODE (op
) == PLUS
8614 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8615 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8618 /* Output the assembler code for a thunk function. THUNK_DECL is the
8619 declaration for the thunk function itself, FUNCTION is the decl for
8620 the target function. DELTA is an immediate constant offset to be
8621 added to THIS. If VCALL_OFFSET is nonzero, the word at
8622 *(*this + vcall_offset) should be added to THIS. */
8625 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8626 HOST_WIDE_INT vcall_offset
, tree function
)
8628 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
8629 static unsigned int current_thunk_number
;
8630 int val_14
= VAL_14_BITS_P (delta
);
8631 unsigned int old_last_address
= last_address
, nbytes
= 0;
8635 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8636 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8637 xoperands
[2] = GEN_INT (delta
);
8639 assemble_start_function (thunk_fndecl
, fnname
);
8640 final_start_function (emit_barrier (), file
, 1);
8644 /* Output the thunk. We know that the function is in the same
8645 translation unit (i.e., the same space) as the thunk, and that
8646 thunks are output after their method. Thus, we don't need an
8647 external branch to reach the function. With SOM and GAS,
8648 functions and thunks are effectively in different sections.
8649 Thus, we can always use a IA-relative branch and the linker
8650 will add a long branch stub if necessary.
8652 However, we have to be careful when generating PIC code on the
8653 SOM port to ensure that the sequence does not transfer to an
8654 import stub for the target function as this could clobber the
8655 return value saved at SP-24. This would also apply to the
8656 32-bit linux port if the multi-space model is implemented. */
8657 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8658 && !(flag_pic
&& TREE_PUBLIC (function
))
8659 && (TARGET_GAS
|| last_address
< 262132))
8660 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8661 && ((targetm_common
.have_named_sections
8662 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8663 /* The GNU 64-bit linker has rather poor stub management.
8664 So, we use a long branch from thunks that aren't in
8665 the same section as the target function. */
8667 && (DECL_SECTION_NAME (thunk_fndecl
)
8668 != DECL_SECTION_NAME (function
)))
8669 || ((DECL_SECTION_NAME (thunk_fndecl
)
8670 == DECL_SECTION_NAME (function
))
8671 && last_address
< 262132)))
8672 /* In this case, we need to be able to reach the start of
8673 the stub table even though the function is likely closer
8674 and can be jumped to directly. */
8675 || (targetm_common
.have_named_sections
8676 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8677 && DECL_SECTION_NAME (function
) == NULL
8678 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8680 || (!targetm_common
.have_named_sections
8681 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8684 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8686 output_asm_insn ("b %0", xoperands
);
8690 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8695 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8699 else if (TARGET_64BIT
)
8703 /* We only have one call-clobbered scratch register, so we can't
8704 make use of the delay slot if delta doesn't fit in 14 bits. */
8707 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8708 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8711 /* Load function address into %r1. */
8712 xop
[0] = xoperands
[0];
8713 xop
[1] = gen_rtx_REG (Pmode
, 1);
8715 pa_output_pic_pcrel_sequence (xop
);
8719 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8720 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8725 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8729 else if (TARGET_PORTABLE_RUNTIME
)
8731 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8732 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8735 output_asm_insn ("ldil L'%2,%%r26", xoperands
);
8737 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8741 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8746 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands
);
8750 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8752 /* The function is accessible from outside this module. The only
8753 way to avoid an import stub between the thunk and function is to
8754 call the function directly with an indirect sequence similar to
8755 that used by $$dyncall. This is possible because $$dyncall acts
8756 as the import stub in an indirect call. */
8757 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8758 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8759 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8760 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8761 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8762 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8763 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8764 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8765 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8769 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8775 output_asm_insn ("bve (%%r22)", xoperands
);
8778 else if (TARGET_NO_SPACE_REGS
)
8780 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8785 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8786 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8787 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8792 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8794 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8800 /* Load function address into %r22. */
8801 xop
[0] = xoperands
[0];
8802 xop
[1] = gen_rtx_REG (Pmode
, 1);
8803 xop
[2] = gen_rtx_REG (Pmode
, 22);
8804 pa_output_pic_pcrel_sequence (xop
);
8807 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8809 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8813 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8818 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8825 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8827 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8828 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8832 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8837 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8846 /* Add DELTA to THIS. */
8849 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8854 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8855 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8861 /* Load *(THIS + DELTA) to %r1. */
8862 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands
);
8864 val_14
= VAL_14_BITS_P (vcall_offset
);
8865 xoperands
[2] = GEN_INT (vcall_offset
);
8867 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8870 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands
);
8875 output_asm_insn ("addil L'%2,%%r1", xoperands
);
8876 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands
);
8882 /* Load *(THIS + DELTA) to %r1. */
8883 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands
);
8885 val_14
= VAL_14_BITS_P (vcall_offset
);
8886 xoperands
[2] = GEN_INT (vcall_offset
);
8888 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8891 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands
);
8896 output_asm_insn ("addil L'%2,%%r1", xoperands
);
8897 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands
);
8902 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8903 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8904 && !(flag_pic
&& TREE_PUBLIC (function
))
8905 && (TARGET_GAS
|| last_address
< 262132))
8906 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8907 && ((targetm_common
.have_named_sections
8908 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8909 /* The GNU 64-bit linker has rather poor stub management.
8910 So, we use a long branch from thunks that aren't in
8911 the same section as the target function. */
8913 && (DECL_SECTION_NAME (thunk_fndecl
)
8914 != DECL_SECTION_NAME (function
)))
8915 || ((DECL_SECTION_NAME (thunk_fndecl
)
8916 == DECL_SECTION_NAME (function
))
8917 && last_address
< 262132)))
8918 /* In this case, we need to be able to reach the start of
8919 the stub table even though the function is likely closer
8920 and can be jumped to directly. */
8921 || (targetm_common
.have_named_sections
8922 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8923 && DECL_SECTION_NAME (function
) == NULL
8924 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8926 || (!targetm_common
.have_named_sections
8927 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8930 output_asm_insn ("b %0", xoperands
);
8932 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8933 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
8935 else if (TARGET_64BIT
)
8937 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8938 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
8940 /* Load function address into %r1. */
8942 xop
[0] = xoperands
[0];
8943 xop
[1] = gen_rtx_REG (Pmode
, 1);
8945 pa_output_pic_pcrel_sequence (xop
);
8947 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8949 else if (TARGET_PORTABLE_RUNTIME
)
8951 /* Load function address into %r22. */
8953 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8954 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands
);
8956 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8958 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8959 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
8961 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8963 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8964 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
8966 /* The function is accessible from outside this module. The only
8967 way to avoid an import stub between the thunk and function is to
8968 call the function directly with an indirect sequence similar to
8969 that used by $$dyncall. This is possible because $$dyncall acts
8970 as the import stub in an indirect call. */
8971 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8972 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8973 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8974 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8975 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8976 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8977 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8978 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8979 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8983 output_asm_insn ("bve,n (%%r22)", xoperands
);
8986 else if (TARGET_NO_SPACE_REGS
)
8988 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands
);
8993 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8994 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8995 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands
);
9001 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9002 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
9004 /* Load function address into %r1. */
9006 xop
[0] = xoperands
[0];
9007 xop
[1] = gen_rtx_REG (Pmode
, 1);
9009 pa_output_pic_pcrel_sequence (xop
);
9011 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
9015 /* Load function address into %r22. */
9017 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
9018 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
9020 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9021 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands
);
9025 final_end_function ();
9027 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
9029 switch_to_section (data_section
);
9030 output_asm_insn (".align 4", xoperands
);
9031 ASM_OUTPUT_LABEL (file
, label
);
9032 output_asm_insn (".word P'%0", xoperands
);
9035 current_thunk_number
++;
9036 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
9037 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
9038 last_address
+= nbytes
;
9039 if (old_last_address
> last_address
)
9040 last_address
= UINT_MAX
;
9041 update_total_code_bytes (nbytes
);
9042 assemble_end_function (thunk_fndecl
, fnname
);
9045 /* Only direct calls to static functions are allowed to be sibling (tail)
9048 This restriction is necessary because some linker generated stubs will
9049 store return pointers into rp' in some cases which might clobber a
9050 live value already in rp'.
9052 In a sibcall the current function and the target function share stack
9053 space. Thus if the path to the current function and the path to the
9054 target function save a value in rp', they save the value into the
9055 same stack slot, which has undesirable consequences.
9057 Because of the deferred binding nature of shared libraries any function
9058 with external scope could be in a different load module and thus require
9059 rp' to be saved when calling that function. So sibcall optimizations
9060 can only be safe for static function.
9062 Note that GCC never needs return value relocations, so we don't have to
9063 worry about static calls with return value relocations (which require
9066 It is safe to perform a sibcall optimization when the target function
9067 will never return. */
9069 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
9071 /* Sibcalls are not ok because the arg pointer register is not a fixed
9072 register. This prevents the sibcall optimization from occurring. In
9073 addition, there are problems with stub placement using GNU ld. This
9074 is because a normal sibcall branch uses a 17-bit relocation while
9075 a regular call branch uses a 22-bit relocation. As a result, more
9076 care needs to be taken in the placement of long-branch stubs. */
9080 if (TARGET_PORTABLE_RUNTIME
)
9083 /* Sibcalls are only ok within a translation unit. */
9084 return decl
&& targetm
.binds_local_p (decl
);
9087 /* ??? Addition is not commutative on the PA due to the weird implicit
9088 space register selection rules for memory addresses. Therefore, we
9089 don't consider a + b == b + a, as this might be inside a MEM. */
9091 pa_commutative_p (const_rtx x
, int outer_code
)
9093 return (COMMUTATIVE_P (x
)
9094 && (TARGET_NO_SPACE_REGS
9095 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
9096 || GET_CODE (x
) != PLUS
));
9099 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9100 use in fmpyadd instructions. */
9102 pa_fmpyaddoperands (rtx
*operands
)
9104 machine_mode mode
= GET_MODE (operands
[0]);
9106 /* Must be a floating point mode. */
9107 if (mode
!= SFmode
&& mode
!= DFmode
)
9110 /* All modes must be the same. */
9111 if (! (mode
== GET_MODE (operands
[1])
9112 && mode
== GET_MODE (operands
[2])
9113 && mode
== GET_MODE (operands
[3])
9114 && mode
== GET_MODE (operands
[4])
9115 && mode
== GET_MODE (operands
[5])))
9118 /* All operands must be registers. */
9119 if (! (GET_CODE (operands
[1]) == REG
9120 && GET_CODE (operands
[2]) == REG
9121 && GET_CODE (operands
[3]) == REG
9122 && GET_CODE (operands
[4]) == REG
9123 && GET_CODE (operands
[5]) == REG
))
9126 /* Only 2 real operands to the addition. One of the input operands must
9127 be the same as the output operand. */
9128 if (! rtx_equal_p (operands
[3], operands
[4])
9129 && ! rtx_equal_p (operands
[3], operands
[5]))
9132 /* Inout operand of add cannot conflict with any operands from multiply. */
9133 if (rtx_equal_p (operands
[3], operands
[0])
9134 || rtx_equal_p (operands
[3], operands
[1])
9135 || rtx_equal_p (operands
[3], operands
[2]))
9138 /* multiply cannot feed into addition operands. */
9139 if (rtx_equal_p (operands
[4], operands
[0])
9140 || rtx_equal_p (operands
[5], operands
[0]))
9143 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9145 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
9146 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
9147 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
9148 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
9149 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
9150 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
9153 /* Passed. Operands are suitable for fmpyadd. */
9157 #if !defined(USE_COLLECT2)
9159 pa_asm_out_constructor (rtx symbol
, int priority
)
9161 if (!function_label_operand (symbol
, VOIDmode
))
9162 pa_encode_label (symbol
);
9164 #ifdef CTORS_SECTION_ASM_OP
9165 default_ctor_section_asm_out_constructor (symbol
, priority
);
9167 # ifdef TARGET_ASM_NAMED_SECTION
9168 default_named_section_asm_out_constructor (symbol
, priority
);
9170 default_stabs_asm_out_constructor (symbol
, priority
);
9176 pa_asm_out_destructor (rtx symbol
, int priority
)
9178 if (!function_label_operand (symbol
, VOIDmode
))
9179 pa_encode_label (symbol
);
9181 #ifdef DTORS_SECTION_ASM_OP
9182 default_dtor_section_asm_out_destructor (symbol
, priority
);
9184 # ifdef TARGET_ASM_NAMED_SECTION
9185 default_named_section_asm_out_destructor (symbol
, priority
);
9187 default_stabs_asm_out_destructor (symbol
, priority
);
9193 /* This function places uninitialized global data in the bss section.
9194 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9195 function on the SOM port to prevent uninitialized global data from
9196 being placed in the data section. */
9199 pa_asm_output_aligned_bss (FILE *stream
,
9201 unsigned HOST_WIDE_INT size
,
9204 switch_to_section (bss_section
);
9206 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9207 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
9210 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9211 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
9214 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
9215 ASM_OUTPUT_LABEL (stream
, name
);
9216 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
9219 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9220 that doesn't allow the alignment of global common storage to be directly
9221 specified. The SOM linker aligns common storage based on the rounded
9222 value of the NUM_BYTES parameter in the .comm directive. It's not
9223 possible to use the .align directive as it doesn't affect the alignment
9224 of the label associated with a .comm directive. */
9227 pa_asm_output_aligned_common (FILE *stream
,
9229 unsigned HOST_WIDE_INT size
,
9232 unsigned int max_common_align
;
9234 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
9235 if (align
> max_common_align
)
9237 /* Alignment exceeds maximum alignment for global common data. */
9238 align
= max_common_align
;
9241 switch_to_section (bss_section
);
9243 assemble_name (stream
, name
);
9244 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
9245 MAX (size
, align
/ BITS_PER_UNIT
));
9248 /* We can't use .comm for local common storage as the SOM linker effectively
9249 treats the symbol as universal and uses the same storage for local symbols
9250 with the same name in different object files. The .block directive
9251 reserves an uninitialized block of storage. However, it's not common
9252 storage. Fortunately, GCC never requests common storage with the same
9253 name in any given translation unit. */
9256 pa_asm_output_aligned_local (FILE *stream
,
9258 unsigned HOST_WIDE_INT size
,
9261 switch_to_section (bss_section
);
9262 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
9265 fprintf (stream
, "%s", LOCAL_ASM_OP
);
9266 assemble_name (stream
, name
);
9267 fprintf (stream
, "\n");
9270 ASM_OUTPUT_LABEL (stream
, name
);
9271 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
9274 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9275 use in fmpysub instructions. */
9277 pa_fmpysuboperands (rtx
*operands
)
9279 machine_mode mode
= GET_MODE (operands
[0]);
9281 /* Must be a floating point mode. */
9282 if (mode
!= SFmode
&& mode
!= DFmode
)
9285 /* All modes must be the same. */
9286 if (! (mode
== GET_MODE (operands
[1])
9287 && mode
== GET_MODE (operands
[2])
9288 && mode
== GET_MODE (operands
[3])
9289 && mode
== GET_MODE (operands
[4])
9290 && mode
== GET_MODE (operands
[5])))
9293 /* All operands must be registers. */
9294 if (! (GET_CODE (operands
[1]) == REG
9295 && GET_CODE (operands
[2]) == REG
9296 && GET_CODE (operands
[3]) == REG
9297 && GET_CODE (operands
[4]) == REG
9298 && GET_CODE (operands
[5]) == REG
))
9301 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9302 operation, so operands[4] must be the same as operand[3]. */
9303 if (! rtx_equal_p (operands
[3], operands
[4]))
9306 /* multiply cannot feed into subtraction. */
9307 if (rtx_equal_p (operands
[5], operands
[0]))
9310 /* Inout operand of sub cannot conflict with any operands from multiply. */
9311 if (rtx_equal_p (operands
[3], operands
[0])
9312 || rtx_equal_p (operands
[3], operands
[1])
9313 || rtx_equal_p (operands
[3], operands
[2]))
9316 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9318 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
9319 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
9320 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
9321 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
9322 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
9323 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
9326 /* Passed. Operands are suitable for fmpysub. */
9330 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9331 constants for a MULT embedded inside a memory address. */
9333 pa_mem_shadd_constant_p (int val
)
9335 if (val
== 2 || val
== 4 || val
== 8)
9341 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9342 constants for shadd instructions. */
9344 pa_shadd_constant_p (int val
)
9346 if (val
== 1 || val
== 2 || val
== 3)
9352 /* Return TRUE if INSN branches forward. */
9355 forward_branch_p (rtx_insn
*insn
)
9357 rtx lab
= JUMP_LABEL (insn
);
9359 /* The INSN must have a jump label. */
9360 gcc_assert (lab
!= NULL_RTX
);
9362 if (INSN_ADDRESSES_SET_P ())
9363 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
9370 insn
= NEXT_INSN (insn
);
9376 /* Output an unconditional move and branch insn. */
9379 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
9381 int length
= get_attr_length (insn
);
9383 /* These are the cases in which we win. */
9385 return "mov%I1b,tr %1,%0,%2";
9387 /* None of the following cases win, but they don't lose either. */
9390 if (dbr_sequence_length () == 0)
9392 /* Nothing in the delay slot, fake it by putting the combined
9393 insn (the copy or add) in the delay slot of a bl. */
9394 if (GET_CODE (operands
[1]) == CONST_INT
)
9395 return "b %2\n\tldi %1,%0";
9397 return "b %2\n\tcopy %1,%0";
9401 /* Something in the delay slot, but we've got a long branch. */
9402 if (GET_CODE (operands
[1]) == CONST_INT
)
9403 return "ldi %1,%0\n\tb %2";
9405 return "copy %1,%0\n\tb %2";
9409 if (GET_CODE (operands
[1]) == CONST_INT
)
9410 output_asm_insn ("ldi %1,%0", operands
);
9412 output_asm_insn ("copy %1,%0", operands
);
9413 return pa_output_lbranch (operands
[2], insn
, 1);
9416 /* Output an unconditional add and branch insn. */
9419 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
9421 int length
= get_attr_length (insn
);
9423 /* To make life easy we want operand0 to be the shared input/output
9424 operand and operand1 to be the readonly operand. */
9425 if (operands
[0] == operands
[1])
9426 operands
[1] = operands
[2];
9428 /* These are the cases in which we win. */
9430 return "add%I1b,tr %1,%0,%3";
9432 /* None of the following cases win, but they don't lose either. */
9435 if (dbr_sequence_length () == 0)
9436 /* Nothing in the delay slot, fake it by putting the combined
9437 insn (the copy or add) in the delay slot of a bl. */
9438 return "b %3\n\tadd%I1 %1,%0,%0";
9440 /* Something in the delay slot, but we've got a long branch. */
9441 return "add%I1 %1,%0,%0\n\tb %3";
9444 output_asm_insn ("add%I1 %1,%0,%0", operands
);
9445 return pa_output_lbranch (operands
[3], insn
, 1);
9448 /* We use this hook to perform a PA specific optimization which is difficult
9449 to do in earlier passes. */
9454 remove_useless_addtr_insns (1);
9456 if (pa_cpu
< PROCESSOR_8000
)
9457 pa_combine_instructions ();
9460 /* The PA has a number of odd instructions which can perform multiple
9461 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9462 it may be profitable to combine two instructions into one instruction
9463 with two outputs. It's not profitable PA2.0 machines because the
9464 two outputs would take two slots in the reorder buffers.
9466 This routine finds instructions which can be combined and combines
9467 them. We only support some of the potential combinations, and we
9468 only try common ways to find suitable instructions.
9470 * addb can add two registers or a register and a small integer
9471 and jump to a nearby (+-8k) location. Normally the jump to the
9472 nearby location is conditional on the result of the add, but by
9473 using the "true" condition we can make the jump unconditional.
9474 Thus addb can perform two independent operations in one insn.
9476 * movb is similar to addb in that it can perform a reg->reg
9477 or small immediate->reg copy and jump to a nearby (+-8k location).
9479 * fmpyadd and fmpysub can perform a FP multiply and either an
9480 FP add or FP sub if the operands of the multiply and add/sub are
9481 independent (there are other minor restrictions). Note both
9482 the fmpy and fadd/fsub can in theory move to better spots according
9483 to data dependencies, but for now we require the fmpy stay at a
9486 * Many of the memory operations can perform pre & post updates
9487 of index registers. GCC's pre/post increment/decrement addressing
9488 is far too simple to take advantage of all the possibilities. This
9489 pass may not be suitable since those insns may not be independent.
9491 * comclr can compare two ints or an int and a register, nullify
9492 the following instruction and zero some other register. This
9493 is more difficult to use as it's harder to find an insn which
9494 will generate a comclr than finding something like an unconditional
9495 branch. (conditional moves & long branches create comclr insns).
9497 * Most arithmetic operations can conditionally skip the next
9498 instruction. They can be viewed as "perform this operation
9499 and conditionally jump to this nearby location" (where nearby
9500 is an insns away). These are difficult to use due to the
9501 branch length restrictions. */
9504 pa_combine_instructions (void)
9508 /* This can get expensive since the basic algorithm is on the
9509 order of O(n^2) (or worse). Only do it for -O2 or higher
9510 levels of optimization. */
9514 /* Walk down the list of insns looking for "anchor" insns which
9515 may be combined with "floating" insns. As the name implies,
9516 "anchor" instructions don't move, while "floating" insns may
9518 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9519 rtx_insn
*new_rtx
= make_insn_raw (par
);
9521 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9523 enum attr_pa_combine_type anchor_attr
;
9524 enum attr_pa_combine_type floater_attr
;
9526 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9527 Also ignore any special USE insns. */
9528 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
9529 || GET_CODE (PATTERN (anchor
)) == USE
9530 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
9533 anchor_attr
= get_attr_pa_combine_type (anchor
);
9534 /* See if anchor is an insn suitable for combination. */
9535 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9536 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9537 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9538 && ! forward_branch_p (anchor
)))
9542 for (floater
= PREV_INSN (anchor
);
9544 floater
= PREV_INSN (floater
))
9546 if (NOTE_P (floater
)
9547 || (NONJUMP_INSN_P (floater
)
9548 && (GET_CODE (PATTERN (floater
)) == USE
9549 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9552 /* Anything except a regular INSN will stop our search. */
9553 if (! NONJUMP_INSN_P (floater
))
9559 /* See if FLOATER is suitable for combination with the
9561 floater_attr
= get_attr_pa_combine_type (floater
);
9562 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9563 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9564 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9565 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9567 /* If ANCHOR and FLOATER can be combined, then we're
9568 done with this pass. */
9569 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9570 SET_DEST (PATTERN (floater
)),
9571 XEXP (SET_SRC (PATTERN (floater
)), 0),
9572 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9576 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9577 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9579 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9581 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9582 SET_DEST (PATTERN (floater
)),
9583 XEXP (SET_SRC (PATTERN (floater
)), 0),
9584 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9589 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9590 SET_DEST (PATTERN (floater
)),
9591 SET_SRC (PATTERN (floater
)),
9592 SET_SRC (PATTERN (floater
))))
9598 /* If we didn't find anything on the backwards scan try forwards. */
9600 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9601 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9603 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9605 if (NOTE_P (floater
)
9606 || (NONJUMP_INSN_P (floater
)
9607 && (GET_CODE (PATTERN (floater
)) == USE
9608 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9612 /* Anything except a regular INSN will stop our search. */
9613 if (! NONJUMP_INSN_P (floater
))
9619 /* See if FLOATER is suitable for combination with the
9621 floater_attr
= get_attr_pa_combine_type (floater
);
9622 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9623 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9624 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9625 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9627 /* If ANCHOR and FLOATER can be combined, then we're
9628 done with this pass. */
9629 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9630 SET_DEST (PATTERN (floater
)),
9631 XEXP (SET_SRC (PATTERN (floater
)),
9633 XEXP (SET_SRC (PATTERN (floater
)),
9640 /* FLOATER will be nonzero if we found a suitable floating
9641 insn for combination with ANCHOR. */
9643 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9644 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9646 /* Emit the new instruction and delete the old anchor. */
9647 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9648 copy_rtx (PATTERN (floater
)));
9649 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9650 emit_insn_before (temp
, anchor
);
9652 SET_INSN_DELETED (anchor
);
9654 /* Emit a special USE insn for FLOATER, then delete
9655 the floating insn. */
9656 temp
= copy_rtx (PATTERN (floater
));
9657 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9658 delete_insn (floater
);
9663 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9665 /* Emit the new_jump instruction and delete the old anchor. */
9666 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9667 copy_rtx (PATTERN (floater
)));
9668 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9669 temp
= emit_jump_insn_before (temp
, anchor
);
9671 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9672 SET_INSN_DELETED (anchor
);
9674 /* Emit a special USE insn for FLOATER, then delete
9675 the floating insn. */
9676 temp
= copy_rtx (PATTERN (floater
));
9677 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9678 delete_insn (floater
);
9686 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9687 int reversed
, rtx dest
,
9690 int insn_code_number
;
9691 rtx_insn
*start
, *end
;
9693 /* Create a PARALLEL with the patterns of ANCHOR and
9694 FLOATER, try to recognize it, then test constraints
9695 for the resulting pattern.
9697 If the pattern doesn't match or the constraints
9698 aren't met keep searching for a suitable floater
9700 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9701 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9702 INSN_CODE (new_rtx
) = -1;
9703 insn_code_number
= recog_memoized (new_rtx
);
9704 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9705 if (insn_code_number
< 0
9706 || (extract_insn (new_rtx
),
9707 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9721 /* There's up to three operands to consider. One
9722 output and two inputs.
9724 The output must not be used between FLOATER & ANCHOR
9725 exclusive. The inputs must not be set between
9726 FLOATER and ANCHOR exclusive. */
9728 if (reg_used_between_p (dest
, start
, end
))
9731 if (reg_set_between_p (src1
, start
, end
))
9734 if (reg_set_between_p (src2
, start
, end
))
9737 /* If we get here, then everything is good. */
9741 /* Return nonzero if references for INSN are delayed.
9743 Millicode insns are actually function calls with some special
9744 constraints on arguments and register usage.
9746 Millicode calls always expect their arguments in the integer argument
9747 registers, and always return their result in %r29 (ret1). They
9748 are expected to clobber their arguments, %r1, %r29, and the return
9749 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9751 This function tells reorg that the references to arguments and
9752 millicode calls do not appear to happen until after the millicode call.
9753 This allows reorg to put insns which set the argument registers into the
9754 delay slot of the millicode call -- thus they act more like traditional
9757 Note we cannot consider side effects of the insn to be delayed because
9758 the branch and link insn will clobber the return pointer. If we happened
9759 to use the return pointer in the delay slot of the call, then we lose.
9761 get_attr_type will try to recognize the given insn, so make sure to
9762 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9765 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9767 return ((NONJUMP_INSN_P (insn
)
9768 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9769 && GET_CODE (PATTERN (insn
)) != USE
9770 && GET_CODE (PATTERN (insn
)) != CLOBBER
9771 && get_attr_type (insn
) == TYPE_MILLI
));
9774 /* Promote the return value, but not the arguments. */
9777 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9779 int *punsignedp ATTRIBUTE_UNUSED
,
9780 const_tree fntype ATTRIBUTE_UNUSED
,
9783 if (for_return
== 0)
9785 return promote_mode (type
, mode
, punsignedp
);
9788 /* On the HP-PA the value is found in register(s) 28(-29), unless
9789 the mode is SF or DF. Then the value is returned in fr4 (32).
9791 This must perform the same promotions as PROMOTE_MODE, else promoting
9792 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9794 Small structures must be returned in a PARALLEL on PA64 in order
9795 to match the HP Compiler ABI. */
9798 pa_function_value (const_tree valtype
,
9799 const_tree func ATTRIBUTE_UNUSED
,
9800 bool outgoing ATTRIBUTE_UNUSED
)
9802 machine_mode valmode
;
9804 if (AGGREGATE_TYPE_P (valtype
)
9805 || TREE_CODE (valtype
) == COMPLEX_TYPE
9806 || VECTOR_TYPE_P (valtype
))
9808 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9810 /* Handle aggregates that fit exactly in a word or double word. */
9811 if (valsize
== UNITS_PER_WORD
|| valsize
== 2 * UNITS_PER_WORD
)
9812 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9816 /* Aggregates with a size less than or equal to 128 bits are
9817 returned in GR 28(-29). They are left justified. The pad
9818 bits are undefined. Larger aggregates are returned in
9822 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9824 for (i
= 0; i
< ub
; i
++)
9826 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9827 gen_rtx_REG (DImode
, 28 + i
),
9832 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9834 else if (valsize
> UNITS_PER_WORD
)
9836 /* Aggregates 5 to 8 bytes in size are returned in general
9837 registers r28-r29 in the same manner as other non
9838 floating-point objects. The data is right-justified and
9839 zero-extended to 64 bits. This is opposite to the normal
9840 justification used on big endian targets and requires
9841 special treatment. */
9842 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9843 gen_rtx_REG (DImode
, 28), const0_rtx
);
9844 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9848 if ((INTEGRAL_TYPE_P (valtype
)
9849 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9850 || POINTER_TYPE_P (valtype
))
9851 valmode
= word_mode
;
9853 valmode
= TYPE_MODE (valtype
);
9855 if (SCALAR_FLOAT_TYPE_P (valtype
)
9856 && !AGGREGATE_TYPE_P (valtype
)
9857 && TYPE_MODE (valtype
) != TFmode
9858 && !TARGET_SOFT_FLOAT
)
9859 return gen_rtx_REG (valmode
, 32);
9861 return gen_rtx_REG (valmode
, 28);
9864 /* Implement the TARGET_LIBCALL_VALUE hook. */
9867 pa_libcall_value (machine_mode mode
,
9868 const_rtx fun ATTRIBUTE_UNUSED
)
9870 if (! TARGET_SOFT_FLOAT
9871 && (mode
== SFmode
|| mode
== DFmode
))
9872 return gen_rtx_REG (mode
, 32);
9874 return gen_rtx_REG (mode
, 28);
9877 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9880 pa_function_value_regno_p (const unsigned int regno
)
9883 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9889 /* Update the data in CUM to advance over argument ARG. */
9892 pa_function_arg_advance (cumulative_args_t cum_v
,
9893 const function_arg_info
&arg
)
9895 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9896 int arg_size
= pa_function_arg_size (arg
.mode
, arg
.type
);
9898 cum
->nargs_prototype
--;
9899 cum
->words
+= (arg_size
9900 + ((cum
->words
& 01)
9901 && arg
.type
!= NULL_TREE
9905 /* Return the location of a parameter that is passed in a register or NULL
9906 if the parameter has any component that is passed in memory.
9908 This is new code and will be pushed to into the net sources after
9911 ??? We might want to restructure this so that it looks more like other
9914 pa_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
9916 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9917 tree type
= arg
.type
;
9918 machine_mode mode
= arg
.mode
;
9919 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9926 if (arg
.end_marker_p ())
9929 arg_size
= pa_function_arg_size (mode
, type
);
9933 /* If this arg would be passed partially or totally on the stack, then
9934 this routine should return zero. pa_arg_partial_bytes will
9935 handle arguments which are split between regs and stack slots if
9936 the ABI mandates split arguments. */
9939 /* The 32-bit ABI does not split arguments. */
9940 if (cum
->words
+ arg_size
> max_arg_words
)
9946 alignment
= cum
->words
& 1;
9947 if (cum
->words
+ alignment
>= max_arg_words
)
9951 /* The 32bit ABIs and the 64bit ABIs are rather different,
9952 particularly in their handling of FP registers. We might
9953 be able to cleverly share code between them, but I'm not
9954 going to bother in the hope that splitting them up results
9955 in code that is more easily understood. */
9959 /* Advance the base registers to their current locations.
9961 Remember, gprs grow towards smaller register numbers while
9962 fprs grow to higher register numbers. Also remember that
9963 although FP regs are 32-bit addressable, we pretend that
9964 the registers are 64-bits wide. */
9965 gpr_reg_base
= 26 - cum
->words
;
9966 fpr_reg_base
= 32 + cum
->words
;
9968 /* Arguments wider than one word and small aggregates need special
9972 || (type
&& (AGGREGATE_TYPE_P (type
)
9973 || TREE_CODE (type
) == COMPLEX_TYPE
9974 || VECTOR_TYPE_P (type
))))
9976 /* Double-extended precision (80-bit), quad-precision (128-bit)
9977 and aggregates including complex numbers are aligned on
9978 128-bit boundaries. The first eight 64-bit argument slots
9979 are associated one-to-one, with general registers r26
9980 through r19, and also with floating-point registers fr4
9981 through fr11. Arguments larger than one word are always
9982 passed in general registers.
9984 Using a PARALLEL with a word mode register results in left
9985 justified data on a big-endian target. */
9988 int i
, offset
= 0, ub
= arg_size
;
9990 /* Align the base register. */
9991 gpr_reg_base
-= alignment
;
9993 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9994 for (i
= 0; i
< ub
; i
++)
9996 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9997 gen_rtx_REG (DImode
, gpr_reg_base
),
10003 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
10008 /* If the argument is larger than a word, then we know precisely
10009 which registers we must use. */
10023 /* Structures 5 to 8 bytes in size are passed in the general
10024 registers in the same manner as other non floating-point
10025 objects. The data is right-justified and zero-extended
10026 to 64 bits. This is opposite to the normal justification
10027 used on big endian targets and requires special treatment.
10028 We now define BLOCK_REG_PADDING to pad these objects.
10029 Aggregates, complex and vector types are passed in the same
10030 manner as structures. */
10031 if (mode
== BLKmode
10032 || (type
&& (AGGREGATE_TYPE_P (type
)
10033 || TREE_CODE (type
) == COMPLEX_TYPE
10034 || VECTOR_TYPE_P (type
))))
10036 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
10037 gen_rtx_REG (DImode
, gpr_reg_base
),
10039 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
10044 /* We have a single word (32 bits). A simple computation
10045 will get us the register #s we need. */
10046 gpr_reg_base
= 26 - cum
->words
;
10047 fpr_reg_base
= 32 + 2 * cum
->words
;
10051 /* Determine if the argument needs to be passed in both general and
10052 floating point registers. */
10053 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
10054 /* If we are doing soft-float with portable runtime, then there
10055 is no need to worry about FP regs. */
10056 && !TARGET_SOFT_FLOAT
10057 /* The parameter must be some kind of scalar float, else we just
10058 pass it in integer registers. */
10059 && GET_MODE_CLASS (mode
) == MODE_FLOAT
10060 /* The target function must not have a prototype. */
10061 && cum
->nargs_prototype
<= 0
10062 /* libcalls do not need to pass items in both FP and general
10064 && type
!= NULL_TREE
10065 /* All this hair applies to "outgoing" args only. This includes
10066 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
10068 /* Also pass outgoing floating arguments in both registers in indirect
10069 calls with the 32 bit ABI and the HP assembler since there is no
10070 way to the specify argument locations in static functions. */
10075 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
10081 gen_rtx_EXPR_LIST (VOIDmode
,
10082 gen_rtx_REG (mode
, fpr_reg_base
),
10084 gen_rtx_EXPR_LIST (VOIDmode
,
10085 gen_rtx_REG (mode
, gpr_reg_base
),
10090 /* See if we should pass this parameter in a general register. */
10091 if (TARGET_SOFT_FLOAT
10092 /* Indirect calls in the normal 32bit ABI require all arguments
10093 to be passed in general registers. */
10094 || (!TARGET_PORTABLE_RUNTIME
10098 /* If the parameter is not a scalar floating-point parameter,
10099 then it belongs in GPRs. */
10100 || GET_MODE_CLASS (mode
) != MODE_FLOAT
10101 /* Structure with single SFmode field belongs in GPR. */
10102 || (type
&& AGGREGATE_TYPE_P (type
)))
10103 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
10105 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
10110 /* Arguments larger than one word are double word aligned. */
10112 static unsigned int
10113 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
10115 bool singleword
= (type
10116 ? (integer_zerop (TYPE_SIZE (type
))
10117 || !TREE_CONSTANT (TYPE_SIZE (type
))
10118 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
10119 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
10121 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
10124 /* If this arg would be passed totally in registers or totally on the stack,
10125 then this routine should return zero. */
10128 pa_arg_partial_bytes (cumulative_args_t cum_v
, const function_arg_info
&arg
)
10130 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10131 unsigned int max_arg_words
= 8;
10132 unsigned int offset
= 0;
10138 arg_size
= pa_function_arg_size (arg
.mode
, arg
.type
);
10139 if (arg_size
> 1 && (cum
->words
& 1))
10142 if (cum
->words
+ offset
+ arg_size
<= max_arg_words
)
10143 /* Arg fits fully into registers. */
10145 else if (cum
->words
+ offset
>= max_arg_words
)
10146 /* Arg fully on the stack. */
10149 /* Arg is split. */
10150 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
10154 /* A get_unnamed_section callback for switching to the text section.
10156 This function is only used with SOM. Because we don't support
10157 named subspaces, we can only create a new subspace or switch back
10158 to the default text subspace. */
10161 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
10163 gcc_assert (TARGET_SOM
);
10166 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
10168 /* We only want to emit a .nsubspa directive once at the
10169 start of the function. */
10170 cfun
->machine
->in_nsubspa
= 1;
10172 /* Create a new subspace for the text. This provides
10173 better stub placement and one-only functions. */
10175 && DECL_ONE_ONLY (cfun
->decl
)
10176 && !DECL_WEAK (cfun
->decl
))
10178 output_section_asm_op ("\t.SPACE $TEXT$\n"
10179 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10180 "ACCESS=44,SORT=24,COMDAT");
10186 /* There isn't a current function or the body of the current
10187 function has been completed. So, we are changing to the
10188 text section to output debugging information. Thus, we
10189 need to forget that we are in the text section so that
10190 varasm.cc will call us when text_section is selected again. */
10191 gcc_assert (!cfun
|| !cfun
->machine
10192 || cfun
->machine
->in_nsubspa
== 2);
10195 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10198 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10201 /* A get_unnamed_section callback for switching to comdat data
10202 sections. This function is only used with SOM. */
10205 som_output_comdat_data_section_asm_op (const char *data
)
10208 output_section_asm_op (data
);
10211 /* Implement TARGET_ASM_INIT_SECTIONS. */
10214 pa_som_asm_init_sections (void)
10217 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
10219 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10220 is not being generated. */
10221 som_readonly_data_section
10222 = get_unnamed_section (0, output_section_asm_op
,
10223 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10225 /* When secondary definitions are not supported, SOM makes readonly
10226 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10227 the comdat flag. */
10228 som_one_only_readonly_data_section
10229 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
10230 "\t.SPACE $TEXT$\n"
10231 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10232 "ACCESS=0x2c,SORT=16,COMDAT");
10235 /* When secondary definitions are not supported, SOM makes data one-only
10236 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10237 som_one_only_data_section
10238 = get_unnamed_section (SECTION_WRITE
,
10239 som_output_comdat_data_section_asm_op
,
10240 "\t.SPACE $PRIVATE$\n"
10241 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10242 "ACCESS=31,SORT=24,COMDAT");
10245 som_tm_clone_table_section
10246 = get_unnamed_section (0, output_section_asm_op
,
10247 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10249 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10250 reference data within the $TEXT$ space (for example constant
10251 strings in the $LIT$ subspace).
10253 The assemblers (GAS and HP as) both have problems with handling
10254 the difference of two symbols. This is the other correct way to
10255 reference constant data during PIC code generation.
10257 Thus, we can't put constant data needing relocation in the $TEXT$
10258 space during PIC generation.
10260 Previously, we placed all constant data into the $DATA$ subspace
10261 when generating PIC code. This reduces sharing, but it works
10262 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10263 This puts constant data not needing relocation into the $TEXT$ space. */
10264 readonly_data_section
= som_readonly_data_section
;
10266 /* We must not have a reference to an external symbol defined in a
10267 shared library in a readonly section, else the SOM linker will
10270 So, we force exception information into the data section. */
10271 exception_section
= data_section
;
10274 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10277 pa_som_tm_clone_table_section (void)
10279 return som_tm_clone_table_section
;
10282 /* On hpux10, the linker will give an error if we have a reference
10283 in the read-only data section to a symbol defined in a shared
10284 library. Therefore, expressions that might require a reloc
10285 cannot be placed in the read-only data section. */
10288 pa_select_section (tree exp
, int reloc
,
10289 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
10291 if (TREE_CODE (exp
) == VAR_DECL
10292 && TREE_READONLY (exp
)
10293 && !TREE_THIS_VOLATILE (exp
)
10294 && DECL_INITIAL (exp
)
10295 && (DECL_INITIAL (exp
) == error_mark_node
10296 || TREE_CONSTANT (DECL_INITIAL (exp
)))
10297 && !(reloc
& pa_reloc_rw_mask ()))
10300 && DECL_ONE_ONLY (exp
)
10301 && !DECL_WEAK (exp
))
10302 return som_one_only_readonly_data_section
;
10304 return readonly_data_section
;
10306 else if (CONSTANT_CLASS_P (exp
)
10307 && !(reloc
& pa_reloc_rw_mask ()))
10308 return readonly_data_section
;
10309 else if (TARGET_SOM
10310 && TREE_CODE (exp
) == VAR_DECL
10311 && DECL_ONE_ONLY (exp
)
10312 && !DECL_WEAK (exp
))
10313 return som_one_only_data_section
;
10315 return data_section
;
10318 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10319 and the function is in a COMDAT group, place the plabel reference in the
10320 .data.rel.ro.local section. The linker ignores references to symbols in
10321 discarded sections from this section. */
10324 pa_elf_select_rtx_section (machine_mode mode
, rtx x
,
10325 unsigned HOST_WIDE_INT align
)
10327 if (function_label_operand (x
, VOIDmode
))
10329 tree decl
= SYMBOL_REF_DECL (x
);
10331 if (!decl
|| (DECL_P (decl
) && DECL_COMDAT_GROUP (decl
)))
10332 return get_named_section (NULL
, ".data.rel.ro.local", 1);
10335 return default_elf_select_rtx_section (mode
, x
, align
);
10338 /* Implement pa_reloc_rw_mask. */
10341 pa_reloc_rw_mask (void)
10343 if (flag_pic
|| (TARGET_SOM
&& !TARGET_HPUX_11
))
10346 /* HP linker does not support global relocs in readonly memory. */
10347 return TARGET_SOM
? 2 : 0;
10351 pa_globalize_label (FILE *stream
, const char *name
)
10353 /* We only handle DATA objects here, functions are globalized in
10354 ASM_DECLARE_FUNCTION_NAME. */
10355 if (! FUNCTION_NAME_P (name
))
10357 fputs ("\t.EXPORT ", stream
);
10358 assemble_name (stream
, name
);
10359 fputs (",DATA\n", stream
);
10363 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10366 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
10367 int incoming ATTRIBUTE_UNUSED
)
10369 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
10372 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10375 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
10377 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10378 PA64 ABI says that objects larger than 128 bits are returned in memory.
10379 Note, int_size_in_bytes can return -1 if the size of the object is
10380 variable or larger than the maximum value that can be expressed as
10381 a HOST_WIDE_INT. It can also return zero for an empty type. The
10382 simplest way to handle variable and empty types is to pass them in
10383 memory. This avoids problems in defining the boundaries of argument
10384 slots, allocating registers, etc. */
10385 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
10386 || int_size_in_bytes (type
) <= 0);
10389 /* Structure to hold declaration and name of external symbols that are
10390 emitted by GCC. We generate a vector of these symbols and output them
10391 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10392 This avoids putting out names that are never really used. */
10394 typedef struct GTY(()) extern_symbol
10400 /* Define gc'd vector type for extern_symbol. */
10402 /* Vector of extern_symbol pointers. */
10403 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
10405 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10406 /* Mark DECL (name NAME) as an external reference (assembler output
10407 file FILE). This saves the names to output at the end of the file
10408 if actually referenced. */
10411 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
10413 gcc_assert (file
== asm_out_file
);
10414 extern_symbol p
= {decl
, name
};
10415 vec_safe_push (extern_symbols
, p
);
10419 /* Output text required at the end of an assembler file.
10420 This includes deferred plabels and .import directives for
10421 all external symbols that were actually referenced. */
10426 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10430 if (!NO_DEFERRED_PROFILE_COUNTERS
)
10431 output_deferred_profile_counters ();
10434 output_deferred_plabels ();
10436 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10437 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
10439 tree decl
= p
->decl
;
10441 if (!TREE_ASM_WRITTEN (decl
)
10442 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
10443 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
10446 vec_free (extern_symbols
);
10449 if (NEED_INDICATE_EXEC_STACK
)
10450 file_end_indicate_exec_stack ();
10453 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10456 pa_can_change_mode_class (machine_mode from
, machine_mode to
,
10457 reg_class_t rclass
)
10462 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
10465 /* Reject changes to/from modes with zero size. */
10466 if (!GET_MODE_SIZE (from
) || !GET_MODE_SIZE (to
))
10469 /* Reject changes to/from complex and vector modes. */
10470 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
10471 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
10474 /* There is no way to load QImode or HImode values directly from memory
10475 to a FP register. SImode loads to the FP registers are not zero
10476 extended. On the 64-bit target, this conflicts with the definition
10477 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10478 except for DImode to SImode on the 64-bit target. It is handled by
10479 register renaming in pa_print_operand. */
10480 if (MAYBE_FP_REG_CLASS_P (rclass
))
10481 return TARGET_64BIT
&& from
== DImode
&& to
== SImode
;
10483 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10484 in specific sets of registers. Thus, we cannot allow changing
10485 to a larger mode when it's larger than a word. */
10486 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
10487 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
10493 /* Implement TARGET_MODES_TIEABLE_P.
10495 We should return FALSE for QImode and HImode because these modes
10496 are not ok in the floating-point registers. However, this prevents
10497 tieing these modes to SImode and DImode in the general registers.
10498 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10499 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10500 in the floating-point registers. */
10503 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10505 /* Don't tie modes in different classes. */
10506 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
10513 /* Length in units of the trampoline instruction code. */
10515 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10518 /* Output assembler code for a block containing the constant parts
10519 of a trampoline, leaving space for the variable parts.\
10521 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10522 and then branches to the specified routine.
10524 This code template is copied from text segment to stack location
10525 and then patched with pa_trampoline_init to contain valid values,
10526 and then entered as a subroutine.
10528 It is best to keep this as small as possible to avoid having to
10529 flush multiple lines in the cache. */
10532 pa_asm_trampoline_template (FILE *f
)
10538 fputs ("\tmfia %r20\n", f
);
10539 fputs ("\tldw 48(%r20),%r22\n", f
);
10540 fputs ("\tcopy %r22,%r21\n", f
);
10541 fputs ("\tbb,>=,n %r22,30,.+16\n", f
);
10542 fputs ("\tdepwi 0,31,2,%r22\n", f
);
10543 fputs ("\tldw 0(%r22),%r21\n", f
);
10544 fputs ("\tldw 4(%r22),%r19\n", f
);
10545 fputs ("\tbve (%r21)\n", f
);
10546 fputs ("\tldw 52(%r20),%r29\n", f
);
10547 fputs ("\t.word 0\n", f
);
10548 fputs ("\t.word 0\n", f
);
10549 fputs ("\t.word 0\n", f
);
10553 if (ASSEMBLER_DIALECT
== 0)
10555 fputs ("\tbl .+8,%r20\n", f
);
10556 fputs ("\tdepi 0,31,2,%r20\n", f
);
10560 fputs ("\tb,l .+8,%r20\n", f
);
10561 fputs ("\tdepwi 0,31,2,%r20\n", f
);
10563 fputs ("\tldw 40(%r20),%r22\n", f
);
10564 fputs ("\tcopy %r22,%r21\n", f
);
10565 fputs ("\tbb,>=,n %r22,30,.+16\n", f
);
10566 if (ASSEMBLER_DIALECT
== 0)
10567 fputs ("\tdepi 0,31,2,%r22\n", f
);
10569 fputs ("\tdepwi 0,31,2,%r22\n", f
);
10570 fputs ("\tldw 0(%r22),%r21\n", f
);
10571 fputs ("\tldw 4(%r22),%r19\n", f
);
10572 fputs ("\tldsid (%r21),%r1\n", f
);
10573 fputs ("\tmtsp %r1,%sr0\n", f
);
10574 fputs ("\tbe 0(%sr0,%r21)\n", f
);
10575 fputs ("\tldw 44(%r20),%r29\n", f
);
10577 fputs ("\t.word 0\n", f
);
10578 fputs ("\t.word 0\n", f
);
10579 fputs ("\t.word 0\n", f
);
10580 fputs ("\t.word 0\n", f
);
10584 fputs ("\t.dword 0\n", f
);
10585 fputs ("\t.dword 0\n", f
);
10586 fputs ("\t.dword 0\n", f
);
10587 fputs ("\t.dword 0\n", f
);
10588 fputs ("\tmfia %r31\n", f
);
10589 fputs ("\tldd 24(%r31),%r27\n", f
);
10590 fputs ("\tldd 32(%r31),%r31\n", f
);
10591 fputs ("\tldd 16(%r27),%r1\n", f
);
10592 fputs ("\tbve (%r1)\n", f
);
10593 fputs ("\tldd 24(%r27),%r27\n", f
);
10594 fputs ("\t.dword 0 ; fptr\n", f
);
10595 fputs ("\t.dword 0 ; static link\n", f
);
10599 /* Emit RTL insns to initialize the variable parts of a trampoline.
10600 FNADDR is an RTX for the address of the function's pure code.
10601 CXT is an RTX for the static chain value for the function.
10603 Move the function address to the trampoline template at offset 48.
10604 Move the static chain value to trampoline template at offset 52.
10605 Move the trampoline address to trampoline template at offset 56.
10606 Move r19 to trampoline template at offset 60. The latter two
10607 words create a plabel for the indirect call to the trampoline.
10609 A similar sequence is used for the 64-bit port but the plabel is
10610 at the beginning of the trampoline.
10612 Finally, the cache entries for the trampoline code are flushed.
10613 This is necessary to ensure that the trampoline instruction sequence
10614 is written to memory prior to any attempts at prefetching the code
10618 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10620 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10621 rtx start_addr
= gen_reg_rtx (Pmode
);
10622 rtx end_addr
= gen_reg_rtx (Pmode
);
10623 rtx line_length
= gen_reg_rtx (Pmode
);
10626 emit_block_move (m_tramp
, assemble_trampoline_template (),
10627 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10628 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10632 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10633 emit_move_insn (tmp
, fnaddr
);
10634 tmp
= adjust_address (m_tramp
, Pmode
, 52);
10635 emit_move_insn (tmp
, chain_value
);
10637 /* Create a fat pointer for the trampoline. */
10638 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10639 emit_move_insn (tmp
, r_tramp
);
10640 tmp
= adjust_address (m_tramp
, Pmode
, 60);
10641 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10643 /* fdc and fic only use registers for the address to flush,
10644 they do not accept integer displacements. We align the
10645 start and end addresses to the beginning of their respective
10646 cache lines to minimize the number of lines flushed. */
10647 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10648 GEN_INT (-MIN_CACHELINE_SIZE
)));
10649 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10650 TRAMPOLINE_CODE_SIZE
-1));
10651 emit_insn (gen_andsi3 (end_addr
, tmp
,
10652 GEN_INT (-MIN_CACHELINE_SIZE
)));
10653 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10654 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10655 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10656 gen_reg_rtx (Pmode
),
10657 gen_reg_rtx (Pmode
)));
10661 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10662 emit_move_insn (tmp
, fnaddr
);
10663 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10664 emit_move_insn (tmp
, chain_value
);
10666 /* Create a fat pointer for the trampoline. */
10667 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10668 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10670 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10671 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10673 /* fdc and fic only use registers for the address to flush,
10674 they do not accept integer displacements. We align the
10675 start and end addresses to the beginning of their respective
10676 cache lines to minimize the number of lines flushed. */
10677 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10678 emit_insn (gen_anddi3 (start_addr
, tmp
,
10679 GEN_INT (-MIN_CACHELINE_SIZE
)));
10680 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10681 TRAMPOLINE_CODE_SIZE
- 1));
10682 emit_insn (gen_anddi3 (end_addr
, tmp
,
10683 GEN_INT (-MIN_CACHELINE_SIZE
)));
10684 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10685 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10686 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10687 gen_reg_rtx (Pmode
),
10688 gen_reg_rtx (Pmode
)));
10691 #ifdef HAVE_ENABLE_EXECUTE_STACK
10692 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10693 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
10697 /* Perform any machine-specific adjustment in the address of the trampoline.
10698 ADDR contains the address that was passed to pa_trampoline_init.
10699 Adjust the trampoline address to point to the plabel at offset 56. */
10702 pa_trampoline_adjust_address (rtx addr
)
10705 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 58));
10710 pa_delegitimize_address (rtx orig_x
)
10714 if (GET_CODE (orig_x
) == UNSPEC
10715 && XINT (orig_x
, 1) == UNSPEC_TP
)
10716 orig_x
= XVECEXP (orig_x
, 0, 0);
10718 x
= delegitimize_mem_from_attrs (orig_x
);
10720 if (GET_CODE (x
) == LO_SUM
10721 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10722 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10723 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10728 pa_internal_arg_pointer (void)
10730 /* The argument pointer and the hard frame pointer are the same in
10731 the 32-bit runtime, so we don't need a copy. */
10733 return copy_to_reg (virtual_incoming_args_rtx
);
10735 return virtual_incoming_args_rtx
;
10738 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10739 Frame pointer elimination is automatically handled. */
10742 pa_can_eliminate (const int from
, const int to
)
10744 /* The argument cannot be eliminated in the 64-bit runtime. */
10745 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10748 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10749 ? ! frame_pointer_needed
10753 /* Define the offset between two registers, FROM to be eliminated and its
10754 replacement TO, at the start of a routine. */
10756 pa_initial_elimination_offset (int from
, int to
)
10758 HOST_WIDE_INT offset
;
10760 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10761 && to
== STACK_POINTER_REGNUM
)
10762 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10763 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10766 gcc_unreachable ();
10772 pa_conditional_register_usage (void)
10776 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10778 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10779 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10780 for (i
= 33; i
< 56; i
+= 2)
10781 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10783 if (TARGET_SOFT_FLOAT
)
10785 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10786 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10789 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10792 /* Target hook for c_mode_for_suffix. */
10794 static machine_mode
10795 pa_c_mode_for_suffix (char suffix
)
10797 if (HPUX_LONG_DOUBLE_LIBRARY
)
10806 /* Target hook for function_section. */
10809 pa_function_section (tree decl
, enum node_frequency freq
,
10810 bool startup
, bool exit
)
10812 /* Put functions in text section if target doesn't have named sections. */
10813 if (!targetm_common
.have_named_sections
)
10814 return text_section
;
10816 /* Force nested functions into the same section as the containing
10819 && DECL_SECTION_NAME (decl
) == NULL
10820 && DECL_CONTEXT (decl
) != NULL_TREE
10821 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10822 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10823 return function_section (DECL_CONTEXT (decl
));
10825 /* Otherwise, use the default function section. */
10826 return default_function_section (decl
, freq
, startup
, exit
);
10829 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10831 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10832 that need more than three instructions to load prior to reload. This
10833 limit is somewhat arbitrary. It takes three instructions to load a
10834 CONST_INT from memory but two are memory accesses. It may be better
10835 to increase the allowed range for CONST_INTS. We may also be able
10836 to handle CONST_DOUBLES. */
10839 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10841 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10844 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10847 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10848 legitimate constants. The other variants can't be handled by
10849 the move patterns after reload starts. */
10850 if (tls_referenced_p (x
))
10853 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10857 && HOST_BITS_PER_WIDE_INT
> 32
10858 && GET_CODE (x
) == CONST_INT
10859 && !reload_in_progress
10860 && !reload_completed
10861 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10862 && !pa_cint_ok_for_move (UINTVAL (x
)))
10865 if (function_label_operand (x
, mode
))
10871 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10873 static unsigned int
10874 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10876 unsigned int flags
;
10878 flags
= default_section_type_flags (decl
, name
, reloc
);
10880 /* Function labels are placed in the constant pool. This can
10881 cause a section conflict if decls are put in ".data.rel.ro"
10882 or ".data.rel.ro.local" using the __attribute__ construct. */
10883 if (strcmp (name
, ".data.rel.ro") == 0
10884 || strcmp (name
, ".data.rel.ro.local") == 0)
10885 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10890 /* pa_legitimate_address_p recognizes an RTL expression that is a
10891 valid memory address for an instruction. The MODE argument is the
10892 machine mode for the MEM expression that wants to use this address.
10894 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10895 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10896 available with floating point loads and stores, and integer loads.
10897 We get better code by allowing indexed addresses in the initial
10900 The acceptance of indexed addresses as legitimate implies that we
10901 must provide patterns for doing indexed integer stores, or the move
10902 expanders must force the address of an indexed store to a register.
10903 We have adopted the latter approach.
10905 Another function of pa_legitimate_address_p is to ensure that
10906 the base register is a valid pointer for indexed instructions.
10907 On targets that have non-equivalent space registers, we have to
10908 know at the time of assembler output which register in a REG+REG
10909 pair is the base register. The REG_POINTER flag is sometimes lost
10910 in reload and the following passes, so it can't be relied on during
10911 code generation. Thus, we either have to canonicalize the order
10912 of the registers in REG+REG indexed addresses, or treat REG+REG
10913 addresses separately and provide patterns for both permutations.
10915 The latter approach requires several hundred additional lines of
10916 code in pa.md. The downside to canonicalizing is that a PLUS
10917 in the wrong order can't combine to form to make a scaled indexed
10918 memory operand. As we won't need to canonicalize the operands if
10919 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10921 We initially break out scaled indexed addresses in canonical order
10922 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10923 scaled indexed addresses during RTL generation. However, fold_rtx
10924 has its own opinion on how the operands of a PLUS should be ordered.
10925 If one of the operands is equivalent to a constant, it will make
10926 that operand the second operand. As the base register is likely to
10927 be equivalent to a SYMBOL_REF, we have made it the second operand.
10929 pa_legitimate_address_p accepts REG+REG as legitimate when the
10930 operands are in the order INDEX+BASE on targets with non-equivalent
10931 space registers, and in any order on targets with equivalent space
10932 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10934 We treat a SYMBOL_REF as legitimate if it is part of the current
10935 function's constant-pool, because such addresses can actually be
10936 output as REG+SMALLINT. */
10939 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
, code_helper
)
10942 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10943 : REG_OK_FOR_BASE_P (x
)))
10944 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10945 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10946 && REG_P (XEXP (x
, 0))
10947 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10948 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10951 if (GET_CODE (x
) == PLUS
)
10955 /* For REG+REG, the base register should be in XEXP (x, 1),
10956 so check it first. */
10957 if (REG_P (XEXP (x
, 1))
10958 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10959 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10960 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10961 else if (REG_P (XEXP (x
, 0))
10962 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10963 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10964 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10968 if (GET_CODE (index
) == CONST_INT
)
10970 /* Short 5-bit displacements always okay. */
10971 if (INT_5_BITS (index
))
10974 if (!base14_operand (index
, mode
))
10977 /* Long 14-bit displacements always okay for these cases. */
10978 if (INT14_OK_STRICT
10979 || reload_completed
10984 /* We have to limit displacements to those supported by
10985 both floating-point and integer accesses as reload can't
10986 fix invalid displacements. See PR114288. */
10990 if (!TARGET_DISABLE_INDEXING
10991 /* Only accept the "canonical" INDEX+BASE operand order
10992 on targets with non-equivalent space registers. */
10993 && (TARGET_NO_SPACE_REGS
10995 : (base
== XEXP (x
, 1) && REG_P (index
)
10996 && (reload_completed
10997 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10998 || REG_POINTER (base
))
10999 && (reload_completed
11000 || (reload_in_progress
&& HARD_REGISTER_P (index
))
11001 || !REG_POINTER (index
))))
11002 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
11003 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
11004 : REG_OK_FOR_INDEX_P (index
))
11005 && borx_reg_operand (base
, Pmode
)
11006 && borx_reg_operand (index
, Pmode
))
11009 if (!TARGET_DISABLE_INDEXING
11010 && GET_CODE (index
) == MULT
11011 /* Only accept base operands with the REG_POINTER flag prior to
11012 reload on targets with non-equivalent space registers. */
11013 && (TARGET_NO_SPACE_REGS
11014 || (base
== XEXP (x
, 1)
11015 && (reload_completed
11016 || (reload_in_progress
&& HARD_REGISTER_P (base
))
11017 || REG_POINTER (base
))))
11018 && REG_P (XEXP (index
, 0))
11019 && GET_MODE (XEXP (index
, 0)) == Pmode
11020 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
11021 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
11022 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
11023 && GET_CODE (XEXP (index
, 1)) == CONST_INT
11024 && INTVAL (XEXP (index
, 1))
11025 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
11026 && borx_reg_operand (base
, Pmode
))
11032 if (GET_CODE (x
) == LO_SUM
)
11034 rtx y
= XEXP (x
, 0);
11036 if (GET_CODE (y
) == SUBREG
)
11037 y
= SUBREG_REG (y
);
11040 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
11041 : REG_OK_FOR_BASE_P (y
)))
11045 /* Needed for -fPIC */
11047 && GET_CODE (y
) == UNSPEC
)
11050 /* Before reload, we need support for 14-bit floating
11051 point loads and stores, and associated relocations. */
11052 if ((TARGET_ELF32
|| !INT14_OK_STRICT
)
11053 && !reload_completed
11058 if (CONSTANT_P (y
))
11064 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
11070 /* Look for machine dependent ways to make the invalid address AD a
11073 For the PA, transform:
11075 memory(X + <large int>)
11079 if (<large int> & mask) >= 16
11080 Y = (<large int> & ~mask) + mask + 1 Round up.
11082 Y = (<large int> & ~mask) Round down.
11084 memory (Z + (<large int> - Y));
11086 This makes reload inheritance and reload_cse work better since Z
11089 There may be more opportunities to improve code with this hook. */
11092 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
11093 int opnum
, int type
,
11094 int ind_levels ATTRIBUTE_UNUSED
)
11096 long offset
, newoffset
, mask
;
11097 rtx new_rtx
, temp
= NULL_RTX
;
11099 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
11100 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
11102 if (optimize
&& GET_CODE (ad
) == PLUS
)
11103 temp
= simplify_binary_operation (PLUS
, Pmode
,
11104 XEXP (ad
, 0), XEXP (ad
, 1));
11106 new_rtx
= temp
? temp
: ad
;
11109 && GET_CODE (new_rtx
) == PLUS
11110 && GET_CODE (XEXP (new_rtx
, 0)) == REG
11111 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
11113 offset
= INTVAL (XEXP ((new_rtx
), 1));
11115 /* Choose rounding direction. Round up if we are >= halfway. */
11116 if ((offset
& mask
) >= ((mask
+ 1) / 2))
11117 newoffset
= (offset
& ~mask
) + mask
+ 1;
11119 newoffset
= offset
& ~mask
;
11121 /* Ensure that long displacements are aligned. */
11123 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
11124 || (TARGET_64BIT
&& (mode
) == DImode
)))
11125 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
11127 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
11129 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
11130 GEN_INT (newoffset
));
11131 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
11132 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
11133 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
11134 opnum
, (enum reload_type
) type
);
11142 /* Output address vector. */
11145 pa_output_addr_vec (rtx lab
, rtx body
)
11147 int idx
, vlen
= XVECLEN (body
, 0);
11150 fputs ("\t.align 4\n", asm_out_file
);
11151 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
11153 fputs ("\t.begin_brtab\n", asm_out_file
);
11154 for (idx
= 0; idx
< vlen
; idx
++)
11156 ASM_OUTPUT_ADDR_VEC_ELT
11157 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
11160 fputs ("\t.end_brtab\n", asm_out_file
);
11163 /* Output address difference vector. */
11166 pa_output_addr_diff_vec (rtx lab
, rtx body
)
11168 rtx base
= XEXP (XEXP (body
, 0), 0);
11169 int idx
, vlen
= XVECLEN (body
, 1);
11171 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
11173 fputs ("\t.begin_brtab\n", asm_out_file
);
11174 for (idx
= 0; idx
< vlen
; idx
++)
11176 ASM_OUTPUT_ADDR_DIFF_ELT
11179 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
11180 CODE_LABEL_NUMBER (base
));
11183 fputs ("\t.end_brtab\n", asm_out_file
);
11186 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11187 arguments passed by hidden reference in the 32-bit HP runtime. Users
11188 can override this behavior for better compatibility with openmp at the
11189 risk of library incompatibilities. Arguments are always passed by value
11190 in the 64-bit HP runtime. */
11193 pa_callee_copies (cumulative_args_t
, const function_arg_info
&)
11195 return !TARGET_CALLER_COPIES
;
11198 /* Implement TARGET_HARD_REGNO_NREGS. */
11200 static unsigned int
11201 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED
, machine_mode mode
)
11203 return PA_HARD_REGNO_NREGS (regno
, mode
);
11206 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11209 pa_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
11211 return PA_HARD_REGNO_MODE_OK (regno
, mode
);
11214 /* Implement TARGET_STARTING_FRAME_OFFSET.
11216 On the 32-bit ports, we reserve one slot for the previous frame
11217 pointer and one fill slot. The fill slot is for compatibility
11218 with HP compiled programs. On the 64-bit ports, we reserve one
11219 slot for the previous frame pointer. */
11221 static HOST_WIDE_INT
11222 pa_starting_frame_offset (void)
11227 /* Figure out the size in words of the function argument. */
11230 pa_function_arg_size (machine_mode mode
, const_tree type
)
11232 HOST_WIDE_INT size
;
11234 size
= mode
!= BLKmode
? GET_MODE_SIZE (mode
) : int_size_in_bytes (type
);
11236 /* The 64-bit runtime does not restrict the size of stack frames,
11237 but the gcc calling conventions limit argument sizes to 1G. Our
11238 prologue/epilogue code limits frame sizes to just under 32 bits.
11239 1G is also the maximum frame size that can be handled by the HPUX
11240 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11241 occur for (parallel:BLK []), we need to ignore large arguments
11242 passed by value. */
11243 if (size
>= (1 << (HOST_BITS_PER_INT
- 2)))
11245 return (int) CEIL (size
, UNITS_PER_WORD
);
11248 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
11251 pa_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
11253 const unsigned PA_FE_INEXACT
= 1;
11254 const unsigned PA_FE_UNDERFLOW
= 2;
11255 const unsigned PA_FE_OVERFLOW
= 4;
11256 const unsigned PA_FE_DIVBYZERO
= 8;
11257 const unsigned PA_FE_INVALID
= 16;
11258 const unsigned HOST_WIDE_INT PA_FE_ALL_EXCEPT
= (PA_FE_INVALID
11263 const unsigned HOST_WIDE_INT PA_FE_EXCEPT_SHIFT
= 27;
11264 tree fenv_var
, get_fpsr
, set_fpsr
, mask
, ld_fenv
, masked_fenv
;
11265 tree hold_all
, new_fenv_var
, reload_fenv
, restore_fnenv
;
11266 tree get_fpsr_call
, set_fpsr_call
, update_call
, atomic_feraiseexcept
;
11268 if (TARGET_SOFT_FLOAT
)
11271 /* Generate the equivalent of :
11272 unsigned int fenv_var;
11273 fenv_var = __builtin_get_fpsr ();
11275 unsigned int masked_fenv;
11276 masked_fenv = fenv_var & mask;
11278 __builtin_set_fpsr (masked_fenv); */
11280 fenv_var
= create_tmp_var_raw (unsigned_type_node
);
11281 get_fpsr
= pa_builtins
[PA_BUILTIN_GET_FPSR
];
11282 set_fpsr
= pa_builtins
[PA_BUILTIN_SET_FPSR
];
11283 mask
= build_int_cst (unsigned_type_node
,
11284 ~((PA_FE_ALL_EXCEPT
<< PA_FE_EXCEPT_SHIFT
)
11285 | PA_FE_ALL_EXCEPT
));
11287 get_fpsr_call
= build_call_expr (get_fpsr
, 0);
11288 ld_fenv
= build4 (TARGET_EXPR
, unsigned_type_node
,
11289 fenv_var
, get_fpsr_call
,
11290 NULL_TREE
, NULL_TREE
);
11291 masked_fenv
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
, mask
);
11292 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
);
11293 set_fpsr_call
= build_call_expr (set_fpsr
, 1, masked_fenv
);
11294 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
, set_fpsr_call
);
11296 /* Store the value of masked_fenv to clear the exceptions:
11297 __builtin_set_fpsr (masked_fenv); */
11299 *clear
= set_fpsr_call
;
11301 /* Generate the equivalent of :
11302 unsigned int new_fenv_var;
11303 new_fenv_var = __builtin_get_fpsr ();
11305 __builtin_set_fpsr (fenv_var);
11307 __atomic_feraiseexcept (new_fenv_var); */
11309 new_fenv_var
= create_tmp_var_raw (unsigned_type_node
);
11310 reload_fenv
= build4 (TARGET_EXPR
, unsigned_type_node
, new_fenv_var
,
11311 get_fpsr_call
, NULL_TREE
, NULL_TREE
);
11312 restore_fnenv
= build_call_expr (set_fpsr
, 1, fenv_var
);
11313 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
11314 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
11315 fold_convert (integer_type_node
,
11317 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
11318 build2 (COMPOUND_EXPR
, void_type_node
,
11319 reload_fenv
, restore_fnenv
), update_call
);