1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
31 #include "stringpool.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
49 #include "common/common-target.h"
50 #include "langhooks.h"
55 /* This file should be included last. */
56 #include "target-def.h"
58 /* Return nonzero if there is a bypass for the output of
59 OUT_INSN and the fp store IN_INSN. */
61 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
63 machine_mode store_mode
;
64 machine_mode other_mode
;
67 if (recog_memoized (in_insn
) < 0
68 || (get_attr_type (in_insn
) != TYPE_FPSTORE
69 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
70 || recog_memoized (out_insn
) < 0)
73 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
75 set
= single_set (out_insn
);
79 other_mode
= GET_MODE (SET_SRC (set
));
81 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
85 #ifndef DO_FRAME_NOTES
86 #ifdef INCOMING_RETURN_ADDR_RTX
87 #define DO_FRAME_NOTES 1
89 #define DO_FRAME_NOTES 0
93 static void pa_option_override (void);
94 static void copy_reg_pointer (rtx
, rtx
);
95 static void fix_range (const char *);
96 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
98 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
99 static bool hppa_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
100 static inline rtx
force_mode (machine_mode
, rtx
);
101 static void pa_reorg (void);
102 static void pa_combine_instructions (void);
103 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
105 static bool forward_branch_p (rtx_insn
*);
106 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
107 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
108 static int compute_movmem_length (rtx_insn
*);
109 static int compute_clrmem_length (rtx_insn
*);
110 static bool pa_assemble_integer (rtx
, unsigned int, int);
111 static void remove_useless_addtr_insns (int);
112 static void store_reg (int, HOST_WIDE_INT
, int);
113 static void store_reg_modify (int, int, HOST_WIDE_INT
);
114 static void load_reg (int, HOST_WIDE_INT
, int);
115 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
116 static rtx
pa_function_value (const_tree
, const_tree
, bool);
117 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
118 static bool pa_function_value_regno_p (const unsigned int);
119 static void pa_output_function_prologue (FILE *);
120 static void update_total_code_bytes (unsigned int);
121 static void pa_output_function_epilogue (FILE *);
122 static int pa_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
123 static int pa_adjust_priority (rtx_insn
*, int);
124 static int pa_issue_rate (void);
125 static int pa_reloc_rw_mask (void);
126 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
127 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
128 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
130 static void pa_encode_section_info (tree
, rtx
, int);
131 static const char *pa_strip_name_encoding (const char *);
132 static bool pa_function_ok_for_sibcall (tree
, tree
);
133 static void pa_globalize_label (FILE *, const char *)
135 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
136 HOST_WIDE_INT
, tree
);
137 #if !defined(USE_COLLECT2)
138 static void pa_asm_out_constructor (rtx
, int);
139 static void pa_asm_out_destructor (rtx
, int);
141 static void pa_init_builtins (void);
142 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
143 static rtx
hppa_builtin_saveregs (void);
144 static void hppa_va_start (tree
, rtx
);
145 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
146 static bool pa_scalar_mode_supported_p (scalar_mode
);
147 static bool pa_commutative_p (const_rtx x
, int outer_code
);
148 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
149 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
150 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
151 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
152 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
153 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
154 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
155 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
156 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
157 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
158 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
159 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
160 static void output_deferred_plabels (void);
161 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
162 static void pa_file_end (void);
163 static void pa_init_libfuncs (void);
164 static rtx
pa_struct_value_rtx (tree
, int);
165 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
167 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
169 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
171 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
173 static pad_direction
pa_function_arg_padding (machine_mode
, const_tree
);
174 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
175 static struct machine_function
* pa_init_machine_status (void);
176 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
178 secondary_reload_info
*);
179 static bool pa_secondary_memory_needed (machine_mode
,
180 reg_class_t
, reg_class_t
);
181 static void pa_extra_live_on_entry (bitmap
);
182 static machine_mode
pa_promote_function_mode (const_tree
,
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx
, tree
, rtx
);
188 static rtx
pa_trampoline_adjust_address (rtx
);
189 static rtx
pa_delegitimize_address (rtx
);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx
pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode
pa_c_mode_for_suffix (char);
195 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
197 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
198 static unsigned int pa_section_type_flags (tree
, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
200 static bool pa_callee_copies (cumulative_args_t
, machine_mode
,
202 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode
);
203 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode
);
204 static bool pa_modes_tieable_p (machine_mode
, machine_mode
);
205 static bool pa_can_change_mode_class (machine_mode
, machine_mode
, reg_class_t
);
207 /* The following extra sections are only used for SOM. */
208 static GTY(()) section
*som_readonly_data_section
;
209 static GTY(()) section
*som_one_only_readonly_data_section
;
210 static GTY(()) section
*som_one_only_data_section
;
211 static GTY(()) section
*som_tm_clone_table_section
;
213 /* Counts for the number of callee-saved general and floating point
214 registers which were saved by the current function's prologue. */
215 static int gr_saved
, fr_saved
;
217 /* Boolean indicating whether the return pointer was saved by the
218 current function's prologue. */
219 static bool rp_saved
;
221 static rtx
find_addr_reg (rtx
);
223 /* Keep track of the number of bytes we have output in the CODE subspace
224 during this compilation so we'll know when to emit inline long-calls. */
225 unsigned long total_code_bytes
;
227 /* The last address of the previous function plus the number of bytes in
228 associated thunks that have been output. This is used to determine if
229 a thunk can use an IA-relative branch to reach its target function. */
230 static unsigned int last_address
;
232 /* Variables to handle plabels that we discover are necessary at assembly
233 output time. They are output after the current function. */
234 struct GTY(()) deferred_plabel
239 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
241 static size_t n_deferred_plabels
= 0;
243 /* Initialize the GCC target structure. */
245 #undef TARGET_OPTION_OVERRIDE
246 #define TARGET_OPTION_OVERRIDE pa_option_override
248 #undef TARGET_ASM_ALIGNED_HI_OP
249 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
250 #undef TARGET_ASM_ALIGNED_SI_OP
251 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
252 #undef TARGET_ASM_ALIGNED_DI_OP
253 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
254 #undef TARGET_ASM_UNALIGNED_HI_OP
255 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
256 #undef TARGET_ASM_UNALIGNED_SI_OP
257 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
258 #undef TARGET_ASM_UNALIGNED_DI_OP
259 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
260 #undef TARGET_ASM_INTEGER
261 #define TARGET_ASM_INTEGER pa_assemble_integer
263 #undef TARGET_ASM_FUNCTION_PROLOGUE
264 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
265 #undef TARGET_ASM_FUNCTION_EPILOGUE
266 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268 #undef TARGET_FUNCTION_VALUE
269 #define TARGET_FUNCTION_VALUE pa_function_value
270 #undef TARGET_LIBCALL_VALUE
271 #define TARGET_LIBCALL_VALUE pa_libcall_value
272 #undef TARGET_FUNCTION_VALUE_REGNO_P
273 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275 #undef TARGET_LEGITIMIZE_ADDRESS
276 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278 #undef TARGET_SCHED_ADJUST_COST
279 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
280 #undef TARGET_SCHED_ADJUST_PRIORITY
281 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
282 #undef TARGET_SCHED_ISSUE_RATE
283 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
285 #undef TARGET_ENCODE_SECTION_INFO
286 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
287 #undef TARGET_STRIP_NAME_ENCODING
288 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
291 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
293 #undef TARGET_COMMUTATIVE_P
294 #define TARGET_COMMUTATIVE_P pa_commutative_p
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
301 #undef TARGET_ASM_FILE_END
302 #define TARGET_ASM_FILE_END pa_file_end
304 #undef TARGET_ASM_RELOC_RW_MASK
305 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
307 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
308 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
310 #if !defined(USE_COLLECT2)
311 #undef TARGET_ASM_CONSTRUCTOR
312 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
313 #undef TARGET_ASM_DESTRUCTOR
314 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
317 #undef TARGET_INIT_BUILTINS
318 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
323 #undef TARGET_REGISTER_MOVE_COST
324 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
325 #undef TARGET_RTX_COSTS
326 #define TARGET_RTX_COSTS hppa_rtx_costs
327 #undef TARGET_ADDRESS_COST
328 #define TARGET_ADDRESS_COST hppa_address_cost
330 #undef TARGET_MACHINE_DEPENDENT_REORG
331 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
333 #undef TARGET_INIT_LIBFUNCS
334 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
336 #undef TARGET_PROMOTE_FUNCTION_MODE
337 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
338 #undef TARGET_PROMOTE_PROTOTYPES
339 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
341 #undef TARGET_STRUCT_VALUE_RTX
342 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
343 #undef TARGET_RETURN_IN_MEMORY
344 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
345 #undef TARGET_MUST_PASS_IN_STACK
346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
347 #undef TARGET_PASS_BY_REFERENCE
348 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
349 #undef TARGET_CALLEE_COPIES
350 #define TARGET_CALLEE_COPIES pa_callee_copies
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
353 #undef TARGET_FUNCTION_ARG
354 #define TARGET_FUNCTION_ARG pa_function_arg
355 #undef TARGET_FUNCTION_ARG_ADVANCE
356 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
357 #undef TARGET_FUNCTION_ARG_PADDING
358 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
359 #undef TARGET_FUNCTION_ARG_BOUNDARY
360 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
362 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
363 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
364 #undef TARGET_EXPAND_BUILTIN_VA_START
365 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
366 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
367 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
369 #undef TARGET_SCALAR_MODE_SUPPORTED_P
370 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
372 #undef TARGET_CANNOT_FORCE_CONST_MEM
373 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
375 #undef TARGET_SECONDARY_RELOAD
376 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
377 #undef TARGET_SECONDARY_MEMORY_NEEDED
378 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
380 #undef TARGET_EXTRA_LIVE_ON_ENTRY
381 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
383 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
384 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
385 #undef TARGET_TRAMPOLINE_INIT
386 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
387 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
388 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
389 #undef TARGET_DELEGITIMIZE_ADDRESS
390 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
391 #undef TARGET_INTERNAL_ARG_POINTER
392 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
393 #undef TARGET_CAN_ELIMINATE
394 #define TARGET_CAN_ELIMINATE pa_can_eliminate
395 #undef TARGET_CONDITIONAL_REGISTER_USAGE
396 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
397 #undef TARGET_C_MODE_FOR_SUFFIX
398 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
399 #undef TARGET_ASM_FUNCTION_SECTION
400 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
402 #undef TARGET_LEGITIMATE_CONSTANT_P
403 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
404 #undef TARGET_SECTION_TYPE_FLAGS
405 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
406 #undef TARGET_LEGITIMATE_ADDRESS_P
407 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
410 #define TARGET_LRA_P hook_bool_void_false
412 #undef TARGET_HARD_REGNO_NREGS
413 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
414 #undef TARGET_HARD_REGNO_MODE_OK
415 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
416 #undef TARGET_MODES_TIEABLE_P
417 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
419 #undef TARGET_CAN_CHANGE_MODE_CLASS
420 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
422 #undef TARGET_CONSTANT_ALIGNMENT
423 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
425 struct gcc_target targetm
= TARGET_INITIALIZER
;
427 /* Parse the -mfixed-range= option string. */
430 fix_range (const char *const_str
)
433 char *str
, *dash
, *comma
;
435 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
436 REG2 are either register names or register numbers. The effect
437 of this option is to mark the registers in the range from REG1 to
438 REG2 as ``fixed'' so they won't be used by the compiler. This is
439 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
441 i
= strlen (const_str
);
442 str
= (char *) alloca (i
+ 1);
443 memcpy (str
, const_str
, i
+ 1);
447 dash
= strchr (str
, '-');
450 warning (0, "value of -mfixed-range must have form REG1-REG2");
455 comma
= strchr (dash
+ 1, ',');
459 first
= decode_reg_name (str
);
462 warning (0, "unknown register name: %s", str
);
466 last
= decode_reg_name (dash
+ 1);
469 warning (0, "unknown register name: %s", dash
+ 1);
477 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
481 for (i
= first
; i
<= last
; ++i
)
482 fixed_regs
[i
] = call_used_regs
[i
] = 1;
491 /* Check if all floating point registers have been fixed. */
492 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
497 target_flags
|= MASK_DISABLE_FPREGS
;
500 /* Implement the TARGET_OPTION_OVERRIDE hook. */
503 pa_option_override (void)
506 cl_deferred_option
*opt
;
507 vec
<cl_deferred_option
> *v
508 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
511 FOR_EACH_VEC_ELT (*v
, i
, opt
)
513 switch (opt
->opt_index
)
515 case OPT_mfixed_range_
:
516 fix_range (opt
->arg
);
524 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
526 warning (0, "PIC code generation is not supported in the portable runtime model");
529 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
531 warning (0, "PIC code generation is not compatible with fast indirect calls");
534 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
536 warning (0, "-g is only supported when using GAS on this processor,");
537 warning (0, "-g option disabled");
538 write_symbols
= NO_DEBUG
;
541 /* We only support the "big PIC" model now. And we always generate PIC
542 code when in 64bit mode. */
543 if (flag_pic
== 1 || TARGET_64BIT
)
546 /* Disable -freorder-blocks-and-partition as we don't support hot and
547 cold partitioning. */
548 if (flag_reorder_blocks_and_partition
)
550 inform (input_location
,
551 "-freorder-blocks-and-partition does not work "
552 "on this architecture");
553 flag_reorder_blocks_and_partition
= 0;
554 flag_reorder_blocks
= 1;
557 /* We can't guarantee that .dword is available for 32-bit targets. */
558 if (UNITS_PER_WORD
== 4)
559 targetm
.asm_out
.aligned_op
.di
= NULL
;
561 /* The unaligned ops are only available when using GAS. */
564 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
565 targetm
.asm_out
.unaligned_op
.si
= NULL
;
566 targetm
.asm_out
.unaligned_op
.di
= NULL
;
569 init_machine_status
= pa_init_machine_status
;
574 PA_BUILTIN_COPYSIGNQ
,
577 PA_BUILTIN_HUGE_VALQ
,
581 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
584 pa_init_builtins (void)
586 #ifdef DONT_HAVE_FPUTC_UNLOCKED
588 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
589 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
590 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
597 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
598 set_user_assembler_name (decl
, "_Isfinite");
599 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
600 set_user_assembler_name (decl
, "_Isfinitef");
604 if (HPUX_LONG_DOUBLE_LIBRARY
)
608 /* Under HPUX, the __float128 type is a synonym for "long double". */
609 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
612 /* TFmode support builtins. */
613 ftype
= build_function_type_list (long_double_type_node
,
614 long_double_type_node
,
616 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
617 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
618 "_U_Qfabs", NULL_TREE
);
619 TREE_READONLY (decl
) = 1;
620 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
622 ftype
= build_function_type_list (long_double_type_node
,
623 long_double_type_node
,
624 long_double_type_node
,
626 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
627 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
628 "_U_Qfcopysign", NULL_TREE
);
629 TREE_READONLY (decl
) = 1;
630 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
632 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
633 decl
= add_builtin_function ("__builtin_infq", ftype
,
634 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
636 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
638 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
639 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
641 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
646 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
647 machine_mode mode ATTRIBUTE_UNUSED
,
648 int ignore ATTRIBUTE_UNUSED
)
650 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
651 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
655 case PA_BUILTIN_FABSQ
:
656 case PA_BUILTIN_COPYSIGNQ
:
657 return expand_call (exp
, target
, ignore
);
659 case PA_BUILTIN_INFQ
:
660 case PA_BUILTIN_HUGE_VALQ
:
662 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
667 tmp
= const_double_from_real_value (inf
, target_mode
);
669 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
672 target
= gen_reg_rtx (target_mode
);
674 emit_move_insn (target
, tmp
);
685 /* Function to init struct machine_function.
686 This will be called, via a pointer variable,
687 from push_function_context. */
689 static struct machine_function
*
690 pa_init_machine_status (void)
692 return ggc_cleared_alloc
<machine_function
> ();
695 /* If FROM is a probable pointer register, mark TO as a probable
696 pointer register with the same pointer alignment as FROM. */
699 copy_reg_pointer (rtx to
, rtx from
)
701 if (REG_POINTER (from
))
702 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
705 /* Return 1 if X contains a symbolic expression. We know these
706 expressions will have one of a few well defined forms, so
707 we need only check those forms. */
709 pa_symbolic_expression_p (rtx x
)
712 /* Strip off any HIGH. */
713 if (GET_CODE (x
) == HIGH
)
716 return symbolic_operand (x
, VOIDmode
);
719 /* Accept any constant that can be moved in one instruction into a
722 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival
)
724 /* OK if ldo, ldil, or zdepi, can be used. */
725 return (VAL_14_BITS_P (ival
)
726 || pa_ldil_cint_p (ival
)
727 || pa_zdepi_cint_p (ival
));
730 /* True iff ldil can be used to load this CONST_INT. The least
731 significant 11 bits of the value must be zero and the value must
732 not change sign when extended from 32 to 64 bits. */
734 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival
)
736 unsigned HOST_WIDE_INT x
;
738 x
= ival
& (((unsigned HOST_WIDE_INT
) -1 << 31) | 0x7ff);
739 return x
== 0 || x
== ((unsigned HOST_WIDE_INT
) -1 << 31);
742 /* True iff zdepi can be used to generate this CONST_INT.
743 zdepi first sign extends a 5-bit signed number to a given field
744 length, then places this field anywhere in a zero. */
746 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
748 unsigned HOST_WIDE_INT lsb_mask
, t
;
750 /* This might not be obvious, but it's at least fast.
751 This function is critical; we don't have the time loops would take. */
753 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
754 /* Return true iff t is a power of two. */
755 return ((t
& (t
- 1)) == 0);
758 /* True iff depi or extru can be used to compute (reg & mask).
759 Accept bit pattern like these:
764 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
767 mask
+= mask
& -mask
;
768 return (mask
& (mask
- 1)) == 0;
771 /* True iff depi can be used to compute (reg | MASK). */
773 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
775 mask
+= mask
& -mask
;
776 return (mask
& (mask
- 1)) == 0;
779 /* Legitimize PIC addresses. If the address is already
780 position-independent, we return ORIG. Newly generated
781 position-independent addresses go to REG. If we need more
782 than one register, we lose. */
785 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
789 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
791 /* Labels need special handling. */
792 if (pic_label_operand (orig
, mode
))
796 /* We do not want to go through the movXX expanders here since that
797 would create recursion.
799 Nor do we really want to call a generator for a named pattern
800 since that requires multiple patterns if we want to support
803 So instead we just emit the raw set, which avoids the movXX
804 expanders completely. */
805 mark_reg_pointer (reg
, BITS_PER_UNIT
);
806 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
808 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
809 add_reg_note (insn
, REG_EQUAL
, orig
);
811 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
812 and update LABEL_NUSES because this is not done automatically. */
813 if (reload_in_progress
|| reload_completed
)
815 /* Extract LABEL_REF. */
816 if (GET_CODE (orig
) == CONST
)
817 orig
= XEXP (XEXP (orig
, 0), 0);
818 /* Extract CODE_LABEL. */
819 orig
= XEXP (orig
, 0);
820 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
821 /* Make sure we have label and not a note. */
823 LABEL_NUSES (orig
)++;
825 crtl
->uses_pic_offset_table
= 1;
828 if (GET_CODE (orig
) == SYMBOL_REF
)
835 /* Before reload, allocate a temporary register for the intermediate
836 result. This allows the sequence to be deleted when the final
837 result is unused and the insns are trivially dead. */
838 tmp_reg
= ((reload_in_progress
|| reload_completed
)
839 ? reg
: gen_reg_rtx (Pmode
));
841 if (function_label_operand (orig
, VOIDmode
))
843 /* Force function label into memory in word mode. */
844 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
845 /* Load plabel address from DLT. */
846 emit_move_insn (tmp_reg
,
847 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
848 gen_rtx_HIGH (word_mode
, orig
)));
850 = gen_const_mem (Pmode
,
851 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
852 gen_rtx_UNSPEC (Pmode
,
855 emit_move_insn (reg
, pic_ref
);
856 /* Now load address of function descriptor. */
857 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
861 /* Load symbol reference from DLT. */
862 emit_move_insn (tmp_reg
,
863 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
864 gen_rtx_HIGH (word_mode
, orig
)));
866 = gen_const_mem (Pmode
,
867 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
868 gen_rtx_UNSPEC (Pmode
,
873 crtl
->uses_pic_offset_table
= 1;
874 mark_reg_pointer (reg
, BITS_PER_UNIT
);
875 insn
= emit_move_insn (reg
, pic_ref
);
877 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
878 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
882 else if (GET_CODE (orig
) == CONST
)
886 if (GET_CODE (XEXP (orig
, 0)) == PLUS
887 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
891 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
893 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
894 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
895 base
== reg
? 0 : reg
);
897 if (GET_CODE (orig
) == CONST_INT
)
899 if (INT_14_BITS (orig
))
900 return plus_constant (Pmode
, base
, INTVAL (orig
));
901 orig
= force_reg (Pmode
, orig
);
903 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
904 /* Likewise, should we set special REG_NOTEs here? */
910 static GTY(()) rtx gen_tls_tga
;
913 gen_tls_get_addr (void)
916 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
921 hppa_tls_call (rtx arg
)
925 ret
= gen_reg_rtx (Pmode
);
926 emit_library_call_value (gen_tls_get_addr (), ret
,
927 LCT_CONST
, Pmode
, arg
, Pmode
);
933 legitimize_tls_address (rtx addr
)
935 rtx ret
, tmp
, t1
, t2
, tp
;
938 /* Currently, we can't handle anything but a SYMBOL_REF. */
939 if (GET_CODE (addr
) != SYMBOL_REF
)
942 switch (SYMBOL_REF_TLS_MODEL (addr
))
944 case TLS_MODEL_GLOBAL_DYNAMIC
:
945 tmp
= gen_reg_rtx (Pmode
);
947 emit_insn (gen_tgd_load_pic (tmp
, addr
));
949 emit_insn (gen_tgd_load (tmp
, addr
));
950 ret
= hppa_tls_call (tmp
);
953 case TLS_MODEL_LOCAL_DYNAMIC
:
954 ret
= gen_reg_rtx (Pmode
);
955 tmp
= gen_reg_rtx (Pmode
);
958 emit_insn (gen_tld_load_pic (tmp
, addr
));
960 emit_insn (gen_tld_load (tmp
, addr
));
961 t1
= hppa_tls_call (tmp
);
964 t2
= gen_reg_rtx (Pmode
);
965 emit_libcall_block (insn
, t2
, t1
,
966 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
968 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
971 case TLS_MODEL_INITIAL_EXEC
:
972 tp
= gen_reg_rtx (Pmode
);
973 tmp
= gen_reg_rtx (Pmode
);
974 ret
= gen_reg_rtx (Pmode
);
975 emit_insn (gen_tp_load (tp
));
977 emit_insn (gen_tie_load_pic (tmp
, addr
));
979 emit_insn (gen_tie_load (tmp
, addr
));
980 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
983 case TLS_MODEL_LOCAL_EXEC
:
984 tp
= gen_reg_rtx (Pmode
);
985 ret
= gen_reg_rtx (Pmode
);
986 emit_insn (gen_tp_load (tp
));
987 emit_insn (gen_tle_load (ret
, addr
, tp
));
997 /* Helper for hppa_legitimize_address. Given X, return true if it
998 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1000 This respectively represent canonical shift-add rtxs or scaled
1001 memory addresses. */
1003 mem_shadd_or_shadd_rtx_p (rtx x
)
1005 return ((GET_CODE (x
) == ASHIFT
1006 || GET_CODE (x
) == MULT
)
1007 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1008 && ((GET_CODE (x
) == ASHIFT
1009 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
1010 || (GET_CODE (x
) == MULT
1011 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
1014 /* Try machine-dependent ways of modifying an illegitimate address
1015 to be legitimate. If we find one, return the new, valid address.
1016 This macro is used in only one place: `memory_address' in explow.c.
1018 OLDX is the address as it was before break_out_memory_refs was called.
1019 In some cases it is useful to look at this to decide what needs to be done.
1021 It is always safe for this macro to do nothing. It exists to recognize
1022 opportunities to optimize the output.
1024 For the PA, transform:
1026 memory(X + <large int>)
1030 if (<large int> & mask) >= 16
1031 Y = (<large int> & ~mask) + mask + 1 Round up.
1033 Y = (<large int> & ~mask) Round down.
1035 memory (Z + (<large int> - Y));
1037 This is for CSE to find several similar references, and only use one Z.
1039 X can either be a SYMBOL_REF or REG, but because combine cannot
1040 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1041 D will not fit in 14 bits.
1043 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1046 MODE_INT references allow displacements which fit in 14 bits, so use
1049 This relies on the fact that most mode MODE_FLOAT references will use FP
1050 registers and most mode MODE_INT references will use integer registers.
1051 (In the rare case of an FP register used in an integer MODE, we depend
1052 on secondary reloads to clean things up.)
1055 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1056 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1057 addressing modes to be used).
1059 Note that the addresses passed into hppa_legitimize_address always
1060 come from a MEM, so we only have to match the MULT form on incoming
1061 addresses. But to be future proof we also match the ASHIFT form.
1063 However, this routine always places those shift-add sequences into
1064 registers, so we have to generate the ASHIFT form as our output.
1066 Put X and Z into registers. Then put the entire expression into
1070 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1075 /* We need to canonicalize the order of operands in unscaled indexed
1076 addresses since the code that checks if an address is valid doesn't
1077 always try both orders. */
1078 if (!TARGET_NO_SPACE_REGS
1079 && GET_CODE (x
) == PLUS
1080 && GET_MODE (x
) == Pmode
1081 && REG_P (XEXP (x
, 0))
1082 && REG_P (XEXP (x
, 1))
1083 && REG_POINTER (XEXP (x
, 0))
1084 && !REG_POINTER (XEXP (x
, 1)))
1085 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1087 if (tls_referenced_p (x
))
1088 return legitimize_tls_address (x
);
1090 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1092 /* Strip off CONST. */
1093 if (GET_CODE (x
) == CONST
)
1096 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1097 That should always be safe. */
1098 if (GET_CODE (x
) == PLUS
1099 && GET_CODE (XEXP (x
, 0)) == REG
1100 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1102 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1103 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1106 /* Note we must reject symbols which represent function addresses
1107 since the assembler/linker can't handle arithmetic on plabels. */
1108 if (GET_CODE (x
) == PLUS
1109 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1110 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1111 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1112 || GET_CODE (XEXP (x
, 0)) == REG
))
1114 rtx int_part
, ptr_reg
;
1116 int offset
= INTVAL (XEXP (x
, 1));
1119 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1120 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1122 /* Choose which way to round the offset. Round up if we
1123 are >= halfway to the next boundary. */
1124 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1125 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1127 newoffset
= (offset
& ~ mask
);
1129 /* If the newoffset will not fit in 14 bits (ldo), then
1130 handling this would take 4 or 5 instructions (2 to load
1131 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1132 add the new offset and the SYMBOL_REF.) Combine can
1133 not handle 4->2 or 5->2 combinations, so do not create
1135 if (! VAL_14_BITS_P (newoffset
)
1136 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1138 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1141 gen_rtx_HIGH (Pmode
, const_part
));
1144 gen_rtx_LO_SUM (Pmode
,
1145 tmp_reg
, const_part
));
1149 if (! VAL_14_BITS_P (newoffset
))
1150 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1152 int_part
= GEN_INT (newoffset
);
1154 ptr_reg
= force_reg (Pmode
,
1155 gen_rtx_PLUS (Pmode
,
1156 force_reg (Pmode
, XEXP (x
, 0)),
1159 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1162 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1164 if (GET_CODE (x
) == PLUS
1165 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1166 && (OBJECT_P (XEXP (x
, 1))
1167 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1168 && GET_CODE (XEXP (x
, 1)) != CONST
)
1170 /* If we were given a MULT, we must fix the constant
1171 as we're going to create the ASHIFT form. */
1172 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1173 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1174 shift_val
= exact_log2 (shift_val
);
1178 if (GET_CODE (reg1
) != REG
)
1179 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1181 reg2
= XEXP (XEXP (x
, 0), 0);
1182 if (GET_CODE (reg2
) != REG
)
1183 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1185 return force_reg (Pmode
,
1186 gen_rtx_PLUS (Pmode
,
1187 gen_rtx_ASHIFT (Pmode
, reg2
,
1188 GEN_INT (shift_val
)),
1192 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1194 Only do so for floating point modes since this is more speculative
1195 and we lose if it's an integer store. */
1196 if (GET_CODE (x
) == PLUS
1197 && GET_CODE (XEXP (x
, 0)) == PLUS
1198 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1199 && (mode
== SFmode
|| mode
== DFmode
))
1201 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1203 /* If we were given a MULT, we must fix the constant
1204 as we're going to create the ASHIFT form. */
1205 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1206 shift_val
= exact_log2 (shift_val
);
1208 /* Try and figure out what to use as a base register. */
1209 rtx reg1
, reg2
, base
, idx
;
1211 reg1
= XEXP (XEXP (x
, 0), 1);
1216 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1217 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1218 it's a base register below. */
1219 if (GET_CODE (reg1
) != REG
)
1220 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1222 if (GET_CODE (reg2
) != REG
)
1223 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1225 /* Figure out what the base and index are. */
1227 if (GET_CODE (reg1
) == REG
1228 && REG_POINTER (reg1
))
1231 idx
= gen_rtx_PLUS (Pmode
,
1232 gen_rtx_ASHIFT (Pmode
,
1233 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1234 GEN_INT (shift_val
)),
1237 else if (GET_CODE (reg2
) == REG
1238 && REG_POINTER (reg2
))
1247 /* If the index adds a large constant, try to scale the
1248 constant so that it can be loaded with only one insn. */
1249 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1250 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1251 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1252 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1254 /* Divide the CONST_INT by the scale factor, then add it to A. */
1255 int val
= INTVAL (XEXP (idx
, 1));
1256 val
/= (1 << shift_val
);
1258 reg1
= XEXP (XEXP (idx
, 0), 0);
1259 if (GET_CODE (reg1
) != REG
)
1260 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1262 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1264 /* We can now generate a simple scaled indexed address. */
1267 (Pmode
, gen_rtx_PLUS (Pmode
,
1268 gen_rtx_ASHIFT (Pmode
, reg1
,
1269 GEN_INT (shift_val
)),
1273 /* If B + C is still a valid base register, then add them. */
1274 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1275 && INTVAL (XEXP (idx
, 1)) <= 4096
1276 && INTVAL (XEXP (idx
, 1)) >= -4096)
1280 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1282 reg2
= XEXP (XEXP (idx
, 0), 0);
1283 if (GET_CODE (reg2
) != CONST_INT
)
1284 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1286 return force_reg (Pmode
,
1287 gen_rtx_PLUS (Pmode
,
1288 gen_rtx_ASHIFT (Pmode
, reg2
,
1289 GEN_INT (shift_val
)),
1293 /* Get the index into a register, then add the base + index and
1294 return a register holding the result. */
1296 /* First get A into a register. */
1297 reg1
= XEXP (XEXP (idx
, 0), 0);
1298 if (GET_CODE (reg1
) != REG
)
1299 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1301 /* And get B into a register. */
1302 reg2
= XEXP (idx
, 1);
1303 if (GET_CODE (reg2
) != REG
)
1304 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1306 reg1
= force_reg (Pmode
,
1307 gen_rtx_PLUS (Pmode
,
1308 gen_rtx_ASHIFT (Pmode
, reg1
,
1309 GEN_INT (shift_val
)),
1312 /* Add the result to our base register and return. */
1313 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1317 /* Uh-oh. We might have an address for x[n-100000]. This needs
1318 special handling to avoid creating an indexed memory address
1319 with x-100000 as the base.
1321 If the constant part is small enough, then it's still safe because
1322 there is a guard page at the beginning and end of the data segment.
1324 Scaled references are common enough that we want to try and rearrange the
1325 terms so that we can use indexing for these addresses too. Only
1326 do the optimization for floatint point modes. */
1328 if (GET_CODE (x
) == PLUS
1329 && pa_symbolic_expression_p (XEXP (x
, 1)))
1331 /* Ugly. We modify things here so that the address offset specified
1332 by the index expression is computed first, then added to x to form
1333 the entire address. */
1335 rtx regx1
, regx2
, regy1
, regy2
, y
;
1337 /* Strip off any CONST. */
1339 if (GET_CODE (y
) == CONST
)
1342 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1344 /* See if this looks like
1345 (plus (mult (reg) (mem_shadd_const))
1346 (const (plus (symbol_ref) (const_int))))
1348 Where const_int is small. In that case the const
1349 expression is a valid pointer for indexing.
1351 If const_int is big, but can be divided evenly by shadd_const
1352 and added to (reg). This allows more scaled indexed addresses. */
1353 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1354 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1355 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1356 && INTVAL (XEXP (y
, 1)) >= -4096
1357 && INTVAL (XEXP (y
, 1)) <= 4095)
1359 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1361 /* If we were given a MULT, we must fix the constant
1362 as we're going to create the ASHIFT form. */
1363 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1364 shift_val
= exact_log2 (shift_val
);
1369 if (GET_CODE (reg1
) != REG
)
1370 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1372 reg2
= XEXP (XEXP (x
, 0), 0);
1373 if (GET_CODE (reg2
) != REG
)
1374 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1378 gen_rtx_PLUS (Pmode
,
1379 gen_rtx_ASHIFT (Pmode
,
1381 GEN_INT (shift_val
)),
1384 else if ((mode
== DFmode
|| mode
== SFmode
)
1385 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1386 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1387 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1388 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1390 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1392 /* If we were given a MULT, we must fix the constant
1393 as we're going to create the ASHIFT form. */
1394 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1395 shift_val
= exact_log2 (shift_val
);
1398 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1399 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1400 regx2
= XEXP (XEXP (x
, 0), 0);
1401 if (GET_CODE (regx2
) != REG
)
1402 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1403 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1407 gen_rtx_PLUS (Pmode
,
1408 gen_rtx_ASHIFT (Pmode
, regx2
,
1409 GEN_INT (shift_val
)),
1410 force_reg (Pmode
, XEXP (y
, 0))));
1412 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1413 && INTVAL (XEXP (y
, 1)) >= -4096
1414 && INTVAL (XEXP (y
, 1)) <= 4095)
1416 /* This is safe because of the guard page at the
1417 beginning and end of the data space. Just
1418 return the original address. */
1423 /* Doesn't look like one we can optimize. */
1424 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1425 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1426 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1427 regx1
= force_reg (Pmode
,
1428 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1430 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1438 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1440 Compute extra cost of moving data between one register class
1443 Make moves from SAR so expensive they should never happen. We used to
1444 have 0xffff here, but that generates overflow in rare cases.
1446 Copies involving a FP register and a non-FP register are relatively
1447 expensive because they must go through memory.
1449 Other copies are reasonably cheap. */
1452 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1453 reg_class_t from
, reg_class_t to
)
1455 if (from
== SHIFT_REGS
)
1457 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1459 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1460 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1466 /* For the HPPA, REG and REG+CONST is cost 0
1467 and addresses involving symbolic constants are cost 2.
1469 PIC addresses are very expensive.
1471 It is no coincidence that this has the same structure
1472 as pa_legitimate_address_p. */
1475 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1476 addr_space_t as ATTRIBUTE_UNUSED
,
1477 bool speed ATTRIBUTE_UNUSED
)
1479 switch (GET_CODE (X
))
1492 /* Compute a (partial) cost for rtx X. Return true if the complete
1493 cost has been computed, and false if subexpressions should be
1494 scanned. In either case, *TOTAL contains the cost result. */
1497 hppa_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
1498 int opno ATTRIBUTE_UNUSED
,
1499 int *total
, bool speed ATTRIBUTE_UNUSED
)
1502 int code
= GET_CODE (x
);
1507 if (INTVAL (x
) == 0)
1509 else if (INT_14_BITS (x
))
1526 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1527 && outer_code
!= SET
)
1534 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1536 *total
= COSTS_N_INSNS (3);
1540 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1541 factor
= GET_MODE_SIZE (mode
) / 4;
1545 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1546 *total
= factor
* factor
* COSTS_N_INSNS (8);
1548 *total
= factor
* factor
* COSTS_N_INSNS (20);
1552 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1554 *total
= COSTS_N_INSNS (14);
1562 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1563 factor
= GET_MODE_SIZE (mode
) / 4;
1567 *total
= factor
* factor
* COSTS_N_INSNS (60);
1570 case PLUS
: /* this includes shNadd insns */
1572 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1574 *total
= COSTS_N_INSNS (3);
1578 /* A size N times larger than UNITS_PER_WORD needs N times as
1579 many insns, taking N times as long. */
1580 factor
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
1583 *total
= factor
* COSTS_N_INSNS (1);
1589 *total
= COSTS_N_INSNS (1);
1597 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1598 new rtx with the correct mode. */
1600 force_mode (machine_mode mode
, rtx orig
)
1602 if (mode
== GET_MODE (orig
))
1605 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1607 return gen_rtx_REG (mode
, REGNO (orig
));
1610 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1613 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1615 return tls_referenced_p (x
);
1618 /* Emit insns to move operands[1] into operands[0].
1620 Return 1 if we have written out everything that needs to be done to
1621 do the move. Otherwise, return 0 and the caller will emit the move
1624 Note SCRATCH_REG may not be in the proper mode depending on how it
1625 will be used. This routine is responsible for creating a new copy
1626 of SCRATCH_REG in the proper mode. */
1629 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1631 register rtx operand0
= operands
[0];
1632 register rtx operand1
= operands
[1];
1635 /* We can only handle indexed addresses in the destination operand
1636 of floating point stores. Thus, we need to break out indexed
1637 addresses from the destination operand. */
1638 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1640 gcc_assert (can_create_pseudo_p ());
1642 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1643 operand0
= replace_equiv_address (operand0
, tem
);
1646 /* On targets with non-equivalent space registers, break out unscaled
1647 indexed addresses from the source operand before the final CSE.
1648 We have to do this because the REG_POINTER flag is not correctly
1649 carried through various optimization passes and CSE may substitute
1650 a pseudo without the pointer set for one with the pointer set. As
1651 a result, we loose various opportunities to create insns with
1652 unscaled indexed addresses. */
1653 if (!TARGET_NO_SPACE_REGS
1654 && !cse_not_expected
1655 && GET_CODE (operand1
) == MEM
1656 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1657 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1658 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1660 = replace_equiv_address (operand1
,
1661 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1664 && reload_in_progress
&& GET_CODE (operand0
) == REG
1665 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1666 operand0
= reg_equiv_mem (REGNO (operand0
));
1667 else if (scratch_reg
1668 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1669 && GET_CODE (SUBREG_REG (operand0
)) == REG
1670 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1672 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1673 the code which tracks sets/uses for delete_output_reload. */
1674 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1675 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1676 SUBREG_BYTE (operand0
));
1677 operand0
= alter_subreg (&temp
, true);
1681 && reload_in_progress
&& GET_CODE (operand1
) == REG
1682 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1683 operand1
= reg_equiv_mem (REGNO (operand1
));
1684 else if (scratch_reg
1685 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1686 && GET_CODE (SUBREG_REG (operand1
)) == REG
1687 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1689 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1690 the code which tracks sets/uses for delete_output_reload. */
1691 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1692 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1693 SUBREG_BYTE (operand1
));
1694 operand1
= alter_subreg (&temp
, true);
1697 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1698 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1699 != XEXP (operand0
, 0)))
1700 operand0
= replace_equiv_address (operand0
, tem
);
1702 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1703 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1704 != XEXP (operand1
, 0)))
1705 operand1
= replace_equiv_address (operand1
, tem
);
1707 /* Handle secondary reloads for loads/stores of FP registers from
1708 REG+D addresses where D does not fit in 5 or 14 bits, including
1709 (subreg (mem (addr))) cases, and reloads for other unsupported
1712 && FP_REG_P (operand0
)
1713 && (MEM_P (operand1
)
1714 || (GET_CODE (operand1
) == SUBREG
1715 && MEM_P (XEXP (operand1
, 0)))))
1719 if (GET_CODE (op1
) == SUBREG
)
1720 op1
= XEXP (op1
, 0);
1722 if (reg_plus_base_memory_operand (op1
, GET_MODE (op1
)))
1726 && INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1727 && !INT_5_BITS (XEXP (XEXP (op1
, 0), 1)))
1729 /* SCRATCH_REG will hold an address and maybe the actual data.
1730 We want it in WORD_MODE regardless of what mode it was
1731 originally given to us. */
1732 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1734 /* D might not fit in 14 bits either; for such cases load D
1735 into scratch reg. */
1736 if (!INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1738 emit_move_insn (scratch_reg
, XEXP (XEXP (op1
, 0), 1));
1739 emit_move_insn (scratch_reg
,
1740 gen_rtx_fmt_ee (GET_CODE (XEXP (op1
, 0)),
1742 XEXP (XEXP (op1
, 0), 0),
1746 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1747 emit_insn (gen_rtx_SET (operand0
,
1748 replace_equiv_address (op1
, scratch_reg
)));
1752 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op1
, VOIDmode
))
1753 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1
, 0))
1754 || IS_INDEX_ADDR_P (XEXP (op1
, 0)))
1756 /* Load memory address into SCRATCH_REG. */
1757 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1758 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1759 emit_insn (gen_rtx_SET (operand0
,
1760 replace_equiv_address (op1
, scratch_reg
)));
1764 else if (scratch_reg
1765 && FP_REG_P (operand1
)
1766 && (MEM_P (operand0
)
1767 || (GET_CODE (operand0
) == SUBREG
1768 && MEM_P (XEXP (operand0
, 0)))))
1772 if (GET_CODE (op0
) == SUBREG
)
1773 op0
= XEXP (op0
, 0);
1775 if (reg_plus_base_memory_operand (op0
, GET_MODE (op0
)))
1779 && INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1780 && !INT_5_BITS (XEXP (XEXP (op0
, 0), 1)))
1782 /* SCRATCH_REG will hold an address and maybe the actual data.
1783 We want it in WORD_MODE regardless of what mode it was
1784 originally given to us. */
1785 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1787 /* D might not fit in 14 bits either; for such cases load D
1788 into scratch reg. */
1789 if (!INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1791 emit_move_insn (scratch_reg
, XEXP (XEXP (op0
, 0), 1));
1792 emit_move_insn (scratch_reg
,
1793 gen_rtx_fmt_ee (GET_CODE (XEXP (op0
, 0)),
1795 XEXP (XEXP (op0
, 0), 0),
1799 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1800 emit_insn (gen_rtx_SET (replace_equiv_address (op0
, scratch_reg
),
1805 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op0
, VOIDmode
))
1806 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0
, 0))
1807 || IS_INDEX_ADDR_P (XEXP (op0
, 0)))
1809 /* Load memory address into SCRATCH_REG. */
1810 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1811 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1812 emit_insn (gen_rtx_SET (replace_equiv_address (op0
, scratch_reg
),
1817 /* Handle secondary reloads for loads of FP registers from constant
1818 expressions by forcing the constant into memory. For the most part,
1819 this is only necessary for SImode and DImode.
1821 Use scratch_reg to hold the address of the memory location. */
1822 else if (scratch_reg
1823 && CONSTANT_P (operand1
)
1824 && FP_REG_P (operand0
))
1826 rtx const_mem
, xoperands
[2];
1828 if (operand1
== CONST0_RTX (mode
))
1830 emit_insn (gen_rtx_SET (operand0
, operand1
));
1834 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1835 it in WORD_MODE regardless of what mode it was originally given
1837 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1839 /* Force the constant into memory and put the address of the
1840 memory location into scratch_reg. */
1841 const_mem
= force_const_mem (mode
, operand1
);
1842 xoperands
[0] = scratch_reg
;
1843 xoperands
[1] = XEXP (const_mem
, 0);
1844 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1846 /* Now load the destination register. */
1847 emit_insn (gen_rtx_SET (operand0
,
1848 replace_equiv_address (const_mem
, scratch_reg
)));
1851 /* Handle secondary reloads for SAR. These occur when trying to load
1852 the SAR from memory or a constant. */
1853 else if (scratch_reg
1854 && GET_CODE (operand0
) == REG
1855 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1856 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1857 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1859 /* D might not fit in 14 bits either; for such cases load D into
1861 if (GET_CODE (operand1
) == MEM
1862 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1864 /* We are reloading the address into the scratch register, so we
1865 want to make sure the scratch register is a full register. */
1866 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1868 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1869 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1872 XEXP (XEXP (operand1
, 0),
1876 /* Now we are going to load the scratch register from memory,
1877 we want to load it in the same width as the original MEM,
1878 which must be the same as the width of the ultimate destination,
1880 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1882 emit_move_insn (scratch_reg
,
1883 replace_equiv_address (operand1
, scratch_reg
));
1887 /* We want to load the scratch register using the same mode as
1888 the ultimate destination. */
1889 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1891 emit_move_insn (scratch_reg
, operand1
);
1894 /* And emit the insn to set the ultimate destination. We know that
1895 the scratch register has the same mode as the destination at this
1897 emit_move_insn (operand0
, scratch_reg
);
1901 /* Handle the most common case: storing into a register. */
1902 if (register_operand (operand0
, mode
))
1904 /* Legitimize TLS symbol references. This happens for references
1905 that aren't a legitimate constant. */
1906 if (PA_SYMBOL_REF_TLS_P (operand1
))
1907 operand1
= legitimize_tls_address (operand1
);
1909 if (register_operand (operand1
, mode
)
1910 || (GET_CODE (operand1
) == CONST_INT
1911 && pa_cint_ok_for_move (UINTVAL (operand1
)))
1912 || (operand1
== CONST0_RTX (mode
))
1913 || (GET_CODE (operand1
) == HIGH
1914 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1915 /* Only `general_operands' can come here, so MEM is ok. */
1916 || GET_CODE (operand1
) == MEM
)
1918 /* Various sets are created during RTL generation which don't
1919 have the REG_POINTER flag correctly set. After the CSE pass,
1920 instruction recognition can fail if we don't consistently
1921 set this flag when performing register copies. This should
1922 also improve the opportunities for creating insns that use
1923 unscaled indexing. */
1924 if (REG_P (operand0
) && REG_P (operand1
))
1926 if (REG_POINTER (operand1
)
1927 && !REG_POINTER (operand0
)
1928 && !HARD_REGISTER_P (operand0
))
1929 copy_reg_pointer (operand0
, operand1
);
1932 /* When MEMs are broken out, the REG_POINTER flag doesn't
1933 get set. In some cases, we can set the REG_POINTER flag
1934 from the declaration for the MEM. */
1935 if (REG_P (operand0
)
1936 && GET_CODE (operand1
) == MEM
1937 && !REG_POINTER (operand0
))
1939 tree decl
= MEM_EXPR (operand1
);
1941 /* Set the register pointer flag and register alignment
1942 if the declaration for this memory reference is a
1948 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1950 if (TREE_CODE (decl
) == COMPONENT_REF
)
1951 decl
= TREE_OPERAND (decl
, 1);
1953 type
= TREE_TYPE (decl
);
1954 type
= strip_array_types (type
);
1956 if (POINTER_TYPE_P (type
))
1957 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
1961 emit_insn (gen_rtx_SET (operand0
, operand1
));
1965 else if (GET_CODE (operand0
) == MEM
)
1967 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1968 && !(reload_in_progress
|| reload_completed
))
1970 rtx temp
= gen_reg_rtx (DFmode
);
1972 emit_insn (gen_rtx_SET (temp
, operand1
));
1973 emit_insn (gen_rtx_SET (operand0
, temp
));
1976 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1978 /* Run this case quickly. */
1979 emit_insn (gen_rtx_SET (operand0
, operand1
));
1982 if (! (reload_in_progress
|| reload_completed
))
1984 operands
[0] = validize_mem (operand0
);
1985 operands
[1] = operand1
= force_reg (mode
, operand1
);
1989 /* Simplify the source if we need to.
1990 Note we do have to handle function labels here, even though we do
1991 not consider them legitimate constants. Loop optimizations can
1992 call the emit_move_xxx with one as a source. */
1993 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1994 || (GET_CODE (operand1
) == HIGH
1995 && symbolic_operand (XEXP (operand1
, 0), mode
))
1996 || function_label_operand (operand1
, VOIDmode
)
1997 || tls_referenced_p (operand1
))
2001 if (GET_CODE (operand1
) == HIGH
)
2004 operand1
= XEXP (operand1
, 0);
2006 if (symbolic_operand (operand1
, mode
))
2008 /* Argh. The assembler and linker can't handle arithmetic
2011 So we force the plabel into memory, load operand0 from
2012 the memory location, then add in the constant part. */
2013 if ((GET_CODE (operand1
) == CONST
2014 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2015 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
2017 || function_label_operand (operand1
, VOIDmode
))
2019 rtx temp
, const_part
;
2021 /* Figure out what (if any) scratch register to use. */
2022 if (reload_in_progress
|| reload_completed
)
2024 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
2025 /* SCRATCH_REG will hold an address and maybe the actual
2026 data. We want it in WORD_MODE regardless of what mode it
2027 was originally given to us. */
2028 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2031 scratch_reg
= gen_reg_rtx (Pmode
);
2033 if (GET_CODE (operand1
) == CONST
)
2035 /* Save away the constant part of the expression. */
2036 const_part
= XEXP (XEXP (operand1
, 0), 1);
2037 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
2039 /* Force the function label into memory. */
2040 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2044 /* No constant part. */
2045 const_part
= NULL_RTX
;
2047 /* Force the function label into memory. */
2048 temp
= force_const_mem (mode
, operand1
);
2052 /* Get the address of the memory location. PIC-ify it if
2054 temp
= XEXP (temp
, 0);
2056 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2058 /* Put the address of the memory location into our destination
2061 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2063 /* Now load from the memory location into our destination
2065 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2066 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2068 /* And add back in the constant part. */
2069 if (const_part
!= NULL_RTX
)
2070 expand_inc (operand0
, const_part
);
2080 if (reload_in_progress
|| reload_completed
)
2082 temp
= scratch_reg
? scratch_reg
: operand0
;
2083 /* TEMP will hold an address and maybe the actual
2084 data. We want it in WORD_MODE regardless of what mode it
2085 was originally given to us. */
2086 temp
= force_mode (word_mode
, temp
);
2089 temp
= gen_reg_rtx (Pmode
);
2091 /* Force (const (plus (symbol) (const_int))) to memory
2092 if the const_int will not fit in 14 bits. Although
2093 this requires a relocation, the instruction sequence
2094 needed to load the value is shorter. */
2095 if (GET_CODE (operand1
) == CONST
2096 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2097 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2098 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2100 rtx x
, m
= force_const_mem (mode
, operand1
);
2102 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2103 x
= replace_equiv_address (m
, x
);
2104 insn
= emit_move_insn (operand0
, x
);
2108 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2109 if (REG_P (operand0
) && REG_P (operands
[1]))
2110 copy_reg_pointer (operand0
, operands
[1]);
2111 insn
= emit_move_insn (operand0
, operands
[1]);
2114 /* Put a REG_EQUAL note on this insn. */
2115 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2117 /* On the HPPA, references to data space are supposed to use dp,
2118 register 27, but showing it in the RTL inhibits various cse
2119 and loop optimizations. */
2124 if (reload_in_progress
|| reload_completed
)
2126 temp
= scratch_reg
? scratch_reg
: operand0
;
2127 /* TEMP will hold an address and maybe the actual
2128 data. We want it in WORD_MODE regardless of what mode it
2129 was originally given to us. */
2130 temp
= force_mode (word_mode
, temp
);
2133 temp
= gen_reg_rtx (mode
);
2135 /* Loading a SYMBOL_REF into a register makes that register
2136 safe to be used as the base in an indexed address.
2138 Don't mark hard registers though. That loses. */
2139 if (GET_CODE (operand0
) == REG
2140 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2141 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2142 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2143 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2146 set
= gen_rtx_SET (operand0
, temp
);
2148 set
= gen_rtx_SET (operand0
,
2149 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2151 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2157 else if (tls_referenced_p (operand1
))
2162 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2164 addend
= XEXP (XEXP (tmp
, 0), 1);
2165 tmp
= XEXP (XEXP (tmp
, 0), 0);
2168 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2169 tmp
= legitimize_tls_address (tmp
);
2172 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2173 tmp
= force_operand (tmp
, operands
[0]);
2177 else if (GET_CODE (operand1
) != CONST_INT
2178 || !pa_cint_ok_for_move (UINTVAL (operand1
)))
2183 HOST_WIDE_INT value
= 0;
2184 HOST_WIDE_INT insv
= 0;
2187 if (GET_CODE (operand1
) == CONST_INT
)
2188 value
= INTVAL (operand1
);
2191 && GET_CODE (operand1
) == CONST_INT
2192 && HOST_BITS_PER_WIDE_INT
> 32
2193 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2197 /* Extract the low order 32 bits of the value and sign extend.
2198 If the new value is the same as the original value, we can
2199 can use the original value as-is. If the new value is
2200 different, we use it and insert the most-significant 32-bits
2201 of the original value into the final result. */
2202 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2203 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2206 #if HOST_BITS_PER_WIDE_INT > 32
2207 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2211 operand1
= GEN_INT (nval
);
2215 if (reload_in_progress
|| reload_completed
)
2216 temp
= scratch_reg
? scratch_reg
: operand0
;
2218 temp
= gen_reg_rtx (mode
);
2220 /* We don't directly split DImode constants on 32-bit targets
2221 because PLUS uses an 11-bit immediate and the insn sequence
2222 generated is not as efficient as the one using HIGH/LO_SUM. */
2223 if (GET_CODE (operand1
) == CONST_INT
2224 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2225 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2228 /* Directly break constant into high and low parts. This
2229 provides better optimization opportunities because various
2230 passes recognize constants split with PLUS but not LO_SUM.
2231 We use a 14-bit signed low part except when the addition
2232 of 0x4000 to the high part might change the sign of the
2234 HOST_WIDE_INT low
= value
& 0x3fff;
2235 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2239 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2247 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2248 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2252 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2253 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2256 insn
= emit_move_insn (operands
[0], operands
[1]);
2258 /* Now insert the most significant 32 bits of the value
2259 into the register. When we don't have a second register
2260 available, it could take up to nine instructions to load
2261 a 64-bit integer constant. Prior to reload, we force
2262 constants that would take more than three instructions
2263 to load to the constant pool. During and after reload,
2264 we have to handle all possible values. */
2267 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2268 register and the value to be inserted is outside the
2269 range that can be loaded with three depdi instructions. */
2270 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2272 operand1
= GEN_INT (insv
);
2274 emit_insn (gen_rtx_SET (temp
,
2275 gen_rtx_HIGH (mode
, operand1
)));
2276 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2278 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2281 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2286 int len
= 5, pos
= 27;
2288 /* Insert the bits using the depdi instruction. */
2291 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2292 HOST_WIDE_INT sign
= v5
< 0;
2294 /* Left extend the insertion. */
2295 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2296 while (pos
> 0 && (insv
& 1) == sign
)
2298 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2304 insn
= emit_insn (gen_insvdi (operand0
,
2309 insn
= emit_insn (gen_insvsi (operand0
,
2314 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2320 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2325 /* Now have insn-emit do whatever it normally does. */
2329 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2330 it will need a link/runtime reloc). */
2333 pa_reloc_needed (tree exp
)
2337 switch (TREE_CODE (exp
))
2342 case POINTER_PLUS_EXPR
:
2345 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2346 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2350 case NON_LVALUE_EXPR
:
2351 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2357 unsigned HOST_WIDE_INT ix
;
2359 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2361 reloc
|= pa_reloc_needed (value
);
2375 /* Return the best assembler insn template
2376 for moving operands[1] into operands[0] as a fullword. */
2378 pa_singlemove_string (rtx
*operands
)
2380 HOST_WIDE_INT intval
;
2382 if (GET_CODE (operands
[0]) == MEM
)
2383 return "stw %r1,%0";
2384 if (GET_CODE (operands
[1]) == MEM
)
2386 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2390 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2392 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2394 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands
[1]), i
);
2396 operands
[1] = GEN_INT (i
);
2397 /* Fall through to CONST_INT case. */
2399 if (GET_CODE (operands
[1]) == CONST_INT
)
2401 intval
= INTVAL (operands
[1]);
2403 if (VAL_14_BITS_P (intval
))
2405 else if ((intval
& 0x7ff) == 0)
2406 return "ldil L'%1,%0";
2407 else if (pa_zdepi_cint_p (intval
))
2408 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2410 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2412 return "copy %1,%0";
2416 /* Compute position (in OP[1]) and width (in OP[2])
2417 useful for copying IMM to a register using the zdepi
2418 instructions. Store the immediate value to insert in OP[0]. */
2420 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2424 /* Find the least significant set bit in IMM. */
2425 for (lsb
= 0; lsb
< 32; lsb
++)
2432 /* Choose variants based on *sign* of the 5-bit field. */
2433 if ((imm
& 0x10) == 0)
2434 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2437 /* Find the width of the bitstring in IMM. */
2438 for (len
= 5; len
< 32 - lsb
; len
++)
2440 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2444 /* Sign extend IMM as a 5-bit value. */
2445 imm
= (imm
& 0xf) - 0x10;
2453 /* Compute position (in OP[1]) and width (in OP[2])
2454 useful for copying IMM to a register using the depdi,z
2455 instructions. Store the immediate value to insert in OP[0]. */
2458 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2460 int lsb
, len
, maxlen
;
2462 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2464 /* Find the least significant set bit in IMM. */
2465 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2472 /* Choose variants based on *sign* of the 5-bit field. */
2473 if ((imm
& 0x10) == 0)
2474 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2477 /* Find the width of the bitstring in IMM. */
2478 for (len
= 5; len
< maxlen
- lsb
; len
++)
2480 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2484 /* Extend length if host is narrow and IMM is negative. */
2485 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2488 /* Sign extend IMM as a 5-bit value. */
2489 imm
= (imm
& 0xf) - 0x10;
2497 /* Output assembler code to perform a doubleword move insn
2498 with operands OPERANDS. */
2501 pa_output_move_double (rtx
*operands
)
2503 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2505 rtx addreg0
= 0, addreg1
= 0;
2508 /* First classify both operands. */
2510 if (REG_P (operands
[0]))
2512 else if (offsettable_memref_p (operands
[0]))
2514 else if (GET_CODE (operands
[0]) == MEM
)
2519 if (REG_P (operands
[1]))
2521 else if (CONSTANT_P (operands
[1]))
2523 else if (offsettable_memref_p (operands
[1]))
2525 else if (GET_CODE (operands
[1]) == MEM
)
2530 /* Check for the cases that the operand constraints are not
2531 supposed to allow to happen. */
2532 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2534 /* Handle copies between general and floating registers. */
2536 if (optype0
== REGOP
&& optype1
== REGOP
2537 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2539 if (FP_REG_P (operands
[0]))
2541 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2542 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2543 return "{fldds|fldd} -16(%%sp),%0";
2547 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2548 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2549 return "{ldws|ldw} -12(%%sp),%R0";
2553 /* Handle auto decrementing and incrementing loads and stores
2554 specifically, since the structure of the function doesn't work
2555 for them without major modification. Do it better when we learn
2556 this port about the general inc/dec addressing of PA.
2557 (This was written by tege. Chide him if it doesn't work.) */
2559 if (optype0
== MEMOP
)
2561 /* We have to output the address syntax ourselves, since print_operand
2562 doesn't deal with the addresses we want to use. Fix this later. */
2564 rtx addr
= XEXP (operands
[0], 0);
2565 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2567 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2569 operands
[0] = XEXP (addr
, 0);
2570 gcc_assert (GET_CODE (operands
[1]) == REG
2571 && GET_CODE (operands
[0]) == REG
);
2573 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2575 /* No overlap between high target register and address
2576 register. (We do this in a non-obvious way to
2577 save a register file writeback) */
2578 if (GET_CODE (addr
) == POST_INC
)
2579 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2580 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2582 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2584 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2586 operands
[0] = XEXP (addr
, 0);
2587 gcc_assert (GET_CODE (operands
[1]) == REG
2588 && GET_CODE (operands
[0]) == REG
);
2590 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2591 /* No overlap between high target register and address
2592 register. (We do this in a non-obvious way to save a
2593 register file writeback) */
2594 if (GET_CODE (addr
) == PRE_INC
)
2595 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2596 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2599 if (optype1
== MEMOP
)
2601 /* We have to output the address syntax ourselves, since print_operand
2602 doesn't deal with the addresses we want to use. Fix this later. */
2604 rtx addr
= XEXP (operands
[1], 0);
2605 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2607 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2609 operands
[1] = XEXP (addr
, 0);
2610 gcc_assert (GET_CODE (operands
[0]) == REG
2611 && GET_CODE (operands
[1]) == REG
);
2613 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2615 /* No overlap between high target register and address
2616 register. (We do this in a non-obvious way to
2617 save a register file writeback) */
2618 if (GET_CODE (addr
) == POST_INC
)
2619 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2620 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2624 /* This is an undefined situation. We should load into the
2625 address register *and* update that register. Probably
2626 we don't need to handle this at all. */
2627 if (GET_CODE (addr
) == POST_INC
)
2628 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2629 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2632 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2634 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2636 operands
[1] = XEXP (addr
, 0);
2637 gcc_assert (GET_CODE (operands
[0]) == REG
2638 && GET_CODE (operands
[1]) == REG
);
2640 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2642 /* No overlap between high target register and address
2643 register. (We do this in a non-obvious way to
2644 save a register file writeback) */
2645 if (GET_CODE (addr
) == PRE_INC
)
2646 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2647 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2651 /* This is an undefined situation. We should load into the
2652 address register *and* update that register. Probably
2653 we don't need to handle this at all. */
2654 if (GET_CODE (addr
) == PRE_INC
)
2655 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2656 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2659 else if (GET_CODE (addr
) == PLUS
2660 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2664 /* Load address into left half of destination register. */
2665 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2666 xoperands
[1] = XEXP (addr
, 1);
2667 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2668 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2669 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2671 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2673 else if (GET_CODE (addr
) == PLUS
2674 && REG_P (XEXP (addr
, 0))
2675 && REG_P (XEXP (addr
, 1)))
2679 /* Load address into left half of destination register. */
2680 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2681 xoperands
[1] = XEXP (addr
, 0);
2682 xoperands
[2] = XEXP (addr
, 1);
2683 output_asm_insn ("{addl|add,l} %1,%2,%0",
2685 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2689 /* If an operand is an unoffsettable memory ref, find a register
2690 we can increment temporarily to make it refer to the second word. */
2692 if (optype0
== MEMOP
)
2693 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2695 if (optype1
== MEMOP
)
2696 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2698 /* Ok, we can do one word at a time.
2699 Normally we do the low-numbered word first.
2701 In either case, set up in LATEHALF the operands to use
2702 for the high-numbered word and in some cases alter the
2703 operands in OPERANDS to be suitable for the low-numbered word. */
2705 if (optype0
== REGOP
)
2706 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2707 else if (optype0
== OFFSOP
)
2708 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2710 latehalf
[0] = operands
[0];
2712 if (optype1
== REGOP
)
2713 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2714 else if (optype1
== OFFSOP
)
2715 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2716 else if (optype1
== CNSTOP
)
2718 if (GET_CODE (operands
[1]) == HIGH
)
2720 operands
[1] = XEXP (operands
[1], 0);
2723 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2726 latehalf
[1] = operands
[1];
2728 /* If the first move would clobber the source of the second one,
2729 do them in the other order.
2731 This can happen in two cases:
2733 mem -> register where the first half of the destination register
2734 is the same register used in the memory's address. Reload
2735 can create such insns.
2737 mem in this case will be either register indirect or register
2738 indirect plus a valid offset.
2740 register -> register move where REGNO(dst) == REGNO(src + 1)
2741 someone (Tim/Tege?) claimed this can happen for parameter loads.
2743 Handle mem -> register case first. */
2744 if (optype0
== REGOP
2745 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2746 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2748 /* Do the late half first. */
2750 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2751 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2755 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2756 return pa_singlemove_string (operands
);
2759 /* Now handle register -> register case. */
2760 if (optype0
== REGOP
&& optype1
== REGOP
2761 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2763 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2764 return pa_singlemove_string (operands
);
2767 /* Normal case: do the two words, low-numbered first. */
2769 output_asm_insn (pa_singlemove_string (operands
), operands
);
2771 /* Make any unoffsettable addresses point at high-numbered word. */
2773 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2775 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2777 /* Do high-numbered word. */
2779 output_asm_insn ("ldil L'%1,%0", latehalf
);
2781 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2783 /* Undo the adds we just did. */
2785 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2787 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2793 pa_output_fp_move_double (rtx
*operands
)
2795 if (FP_REG_P (operands
[0]))
2797 if (FP_REG_P (operands
[1])
2798 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2799 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2801 output_asm_insn ("fldd%F1 %1,%0", operands
);
2803 else if (FP_REG_P (operands
[1]))
2805 output_asm_insn ("fstd%F0 %1,%0", operands
);
2811 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2813 /* This is a pain. You have to be prepared to deal with an
2814 arbitrary address here including pre/post increment/decrement.
2816 so avoid this in the MD. */
2817 gcc_assert (GET_CODE (operands
[0]) == REG
);
2819 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2820 xoperands
[0] = operands
[0];
2821 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2826 /* Return a REG that occurs in ADDR with coefficient 1.
2827 ADDR can be effectively incremented by incrementing REG. */
2830 find_addr_reg (rtx addr
)
2832 while (GET_CODE (addr
) == PLUS
)
2834 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2835 addr
= XEXP (addr
, 0);
2836 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2837 addr
= XEXP (addr
, 1);
2838 else if (CONSTANT_P (XEXP (addr
, 0)))
2839 addr
= XEXP (addr
, 1);
2840 else if (CONSTANT_P (XEXP (addr
, 1)))
2841 addr
= XEXP (addr
, 0);
2845 gcc_assert (GET_CODE (addr
) == REG
);
2849 /* Emit code to perform a block move.
2851 OPERANDS[0] is the destination pointer as a REG, clobbered.
2852 OPERANDS[1] is the source pointer as a REG, clobbered.
2853 OPERANDS[2] is a register for temporary storage.
2854 OPERANDS[3] is a register for temporary storage.
2855 OPERANDS[4] is the size as a CONST_INT
2856 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2857 OPERANDS[6] is another temporary register. */
2860 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2862 int align
= INTVAL (operands
[5]);
2863 unsigned long n_bytes
= INTVAL (operands
[4]);
2865 /* We can't move more than a word at a time because the PA
2866 has no longer integer move insns. (Could use fp mem ops?) */
2867 if (align
> (TARGET_64BIT
? 8 : 4))
2868 align
= (TARGET_64BIT
? 8 : 4);
2870 /* Note that we know each loop below will execute at least twice
2871 (else we would have open-coded the copy). */
2875 /* Pre-adjust the loop counter. */
2876 operands
[4] = GEN_INT (n_bytes
- 16);
2877 output_asm_insn ("ldi %4,%2", operands
);
2880 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2881 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2882 output_asm_insn ("std,ma %3,8(%0)", operands
);
2883 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2884 output_asm_insn ("std,ma %6,8(%0)", operands
);
2886 /* Handle the residual. There could be up to 7 bytes of
2887 residual to copy! */
2888 if (n_bytes
% 16 != 0)
2890 operands
[4] = GEN_INT (n_bytes
% 8);
2891 if (n_bytes
% 16 >= 8)
2892 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2893 if (n_bytes
% 8 != 0)
2894 output_asm_insn ("ldd 0(%1),%6", operands
);
2895 if (n_bytes
% 16 >= 8)
2896 output_asm_insn ("std,ma %3,8(%0)", operands
);
2897 if (n_bytes
% 8 != 0)
2898 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2903 /* Pre-adjust the loop counter. */
2904 operands
[4] = GEN_INT (n_bytes
- 8);
2905 output_asm_insn ("ldi %4,%2", operands
);
2908 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2909 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2910 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2911 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2912 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2914 /* Handle the residual. There could be up to 7 bytes of
2915 residual to copy! */
2916 if (n_bytes
% 8 != 0)
2918 operands
[4] = GEN_INT (n_bytes
% 4);
2919 if (n_bytes
% 8 >= 4)
2920 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2921 if (n_bytes
% 4 != 0)
2922 output_asm_insn ("ldw 0(%1),%6", operands
);
2923 if (n_bytes
% 8 >= 4)
2924 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2925 if (n_bytes
% 4 != 0)
2926 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2931 /* Pre-adjust the loop counter. */
2932 operands
[4] = GEN_INT (n_bytes
- 4);
2933 output_asm_insn ("ldi %4,%2", operands
);
2936 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2937 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2938 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2939 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2940 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2942 /* Handle the residual. */
2943 if (n_bytes
% 4 != 0)
2945 if (n_bytes
% 4 >= 2)
2946 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2947 if (n_bytes
% 2 != 0)
2948 output_asm_insn ("ldb 0(%1),%6", operands
);
2949 if (n_bytes
% 4 >= 2)
2950 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2951 if (n_bytes
% 2 != 0)
2952 output_asm_insn ("stb %6,0(%0)", operands
);
2957 /* Pre-adjust the loop counter. */
2958 operands
[4] = GEN_INT (n_bytes
- 2);
2959 output_asm_insn ("ldi %4,%2", operands
);
2962 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2963 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2964 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2965 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2966 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2968 /* Handle the residual. */
2969 if (n_bytes
% 2 != 0)
2971 output_asm_insn ("ldb 0(%1),%3", operands
);
2972 output_asm_insn ("stb %3,0(%0)", operands
);
2981 /* Count the number of insns necessary to handle this block move.
2983 Basic structure is the same as emit_block_move, except that we
2984 count insns rather than emit them. */
2987 compute_movmem_length (rtx_insn
*insn
)
2989 rtx pat
= PATTERN (insn
);
2990 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2991 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2992 unsigned int n_insns
= 0;
2994 /* We can't move more than four bytes at a time because the PA
2995 has no longer integer move insns. (Could use fp mem ops?) */
2996 if (align
> (TARGET_64BIT
? 8 : 4))
2997 align
= (TARGET_64BIT
? 8 : 4);
2999 /* The basic copying loop. */
3003 if (n_bytes
% (2 * align
) != 0)
3005 if ((n_bytes
% (2 * align
)) >= align
)
3008 if ((n_bytes
% align
) != 0)
3012 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3016 /* Emit code to perform a block clear.
3018 OPERANDS[0] is the destination pointer as a REG, clobbered.
3019 OPERANDS[1] is a register for temporary storage.
3020 OPERANDS[2] is the size as a CONST_INT
3021 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3024 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
3026 int align
= INTVAL (operands
[3]);
3027 unsigned long n_bytes
= INTVAL (operands
[2]);
3029 /* We can't clear more than a word at a time because the PA
3030 has no longer integer move insns. */
3031 if (align
> (TARGET_64BIT
? 8 : 4))
3032 align
= (TARGET_64BIT
? 8 : 4);
3034 /* Note that we know each loop below will execute at least twice
3035 (else we would have open-coded the copy). */
3039 /* Pre-adjust the loop counter. */
3040 operands
[2] = GEN_INT (n_bytes
- 16);
3041 output_asm_insn ("ldi %2,%1", operands
);
3044 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3045 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
3046 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3048 /* Handle the residual. There could be up to 7 bytes of
3049 residual to copy! */
3050 if (n_bytes
% 16 != 0)
3052 operands
[2] = GEN_INT (n_bytes
% 8);
3053 if (n_bytes
% 16 >= 8)
3054 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3055 if (n_bytes
% 8 != 0)
3056 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3061 /* Pre-adjust the loop counter. */
3062 operands
[2] = GEN_INT (n_bytes
- 8);
3063 output_asm_insn ("ldi %2,%1", operands
);
3066 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3067 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3068 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3070 /* Handle the residual. There could be up to 7 bytes of
3071 residual to copy! */
3072 if (n_bytes
% 8 != 0)
3074 operands
[2] = GEN_INT (n_bytes
% 4);
3075 if (n_bytes
% 8 >= 4)
3076 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3077 if (n_bytes
% 4 != 0)
3078 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3083 /* Pre-adjust the loop counter. */
3084 operands
[2] = GEN_INT (n_bytes
- 4);
3085 output_asm_insn ("ldi %2,%1", operands
);
3088 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3089 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3090 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3092 /* Handle the residual. */
3093 if (n_bytes
% 4 != 0)
3095 if (n_bytes
% 4 >= 2)
3096 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3097 if (n_bytes
% 2 != 0)
3098 output_asm_insn ("stb %%r0,0(%0)", operands
);
3103 /* Pre-adjust the loop counter. */
3104 operands
[2] = GEN_INT (n_bytes
- 2);
3105 output_asm_insn ("ldi %2,%1", operands
);
3108 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3109 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3110 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3112 /* Handle the residual. */
3113 if (n_bytes
% 2 != 0)
3114 output_asm_insn ("stb %%r0,0(%0)", operands
);
3123 /* Count the number of insns necessary to handle this block move.
3125 Basic structure is the same as emit_block_move, except that we
3126 count insns rather than emit them. */
3129 compute_clrmem_length (rtx_insn
*insn
)
3131 rtx pat
= PATTERN (insn
);
3132 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3133 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3134 unsigned int n_insns
= 0;
3136 /* We can't clear more than a word at a time because the PA
3137 has no longer integer move insns. */
3138 if (align
> (TARGET_64BIT
? 8 : 4))
3139 align
= (TARGET_64BIT
? 8 : 4);
3141 /* The basic loop. */
3145 if (n_bytes
% (2 * align
) != 0)
3147 if ((n_bytes
% (2 * align
)) >= align
)
3150 if ((n_bytes
% align
) != 0)
3154 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3160 pa_output_and (rtx
*operands
)
3162 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3164 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3165 int ls0
, ls1
, ms0
, p
, len
;
3167 for (ls0
= 0; ls0
< 32; ls0
++)
3168 if ((mask
& (1 << ls0
)) == 0)
3171 for (ls1
= ls0
; ls1
< 32; ls1
++)
3172 if ((mask
& (1 << ls1
)) != 0)
3175 for (ms0
= ls1
; ms0
< 32; ms0
++)
3176 if ((mask
& (1 << ms0
)) == 0)
3179 gcc_assert (ms0
== 32);
3187 operands
[2] = GEN_INT (len
);
3188 return "{extru|extrw,u} %1,31,%2,%0";
3192 /* We could use this `depi' for the case above as well, but `depi'
3193 requires one more register file access than an `extru'. */
3198 operands
[2] = GEN_INT (p
);
3199 operands
[3] = GEN_INT (len
);
3200 return "{depi|depwi} 0,%2,%3,%0";
3204 return "and %1,%2,%0";
3207 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3208 storing the result in operands[0]. */
3210 pa_output_64bit_and (rtx
*operands
)
3212 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3214 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3215 int ls0
, ls1
, ms0
, p
, len
;
3217 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3218 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3221 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3222 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3225 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3226 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3229 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3231 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3237 operands
[2] = GEN_INT (len
);
3238 return "extrd,u %1,63,%2,%0";
3242 /* We could use this `depi' for the case above as well, but `depi'
3243 requires one more register file access than an `extru'. */
3248 operands
[2] = GEN_INT (p
);
3249 operands
[3] = GEN_INT (len
);
3250 return "depdi 0,%2,%3,%0";
3254 return "and %1,%2,%0";
3258 pa_output_ior (rtx
*operands
)
3260 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3261 int bs0
, bs1
, p
, len
;
3263 if (INTVAL (operands
[2]) == 0)
3264 return "copy %1,%0";
3266 for (bs0
= 0; bs0
< 32; bs0
++)
3267 if ((mask
& (1 << bs0
)) != 0)
3270 for (bs1
= bs0
; bs1
< 32; bs1
++)
3271 if ((mask
& (1 << bs1
)) == 0)
3274 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3279 operands
[2] = GEN_INT (p
);
3280 operands
[3] = GEN_INT (len
);
3281 return "{depi|depwi} -1,%2,%3,%0";
3284 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3285 storing the result in operands[0]. */
3287 pa_output_64bit_ior (rtx
*operands
)
3289 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3290 int bs0
, bs1
, p
, len
;
3292 if (INTVAL (operands
[2]) == 0)
3293 return "copy %1,%0";
3295 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3296 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3299 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3300 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3303 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3304 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3309 operands
[2] = GEN_INT (p
);
3310 operands
[3] = GEN_INT (len
);
3311 return "depdi -1,%2,%3,%0";
3314 /* Target hook for assembling integer objects. This code handles
3315 aligned SI and DI integers specially since function references
3316 must be preceded by P%. */
3319 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3324 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3325 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3326 calling output_addr_const. Otherwise, it may call assemble_external
3327 in the midst of outputing the assembler code for the SYMBOL_REF.
3328 We restore the SYMBOL_REF_DECL after the output is done. */
3329 if (GET_CODE (x
) == SYMBOL_REF
)
3331 decl
= SYMBOL_REF_DECL (x
);
3334 assemble_external (decl
);
3335 SET_SYMBOL_REF_DECL (x
, NULL
);
3339 if (size
== UNITS_PER_WORD
3341 && function_label_operand (x
, VOIDmode
))
3343 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3345 /* We don't want an OPD when generating fast indirect calls. */
3346 if (!TARGET_FAST_INDIRECT_CALLS
)
3347 fputs ("P%", asm_out_file
);
3349 output_addr_const (asm_out_file
, x
);
3350 fputc ('\n', asm_out_file
);
3354 result
= default_assemble_integer (x
, size
, aligned_p
);
3357 SET_SYMBOL_REF_DECL (x
, decl
);
3362 /* Output an ascii string. */
3364 pa_output_ascii (FILE *file
, const char *p
, int size
)
3368 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3370 /* The HP assembler can only take strings of 256 characters at one
3371 time. This is a limitation on input line length, *not* the
3372 length of the string. Sigh. Even worse, it seems that the
3373 restriction is in number of input characters (see \xnn &
3374 \whatever). So we have to do this very carefully. */
3376 fputs ("\t.STRING \"", file
);
3379 for (i
= 0; i
< size
; i
+= 4)
3383 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3385 register unsigned int c
= (unsigned char) p
[i
+ io
];
3387 if (c
== '\"' || c
== '\\')
3388 partial_output
[co
++] = '\\';
3389 if (c
>= ' ' && c
< 0177)
3390 partial_output
[co
++] = c
;
3394 partial_output
[co
++] = '\\';
3395 partial_output
[co
++] = 'x';
3396 hexd
= c
/ 16 - 0 + '0';
3398 hexd
-= '9' - 'a' + 1;
3399 partial_output
[co
++] = hexd
;
3400 hexd
= c
% 16 - 0 + '0';
3402 hexd
-= '9' - 'a' + 1;
3403 partial_output
[co
++] = hexd
;
3406 if (chars_output
+ co
> 243)
3408 fputs ("\"\n\t.STRING \"", file
);
3411 fwrite (partial_output
, 1, (size_t) co
, file
);
3415 fputs ("\"\n", file
);
3418 /* Try to rewrite floating point comparisons & branches to avoid
3419 useless add,tr insns.
3421 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3422 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3423 first attempt to remove useless add,tr insns. It is zero
3424 for the second pass as reorg sometimes leaves bogus REG_DEAD
3427 When CHECK_NOTES is zero we can only eliminate add,tr insns
3428 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3431 remove_useless_addtr_insns (int check_notes
)
3434 static int pass
= 0;
3436 /* This is fairly cheap, so always run it when optimizing. */
3440 int fbranch_count
= 0;
3442 /* Walk all the insns in this function looking for fcmp & fbranch
3443 instructions. Keep track of how many of each we find. */
3444 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3448 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3449 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3452 tmp
= PATTERN (insn
);
3454 /* It must be a set. */
3455 if (GET_CODE (tmp
) != SET
)
3458 /* If the destination is CCFP, then we've found an fcmp insn. */
3459 tmp
= SET_DEST (tmp
);
3460 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3466 tmp
= PATTERN (insn
);
3467 /* If this is an fbranch instruction, bump the fbranch counter. */
3468 if (GET_CODE (tmp
) == SET
3469 && SET_DEST (tmp
) == pc_rtx
3470 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3471 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3472 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3473 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3481 /* Find all floating point compare + branch insns. If possible,
3482 reverse the comparison & the branch to avoid add,tr insns. */
3483 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3488 /* Ignore anything that isn't an INSN. */
3489 if (! NONJUMP_INSN_P (insn
))
3492 tmp
= PATTERN (insn
);
3494 /* It must be a set. */
3495 if (GET_CODE (tmp
) != SET
)
3498 /* The destination must be CCFP, which is register zero. */
3499 tmp
= SET_DEST (tmp
);
3500 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3503 /* INSN should be a set of CCFP.
3505 See if the result of this insn is used in a reversed FP
3506 conditional branch. If so, reverse our condition and
3507 the branch. Doing so avoids useless add,tr insns. */
3508 next
= next_insn (insn
);
3511 /* Jumps, calls and labels stop our search. */
3512 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3515 /* As does another fcmp insn. */
3516 if (NONJUMP_INSN_P (next
)
3517 && GET_CODE (PATTERN (next
)) == SET
3518 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3519 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3522 next
= next_insn (next
);
3525 /* Is NEXT_INSN a branch? */
3526 if (next
&& JUMP_P (next
))
3528 rtx pattern
= PATTERN (next
);
3530 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3531 and CCFP dies, then reverse our conditional and the branch
3532 to avoid the add,tr. */
3533 if (GET_CODE (pattern
) == SET
3534 && SET_DEST (pattern
) == pc_rtx
3535 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3536 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3537 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3538 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3539 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3540 && (fcmp_count
== fbranch_count
3542 && find_regno_note (next
, REG_DEAD
, 0))))
3544 /* Reverse the branch. */
3545 tmp
= XEXP (SET_SRC (pattern
), 1);
3546 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3547 XEXP (SET_SRC (pattern
), 2) = tmp
;
3548 INSN_CODE (next
) = -1;
3550 /* Reverse our condition. */
3551 tmp
= PATTERN (insn
);
3552 PUT_CODE (XEXP (tmp
, 1),
3553 (reverse_condition_maybe_unordered
3554 (GET_CODE (XEXP (tmp
, 1)))));
3564 /* You may have trouble believing this, but this is the 32 bit HP-PA
3569 Variable arguments (optional; any number may be allocated)
3571 SP-(4*(N+9)) arg word N
3576 Fixed arguments (must be allocated; may remain unused)
3585 SP-32 External Data Pointer (DP)
3587 SP-24 External/stub RP (RP')
3591 SP-8 Calling Stub RP (RP'')
3596 SP-0 Stack Pointer (points to next available address)
3600 /* This function saves registers as follows. Registers marked with ' are
3601 this function's registers (as opposed to the previous function's).
3602 If a frame_pointer isn't needed, r4 is saved as a general register;
3603 the space for the frame pointer is still allocated, though, to keep
3609 SP (FP') Previous FP
3610 SP + 4 Alignment filler (sigh)
3611 SP + 8 Space for locals reserved here.
3615 SP + n All call saved register used.
3619 SP + o All call saved fp registers used.
3623 SP + p (SP') points to next available address.
3627 /* Global variables set by output_function_prologue(). */
3628 /* Size of frame. Need to know this to emit return insns from
3630 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3631 static int save_fregs
;
3633 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3634 Handle case where DISP > 8k by using the add_high_const patterns.
3636 Note in DISP > 8k case, we will leave the high part of the address
3637 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3640 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3642 rtx dest
, src
, basereg
;
3645 src
= gen_rtx_REG (word_mode
, reg
);
3646 basereg
= gen_rtx_REG (Pmode
, base
);
3647 if (VAL_14_BITS_P (disp
))
3649 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3650 insn
= emit_move_insn (dest
, src
);
3652 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3654 rtx delta
= GEN_INT (disp
);
3655 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3657 emit_move_insn (tmpreg
, delta
);
3658 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3661 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3662 gen_rtx_SET (tmpreg
,
3663 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3664 RTX_FRAME_RELATED_P (insn
) = 1;
3666 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3667 insn
= emit_move_insn (dest
, src
);
3671 rtx delta
= GEN_INT (disp
);
3672 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3673 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3675 emit_move_insn (tmpreg
, high
);
3676 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3677 insn
= emit_move_insn (dest
, src
);
3679 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3680 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3681 gen_rtx_PLUS (word_mode
,
3688 RTX_FRAME_RELATED_P (insn
) = 1;
3691 /* Emit RTL to store REG at the memory location specified by BASE and then
3692 add MOD to BASE. MOD must be <= 8k. */
3695 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3697 rtx basereg
, srcreg
, delta
;
3700 gcc_assert (VAL_14_BITS_P (mod
));
3702 basereg
= gen_rtx_REG (Pmode
, base
);
3703 srcreg
= gen_rtx_REG (word_mode
, reg
);
3704 delta
= GEN_INT (mod
);
3706 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3709 RTX_FRAME_RELATED_P (insn
) = 1;
3711 /* RTX_FRAME_RELATED_P must be set on each frame related set
3712 in a parallel with more than one element. */
3713 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3714 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3718 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3719 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3720 whether to add a frame note or not.
3722 In the DISP > 8k case, we leave the high part of the address in %r1.
3723 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3726 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3730 if (VAL_14_BITS_P (disp
))
3732 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3733 plus_constant (Pmode
,
3734 gen_rtx_REG (Pmode
, base
), disp
));
3736 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3738 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3739 rtx delta
= GEN_INT (disp
);
3740 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3742 emit_move_insn (tmpreg
, delta
);
3743 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3744 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3746 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3747 gen_rtx_SET (tmpreg
,
3748 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3752 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3753 rtx delta
= GEN_INT (disp
);
3754 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3756 emit_move_insn (tmpreg
,
3757 gen_rtx_PLUS (Pmode
, basereg
,
3758 gen_rtx_HIGH (Pmode
, delta
)));
3759 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3760 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3763 if (DO_FRAME_NOTES
&& note
)
3764 RTX_FRAME_RELATED_P (insn
) = 1;
3768 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3773 /* The code in pa_expand_prologue and pa_expand_epilogue must
3774 be consistent with the rounding and size calculation done here.
3775 Change them at the same time. */
3777 /* We do our own stack alignment. First, round the size of the
3778 stack locals up to a word boundary. */
3779 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3781 /* Space for previous frame pointer + filler. If any frame is
3782 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3783 waste some space here for the sake of HP compatibility. The
3784 first slot is only used when the frame pointer is needed. */
3785 if (size
|| frame_pointer_needed
)
3786 size
+= STARTING_FRAME_OFFSET
;
3788 /* If the current function calls __builtin_eh_return, then we need
3789 to allocate stack space for registers that will hold data for
3790 the exception handler. */
3791 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3795 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3797 size
+= i
* UNITS_PER_WORD
;
3800 /* Account for space used by the callee general register saves. */
3801 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3802 if (df_regs_ever_live_p (i
))
3803 size
+= UNITS_PER_WORD
;
3805 /* Account for space used by the callee floating point register saves. */
3806 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3807 if (df_regs_ever_live_p (i
)
3808 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3812 /* We always save both halves of the FP register, so always
3813 increment the frame size by 8 bytes. */
3817 /* If any of the floating registers are saved, account for the
3818 alignment needed for the floating point register save block. */
3821 size
= (size
+ 7) & ~7;
3826 /* The various ABIs include space for the outgoing parameters in the
3827 size of the current function's stack frame. We don't need to align
3828 for the outgoing arguments as their alignment is set by the final
3829 rounding for the frame as a whole. */
3830 size
+= crtl
->outgoing_args_size
;
3832 /* Allocate space for the fixed frame marker. This space must be
3833 allocated for any function that makes calls or allocates
3835 if (!crtl
->is_leaf
|| size
)
3836 size
+= TARGET_64BIT
? 48 : 32;
3838 /* Finally, round to the preferred stack boundary. */
3839 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3840 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3843 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3844 of memory. If any fpu reg is used in the function, we allocate
3845 such a block here, at the bottom of the frame, just in case it's needed.
3847 If this function is a leaf procedure, then we may choose not
3848 to do a "save" insn. The decision about whether or not
3849 to do this is made in regclass.c. */
3852 pa_output_function_prologue (FILE *file
)
3854 /* The function's label and associated .PROC must never be
3855 separated and must be output *after* any profiling declarations
3856 to avoid changing spaces/subspaces within a procedure. */
3857 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3858 fputs ("\t.PROC\n", file
);
3860 /* pa_expand_prologue does the dirty work now. We just need
3861 to output the assembler directives which denote the start
3863 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3865 fputs (",NO_CALLS", file
);
3867 fputs (",CALLS", file
);
3869 fputs (",SAVE_RP", file
);
3871 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3872 at the beginning of the frame and that it is used as the frame
3873 pointer for the frame. We do this because our current frame
3874 layout doesn't conform to that specified in the HP runtime
3875 documentation and we need a way to indicate to programs such as
3876 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3877 isn't used by HP compilers but is supported by the assembler.
3878 However, SAVE_SP is supposed to indicate that the previous stack
3879 pointer has been saved in the frame marker. */
3880 if (frame_pointer_needed
)
3881 fputs (",SAVE_SP", file
);
3883 /* Pass on information about the number of callee register saves
3884 performed in the prologue.
3886 The compiler is supposed to pass the highest register number
3887 saved, the assembler then has to adjust that number before
3888 entering it into the unwind descriptor (to account for any
3889 caller saved registers with lower register numbers than the
3890 first callee saved register). */
3892 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3895 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3897 fputs ("\n\t.ENTRY\n", file
);
3899 remove_useless_addtr_insns (0);
3903 pa_expand_prologue (void)
3905 int merge_sp_adjust_with_store
= 0;
3906 HOST_WIDE_INT size
= get_frame_size ();
3907 HOST_WIDE_INT offset
;
3916 /* Compute total size for frame pointer, filler, locals and rounding to
3917 the next word boundary. Similar code appears in pa_compute_frame_size
3918 and must be changed in tandem with this code. */
3919 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3920 if (local_fsize
|| frame_pointer_needed
)
3921 local_fsize
+= STARTING_FRAME_OFFSET
;
3923 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3924 if (flag_stack_usage_info
)
3925 current_function_static_stack_size
= actual_fsize
;
3927 /* Compute a few things we will use often. */
3928 tmpreg
= gen_rtx_REG (word_mode
, 1);
3930 /* Save RP first. The calling conventions manual states RP will
3931 always be stored into the caller's frame at sp - 20 or sp - 16
3932 depending on which ABI is in use. */
3933 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3935 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3941 /* Allocate the local frame and set up the frame pointer if needed. */
3942 if (actual_fsize
!= 0)
3944 if (frame_pointer_needed
)
3946 /* Copy the old frame pointer temporarily into %r1. Set up the
3947 new stack pointer, then store away the saved old frame pointer
3948 into the stack at sp and at the same time update the stack
3949 pointer by actual_fsize bytes. Two versions, first
3950 handles small (<8k) frames. The second handles large (>=8k)
3952 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3954 RTX_FRAME_RELATED_P (insn
) = 1;
3956 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3958 RTX_FRAME_RELATED_P (insn
) = 1;
3960 if (VAL_14_BITS_P (actual_fsize
))
3961 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3964 /* It is incorrect to store the saved frame pointer at *sp,
3965 then increment sp (writes beyond the current stack boundary).
3967 So instead use stwm to store at *sp and post-increment the
3968 stack pointer as an atomic operation. Then increment sp to
3969 finish allocating the new frame. */
3970 HOST_WIDE_INT adjust1
= 8192 - 64;
3971 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3973 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3974 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3978 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3979 we need to store the previous stack pointer (frame pointer)
3980 into the frame marker on targets that use the HP unwind
3981 library. This allows the HP unwind library to be used to
3982 unwind GCC frames. However, we are not fully compatible
3983 with the HP library because our frame layout differs from
3984 that specified in the HP runtime specification.
3986 We don't want a frame note on this instruction as the frame
3987 marker moves during dynamic stack allocation.
3989 This instruction also serves as a blockage to prevent
3990 register spills from being scheduled before the stack
3991 pointer is raised. This is necessary as we store
3992 registers using the frame pointer as a base register,
3993 and the frame pointer is set before sp is raised. */
3994 if (TARGET_HPUX_UNWIND_LIBRARY
)
3996 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3997 GEN_INT (TARGET_64BIT
? -8 : -4));
3999 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
4000 hard_frame_pointer_rtx
);
4003 emit_insn (gen_blockage ());
4005 /* no frame pointer needed. */
4008 /* In some cases we can perform the first callee register save
4009 and allocating the stack frame at the same time. If so, just
4010 make a note of it and defer allocating the frame until saving
4011 the callee registers. */
4012 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
4013 merge_sp_adjust_with_store
= 1;
4014 /* Can not optimize. Adjust the stack frame by actual_fsize
4017 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4022 /* Normal register save.
4024 Do not save the frame pointer in the frame_pointer_needed case. It
4025 was done earlier. */
4026 if (frame_pointer_needed
)
4028 offset
= local_fsize
;
4030 /* Saving the EH return data registers in the frame is the simplest
4031 way to get the frame unwind information emitted. We put them
4032 just before the general registers. */
4033 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4035 unsigned int i
, regno
;
4039 regno
= EH_RETURN_DATA_REGNO (i
);
4040 if (regno
== INVALID_REGNUM
)
4043 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4044 offset
+= UNITS_PER_WORD
;
4048 for (i
= 18; i
>= 4; i
--)
4049 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4051 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4052 offset
+= UNITS_PER_WORD
;
4055 /* Account for %r3 which is saved in a special place. */
4058 /* No frame pointer needed. */
4061 offset
= local_fsize
- actual_fsize
;
4063 /* Saving the EH return data registers in the frame is the simplest
4064 way to get the frame unwind information emitted. */
4065 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4067 unsigned int i
, regno
;
4071 regno
= EH_RETURN_DATA_REGNO (i
);
4072 if (regno
== INVALID_REGNUM
)
4075 /* If merge_sp_adjust_with_store is nonzero, then we can
4076 optimize the first save. */
4077 if (merge_sp_adjust_with_store
)
4079 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4080 merge_sp_adjust_with_store
= 0;
4083 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4084 offset
+= UNITS_PER_WORD
;
4088 for (i
= 18; i
>= 3; i
--)
4089 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4091 /* If merge_sp_adjust_with_store is nonzero, then we can
4092 optimize the first GR save. */
4093 if (merge_sp_adjust_with_store
)
4095 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4096 merge_sp_adjust_with_store
= 0;
4099 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4100 offset
+= UNITS_PER_WORD
;
4104 /* If we wanted to merge the SP adjustment with a GR save, but we never
4105 did any GR saves, then just emit the adjustment here. */
4106 if (merge_sp_adjust_with_store
)
4107 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4111 /* The hppa calling conventions say that %r19, the pic offset
4112 register, is saved at sp - 32 (in this function's frame)
4113 when generating PIC code. FIXME: What is the correct thing
4114 to do for functions which make no calls and allocate no
4115 frame? Do we need to allocate a frame, or can we just omit
4116 the save? For now we'll just omit the save.
4118 We don't want a note on this insn as the frame marker can
4119 move if there is a dynamic stack allocation. */
4120 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4122 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4124 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4128 /* Align pointer properly (doubleword boundary). */
4129 offset
= (offset
+ 7) & ~7;
4131 /* Floating point register store. */
4136 /* First get the frame or stack pointer to the start of the FP register
4138 if (frame_pointer_needed
)
4140 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4141 base
= hard_frame_pointer_rtx
;
4145 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4146 base
= stack_pointer_rtx
;
4149 /* Now actually save the FP registers. */
4150 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4152 if (df_regs_ever_live_p (i
)
4153 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4157 addr
= gen_rtx_MEM (DFmode
,
4158 gen_rtx_POST_INC (word_mode
, tmpreg
));
4159 reg
= gen_rtx_REG (DFmode
, i
);
4160 insn
= emit_move_insn (addr
, reg
);
4163 RTX_FRAME_RELATED_P (insn
) = 1;
4166 rtx mem
= gen_rtx_MEM (DFmode
,
4167 plus_constant (Pmode
, base
,
4169 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4170 gen_rtx_SET (mem
, reg
));
4174 rtx meml
= gen_rtx_MEM (SFmode
,
4175 plus_constant (Pmode
, base
,
4177 rtx memr
= gen_rtx_MEM (SFmode
,
4178 plus_constant (Pmode
, base
,
4180 rtx regl
= gen_rtx_REG (SFmode
, i
);
4181 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4182 rtx setl
= gen_rtx_SET (meml
, regl
);
4183 rtx setr
= gen_rtx_SET (memr
, regr
);
4186 RTX_FRAME_RELATED_P (setl
) = 1;
4187 RTX_FRAME_RELATED_P (setr
) = 1;
4188 vec
= gen_rtvec (2, setl
, setr
);
4189 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4190 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4193 offset
+= GET_MODE_SIZE (DFmode
);
4200 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4201 Handle case where DISP > 8k by using the add_high_const patterns. */
4204 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4206 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4207 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4210 if (VAL_14_BITS_P (disp
))
4211 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4212 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4214 rtx delta
= GEN_INT (disp
);
4215 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4217 emit_move_insn (tmpreg
, delta
);
4218 if (TARGET_DISABLE_INDEXING
)
4220 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4221 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4224 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4228 rtx delta
= GEN_INT (disp
);
4229 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4230 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4232 emit_move_insn (tmpreg
, high
);
4233 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4236 emit_move_insn (dest
, src
);
4239 /* Update the total code bytes output to the text section. */
4242 update_total_code_bytes (unsigned int nbytes
)
4244 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4245 && !IN_NAMED_SECTION_P (cfun
->decl
))
4247 unsigned int old_total
= total_code_bytes
;
4249 total_code_bytes
+= nbytes
;
4251 /* Be prepared to handle overflows. */
4252 if (old_total
> total_code_bytes
)
4253 total_code_bytes
= UINT_MAX
;
4257 /* This function generates the assembly code for function exit.
4258 Args are as for output_function_prologue ().
4260 The function epilogue should not depend on the current stack
4261 pointer! It should use the frame pointer only. This is mandatory
4262 because of alloca; we also take advantage of it to omit stack
4263 adjustments before returning. */
4266 pa_output_function_epilogue (FILE *file
)
4268 rtx_insn
*insn
= get_last_insn ();
4271 /* pa_expand_epilogue does the dirty work now. We just need
4272 to output the assembler directives which denote the end
4275 To make debuggers happy, emit a nop if the epilogue was completely
4276 eliminated due to a volatile call as the last insn in the
4277 current function. That way the return address (in %r2) will
4278 always point to a valid instruction in the current function. */
4280 /* Get the last real insn. */
4282 insn
= prev_real_insn (insn
);
4284 /* If it is a sequence, then look inside. */
4285 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4286 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4288 /* If insn is a CALL_INSN, then it must be a call to a volatile
4289 function (otherwise there would be epilogue insns). */
4290 if (insn
&& CALL_P (insn
))
4292 fputs ("\tnop\n", file
);
4298 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4300 if (TARGET_SOM
&& TARGET_GAS
)
4302 /* We are done with this subspace except possibly for some additional
4303 debug information. Forget that we are in this subspace to ensure
4304 that the next function is output in its own subspace. */
4306 cfun
->machine
->in_nsubspa
= 2;
4309 /* Thunks do their own insn accounting. */
4313 if (INSN_ADDRESSES_SET_P ())
4315 last_address
= extra_nop
? 4 : 0;
4316 insn
= get_last_nonnote_insn ();
4319 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4321 last_address
+= insn_default_length (insn
);
4323 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4324 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4327 last_address
= UINT_MAX
;
4329 /* Finally, update the total number of code bytes output so far. */
4330 update_total_code_bytes (last_address
);
4334 pa_expand_epilogue (void)
4337 HOST_WIDE_INT offset
;
4338 HOST_WIDE_INT ret_off
= 0;
4340 int merge_sp_adjust_with_load
= 0;
4342 /* We will use this often. */
4343 tmpreg
= gen_rtx_REG (word_mode
, 1);
4345 /* Try to restore RP early to avoid load/use interlocks when
4346 RP gets used in the return (bv) instruction. This appears to still
4347 be necessary even when we schedule the prologue and epilogue. */
4350 ret_off
= TARGET_64BIT
? -16 : -20;
4351 if (frame_pointer_needed
)
4353 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4358 /* No frame pointer, and stack is smaller than 8k. */
4359 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4361 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4367 /* General register restores. */
4368 if (frame_pointer_needed
)
4370 offset
= local_fsize
;
4372 /* If the current function calls __builtin_eh_return, then we need
4373 to restore the saved EH data registers. */
4374 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4376 unsigned int i
, regno
;
4380 regno
= EH_RETURN_DATA_REGNO (i
);
4381 if (regno
== INVALID_REGNUM
)
4384 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4385 offset
+= UNITS_PER_WORD
;
4389 for (i
= 18; i
>= 4; i
--)
4390 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4392 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4393 offset
+= UNITS_PER_WORD
;
4398 offset
= local_fsize
- actual_fsize
;
4400 /* If the current function calls __builtin_eh_return, then we need
4401 to restore the saved EH data registers. */
4402 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4404 unsigned int i
, regno
;
4408 regno
= EH_RETURN_DATA_REGNO (i
);
4409 if (regno
== INVALID_REGNUM
)
4412 /* Only for the first load.
4413 merge_sp_adjust_with_load holds the register load
4414 with which we will merge the sp adjustment. */
4415 if (merge_sp_adjust_with_load
== 0
4417 && VAL_14_BITS_P (-actual_fsize
))
4418 merge_sp_adjust_with_load
= regno
;
4420 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4421 offset
+= UNITS_PER_WORD
;
4425 for (i
= 18; i
>= 3; i
--)
4427 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4429 /* Only for the first load.
4430 merge_sp_adjust_with_load holds the register load
4431 with which we will merge the sp adjustment. */
4432 if (merge_sp_adjust_with_load
== 0
4434 && VAL_14_BITS_P (-actual_fsize
))
4435 merge_sp_adjust_with_load
= i
;
4437 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4438 offset
+= UNITS_PER_WORD
;
4443 /* Align pointer properly (doubleword boundary). */
4444 offset
= (offset
+ 7) & ~7;
4446 /* FP register restores. */
4449 /* Adjust the register to index off of. */
4450 if (frame_pointer_needed
)
4451 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4453 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4455 /* Actually do the restores now. */
4456 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4457 if (df_regs_ever_live_p (i
)
4458 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4460 rtx src
= gen_rtx_MEM (DFmode
,
4461 gen_rtx_POST_INC (word_mode
, tmpreg
));
4462 rtx dest
= gen_rtx_REG (DFmode
, i
);
4463 emit_move_insn (dest
, src
);
4467 /* Emit a blockage insn here to keep these insns from being moved to
4468 an earlier spot in the epilogue, or into the main instruction stream.
4470 This is necessary as we must not cut the stack back before all the
4471 restores are finished. */
4472 emit_insn (gen_blockage ());
4474 /* Reset stack pointer (and possibly frame pointer). The stack
4475 pointer is initially set to fp + 64 to avoid a race condition. */
4476 if (frame_pointer_needed
)
4478 rtx delta
= GEN_INT (-64);
4480 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4481 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4482 stack_pointer_rtx
, delta
));
4484 /* If we were deferring a callee register restore, do it now. */
4485 else if (merge_sp_adjust_with_load
)
4487 rtx delta
= GEN_INT (-actual_fsize
);
4488 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4490 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4492 else if (actual_fsize
!= 0)
4493 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4496 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4497 frame greater than 8k), do so now. */
4499 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4501 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4503 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4505 emit_insn (gen_blockage ());
4506 emit_insn (TARGET_64BIT
4507 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4508 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4513 pa_can_use_return_insn (void)
4515 if (!reload_completed
)
4518 if (frame_pointer_needed
)
4521 if (df_regs_ever_live_p (2))
4527 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4531 hppa_pic_save_rtx (void)
4533 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4536 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4537 #define NO_DEFERRED_PROFILE_COUNTERS 0
4541 /* Vector of funcdef numbers. */
4542 static vec
<int> funcdef_nos
;
4544 /* Output deferred profile counters. */
4546 output_deferred_profile_counters (void)
4551 if (funcdef_nos
.is_empty ())
4554 switch_to_section (data_section
);
4555 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4556 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4558 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4560 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4561 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4564 funcdef_nos
.release ();
4568 hppa_profile_hook (int label_no
)
4570 /* We use SImode for the address of the function in both 32 and
4571 64-bit code to avoid having to provide DImode versions of the
4572 lcla2 and load_offset_label_address insn patterns. */
4573 rtx reg
= gen_reg_rtx (SImode
);
4574 rtx_code_label
*label_rtx
= gen_label_rtx ();
4575 rtx mcount
= gen_rtx_MEM (Pmode
, gen_rtx_SYMBOL_REF (Pmode
, "_mcount"));
4576 int reg_parm_stack_space
= REG_PARM_STACK_SPACE (NULL_TREE
);
4577 rtx arg_bytes
, begin_label_rtx
;
4578 rtx_insn
*call_insn
;
4579 char begin_label_name
[16];
4580 bool use_mcount_pcrel_call
;
4582 /* If we can reach _mcount with a pc-relative call, we can optimize
4583 loading the address of the current function. This requires linker
4584 long branch stub support. */
4585 if (!TARGET_PORTABLE_RUNTIME
4586 && !TARGET_LONG_CALLS
4587 && (TARGET_SOM
|| flag_function_sections
))
4588 use_mcount_pcrel_call
= TRUE
;
4590 use_mcount_pcrel_call
= FALSE
;
4592 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4594 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4596 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4598 if (!use_mcount_pcrel_call
)
4600 /* The address of the function is loaded into %r25 with an instruction-
4601 relative sequence that avoids the use of relocations. The sequence
4602 is split so that the load_offset_label_address instruction can
4603 occupy the delay slot of the call to _mcount. */
4605 emit_insn (gen_lcla2 (reg
, label_rtx
));
4607 emit_insn (gen_lcla1 (reg
, label_rtx
));
4609 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4615 if (!NO_DEFERRED_PROFILE_COUNTERS
)
4617 rtx count_label_rtx
, addr
, r24
;
4618 char count_label_name
[16];
4620 funcdef_nos
.safe_push (label_no
);
4621 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4622 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
,
4623 ggc_strdup (count_label_name
));
4625 addr
= force_reg (Pmode
, count_label_rtx
);
4626 r24
= gen_rtx_REG (Pmode
, 24);
4627 emit_move_insn (r24
, addr
);
4629 arg_bytes
= GEN_INT (TARGET_64BIT
? 24 : 12);
4630 if (use_mcount_pcrel_call
)
4631 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4634 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4636 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4640 arg_bytes
= GEN_INT (TARGET_64BIT
? 16 : 8);
4641 if (use_mcount_pcrel_call
)
4642 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4645 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4648 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4649 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4651 /* Indicate the _mcount call cannot throw, nor will it execute a
4653 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4655 /* Allocate space for fixed arguments. */
4656 if (reg_parm_stack_space
> crtl
->outgoing_args_size
)
4657 crtl
->outgoing_args_size
= reg_parm_stack_space
;
4660 /* Fetch the return address for the frame COUNT steps up from
4661 the current frame, after the prologue. FRAMEADDR is the
4662 frame pointer of the COUNT frame.
4664 We want to ignore any export stub remnants here. To handle this,
4665 we examine the code at the return address, and if it is an export
4666 stub, we return a memory rtx for the stub return address stored
4669 The value returned is used in two different ways:
4671 1. To find a function's caller.
4673 2. To change the return address for a function.
4675 This function handles most instances of case 1; however, it will
4676 fail if there are two levels of stubs to execute on the return
4677 path. The only way I believe that can happen is if the return value
4678 needs a parameter relocation, which never happens for C code.
4680 This function handles most instances of case 2; however, it will
4681 fail if we did not originally have stub code on the return path
4682 but will need stub code on the new return path. This can happen if
4683 the caller & callee are both in the main program, but the new
4684 return location is in a shared library. */
4687 pa_return_addr_rtx (int count
, rtx frameaddr
)
4694 /* The instruction stream at the return address of a PA1.X export stub is:
4696 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4697 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4698 0x00011820 | stub+16: mtsp r1,sr0
4699 0xe0400002 | stub+20: be,n 0(sr0,rp)
4701 0xe0400002 must be specified as -532676606 so that it won't be
4702 rejected as an invalid immediate operand on 64-bit hosts.
4704 The instruction stream at the return address of a PA2.0 export stub is:
4706 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4707 0xe840d002 | stub+12: bve,n (rp)
4710 HOST_WIDE_INT insns
[4];
4716 rp
= get_hard_reg_initial_val (Pmode
, 2);
4718 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4721 /* If there is no export stub then just use the value saved from
4722 the return pointer register. */
4724 saved_rp
= gen_reg_rtx (Pmode
);
4725 emit_move_insn (saved_rp
, rp
);
4727 /* Get pointer to the instruction stream. We have to mask out the
4728 privilege level from the two low order bits of the return address
4729 pointer here so that ins will point to the start of the first
4730 instruction that would have been executed if we returned. */
4731 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4732 label
= gen_label_rtx ();
4736 insns
[0] = 0x4bc23fd1;
4737 insns
[1] = -398405630;
4742 insns
[0] = 0x4bc23fd1;
4743 insns
[1] = 0x004010a1;
4744 insns
[2] = 0x00011820;
4745 insns
[3] = -532676606;
4749 /* Check the instruction stream at the normal return address for the
4750 export stub. If it is an export stub, than our return address is
4751 really in -24[frameaddr]. */
4753 for (i
= 0; i
< len
; i
++)
4755 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4756 rtx op1
= GEN_INT (insns
[i
]);
4757 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4760 /* Here we know that our return address points to an export
4761 stub. We don't want to return the address of the export stub,
4762 but rather the return address of the export stub. That return
4763 address is stored at -24[frameaddr]. */
4765 emit_move_insn (saved_rp
,
4767 memory_address (Pmode
,
4768 plus_constant (Pmode
, frameaddr
,
4777 pa_emit_bcond_fp (rtx operands
[])
4779 enum rtx_code code
= GET_CODE (operands
[0]);
4780 rtx operand0
= operands
[1];
4781 rtx operand1
= operands
[2];
4782 rtx label
= operands
[3];
4784 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4785 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4787 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4788 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4791 gen_rtx_REG (CCFPmode
, 0),
4793 gen_rtx_LABEL_REF (VOIDmode
, label
),
4798 /* Adjust the cost of a scheduling dependency. Return the new cost of
4799 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4802 pa_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
4805 enum attr_type attr_type
;
4807 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4808 true dependencies as they are described with bypasses now. */
4809 if (pa_cpu
>= PROCESSOR_8000
|| dep_type
== 0)
4812 if (! recog_memoized (insn
))
4815 attr_type
= get_attr_type (insn
);
4820 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4823 if (attr_type
== TYPE_FPLOAD
)
4825 rtx pat
= PATTERN (insn
);
4826 rtx dep_pat
= PATTERN (dep_insn
);
4827 if (GET_CODE (pat
) == PARALLEL
)
4829 /* This happens for the fldXs,mb patterns. */
4830 pat
= XVECEXP (pat
, 0, 0);
4832 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4833 /* If this happens, we have to extend this to schedule
4834 optimally. Return 0 for now. */
4837 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4839 if (! recog_memoized (dep_insn
))
4841 switch (get_attr_type (dep_insn
))
4848 case TYPE_FPSQRTSGL
:
4849 case TYPE_FPSQRTDBL
:
4850 /* A fpload can't be issued until one cycle before a
4851 preceding arithmetic operation has finished if
4852 the target of the fpload is any of the sources
4853 (or destination) of the arithmetic operation. */
4854 return insn_default_latency (dep_insn
) - 1;
4861 else if (attr_type
== TYPE_FPALU
)
4863 rtx pat
= PATTERN (insn
);
4864 rtx dep_pat
= PATTERN (dep_insn
);
4865 if (GET_CODE (pat
) == PARALLEL
)
4867 /* This happens for the fldXs,mb patterns. */
4868 pat
= XVECEXP (pat
, 0, 0);
4870 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4871 /* If this happens, we have to extend this to schedule
4872 optimally. Return 0 for now. */
4875 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4877 if (! recog_memoized (dep_insn
))
4879 switch (get_attr_type (dep_insn
))
4883 case TYPE_FPSQRTSGL
:
4884 case TYPE_FPSQRTDBL
:
4885 /* An ALU flop can't be issued until two cycles before a
4886 preceding divide or sqrt operation has finished if
4887 the target of the ALU flop is any of the sources
4888 (or destination) of the divide or sqrt operation. */
4889 return insn_default_latency (dep_insn
) - 2;
4897 /* For other anti dependencies, the cost is 0. */
4900 case REG_DEP_OUTPUT
:
4901 /* Output dependency; DEP_INSN writes a register that INSN writes some
4903 if (attr_type
== TYPE_FPLOAD
)
4905 rtx pat
= PATTERN (insn
);
4906 rtx dep_pat
= PATTERN (dep_insn
);
4907 if (GET_CODE (pat
) == PARALLEL
)
4909 /* This happens for the fldXs,mb patterns. */
4910 pat
= XVECEXP (pat
, 0, 0);
4912 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4913 /* If this happens, we have to extend this to schedule
4914 optimally. Return 0 for now. */
4917 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4919 if (! recog_memoized (dep_insn
))
4921 switch (get_attr_type (dep_insn
))
4928 case TYPE_FPSQRTSGL
:
4929 case TYPE_FPSQRTDBL
:
4930 /* A fpload can't be issued until one cycle before a
4931 preceding arithmetic operation has finished if
4932 the target of the fpload is the destination of the
4933 arithmetic operation.
4935 Exception: For PA7100LC, PA7200 and PA7300, the cost
4936 is 3 cycles, unless they bundle together. We also
4937 pay the penalty if the second insn is a fpload. */
4938 return insn_default_latency (dep_insn
) - 1;
4945 else if (attr_type
== TYPE_FPALU
)
4947 rtx pat
= PATTERN (insn
);
4948 rtx dep_pat
= PATTERN (dep_insn
);
4949 if (GET_CODE (pat
) == PARALLEL
)
4951 /* This happens for the fldXs,mb patterns. */
4952 pat
= XVECEXP (pat
, 0, 0);
4954 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4955 /* If this happens, we have to extend this to schedule
4956 optimally. Return 0 for now. */
4959 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4961 if (! recog_memoized (dep_insn
))
4963 switch (get_attr_type (dep_insn
))
4967 case TYPE_FPSQRTSGL
:
4968 case TYPE_FPSQRTDBL
:
4969 /* An ALU flop can't be issued until two cycles before a
4970 preceding divide or sqrt operation has finished if
4971 the target of the ALU flop is also the target of
4972 the divide or sqrt operation. */
4973 return insn_default_latency (dep_insn
) - 2;
4981 /* For other output dependencies, the cost is 0. */
4989 /* Adjust scheduling priorities. We use this to try and keep addil
4990 and the next use of %r1 close together. */
4992 pa_adjust_priority (rtx_insn
*insn
, int priority
)
4994 rtx set
= single_set (insn
);
4998 src
= SET_SRC (set
);
4999 dest
= SET_DEST (set
);
5000 if (GET_CODE (src
) == LO_SUM
5001 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
5002 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
5005 else if (GET_CODE (src
) == MEM
5006 && GET_CODE (XEXP (src
, 0)) == LO_SUM
5007 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
5008 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
5011 else if (GET_CODE (dest
) == MEM
5012 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
5013 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
5014 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
5020 /* The 700 can only issue a single insn at a time.
5021 The 7XXX processors can issue two insns at a time.
5022 The 8000 can issue 4 insns at a time. */
5024 pa_issue_rate (void)
5028 case PROCESSOR_700
: return 1;
5029 case PROCESSOR_7100
: return 2;
5030 case PROCESSOR_7100LC
: return 2;
5031 case PROCESSOR_7200
: return 2;
5032 case PROCESSOR_7300
: return 2;
5033 case PROCESSOR_8000
: return 4;
5042 /* Return any length plus adjustment needed by INSN which already has
5043 its length computed as LENGTH. Return LENGTH if no adjustment is
5046 Also compute the length of an inline block move here as it is too
5047 complicated to express as a length attribute in pa.md. */
5049 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
5051 rtx pat
= PATTERN (insn
);
5053 /* If length is negative or undefined, provide initial length. */
5054 if ((unsigned int) length
>= INT_MAX
)
5056 if (GET_CODE (pat
) == SEQUENCE
)
5057 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
5059 switch (get_attr_type (insn
))
5062 length
= pa_attr_length_millicode_call (insn
);
5065 length
= pa_attr_length_call (insn
, 0);
5068 length
= pa_attr_length_call (insn
, 1);
5071 length
= pa_attr_length_indirect_call (insn
);
5073 case TYPE_SH_FUNC_ADRS
:
5074 length
= pa_attr_length_millicode_call (insn
) + 20;
5081 /* Block move pattern. */
5082 if (NONJUMP_INSN_P (insn
)
5083 && GET_CODE (pat
) == PARALLEL
5084 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5085 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5086 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5087 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5088 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5089 length
+= compute_movmem_length (insn
) - 4;
5090 /* Block clear pattern. */
5091 else if (NONJUMP_INSN_P (insn
)
5092 && GET_CODE (pat
) == PARALLEL
5093 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5094 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5095 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5096 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5097 length
+= compute_clrmem_length (insn
) - 4;
5098 /* Conditional branch with an unfilled delay slot. */
5099 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5101 /* Adjust a short backwards conditional with an unfilled delay slot. */
5102 if (GET_CODE (pat
) == SET
5104 && JUMP_LABEL (insn
) != NULL_RTX
5105 && ! forward_branch_p (insn
))
5107 else if (GET_CODE (pat
) == PARALLEL
5108 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5111 /* Adjust dbra insn with short backwards conditional branch with
5112 unfilled delay slot -- only for case where counter is in a
5113 general register register. */
5114 else if (GET_CODE (pat
) == PARALLEL
5115 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5116 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5117 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5119 && ! forward_branch_p (insn
))
5125 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5128 pa_print_operand_punct_valid_p (unsigned char code
)
5139 /* Print operand X (an rtx) in assembler syntax to file FILE.
5140 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5141 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5144 pa_print_operand (FILE *file
, rtx x
, int code
)
5149 /* Output a 'nop' if there's nothing for the delay slot. */
5150 if (dbr_sequence_length () == 0)
5151 fputs ("\n\tnop", file
);
5154 /* Output a nullification completer if there's nothing for the */
5155 /* delay slot or nullification is requested. */
5156 if (dbr_sequence_length () == 0 ||
5158 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5162 /* Print out the second register name of a register pair.
5163 I.e., R (6) => 7. */
5164 fputs (reg_names
[REGNO (x
) + 1], file
);
5167 /* A register or zero. */
5169 || (x
== CONST0_RTX (DFmode
))
5170 || (x
== CONST0_RTX (SFmode
)))
5172 fputs ("%r0", file
);
5178 /* A register or zero (floating point). */
5180 || (x
== CONST0_RTX (DFmode
))
5181 || (x
== CONST0_RTX (SFmode
)))
5183 fputs ("%fr0", file
);
5192 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5193 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5194 pa_output_global_address (file
, xoperands
[1], 0);
5195 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5199 case 'C': /* Plain (C)ondition */
5201 switch (GET_CODE (x
))
5204 fputs ("=", file
); break;
5206 fputs ("<>", file
); break;
5208 fputs (">", file
); break;
5210 fputs (">=", file
); break;
5212 fputs (">>=", file
); break;
5214 fputs (">>", file
); break;
5216 fputs ("<", file
); break;
5218 fputs ("<=", file
); break;
5220 fputs ("<<=", file
); break;
5222 fputs ("<<", file
); break;
5227 case 'N': /* Condition, (N)egated */
5228 switch (GET_CODE (x
))
5231 fputs ("<>", file
); break;
5233 fputs ("=", file
); break;
5235 fputs ("<=", file
); break;
5237 fputs ("<", file
); break;
5239 fputs ("<<", file
); break;
5241 fputs ("<<=", file
); break;
5243 fputs (">=", file
); break;
5245 fputs (">", file
); break;
5247 fputs (">>", file
); break;
5249 fputs (">>=", file
); break;
5254 /* For floating point comparisons. Note that the output
5255 predicates are the complement of the desired mode. The
5256 conditions for GT, GE, LT, LE and LTGT cause an invalid
5257 operation exception if the result is unordered and this
5258 exception is enabled in the floating-point status register. */
5260 switch (GET_CODE (x
))
5263 fputs ("!=", file
); break;
5265 fputs ("=", file
); break;
5267 fputs ("!>", file
); break;
5269 fputs ("!>=", file
); break;
5271 fputs ("!<", file
); break;
5273 fputs ("!<=", file
); break;
5275 fputs ("!<>", file
); break;
5277 fputs ("!?<=", file
); break;
5279 fputs ("!?<", file
); break;
5281 fputs ("!?>=", file
); break;
5283 fputs ("!?>", file
); break;
5285 fputs ("!?=", file
); break;
5287 fputs ("!?", file
); break;
5289 fputs ("?", file
); break;
5294 case 'S': /* Condition, operands are (S)wapped. */
5295 switch (GET_CODE (x
))
5298 fputs ("=", file
); break;
5300 fputs ("<>", file
); break;
5302 fputs ("<", file
); break;
5304 fputs ("<=", file
); break;
5306 fputs ("<<=", file
); break;
5308 fputs ("<<", file
); break;
5310 fputs (">", file
); break;
5312 fputs (">=", file
); break;
5314 fputs (">>=", file
); break;
5316 fputs (">>", file
); break;
5321 case 'B': /* Condition, (B)oth swapped and negate. */
5322 switch (GET_CODE (x
))
5325 fputs ("<>", file
); break;
5327 fputs ("=", file
); break;
5329 fputs (">=", file
); break;
5331 fputs (">", file
); break;
5333 fputs (">>", file
); break;
5335 fputs (">>=", file
); break;
5337 fputs ("<=", file
); break;
5339 fputs ("<", file
); break;
5341 fputs ("<<", file
); break;
5343 fputs ("<<=", file
); break;
5349 gcc_assert (GET_CODE (x
) == CONST_INT
);
5350 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5353 gcc_assert (GET_CODE (x
) == CONST_INT
);
5354 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5357 gcc_assert (GET_CODE (x
) == CONST_INT
);
5358 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5361 gcc_assert (GET_CODE (x
) == CONST_INT
5362 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5363 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5366 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5367 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5370 gcc_assert (GET_CODE (x
) == CONST_INT
);
5371 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5374 gcc_assert (GET_CODE (x
) == CONST_INT
);
5375 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5378 if (GET_CODE (x
) == CONST_INT
)
5383 switch (GET_CODE (XEXP (x
, 0)))
5387 if (ASSEMBLER_DIALECT
== 0)
5388 fputs ("s,mb", file
);
5390 fputs (",mb", file
);
5394 if (ASSEMBLER_DIALECT
== 0)
5395 fputs ("s,ma", file
);
5397 fputs (",ma", file
);
5400 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5401 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5403 if (ASSEMBLER_DIALECT
== 0)
5406 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5407 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5409 if (ASSEMBLER_DIALECT
== 0)
5410 fputs ("x,s", file
);
5414 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5418 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5424 pa_output_global_address (file
, x
, 0);
5427 pa_output_global_address (file
, x
, 1);
5429 case 0: /* Don't do anything special */
5434 compute_zdepwi_operands (INTVAL (x
), op
);
5435 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5441 compute_zdepdi_operands (INTVAL (x
), op
);
5442 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5446 /* We can get here from a .vtable_inherit due to our
5447 CONSTANT_ADDRESS_P rejecting perfectly good constant
5453 if (GET_CODE (x
) == REG
)
5455 fputs (reg_names
[REGNO (x
)], file
);
5456 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5462 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5463 && (REGNO (x
) & 1) == 0)
5466 else if (GET_CODE (x
) == MEM
)
5468 int size
= GET_MODE_SIZE (GET_MODE (x
));
5469 rtx base
= NULL_RTX
;
5470 switch (GET_CODE (XEXP (x
, 0)))
5474 base
= XEXP (XEXP (x
, 0), 0);
5475 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5479 base
= XEXP (XEXP (x
, 0), 0);
5480 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5483 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5484 fprintf (file
, "%s(%s)",
5485 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5486 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5487 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5488 fprintf (file
, "%s(%s)",
5489 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5490 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5491 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5492 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5494 /* Because the REG_POINTER flag can get lost during reload,
5495 pa_legitimate_address_p canonicalizes the order of the
5496 index and base registers in the combined move patterns. */
5497 rtx base
= XEXP (XEXP (x
, 0), 1);
5498 rtx index
= XEXP (XEXP (x
, 0), 0);
5500 fprintf (file
, "%s(%s)",
5501 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5504 output_address (GET_MODE (x
), XEXP (x
, 0));
5507 output_address (GET_MODE (x
), XEXP (x
, 0));
5512 output_addr_const (file
, x
);
5515 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5518 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5521 /* Imagine (high (const (plus ...))). */
5522 if (GET_CODE (x
) == HIGH
)
5525 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5526 output_addr_const (file
, x
);
5527 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5529 output_addr_const (file
, x
);
5530 fputs ("-$global$", file
);
5532 else if (GET_CODE (x
) == CONST
)
5534 const char *sep
= "";
5535 int offset
= 0; /* assembler wants -$global$ at end */
5536 rtx base
= NULL_RTX
;
5538 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5542 base
= XEXP (XEXP (x
, 0), 0);
5543 output_addr_const (file
, base
);
5546 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5552 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5556 base
= XEXP (XEXP (x
, 0), 1);
5557 output_addr_const (file
, base
);
5560 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5566 /* How bogus. The compiler is apparently responsible for
5567 rounding the constant if it uses an LR field selector.
5569 The linker and/or assembler seem a better place since
5570 they have to do this kind of thing already.
5572 If we fail to do this, HP's optimizing linker may eliminate
5573 an addil, but not update the ldw/stw/ldo instruction that
5574 uses the result of the addil. */
5576 offset
= ((offset
+ 0x1000) & ~0x1fff);
5578 switch (GET_CODE (XEXP (x
, 0)))
5591 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5599 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5600 fputs ("-$global$", file
);
5602 fprintf (file
, "%s%d", sep
, offset
);
5605 output_addr_const (file
, x
);
5608 /* Output boilerplate text to appear at the beginning of the file.
5609 There are several possible versions. */
5610 #define aputs(x) fputs(x, asm_out_file)
5612 pa_file_start_level (void)
5615 aputs ("\t.LEVEL 2.0w\n");
5616 else if (TARGET_PA_20
)
5617 aputs ("\t.LEVEL 2.0\n");
5618 else if (TARGET_PA_11
)
5619 aputs ("\t.LEVEL 1.1\n");
5621 aputs ("\t.LEVEL 1.0\n");
5625 pa_file_start_space (int sortspace
)
5627 aputs ("\t.SPACE $PRIVATE$");
5630 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5632 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5633 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5634 "\n\t.SPACE $TEXT$");
5637 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5638 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5642 pa_file_start_file (int want_version
)
5644 if (write_symbols
!= NO_DEBUG
)
5646 output_file_directive (asm_out_file
, main_input_filename
);
5648 aputs ("\t.version\t\"01.01\"\n");
5653 pa_file_start_mcount (const char *aswhat
)
5656 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5660 pa_elf_file_start (void)
5662 pa_file_start_level ();
5663 pa_file_start_mcount ("ENTRY");
5664 pa_file_start_file (0);
5668 pa_som_file_start (void)
5670 pa_file_start_level ();
5671 pa_file_start_space (0);
5672 aputs ("\t.IMPORT $global$,DATA\n"
5673 "\t.IMPORT $$dyncall,MILLICODE\n");
5674 pa_file_start_mcount ("CODE");
5675 pa_file_start_file (0);
5679 pa_linux_file_start (void)
5681 pa_file_start_file (1);
5682 pa_file_start_level ();
5683 pa_file_start_mcount ("CODE");
5687 pa_hpux64_gas_file_start (void)
5689 pa_file_start_level ();
5690 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5692 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5694 pa_file_start_file (1);
5698 pa_hpux64_hpas_file_start (void)
5700 pa_file_start_level ();
5701 pa_file_start_space (1);
5702 pa_file_start_mcount ("CODE");
5703 pa_file_start_file (0);
5707 /* Search the deferred plabel list for SYMBOL and return its internal
5708 label. If an entry for SYMBOL is not found, a new entry is created. */
5711 pa_get_deferred_plabel (rtx symbol
)
5713 const char *fname
= XSTR (symbol
, 0);
5716 /* See if we have already put this function on the list of deferred
5717 plabels. This list is generally small, so a liner search is not
5718 too ugly. If it proves too slow replace it with something faster. */
5719 for (i
= 0; i
< n_deferred_plabels
; i
++)
5720 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5723 /* If the deferred plabel list is empty, or this entry was not found
5724 on the list, create a new entry on the list. */
5725 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5729 if (deferred_plabels
== 0)
5730 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5732 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5734 n_deferred_plabels
+ 1);
5736 i
= n_deferred_plabels
++;
5737 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5738 deferred_plabels
[i
].symbol
= symbol
;
5740 /* Gross. We have just implicitly taken the address of this
5741 function. Mark it in the same manner as assemble_name. */
5742 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5744 mark_referenced (id
);
5747 return deferred_plabels
[i
].internal_label
;
5751 output_deferred_plabels (void)
5755 /* If we have some deferred plabels, then we need to switch into the
5756 data or readonly data section, and align it to a 4 byte boundary
5757 before outputting the deferred plabels. */
5758 if (n_deferred_plabels
)
5760 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5761 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5764 /* Now output the deferred plabels. */
5765 for (i
= 0; i
< n_deferred_plabels
; i
++)
5767 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5768 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5769 assemble_integer (deferred_plabels
[i
].symbol
,
5770 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5774 /* Initialize optabs to point to emulation routines. */
5777 pa_init_libfuncs (void)
5779 if (HPUX_LONG_DOUBLE_LIBRARY
)
5781 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5782 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5783 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5784 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5785 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5786 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5787 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5788 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5789 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5791 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5792 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5793 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5794 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5795 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5796 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5797 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5799 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5800 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5801 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5802 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5804 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5805 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5806 : "_U_Qfcnvfxt_quad_to_sgl");
5807 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5808 "_U_Qfcnvfxt_quad_to_dbl");
5809 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5810 "_U_Qfcnvfxt_quad_to_usgl");
5811 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5812 "_U_Qfcnvfxt_quad_to_udbl");
5814 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5815 "_U_Qfcnvxf_sgl_to_quad");
5816 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5817 "_U_Qfcnvxf_dbl_to_quad");
5818 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5819 "_U_Qfcnvxf_usgl_to_quad");
5820 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5821 "_U_Qfcnvxf_udbl_to_quad");
5824 if (TARGET_SYNC_LIBCALL
)
5825 init_sync_libfuncs (8);
5828 /* HP's millicode routines mean something special to the assembler.
5829 Keep track of which ones we have used. */
5831 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5832 static void import_milli (enum millicodes
);
5833 static char imported
[(int) end1000
];
5834 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5835 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5836 #define MILLI_START 10
5839 import_milli (enum millicodes code
)
5841 char str
[sizeof (import_string
)];
5843 if (!imported
[(int) code
])
5845 imported
[(int) code
] = 1;
5846 strcpy (str
, import_string
);
5847 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5848 output_asm_insn (str
, 0);
5852 /* The register constraints have put the operands and return value in
5853 the proper registers. */
5856 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5858 import_milli (mulI
);
5859 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5862 /* Emit the rtl for doing a division by a constant. */
5864 /* Do magic division millicodes exist for this value? */
5865 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5867 /* We'll use an array to keep track of the magic millicodes and
5868 whether or not we've used them already. [n][0] is signed, [n][1] is
5871 static int div_milli
[16][2];
5874 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5876 if (GET_CODE (operands
[2]) == CONST_INT
5877 && INTVAL (operands
[2]) > 0
5878 && INTVAL (operands
[2]) < 16
5879 && pa_magic_milli
[INTVAL (operands
[2])])
5881 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5883 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5887 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5888 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5890 gen_rtx_REG (SImode
, 26),
5892 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5893 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5894 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5895 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5896 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5897 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5904 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5908 /* If the divisor is a constant, try to use one of the special
5910 if (GET_CODE (operands
[0]) == CONST_INT
)
5912 static char buf
[100];
5913 divisor
= INTVAL (operands
[0]);
5914 if (!div_milli
[divisor
][unsignedp
])
5916 div_milli
[divisor
][unsignedp
] = 1;
5918 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5920 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5924 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5925 INTVAL (operands
[0]));
5926 return pa_output_millicode_call (insn
,
5927 gen_rtx_SYMBOL_REF (SImode
, buf
));
5931 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5932 INTVAL (operands
[0]));
5933 return pa_output_millicode_call (insn
,
5934 gen_rtx_SYMBOL_REF (SImode
, buf
));
5937 /* Divisor isn't a special constant. */
5942 import_milli (divU
);
5943 return pa_output_millicode_call (insn
,
5944 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5948 import_milli (divI
);
5949 return pa_output_millicode_call (insn
,
5950 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5955 /* Output a $$rem millicode to do mod. */
5958 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5962 import_milli (remU
);
5963 return pa_output_millicode_call (insn
,
5964 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5968 import_milli (remI
);
5969 return pa_output_millicode_call (insn
,
5970 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5975 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5977 const char *arg_regs
[4];
5978 machine_mode arg_mode
;
5980 int i
, output_flag
= 0;
5983 /* We neither need nor want argument location descriptors for the
5984 64bit runtime environment or the ELF32 environment. */
5985 if (TARGET_64BIT
|| TARGET_ELF32
)
5988 for (i
= 0; i
< 4; i
++)
5991 /* Specify explicitly that no argument relocations should take place
5992 if using the portable runtime calling conventions. */
5993 if (TARGET_PORTABLE_RUNTIME
)
5995 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6000 gcc_assert (CALL_P (call_insn
));
6001 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
6002 link
; link
= XEXP (link
, 1))
6004 rtx use
= XEXP (link
, 0);
6006 if (! (GET_CODE (use
) == USE
6007 && GET_CODE (XEXP (use
, 0)) == REG
6008 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
6011 arg_mode
= GET_MODE (XEXP (use
, 0));
6012 regno
= REGNO (XEXP (use
, 0));
6013 if (regno
>= 23 && regno
<= 26)
6015 arg_regs
[26 - regno
] = "GR";
6016 if (arg_mode
== DImode
)
6017 arg_regs
[25 - regno
] = "GR";
6019 else if (regno
>= 32 && regno
<= 39)
6021 if (arg_mode
== SFmode
)
6022 arg_regs
[(regno
- 32) / 2] = "FR";
6025 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6026 arg_regs
[(regno
- 34) / 2] = "FR";
6027 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
6029 arg_regs
[(regno
- 34) / 2] = "FU";
6030 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
6035 fputs ("\t.CALL ", asm_out_file
);
6036 for (i
= 0; i
< 4; i
++)
6041 fputc (',', asm_out_file
);
6042 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
6045 fputc ('\n', asm_out_file
);
6048 /* Inform reload about cases where moving X with a mode MODE to or from
6049 a register in RCLASS requires an extra scratch or immediate register.
6050 Return the class needed for the immediate register. */
6053 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
6054 machine_mode mode
, secondary_reload_info
*sri
)
6057 enum reg_class rclass
= (enum reg_class
) rclass_i
;
6059 /* Handle the easy stuff first. */
6060 if (rclass
== R1_REGS
)
6066 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
6072 /* If we have something like (mem (mem (...)), we can safely assume the
6073 inner MEM will end up in a general register after reloading, so there's
6074 no need for a secondary reload. */
6075 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
6078 /* Trying to load a constant into a FP register during PIC code
6079 generation requires %r1 as a scratch register. For float modes,
6080 the only legitimate constant is CONST0_RTX. However, there are
6081 a few patterns that accept constant double operands. */
6083 && FP_REG_CLASS_P (rclass
)
6084 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6089 sri
->icode
= CODE_FOR_reload_insi_r1
;
6093 sri
->icode
= CODE_FOR_reload_indi_r1
;
6097 sri
->icode
= CODE_FOR_reload_insf_r1
;
6101 sri
->icode
= CODE_FOR_reload_indf_r1
;
6110 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6111 register when we're generating PIC code or when the operand isn't
6113 if (pa_symbolic_expression_p (x
))
6115 if (GET_CODE (x
) == HIGH
)
6118 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6123 sri
->icode
= CODE_FOR_reload_insi_r1
;
6127 sri
->icode
= CODE_FOR_reload_indi_r1
;
6137 /* Profiling showed the PA port spends about 1.3% of its compilation
6138 time in true_regnum from calls inside pa_secondary_reload_class. */
6139 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6140 regno
= true_regnum (x
);
6142 /* Handle reloads for floating point loads and stores. */
6143 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6144 && FP_REG_CLASS_P (rclass
))
6150 /* We don't need a secondary reload for indexed memory addresses.
6152 When INT14_OK_STRICT is true, it might appear that we could
6153 directly allow register indirect memory addresses. However,
6154 this doesn't work because we don't support SUBREGs in
6155 floating-point register copies and reload doesn't tell us
6156 when it's going to use a SUBREG. */
6157 if (IS_INDEX_ADDR_P (x
))
6161 /* Request a secondary reload with a general scratch register
6162 for everything else. ??? Could symbolic operands be handled
6163 directly when generating non-pic PA 2.0 code? */
6165 ? direct_optab_handler (reload_in_optab
, mode
)
6166 : direct_optab_handler (reload_out_optab
, mode
));
6170 /* A SAR<->FP register copy requires an intermediate general register
6171 and secondary memory. We need a secondary reload with a general
6172 scratch register for spills. */
6173 if (rclass
== SHIFT_REGS
)
6176 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6179 ? direct_optab_handler (reload_in_optab
, mode
)
6180 : direct_optab_handler (reload_out_optab
, mode
));
6184 /* Handle FP copy. */
6185 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6186 return GENERAL_REGS
;
6189 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6190 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6191 && FP_REG_CLASS_P (rclass
))
6192 return GENERAL_REGS
;
6197 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6200 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED
,
6201 reg_class_t class1 ATTRIBUTE_UNUSED
,
6202 reg_class_t class2 ATTRIBUTE_UNUSED
)
6204 #ifdef PA_SECONDARY_MEMORY_NEEDED
6205 return PA_SECONDARY_MEMORY_NEEDED (mode
, class1
, class2
);
6211 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6212 is only marked as live on entry by df-scan when it is a fixed
6213 register. It isn't a fixed register in the 64-bit runtime,
6214 so we need to mark it here. */
6217 pa_extra_live_on_entry (bitmap regs
)
6220 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6223 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6224 to prevent it from being deleted. */
6227 pa_eh_return_handler_rtx (void)
6231 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6232 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6233 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6238 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6239 by invisible reference. As a GCC extension, we also pass anything
6240 with a zero or variable size by reference.
6242 The 64-bit runtime does not describe passing any types by invisible
6243 reference. The internals of GCC can't currently handle passing
6244 empty structures, and zero or variable length arrays when they are
6245 not passed entirely on the stack or by reference. Thus, as a GCC
6246 extension, we pass these types by reference. The HP compiler doesn't
6247 support these types, so hopefully there shouldn't be any compatibility
6248 issues. This may have to be revisited when HP releases a C99 compiler
6249 or updates the ABI. */
6252 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6253 machine_mode mode
, const_tree type
,
6254 bool named ATTRIBUTE_UNUSED
)
6259 size
= int_size_in_bytes (type
);
6261 size
= GET_MODE_SIZE (mode
);
6266 return size
<= 0 || size
> 8;
6269 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6271 static pad_direction
6272 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6277 && (AGGREGATE_TYPE_P (type
)
6278 || TREE_CODE (type
) == COMPLEX_TYPE
6279 || TREE_CODE (type
) == VECTOR_TYPE
)))
6281 /* Return PAD_NONE if justification is not required. */
6283 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6284 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6287 /* The directions set here are ignored when a BLKmode argument larger
6288 than a word is placed in a register. Different code is used for
6289 the stack and registers. This makes it difficult to have a
6290 consistent data representation for both the stack and registers.
6291 For both runtimes, the justification and padding for arguments on
6292 the stack and in registers should be identical. */
6294 /* The 64-bit runtime specifies left justification for aggregates. */
6297 /* The 32-bit runtime architecture specifies right justification.
6298 When the argument is passed on the stack, the argument is padded
6299 with garbage on the left. The HP compiler pads with zeros. */
6300 return PAD_DOWNWARD
;
6303 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6304 return PAD_DOWNWARD
;
6310 /* Do what is necessary for `va_start'. We look at the current function
6311 to determine if stdargs or varargs is used and fill in an initial
6312 va_list. A pointer to this constructor is returned. */
6315 hppa_builtin_saveregs (void)
6318 tree fntype
= TREE_TYPE (current_function_decl
);
6319 int argadj
= ((!stdarg_p (fntype
))
6320 ? UNITS_PER_WORD
: 0);
6323 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6325 offset
= crtl
->args
.arg_offset_rtx
;
6331 /* Adjust for varargs/stdarg differences. */
6333 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6335 offset
= crtl
->args
.arg_offset_rtx
;
6337 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6338 from the incoming arg pointer and growing to larger addresses. */
6339 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6340 emit_move_insn (gen_rtx_MEM (word_mode
,
6341 plus_constant (Pmode
,
6342 arg_pointer_rtx
, off
)),
6343 gen_rtx_REG (word_mode
, i
));
6345 /* The incoming args pointer points just beyond the flushback area;
6346 normally this is not a serious concern. However, when we are doing
6347 varargs/stdargs we want to make the arg pointer point to the start
6348 of the incoming argument area. */
6349 emit_move_insn (virtual_incoming_args_rtx
,
6350 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6352 /* Now return a pointer to the first anonymous argument. */
6353 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6354 virtual_incoming_args_rtx
,
6355 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6358 /* Store general registers on the stack. */
6359 dest
= gen_rtx_MEM (BLKmode
,
6360 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6362 set_mem_alias_set (dest
, get_varargs_alias_set ());
6363 set_mem_align (dest
, BITS_PER_WORD
);
6364 move_block_from_reg (23, dest
, 4);
6366 /* move_block_from_reg will emit code to store the argument registers
6367 individually as scalar stores.
6369 However, other insns may later load from the same addresses for
6370 a structure load (passing a struct to a varargs routine).
6372 The alias code assumes that such aliasing can never happen, so we
6373 have to keep memory referencing insns from moving up beyond the
6374 last argument register store. So we emit a blockage insn here. */
6375 emit_insn (gen_blockage ());
6377 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6378 crtl
->args
.internal_arg_pointer
,
6379 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6383 hppa_va_start (tree valist
, rtx nextarg
)
6385 nextarg
= expand_builtin_saveregs ();
6386 std_expand_builtin_va_start (valist
, nextarg
);
6390 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6395 /* Args grow upward. We can use the generic routines. */
6396 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6398 else /* !TARGET_64BIT */
6400 tree ptr
= build_pointer_type (type
);
6403 unsigned int size
, ofs
;
6406 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6410 ptr
= build_pointer_type (type
);
6412 size
= int_size_in_bytes (type
);
6413 valist_type
= TREE_TYPE (valist
);
6415 /* Args grow down. Not handled by generic routines. */
6417 u
= fold_convert (sizetype
, size_in_bytes (type
));
6418 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6419 t
= fold_build_pointer_plus (valist
, u
);
6421 /* Align to 4 or 8 byte boundary depending on argument size. */
6423 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6424 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6425 t
= fold_convert (valist_type
, t
);
6427 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6429 ofs
= (8 - size
) % 4;
6431 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6433 t
= fold_convert (ptr
, t
);
6434 t
= build_va_arg_indirect_ref (t
);
6437 t
= build_va_arg_indirect_ref (t
);
6443 /* True if MODE is valid for the target. By "valid", we mean able to
6444 be manipulated in non-trivial ways. In particular, this means all
6445 the arithmetic is supported.
6447 Currently, TImode is not valid as the HP 64-bit runtime documentation
6448 doesn't document the alignment and calling conventions for this type.
6449 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6450 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6453 pa_scalar_mode_supported_p (scalar_mode mode
)
6455 int precision
= GET_MODE_PRECISION (mode
);
6457 switch (GET_MODE_CLASS (mode
))
6459 case MODE_PARTIAL_INT
:
6461 if (precision
== CHAR_TYPE_SIZE
)
6463 if (precision
== SHORT_TYPE_SIZE
)
6465 if (precision
== INT_TYPE_SIZE
)
6467 if (precision
== LONG_TYPE_SIZE
)
6469 if (precision
== LONG_LONG_TYPE_SIZE
)
6474 if (precision
== FLOAT_TYPE_SIZE
)
6476 if (precision
== DOUBLE_TYPE_SIZE
)
6478 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6482 case MODE_DECIMAL_FLOAT
:
6490 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6491 it branches into the delay slot. Otherwise, return FALSE. */
6494 branch_to_delay_slot_p (rtx_insn
*insn
)
6496 rtx_insn
*jump_insn
;
6498 if (dbr_sequence_length ())
6501 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6504 insn
= next_active_insn (insn
);
6505 if (jump_insn
== insn
)
6508 /* We can't rely on the length of asms. So, we return FALSE when
6509 the branch is followed by an asm. */
6511 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6512 || asm_noperands (PATTERN (insn
)) >= 0
6513 || get_attr_length (insn
) > 0)
6520 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6522 This occurs when INSN has an unfilled delay slot and is followed
6523 by an asm. Disaster can occur if the asm is empty and the jump
6524 branches into the delay slot. So, we add a nop in the delay slot
6525 when this occurs. */
6528 branch_needs_nop_p (rtx_insn
*insn
)
6530 rtx_insn
*jump_insn
;
6532 if (dbr_sequence_length ())
6535 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6538 insn
= next_active_insn (insn
);
6539 if (!insn
|| jump_insn
== insn
)
6542 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6543 || asm_noperands (PATTERN (insn
)) >= 0)
6544 && get_attr_length (insn
) > 0)
6551 /* Return TRUE if INSN, a forward jump insn, can use nullification
6552 to skip the following instruction. This avoids an extra cycle due
6553 to a mis-predicted branch when we fall through. */
6556 use_skip_p (rtx_insn
*insn
)
6558 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6562 insn
= next_active_insn (insn
);
6564 /* We can't rely on the length of asms, so we can't skip asms. */
6566 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6567 || asm_noperands (PATTERN (insn
)) >= 0)
6569 if (get_attr_length (insn
) == 4
6570 && jump_insn
== next_active_insn (insn
))
6572 if (get_attr_length (insn
) > 0)
6579 /* This routine handles all the normal conditional branch sequences we
6580 might need to generate. It handles compare immediate vs compare
6581 register, nullification of delay slots, varying length branches,
6582 negated branches, and all combinations of the above. It returns the
6583 output appropriate to emit the branch corresponding to all given
6587 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6589 static char buf
[100];
6591 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6592 int length
= get_attr_length (insn
);
6595 /* A conditional branch to the following instruction (e.g. the delay slot)
6596 is asking for a disaster. This can happen when not optimizing and
6597 when jump optimization fails.
6599 While it is usually safe to emit nothing, this can fail if the
6600 preceding instruction is a nullified branch with an empty delay
6601 slot and the same branch target as this branch. We could check
6602 for this but jump optimization should eliminate nop jumps. It
6603 is always safe to emit a nop. */
6604 if (branch_to_delay_slot_p (insn
))
6607 /* The doubleword form of the cmpib instruction doesn't have the LEU
6608 and GTU conditions while the cmpb instruction does. Since we accept
6609 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6610 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6611 operands
[2] = gen_rtx_REG (DImode
, 0);
6612 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6613 operands
[1] = gen_rtx_REG (DImode
, 0);
6615 /* If this is a long branch with its delay slot unfilled, set `nullify'
6616 as it can nullify the delay slot and save a nop. */
6617 if (length
== 8 && dbr_sequence_length () == 0)
6620 /* If this is a short forward conditional branch which did not get
6621 its delay slot filled, the delay slot can still be nullified. */
6622 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6623 nullify
= forward_branch_p (insn
);
6625 /* A forward branch over a single nullified insn can be done with a
6626 comclr instruction. This avoids a single cycle penalty due to
6627 mis-predicted branch if we fall through (branch not taken). */
6628 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6632 /* All short conditional branches except backwards with an unfilled
6636 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6638 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6639 if (GET_MODE (operands
[1]) == DImode
)
6642 strcat (buf
, "%B3");
6644 strcat (buf
, "%S3");
6646 strcat (buf
, " %2,%r1,%%r0");
6649 if (branch_needs_nop_p (insn
))
6650 strcat (buf
, ",n %2,%r1,%0%#");
6652 strcat (buf
, ",n %2,%r1,%0");
6655 strcat (buf
, " %2,%r1,%0");
6658 /* All long conditionals. Note a short backward branch with an
6659 unfilled delay slot is treated just like a long backward branch
6660 with an unfilled delay slot. */
6662 /* Handle weird backwards branch with a filled delay slot
6663 which is nullified. */
6664 if (dbr_sequence_length () != 0
6665 && ! forward_branch_p (insn
)
6668 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6669 if (GET_MODE (operands
[1]) == DImode
)
6672 strcat (buf
, "%S3");
6674 strcat (buf
, "%B3");
6675 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6677 /* Handle short backwards branch with an unfilled delay slot.
6678 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6679 taken and untaken branches. */
6680 else if (dbr_sequence_length () == 0
6681 && ! forward_branch_p (insn
)
6682 && INSN_ADDRESSES_SET_P ()
6683 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6684 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6686 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6687 if (GET_MODE (operands
[1]) == DImode
)
6690 strcat (buf
, "%B3 %2,%r1,%0%#");
6692 strcat (buf
, "%S3 %2,%r1,%0%#");
6696 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6697 if (GET_MODE (operands
[1]) == DImode
)
6700 strcat (buf
, "%S3");
6702 strcat (buf
, "%B3");
6704 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6706 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6711 /* The reversed conditional branch must branch over one additional
6712 instruction if the delay slot is filled and needs to be extracted
6713 by pa_output_lbranch. If the delay slot is empty or this is a
6714 nullified forward branch, the instruction after the reversed
6715 condition branch must be nullified. */
6716 if (dbr_sequence_length () == 0
6717 || (nullify
&& forward_branch_p (insn
)))
6721 operands
[4] = GEN_INT (length
);
6726 operands
[4] = GEN_INT (length
+ 4);
6729 /* Create a reversed conditional branch which branches around
6730 the following insns. */
6731 if (GET_MODE (operands
[1]) != DImode
)
6737 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6740 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6746 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6749 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6758 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6761 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6767 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6770 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6774 output_asm_insn (buf
, operands
);
6775 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6780 /* Output a PIC pc-relative instruction sequence to load the address of
6781 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6782 or a code label. OPERANDS[1] specifies the register to use to load
6783 the program counter. OPERANDS[3] may be used for label generation
6784 The sequence is always three instructions in length. The program
6785 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6786 Register %r1 is clobbered. */
6789 pa_output_pic_pcrel_sequence (rtx
*operands
)
6791 gcc_assert (SYMBOL_REF_P (operands
[0]) || LABEL_P (operands
[0]));
6794 /* We can use mfia to determine the current program counter. */
6795 if (TARGET_SOM
|| !TARGET_GAS
)
6797 operands
[3] = gen_label_rtx ();
6798 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6799 CODE_LABEL_NUMBER (operands
[3]));
6800 output_asm_insn ("mfia %1", operands
);
6801 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6802 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6806 output_asm_insn ("mfia %1", operands
);
6807 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands
);
6808 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands
);
6813 /* We need to use a branch to determine the current program counter. */
6814 output_asm_insn ("{bl|b,l} .+8,%1", operands
);
6815 if (TARGET_SOM
|| !TARGET_GAS
)
6817 operands
[3] = gen_label_rtx ();
6818 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6819 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6820 CODE_LABEL_NUMBER (operands
[3]));
6821 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6825 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands
);
6826 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands
);
6831 /* This routine handles output of long unconditional branches that
6832 exceed the maximum range of a simple branch instruction. Since
6833 we don't have a register available for the branch, we save register
6834 %r1 in the frame marker, load the branch destination DEST into %r1,
6835 execute the branch, and restore %r1 in the delay slot of the branch.
6837 Since long branches may have an insn in the delay slot and the
6838 delay slot is used to restore %r1, we in general need to extract
6839 this insn and execute it before the branch. However, to facilitate
6840 use of this function by conditional branches, we also provide an
6841 option to not extract the delay insn so that it will be emitted
6842 after the long branch. So, if there is an insn in the delay slot,
6843 it is extracted if XDELAY is nonzero.
6845 The lengths of the various long-branch sequences are 20, 16 and 24
6846 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6849 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6853 xoperands
[0] = dest
;
6855 /* First, free up the delay slot. */
6856 if (xdelay
&& dbr_sequence_length () != 0)
6858 /* We can't handle a jump in the delay slot. */
6859 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6861 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6864 /* Now delete the delay insn. */
6865 SET_INSN_DELETED (NEXT_INSN (insn
));
6868 /* Output an insn to save %r1. The runtime documentation doesn't
6869 specify whether the "Clean Up" slot in the callers frame can
6870 be clobbered by the callee. It isn't copied by HP's builtin
6871 alloca, so this suggests that it can be clobbered if necessary.
6872 The "Static Link" location is copied by HP builtin alloca, so
6873 we avoid using it. Using the cleanup slot might be a problem
6874 if we have to interoperate with languages that pass cleanup
6875 information. However, it should be possible to handle these
6876 situations with GCC's asm feature.
6878 The "Current RP" slot is reserved for the called procedure, so
6879 we try to use it when we don't have a frame of our own. It's
6880 rather unlikely that we won't have a frame when we need to emit
6883 Really the way to go long term is a register scavenger; goto
6884 the target of the jump and find a register which we can use
6885 as a scratch to hold the value in %r1. Then, we wouldn't have
6886 to free up the delay slot or clobber a slot that may be needed
6887 for other purposes. */
6890 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6891 /* Use the return pointer slot in the frame marker. */
6892 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6894 /* Use the slot at -40 in the frame marker since HP builtin
6895 alloca doesn't copy it. */
6896 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6900 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6901 /* Use the return pointer slot in the frame marker. */
6902 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6904 /* Use the "Clean Up" slot in the frame marker. In GCC,
6905 the only other use of this location is for copying a
6906 floating point double argument from a floating-point
6907 register to two general registers. The copy is done
6908 as an "atomic" operation when outputting a call, so it
6909 won't interfere with our using the location here. */
6910 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6913 if (TARGET_PORTABLE_RUNTIME
)
6915 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6916 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6917 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6921 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
6922 xoperands
[2] = xoperands
[1];
6923 pa_output_pic_pcrel_sequence (xoperands
);
6924 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6927 /* Now output a very long branch to the original target. */
6928 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6930 /* Now restore the value of %r1 in the delay slot. */
6933 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6934 return "ldd -16(%%r30),%%r1";
6936 return "ldd -40(%%r30),%%r1";
6940 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6941 return "ldw -20(%%r30),%%r1";
6943 return "ldw -12(%%r30),%%r1";
6947 /* This routine handles all the branch-on-bit conditional branch sequences we
6948 might need to generate. It handles nullification of delay slots,
6949 varying length branches, negated branches and all combinations of the
6950 above. it returns the appropriate output template to emit the branch. */
6953 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6955 static char buf
[100];
6957 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6958 int length
= get_attr_length (insn
);
6961 /* A conditional branch to the following instruction (e.g. the delay slot) is
6962 asking for a disaster. I do not think this can happen as this pattern
6963 is only used when optimizing; jump optimization should eliminate the
6964 jump. But be prepared just in case. */
6966 if (branch_to_delay_slot_p (insn
))
6969 /* If this is a long branch with its delay slot unfilled, set `nullify'
6970 as it can nullify the delay slot and save a nop. */
6971 if (length
== 8 && dbr_sequence_length () == 0)
6974 /* If this is a short forward conditional branch which did not get
6975 its delay slot filled, the delay slot can still be nullified. */
6976 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6977 nullify
= forward_branch_p (insn
);
6979 /* A forward branch over a single nullified insn can be done with a
6980 extrs instruction. This avoids a single cycle penalty due to
6981 mis-predicted branch if we fall through (branch not taken). */
6982 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6987 /* All short conditional branches except backwards with an unfilled
6991 strcpy (buf
, "{extrs,|extrw,s,}");
6993 strcpy (buf
, "bb,");
6994 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6995 strcpy (buf
, "extrd,s,*");
6996 else if (GET_MODE (operands
[0]) == DImode
)
6997 strcpy (buf
, "bb,*");
6998 if ((which
== 0 && negated
)
6999 || (which
== 1 && ! negated
))
7004 strcat (buf
, " %0,%1,1,%%r0");
7005 else if (nullify
&& negated
)
7007 if (branch_needs_nop_p (insn
))
7008 strcat (buf
, ",n %0,%1,%3%#");
7010 strcat (buf
, ",n %0,%1,%3");
7012 else if (nullify
&& ! negated
)
7014 if (branch_needs_nop_p (insn
))
7015 strcat (buf
, ",n %0,%1,%2%#");
7017 strcat (buf
, ",n %0,%1,%2");
7019 else if (! nullify
&& negated
)
7020 strcat (buf
, " %0,%1,%3");
7021 else if (! nullify
&& ! negated
)
7022 strcat (buf
, " %0,%1,%2");
7025 /* All long conditionals. Note a short backward branch with an
7026 unfilled delay slot is treated just like a long backward branch
7027 with an unfilled delay slot. */
7029 /* Handle weird backwards branch with a filled delay slot
7030 which is nullified. */
7031 if (dbr_sequence_length () != 0
7032 && ! forward_branch_p (insn
)
7035 strcpy (buf
, "bb,");
7036 if (GET_MODE (operands
[0]) == DImode
)
7038 if ((which
== 0 && negated
)
7039 || (which
== 1 && ! negated
))
7044 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
7046 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
7048 /* Handle short backwards branch with an unfilled delay slot.
7049 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7050 taken and untaken branches. */
7051 else if (dbr_sequence_length () == 0
7052 && ! forward_branch_p (insn
)
7053 && INSN_ADDRESSES_SET_P ()
7054 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7055 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7057 strcpy (buf
, "bb,");
7058 if (GET_MODE (operands
[0]) == DImode
)
7060 if ((which
== 0 && negated
)
7061 || (which
== 1 && ! negated
))
7066 strcat (buf
, " %0,%1,%3%#");
7068 strcat (buf
, " %0,%1,%2%#");
7072 if (GET_MODE (operands
[0]) == DImode
)
7073 strcpy (buf
, "extrd,s,*");
7075 strcpy (buf
, "{extrs,|extrw,s,}");
7076 if ((which
== 0 && negated
)
7077 || (which
== 1 && ! negated
))
7081 if (nullify
&& negated
)
7082 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
7083 else if (nullify
&& ! negated
)
7084 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
7086 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
7088 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
7093 /* The reversed conditional branch must branch over one additional
7094 instruction if the delay slot is filled and needs to be extracted
7095 by pa_output_lbranch. If the delay slot is empty or this is a
7096 nullified forward branch, the instruction after the reversed
7097 condition branch must be nullified. */
7098 if (dbr_sequence_length () == 0
7099 || (nullify
&& forward_branch_p (insn
)))
7103 operands
[4] = GEN_INT (length
);
7108 operands
[4] = GEN_INT (length
+ 4);
7111 if (GET_MODE (operands
[0]) == DImode
)
7112 strcpy (buf
, "bb,*");
7114 strcpy (buf
, "bb,");
7115 if ((which
== 0 && negated
)
7116 || (which
== 1 && !negated
))
7121 strcat (buf
, ",n %0,%1,.+%4");
7123 strcat (buf
, " %0,%1,.+%4");
7124 output_asm_insn (buf
, operands
);
7125 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7131 /* This routine handles all the branch-on-variable-bit conditional branch
7132 sequences we might need to generate. It handles nullification of delay
7133 slots, varying length branches, negated branches and all combinations
7134 of the above. it returns the appropriate output template to emit the
7138 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7141 static char buf
[100];
7143 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7144 int length
= get_attr_length (insn
);
7147 /* A conditional branch to the following instruction (e.g. the delay slot) is
7148 asking for a disaster. I do not think this can happen as this pattern
7149 is only used when optimizing; jump optimization should eliminate the
7150 jump. But be prepared just in case. */
7152 if (branch_to_delay_slot_p (insn
))
7155 /* If this is a long branch with its delay slot unfilled, set `nullify'
7156 as it can nullify the delay slot and save a nop. */
7157 if (length
== 8 && dbr_sequence_length () == 0)
7160 /* If this is a short forward conditional branch which did not get
7161 its delay slot filled, the delay slot can still be nullified. */
7162 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7163 nullify
= forward_branch_p (insn
);
7165 /* A forward branch over a single nullified insn can be done with a
7166 extrs instruction. This avoids a single cycle penalty due to
7167 mis-predicted branch if we fall through (branch not taken). */
7168 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7173 /* All short conditional branches except backwards with an unfilled
7177 strcpy (buf
, "{vextrs,|extrw,s,}");
7179 strcpy (buf
, "{bvb,|bb,}");
7180 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7181 strcpy (buf
, "extrd,s,*");
7182 else if (GET_MODE (operands
[0]) == DImode
)
7183 strcpy (buf
, "bb,*");
7184 if ((which
== 0 && negated
)
7185 || (which
== 1 && ! negated
))
7190 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7191 else if (nullify
&& negated
)
7193 if (branch_needs_nop_p (insn
))
7194 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7196 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7198 else if (nullify
&& ! negated
)
7200 if (branch_needs_nop_p (insn
))
7201 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7203 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7205 else if (! nullify
&& negated
)
7206 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7207 else if (! nullify
&& ! negated
)
7208 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7211 /* All long conditionals. Note a short backward branch with an
7212 unfilled delay slot is treated just like a long backward branch
7213 with an unfilled delay slot. */
7215 /* Handle weird backwards branch with a filled delay slot
7216 which is nullified. */
7217 if (dbr_sequence_length () != 0
7218 && ! forward_branch_p (insn
)
7221 strcpy (buf
, "{bvb,|bb,}");
7222 if (GET_MODE (operands
[0]) == DImode
)
7224 if ((which
== 0 && negated
)
7225 || (which
== 1 && ! negated
))
7230 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7232 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7234 /* Handle short backwards branch with an unfilled delay slot.
7235 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7236 taken and untaken branches. */
7237 else if (dbr_sequence_length () == 0
7238 && ! forward_branch_p (insn
)
7239 && INSN_ADDRESSES_SET_P ()
7240 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7241 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7243 strcpy (buf
, "{bvb,|bb,}");
7244 if (GET_MODE (operands
[0]) == DImode
)
7246 if ((which
== 0 && negated
)
7247 || (which
== 1 && ! negated
))
7252 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7254 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7258 strcpy (buf
, "{vextrs,|extrw,s,}");
7259 if (GET_MODE (operands
[0]) == DImode
)
7260 strcpy (buf
, "extrd,s,*");
7261 if ((which
== 0 && negated
)
7262 || (which
== 1 && ! negated
))
7266 if (nullify
&& negated
)
7267 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7268 else if (nullify
&& ! negated
)
7269 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7271 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7273 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7278 /* The reversed conditional branch must branch over one additional
7279 instruction if the delay slot is filled and needs to be extracted
7280 by pa_output_lbranch. If the delay slot is empty or this is a
7281 nullified forward branch, the instruction after the reversed
7282 condition branch must be nullified. */
7283 if (dbr_sequence_length () == 0
7284 || (nullify
&& forward_branch_p (insn
)))
7288 operands
[4] = GEN_INT (length
);
7293 operands
[4] = GEN_INT (length
+ 4);
7296 if (GET_MODE (operands
[0]) == DImode
)
7297 strcpy (buf
, "bb,*");
7299 strcpy (buf
, "{bvb,|bb,}");
7300 if ((which
== 0 && negated
)
7301 || (which
== 1 && !negated
))
7306 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7308 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7309 output_asm_insn (buf
, operands
);
7310 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7316 /* Return the output template for emitting a dbra type insn.
7318 Note it may perform some output operations on its own before
7319 returning the final output string. */
7321 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7323 int length
= get_attr_length (insn
);
7325 /* A conditional branch to the following instruction (e.g. the delay slot) is
7326 asking for a disaster. Be prepared! */
7328 if (branch_to_delay_slot_p (insn
))
7330 if (which_alternative
== 0)
7331 return "ldo %1(%0),%0";
7332 else if (which_alternative
== 1)
7334 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7335 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7336 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7337 return "{fldws|fldw} -16(%%r30),%0";
7341 output_asm_insn ("ldw %0,%4", operands
);
7342 return "ldo %1(%4),%4\n\tstw %4,%0";
7346 if (which_alternative
== 0)
7348 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7351 /* If this is a long branch with its delay slot unfilled, set `nullify'
7352 as it can nullify the delay slot and save a nop. */
7353 if (length
== 8 && dbr_sequence_length () == 0)
7356 /* If this is a short forward conditional branch which did not get
7357 its delay slot filled, the delay slot can still be nullified. */
7358 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7359 nullify
= forward_branch_p (insn
);
7366 if (branch_needs_nop_p (insn
))
7367 return "addib,%C2,n %1,%0,%3%#";
7369 return "addib,%C2,n %1,%0,%3";
7372 return "addib,%C2 %1,%0,%3";
7375 /* Handle weird backwards branch with a fulled delay slot
7376 which is nullified. */
7377 if (dbr_sequence_length () != 0
7378 && ! forward_branch_p (insn
)
7380 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7381 /* Handle short backwards branch with an unfilled delay slot.
7382 Using a addb;nop rather than addi;bl saves 1 cycle for both
7383 taken and untaken branches. */
7384 else if (dbr_sequence_length () == 0
7385 && ! forward_branch_p (insn
)
7386 && INSN_ADDRESSES_SET_P ()
7387 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7388 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7389 return "addib,%C2 %1,%0,%3%#";
7391 /* Handle normal cases. */
7393 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7395 return "addi,%N2 %1,%0,%0\n\tb %3";
7398 /* The reversed conditional branch must branch over one additional
7399 instruction if the delay slot is filled and needs to be extracted
7400 by pa_output_lbranch. If the delay slot is empty or this is a
7401 nullified forward branch, the instruction after the reversed
7402 condition branch must be nullified. */
7403 if (dbr_sequence_length () == 0
7404 || (nullify
&& forward_branch_p (insn
)))
7408 operands
[4] = GEN_INT (length
);
7413 operands
[4] = GEN_INT (length
+ 4);
7417 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7419 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7421 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7425 /* Deal with gross reload from FP register case. */
7426 else if (which_alternative
== 1)
7428 /* Move loop counter from FP register to MEM then into a GR,
7429 increment the GR, store the GR into MEM, and finally reload
7430 the FP register from MEM from within the branch's delay slot. */
7431 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7433 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7435 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7436 else if (length
== 28)
7437 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7440 operands
[5] = GEN_INT (length
- 16);
7441 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7442 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7443 return pa_output_lbranch (operands
[3], insn
, 0);
7446 /* Deal with gross reload from memory case. */
7449 /* Reload loop counter from memory, the store back to memory
7450 happens in the branch's delay slot. */
7451 output_asm_insn ("ldw %0,%4", operands
);
7453 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7454 else if (length
== 16)
7455 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7458 operands
[5] = GEN_INT (length
- 4);
7459 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7460 return pa_output_lbranch (operands
[3], insn
, 0);
7465 /* Return the output template for emitting a movb type insn.
7467 Note it may perform some output operations on its own before
7468 returning the final output string. */
7470 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7471 int reverse_comparison
)
7473 int length
= get_attr_length (insn
);
7475 /* A conditional branch to the following instruction (e.g. the delay slot) is
7476 asking for a disaster. Be prepared! */
7478 if (branch_to_delay_slot_p (insn
))
7480 if (which_alternative
== 0)
7481 return "copy %1,%0";
7482 else if (which_alternative
== 1)
7484 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7485 return "{fldws|fldw} -16(%%r30),%0";
7487 else if (which_alternative
== 2)
7493 /* Support the second variant. */
7494 if (reverse_comparison
)
7495 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7497 if (which_alternative
== 0)
7499 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7502 /* If this is a long branch with its delay slot unfilled, set `nullify'
7503 as it can nullify the delay slot and save a nop. */
7504 if (length
== 8 && dbr_sequence_length () == 0)
7507 /* If this is a short forward conditional branch which did not get
7508 its delay slot filled, the delay slot can still be nullified. */
7509 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7510 nullify
= forward_branch_p (insn
);
7517 if (branch_needs_nop_p (insn
))
7518 return "movb,%C2,n %1,%0,%3%#";
7520 return "movb,%C2,n %1,%0,%3";
7523 return "movb,%C2 %1,%0,%3";
7526 /* Handle weird backwards branch with a filled delay slot
7527 which is nullified. */
7528 if (dbr_sequence_length () != 0
7529 && ! forward_branch_p (insn
)
7531 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7533 /* Handle short backwards branch with an unfilled delay slot.
7534 Using a movb;nop rather than or;bl saves 1 cycle for both
7535 taken and untaken branches. */
7536 else if (dbr_sequence_length () == 0
7537 && ! forward_branch_p (insn
)
7538 && INSN_ADDRESSES_SET_P ()
7539 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7540 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7541 return "movb,%C2 %1,%0,%3%#";
7542 /* Handle normal cases. */
7544 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7546 return "or,%N2 %1,%%r0,%0\n\tb %3";
7549 /* The reversed conditional branch must branch over one additional
7550 instruction if the delay slot is filled and needs to be extracted
7551 by pa_output_lbranch. If the delay slot is empty or this is a
7552 nullified forward branch, the instruction after the reversed
7553 condition branch must be nullified. */
7554 if (dbr_sequence_length () == 0
7555 || (nullify
&& forward_branch_p (insn
)))
7559 operands
[4] = GEN_INT (length
);
7564 operands
[4] = GEN_INT (length
+ 4);
7568 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7570 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7572 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7575 /* Deal with gross reload for FP destination register case. */
7576 else if (which_alternative
== 1)
7578 /* Move source register to MEM, perform the branch test, then
7579 finally load the FP register from MEM from within the branch's
7581 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7583 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7584 else if (length
== 16)
7585 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7588 operands
[4] = GEN_INT (length
- 4);
7589 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7590 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7591 return pa_output_lbranch (operands
[3], insn
, 0);
7594 /* Deal with gross reload from memory case. */
7595 else if (which_alternative
== 2)
7597 /* Reload loop counter from memory, the store back to memory
7598 happens in the branch's delay slot. */
7600 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7601 else if (length
== 12)
7602 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7605 operands
[4] = GEN_INT (length
);
7606 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7608 return pa_output_lbranch (operands
[3], insn
, 0);
7611 /* Handle SAR as a destination. */
7615 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7616 else if (length
== 12)
7617 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7620 operands
[4] = GEN_INT (length
);
7621 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7623 return pa_output_lbranch (operands
[3], insn
, 0);
7628 /* Copy any FP arguments in INSN into integer registers. */
7630 copy_fp_args (rtx_insn
*insn
)
7635 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7637 int arg_mode
, regno
;
7638 rtx use
= XEXP (link
, 0);
7640 if (! (GET_CODE (use
) == USE
7641 && GET_CODE (XEXP (use
, 0)) == REG
7642 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7645 arg_mode
= GET_MODE (XEXP (use
, 0));
7646 regno
= REGNO (XEXP (use
, 0));
7648 /* Is it a floating point register? */
7649 if (regno
>= 32 && regno
<= 39)
7651 /* Copy the FP register into an integer register via memory. */
7652 if (arg_mode
== SFmode
)
7654 xoperands
[0] = XEXP (use
, 0);
7655 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7656 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7657 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7661 xoperands
[0] = XEXP (use
, 0);
7662 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7663 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7664 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7665 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7671 /* Compute length of the FP argument copy sequence for INSN. */
7673 length_fp_args (rtx_insn
*insn
)
7678 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7680 int arg_mode
, regno
;
7681 rtx use
= XEXP (link
, 0);
7683 if (! (GET_CODE (use
) == USE
7684 && GET_CODE (XEXP (use
, 0)) == REG
7685 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7688 arg_mode
= GET_MODE (XEXP (use
, 0));
7689 regno
= REGNO (XEXP (use
, 0));
7691 /* Is it a floating point register? */
7692 if (regno
>= 32 && regno
<= 39)
7694 if (arg_mode
== SFmode
)
7704 /* Return the attribute length for the millicode call instruction INSN.
7705 The length must match the code generated by pa_output_millicode_call.
7706 We include the delay slot in the returned length as it is better to
7707 over estimate the length than to under estimate it. */
7710 pa_attr_length_millicode_call (rtx_insn
*insn
)
7712 unsigned long distance
= -1;
7713 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7715 if (INSN_ADDRESSES_SET_P ())
7717 distance
= (total
+ insn_current_reference_address (insn
));
7718 if (distance
< total
)
7724 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7729 else if (TARGET_PORTABLE_RUNTIME
)
7733 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7743 /* INSN is a function call.
7745 CALL_DEST is the routine we are calling. */
7748 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7750 int attr_length
= get_attr_length (insn
);
7751 int seq_length
= dbr_sequence_length ();
7754 xoperands
[0] = call_dest
;
7756 /* Handle the common case where we are sure that the branch will
7757 reach the beginning of the $CODE$ subspace. The within reach
7758 form of the $$sh_func_adrs call has a length of 28. Because it
7759 has an attribute type of sh_func_adrs, it never has a nonzero
7760 sequence length (i.e., the delay slot is never filled). */
7761 if (!TARGET_LONG_CALLS
7762 && (attr_length
== 8
7763 || (attr_length
== 28
7764 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7766 xoperands
[1] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7767 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7773 /* It might seem that one insn could be saved by accessing
7774 the millicode function using the linkage table. However,
7775 this doesn't work in shared libraries and other dynamically
7776 loaded objects. Using a pc-relative sequence also avoids
7777 problems related to the implicit use of the gp register. */
7778 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
7779 xoperands
[2] = xoperands
[1];
7780 pa_output_pic_pcrel_sequence (xoperands
);
7781 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7783 else if (TARGET_PORTABLE_RUNTIME
)
7785 /* Pure portable runtime doesn't allow be/ble; we also don't
7786 have PIC support in the assembler/linker, so this sequence
7789 /* Get the address of our target into %r1. */
7790 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7791 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7793 /* Get our return address into %r31. */
7794 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7795 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7797 /* Jump to our target address in %r1. */
7798 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7802 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7804 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7806 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7810 xoperands
[1] = gen_rtx_REG (Pmode
, 31);
7811 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
7812 pa_output_pic_pcrel_sequence (xoperands
);
7814 /* Adjust return address. */
7815 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands
);
7817 /* Jump to our target address in %r1. */
7818 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7822 if (seq_length
== 0)
7823 output_asm_insn ("nop", xoperands
);
7828 /* Return the attribute length of the call instruction INSN. The SIBCALL
7829 flag indicates whether INSN is a regular call or a sibling call. The
7830 length returned must be longer than the code actually generated by
7831 pa_output_call. Since branch shortening is done before delay branch
7832 sequencing, there is no way to determine whether or not the delay
7833 slot will be filled during branch shortening. Even when the delay
7834 slot is filled, we may have to add a nop if the delay slot contains
7835 a branch that can't reach its target. Thus, we always have to include
7836 the delay slot in the length estimate. This used to be done in
7837 pa_adjust_insn_length but we do it here now as some sequences always
7838 fill the delay slot and we can save four bytes in the estimate for
7842 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7845 rtx call
, call_dest
;
7848 rtx pat
= PATTERN (insn
);
7849 unsigned long distance
= -1;
7851 gcc_assert (CALL_P (insn
));
7853 if (INSN_ADDRESSES_SET_P ())
7855 unsigned long total
;
7857 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7858 distance
= (total
+ insn_current_reference_address (insn
));
7859 if (distance
< total
)
7863 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7865 /* Get the call rtx. */
7866 call
= XVECEXP (pat
, 0, 0);
7867 if (GET_CODE (call
) == SET
)
7868 call
= SET_SRC (call
);
7870 gcc_assert (GET_CODE (call
) == CALL
);
7872 /* Determine if this is a local call. */
7873 call_dest
= XEXP (XEXP (call
, 0), 0);
7874 call_decl
= SYMBOL_REF_DECL (call_dest
);
7875 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7877 /* pc-relative branch. */
7878 if (!TARGET_LONG_CALLS
7879 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7880 || distance
< MAX_PCREL17F_OFFSET
))
7883 /* 64-bit plabel sequence. */
7884 else if (TARGET_64BIT
&& !local_call
)
7885 length
+= sibcall
? 28 : 24;
7887 /* non-pic long absolute branch sequence. */
7888 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7891 /* long pc-relative branch sequence. */
7892 else if (TARGET_LONG_PIC_SDIFF_CALL
7893 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
7897 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7901 /* 32-bit plabel sequence. */
7907 length
+= length_fp_args (insn
);
7917 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7925 /* INSN is a function call.
7927 CALL_DEST is the routine we are calling. */
7930 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7932 int seq_length
= dbr_sequence_length ();
7933 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7934 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7937 xoperands
[0] = call_dest
;
7939 /* Handle the common case where we're sure that the branch will reach
7940 the beginning of the "$CODE$" subspace. This is the beginning of
7941 the current function if we are in a named section. */
7942 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7944 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7945 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7949 if (TARGET_64BIT
&& !local_call
)
7951 /* ??? As far as I can tell, the HP linker doesn't support the
7952 long pc-relative sequence described in the 64-bit runtime
7953 architecture. So, we use a slightly longer indirect call. */
7954 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7955 xoperands
[1] = gen_label_rtx ();
7957 /* If this isn't a sibcall, we put the load of %r27 into the
7958 delay slot. We can't do this in a sibcall as we don't
7959 have a second call-clobbered scratch register available.
7960 We don't need to do anything when generating fast indirect
7962 if (seq_length
!= 0 && !sibcall
)
7964 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7967 /* Now delete the delay insn. */
7968 SET_INSN_DELETED (NEXT_INSN (insn
));
7972 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7973 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7974 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7978 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7979 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7980 output_asm_insn ("bve (%%r1)", xoperands
);
7984 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7985 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7986 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7992 int indirect_call
= 0;
7994 /* Emit a long call. There are several different sequences
7995 of increasing length and complexity. In most cases,
7996 they don't allow an instruction in the delay slot. */
7997 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7998 && !TARGET_LONG_PIC_SDIFF_CALL
7999 && !(TARGET_GAS
&& !TARGET_SOM
&& local_call
)
8007 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
8009 /* A non-jump insn in the delay slot. By definition we can
8010 emit this insn before the call (and in fact before argument
8012 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
8015 /* Now delete the delay insn. */
8016 SET_INSN_DELETED (NEXT_INSN (insn
));
8020 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
8022 /* This is the best sequence for making long calls in
8023 non-pic code. Unfortunately, GNU ld doesn't provide
8024 the stub needed for external calls, and GAS's support
8025 for this with the SOM linker is buggy. It is safe
8026 to use this for local calls. */
8027 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8029 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
8033 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8036 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
8038 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8044 /* The HP assembler and linker can handle relocations for
8045 the difference of two symbols. The HP assembler
8046 recognizes the sequence as a pc-relative call and
8047 the linker provides stubs when needed. */
8049 /* GAS currently can't generate the relocations that
8050 are needed for the SOM linker under HP-UX using this
8051 sequence. The GNU linker doesn't generate the stubs
8052 that are needed for external calls on TARGET_ELF32
8053 with this sequence. For now, we have to use a longer
8054 plabel sequence when using GAS for non local calls. */
8055 if (TARGET_LONG_PIC_SDIFF_CALL
8056 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
8058 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
8059 xoperands
[2] = xoperands
[1];
8060 pa_output_pic_pcrel_sequence (xoperands
);
8064 /* Emit a long plabel-based call sequence. This is
8065 essentially an inline implementation of $$dyncall.
8066 We don't actually try to call $$dyncall as this is
8067 as difficult as calling the function itself. */
8068 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
8069 xoperands
[1] = gen_label_rtx ();
8071 /* Since the call is indirect, FP arguments in registers
8072 need to be copied to the general registers. Then, the
8073 argument relocation stub will copy them back. */
8075 copy_fp_args (insn
);
8079 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
8080 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
8081 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
8085 output_asm_insn ("addil LR'%0-$global$,%%r27",
8087 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8091 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
8092 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
8093 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
8094 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
8096 if (!sibcall
&& !TARGET_PA_20
)
8098 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8099 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8100 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8102 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8109 output_asm_insn ("bve (%%r1)", xoperands
);
8114 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8115 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8119 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8124 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8125 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8130 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8131 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8133 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8137 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8138 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8140 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8143 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8145 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8153 if (seq_length
== 0)
8154 output_asm_insn ("nop", xoperands
);
8159 /* Return the attribute length of the indirect call instruction INSN.
8160 The length must match the code generated by output_indirect call.
8161 The returned length includes the delay slot. Currently, the delay
8162 slot of an indirect call sequence is not exposed and it is used by
8163 the sequence itself. */
8166 pa_attr_length_indirect_call (rtx_insn
*insn
)
8168 unsigned long distance
= -1;
8169 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8171 if (INSN_ADDRESSES_SET_P ())
8173 distance
= (total
+ insn_current_reference_address (insn
));
8174 if (distance
< total
)
8181 if (TARGET_FAST_INDIRECT_CALLS
)
8184 if (TARGET_PORTABLE_RUNTIME
)
8187 /* Inline version of $$dyncall. */
8188 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8191 if (!TARGET_LONG_CALLS
8192 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8193 || distance
< MAX_PCREL17F_OFFSET
))
8196 /* Out of reach, can use ble. */
8200 /* Inline version of $$dyncall. */
8201 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
)
8207 /* Long PIC pc-relative call. */
8212 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8219 xoperands
[0] = call_dest
;
8220 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8221 "bve,l (%%r2),%%r2\n\t"
8222 "ldd 24(%0),%%r27", xoperands
);
8226 /* First the special case for kernels, level 0 systems, etc. */
8227 if (TARGET_FAST_INDIRECT_CALLS
)
8229 pa_output_arg_descriptor (insn
);
8231 return "bve,l,n (%%r22),%%r2\n\tnop";
8232 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8235 if (TARGET_PORTABLE_RUNTIME
)
8237 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8238 "ldo R'$$dyncall(%%r31),%%r31", xoperands
);
8239 pa_output_arg_descriptor (insn
);
8240 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8243 /* Maybe emit a fast inline version of $$dyncall. */
8244 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8246 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8247 "ldw 2(%%r22),%%r19\n\t"
8248 "ldw -2(%%r22),%%r22", xoperands
);
8249 pa_output_arg_descriptor (insn
);
8250 if (TARGET_NO_SPACE_REGS
)
8253 return "bve,l,n (%%r22),%%r2\n\tnop";
8254 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8256 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8259 /* Now the normal case -- we can reach $$dyncall directly or
8260 we're sure that we can get there via a long-branch stub.
8262 No need to check target flags as the length uniquely identifies
8263 the remaining cases. */
8264 length
= pa_attr_length_indirect_call (insn
);
8267 pa_output_arg_descriptor (insn
);
8269 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8270 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8271 variant of the B,L instruction can't be used on the SOM target. */
8272 if (TARGET_PA_20
&& !TARGET_SOM
)
8273 return "b,l,n $$dyncall,%%r2\n\tnop";
8275 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8278 /* Long millicode call, but we are not generating PIC or portable runtime
8282 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands
);
8283 pa_output_arg_descriptor (insn
);
8284 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8287 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8288 pc-relative call sequence is five instructions. The inline PA 2.0
8289 version of $$dyncall is also five instructions. The PA 1.X versions
8290 are longer but still an overall win. */
8291 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
|| !optimize_size
)
8293 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8294 "ldw 2(%%r22),%%r19\n\t"
8295 "ldw -2(%%r22),%%r22", xoperands
);
8296 if (TARGET_NO_SPACE_REGS
)
8298 pa_output_arg_descriptor (insn
);
8300 return "bve,l,n (%%r22),%%r2\n\tnop";
8301 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8305 pa_output_arg_descriptor (insn
);
8306 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8308 output_asm_insn ("bl .+8,%%r2\n\t"
8309 "ldo 16(%%r2),%%r2\n\t"
8310 "ldsid (%%r22),%%r1\n\t"
8311 "mtsp %%r1,%%sr0", xoperands
);
8312 pa_output_arg_descriptor (insn
);
8313 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8316 /* We need a long PIC call to $$dyncall. */
8317 xoperands
[0] = gen_rtx_SYMBOL_REF (Pmode
, "$$dyncall");
8318 xoperands
[1] = gen_rtx_REG (Pmode
, 2);
8319 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
8320 pa_output_pic_pcrel_sequence (xoperands
);
8321 pa_output_arg_descriptor (insn
);
8322 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8325 /* In HPUX 8.0's shared library scheme, special relocations are needed
8326 for function labels if they might be passed to a function
8327 in a shared library (because shared libraries don't live in code
8328 space), and special magic is needed to construct their address. */
8331 pa_encode_label (rtx sym
)
8333 const char *str
= XSTR (sym
, 0);
8334 int len
= strlen (str
) + 1;
8337 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8341 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8345 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8347 int old_referenced
= 0;
8349 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8351 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8353 default_encode_section_info (decl
, rtl
, first
);
8355 if (first
&& TEXT_SPACE_P (decl
))
8357 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8358 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8359 pa_encode_label (XEXP (rtl
, 0));
8361 else if (old_referenced
)
8362 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8365 /* This is sort of inverse to pa_encode_section_info. */
8368 pa_strip_name_encoding (const char *str
)
8370 str
+= (*str
== '@');
8371 str
+= (*str
== '*');
8375 /* Returns 1 if OP is a function label involved in a simple addition
8376 with a constant. Used to keep certain patterns from matching
8377 during instruction combination. */
8379 pa_is_function_label_plus_const (rtx op
)
8381 /* Strip off any CONST. */
8382 if (GET_CODE (op
) == CONST
)
8385 return (GET_CODE (op
) == PLUS
8386 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8387 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8390 /* Output assembly code for a thunk to FUNCTION. */
8393 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8394 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8397 static unsigned int current_thunk_number
;
8398 int val_14
= VAL_14_BITS_P (delta
);
8399 unsigned int old_last_address
= last_address
, nbytes
= 0;
8403 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8404 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8405 xoperands
[2] = GEN_INT (delta
);
8407 final_start_function (emit_barrier (), file
, 1);
8409 /* Output the thunk. We know that the function is in the same
8410 translation unit (i.e., the same space) as the thunk, and that
8411 thunks are output after their method. Thus, we don't need an
8412 external branch to reach the function. With SOM and GAS,
8413 functions and thunks are effectively in different sections.
8414 Thus, we can always use a IA-relative branch and the linker
8415 will add a long branch stub if necessary.
8417 However, we have to be careful when generating PIC code on the
8418 SOM port to ensure that the sequence does not transfer to an
8419 import stub for the target function as this could clobber the
8420 return value saved at SP-24. This would also apply to the
8421 32-bit linux port if the multi-space model is implemented. */
8422 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8423 && !(flag_pic
&& TREE_PUBLIC (function
))
8424 && (TARGET_GAS
|| last_address
< 262132))
8425 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8426 && ((targetm_common
.have_named_sections
8427 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8428 /* The GNU 64-bit linker has rather poor stub management.
8429 So, we use a long branch from thunks that aren't in
8430 the same section as the target function. */
8432 && (DECL_SECTION_NAME (thunk_fndecl
)
8433 != DECL_SECTION_NAME (function
)))
8434 || ((DECL_SECTION_NAME (thunk_fndecl
)
8435 == DECL_SECTION_NAME (function
))
8436 && last_address
< 262132)))
8437 /* In this case, we need to be able to reach the start of
8438 the stub table even though the function is likely closer
8439 and can be jumped to directly. */
8440 || (targetm_common
.have_named_sections
8441 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8442 && DECL_SECTION_NAME (function
) == NULL
8443 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8445 || (!targetm_common
.have_named_sections
8446 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8449 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8451 output_asm_insn ("b %0", xoperands
);
8455 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8460 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8464 else if (TARGET_64BIT
)
8468 /* We only have one call-clobbered scratch register, so we can't
8469 make use of the delay slot if delta doesn't fit in 14 bits. */
8472 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8473 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8476 /* Load function address into %r1. */
8477 xop
[0] = xoperands
[0];
8478 xop
[1] = gen_rtx_REG (Pmode
, 1);
8480 pa_output_pic_pcrel_sequence (xop
);
8484 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8485 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8490 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8494 else if (TARGET_PORTABLE_RUNTIME
)
8496 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8497 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8500 output_asm_insn ("ldil L'%2,%%r26", xoperands
);
8502 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8506 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8511 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands
);
8515 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8517 /* The function is accessible from outside this module. The only
8518 way to avoid an import stub between the thunk and function is to
8519 call the function directly with an indirect sequence similar to
8520 that used by $$dyncall. This is possible because $$dyncall acts
8521 as the import stub in an indirect call. */
8522 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8523 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8524 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8525 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8526 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8527 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8528 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8529 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8530 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8534 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8540 output_asm_insn ("bve (%%r22)", xoperands
);
8543 else if (TARGET_NO_SPACE_REGS
)
8545 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8550 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8551 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8552 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8557 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8559 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8565 /* Load function address into %r22. */
8566 xop
[0] = xoperands
[0];
8567 xop
[1] = gen_rtx_REG (Pmode
, 1);
8568 xop
[2] = gen_rtx_REG (Pmode
, 22);
8569 pa_output_pic_pcrel_sequence (xop
);
8572 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8574 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8578 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8583 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8590 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8592 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8593 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8597 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8602 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8607 final_end_function ();
8609 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8611 switch_to_section (data_section
);
8612 output_asm_insn (".align 4", xoperands
);
8613 ASM_OUTPUT_LABEL (file
, label
);
8614 output_asm_insn (".word P'%0", xoperands
);
8617 current_thunk_number
++;
8618 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8619 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8620 last_address
+= nbytes
;
8621 if (old_last_address
> last_address
)
8622 last_address
= UINT_MAX
;
8623 update_total_code_bytes (nbytes
);
8626 /* Only direct calls to static functions are allowed to be sibling (tail)
8629 This restriction is necessary because some linker generated stubs will
8630 store return pointers into rp' in some cases which might clobber a
8631 live value already in rp'.
8633 In a sibcall the current function and the target function share stack
8634 space. Thus if the path to the current function and the path to the
8635 target function save a value in rp', they save the value into the
8636 same stack slot, which has undesirable consequences.
8638 Because of the deferred binding nature of shared libraries any function
8639 with external scope could be in a different load module and thus require
8640 rp' to be saved when calling that function. So sibcall optimizations
8641 can only be safe for static function.
8643 Note that GCC never needs return value relocations, so we don't have to
8644 worry about static calls with return value relocations (which require
8647 It is safe to perform a sibcall optimization when the target function
8648 will never return. */
8650 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8652 if (TARGET_PORTABLE_RUNTIME
)
8655 /* Sibcalls are not ok because the arg pointer register is not a fixed
8656 register. This prevents the sibcall optimization from occurring. In
8657 addition, there are problems with stub placement using GNU ld. This
8658 is because a normal sibcall branch uses a 17-bit relocation while
8659 a regular call branch uses a 22-bit relocation. As a result, more
8660 care needs to be taken in the placement of long-branch stubs. */
8664 /* Sibcalls are only ok within a translation unit. */
8665 return (decl
&& !TREE_PUBLIC (decl
));
8668 /* ??? Addition is not commutative on the PA due to the weird implicit
8669 space register selection rules for memory addresses. Therefore, we
8670 don't consider a + b == b + a, as this might be inside a MEM. */
8672 pa_commutative_p (const_rtx x
, int outer_code
)
8674 return (COMMUTATIVE_P (x
)
8675 && (TARGET_NO_SPACE_REGS
8676 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8677 || GET_CODE (x
) != PLUS
));
8680 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8681 use in fmpyadd instructions. */
8683 pa_fmpyaddoperands (rtx
*operands
)
8685 machine_mode mode
= GET_MODE (operands
[0]);
8687 /* Must be a floating point mode. */
8688 if (mode
!= SFmode
&& mode
!= DFmode
)
8691 /* All modes must be the same. */
8692 if (! (mode
== GET_MODE (operands
[1])
8693 && mode
== GET_MODE (operands
[2])
8694 && mode
== GET_MODE (operands
[3])
8695 && mode
== GET_MODE (operands
[4])
8696 && mode
== GET_MODE (operands
[5])))
8699 /* All operands must be registers. */
8700 if (! (GET_CODE (operands
[1]) == REG
8701 && GET_CODE (operands
[2]) == REG
8702 && GET_CODE (operands
[3]) == REG
8703 && GET_CODE (operands
[4]) == REG
8704 && GET_CODE (operands
[5]) == REG
))
8707 /* Only 2 real operands to the addition. One of the input operands must
8708 be the same as the output operand. */
8709 if (! rtx_equal_p (operands
[3], operands
[4])
8710 && ! rtx_equal_p (operands
[3], operands
[5]))
8713 /* Inout operand of add cannot conflict with any operands from multiply. */
8714 if (rtx_equal_p (operands
[3], operands
[0])
8715 || rtx_equal_p (operands
[3], operands
[1])
8716 || rtx_equal_p (operands
[3], operands
[2]))
8719 /* multiply cannot feed into addition operands. */
8720 if (rtx_equal_p (operands
[4], operands
[0])
8721 || rtx_equal_p (operands
[5], operands
[0]))
8724 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8726 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8727 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8728 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8729 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8730 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8731 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8734 /* Passed. Operands are suitable for fmpyadd. */
8738 #if !defined(USE_COLLECT2)
8740 pa_asm_out_constructor (rtx symbol
, int priority
)
8742 if (!function_label_operand (symbol
, VOIDmode
))
8743 pa_encode_label (symbol
);
8745 #ifdef CTORS_SECTION_ASM_OP
8746 default_ctor_section_asm_out_constructor (symbol
, priority
);
8748 # ifdef TARGET_ASM_NAMED_SECTION
8749 default_named_section_asm_out_constructor (symbol
, priority
);
8751 default_stabs_asm_out_constructor (symbol
, priority
);
8757 pa_asm_out_destructor (rtx symbol
, int priority
)
8759 if (!function_label_operand (symbol
, VOIDmode
))
8760 pa_encode_label (symbol
);
8762 #ifdef DTORS_SECTION_ASM_OP
8763 default_dtor_section_asm_out_destructor (symbol
, priority
);
8765 # ifdef TARGET_ASM_NAMED_SECTION
8766 default_named_section_asm_out_destructor (symbol
, priority
);
8768 default_stabs_asm_out_destructor (symbol
, priority
);
8774 /* This function places uninitialized global data in the bss section.
8775 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8776 function on the SOM port to prevent uninitialized global data from
8777 being placed in the data section. */
8780 pa_asm_output_aligned_bss (FILE *stream
,
8782 unsigned HOST_WIDE_INT size
,
8785 switch_to_section (bss_section
);
8786 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8788 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8789 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8792 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8793 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8796 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8797 ASM_OUTPUT_LABEL (stream
, name
);
8798 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8801 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8802 that doesn't allow the alignment of global common storage to be directly
8803 specified. The SOM linker aligns common storage based on the rounded
8804 value of the NUM_BYTES parameter in the .comm directive. It's not
8805 possible to use the .align directive as it doesn't affect the alignment
8806 of the label associated with a .comm directive. */
8809 pa_asm_output_aligned_common (FILE *stream
,
8811 unsigned HOST_WIDE_INT size
,
8814 unsigned int max_common_align
;
8816 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8817 if (align
> max_common_align
)
8819 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8820 "for global common data. Using %u",
8821 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8822 align
= max_common_align
;
8825 switch_to_section (bss_section
);
8827 assemble_name (stream
, name
);
8828 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8829 MAX (size
, align
/ BITS_PER_UNIT
));
8832 /* We can't use .comm for local common storage as the SOM linker effectively
8833 treats the symbol as universal and uses the same storage for local symbols
8834 with the same name in different object files. The .block directive
8835 reserves an uninitialized block of storage. However, it's not common
8836 storage. Fortunately, GCC never requests common storage with the same
8837 name in any given translation unit. */
8840 pa_asm_output_aligned_local (FILE *stream
,
8842 unsigned HOST_WIDE_INT size
,
8845 switch_to_section (bss_section
);
8846 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8849 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8850 assemble_name (stream
, name
);
8851 fprintf (stream
, "\n");
8854 ASM_OUTPUT_LABEL (stream
, name
);
8855 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8858 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8859 use in fmpysub instructions. */
8861 pa_fmpysuboperands (rtx
*operands
)
8863 machine_mode mode
= GET_MODE (operands
[0]);
8865 /* Must be a floating point mode. */
8866 if (mode
!= SFmode
&& mode
!= DFmode
)
8869 /* All modes must be the same. */
8870 if (! (mode
== GET_MODE (operands
[1])
8871 && mode
== GET_MODE (operands
[2])
8872 && mode
== GET_MODE (operands
[3])
8873 && mode
== GET_MODE (operands
[4])
8874 && mode
== GET_MODE (operands
[5])))
8877 /* All operands must be registers. */
8878 if (! (GET_CODE (operands
[1]) == REG
8879 && GET_CODE (operands
[2]) == REG
8880 && GET_CODE (operands
[3]) == REG
8881 && GET_CODE (operands
[4]) == REG
8882 && GET_CODE (operands
[5]) == REG
))
8885 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8886 operation, so operands[4] must be the same as operand[3]. */
8887 if (! rtx_equal_p (operands
[3], operands
[4]))
8890 /* multiply cannot feed into subtraction. */
8891 if (rtx_equal_p (operands
[5], operands
[0]))
8894 /* Inout operand of sub cannot conflict with any operands from multiply. */
8895 if (rtx_equal_p (operands
[3], operands
[0])
8896 || rtx_equal_p (operands
[3], operands
[1])
8897 || rtx_equal_p (operands
[3], operands
[2]))
8900 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8902 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8903 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8904 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8905 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8906 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8907 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8910 /* Passed. Operands are suitable for fmpysub. */
8914 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8915 constants for a MULT embedded inside a memory address. */
8917 pa_mem_shadd_constant_p (int val
)
8919 if (val
== 2 || val
== 4 || val
== 8)
8925 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8926 constants for shadd instructions. */
8928 pa_shadd_constant_p (int val
)
8930 if (val
== 1 || val
== 2 || val
== 3)
8936 /* Return TRUE if INSN branches forward. */
8939 forward_branch_p (rtx_insn
*insn
)
8941 rtx lab
= JUMP_LABEL (insn
);
8943 /* The INSN must have a jump label. */
8944 gcc_assert (lab
!= NULL_RTX
);
8946 if (INSN_ADDRESSES_SET_P ())
8947 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8954 insn
= NEXT_INSN (insn
);
8960 /* Output an unconditional move and branch insn. */
8963 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8965 int length
= get_attr_length (insn
);
8967 /* These are the cases in which we win. */
8969 return "mov%I1b,tr %1,%0,%2";
8971 /* None of the following cases win, but they don't lose either. */
8974 if (dbr_sequence_length () == 0)
8976 /* Nothing in the delay slot, fake it by putting the combined
8977 insn (the copy or add) in the delay slot of a bl. */
8978 if (GET_CODE (operands
[1]) == CONST_INT
)
8979 return "b %2\n\tldi %1,%0";
8981 return "b %2\n\tcopy %1,%0";
8985 /* Something in the delay slot, but we've got a long branch. */
8986 if (GET_CODE (operands
[1]) == CONST_INT
)
8987 return "ldi %1,%0\n\tb %2";
8989 return "copy %1,%0\n\tb %2";
8993 if (GET_CODE (operands
[1]) == CONST_INT
)
8994 output_asm_insn ("ldi %1,%0", operands
);
8996 output_asm_insn ("copy %1,%0", operands
);
8997 return pa_output_lbranch (operands
[2], insn
, 1);
9000 /* Output an unconditional add and branch insn. */
9003 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
9005 int length
= get_attr_length (insn
);
9007 /* To make life easy we want operand0 to be the shared input/output
9008 operand and operand1 to be the readonly operand. */
9009 if (operands
[0] == operands
[1])
9010 operands
[1] = operands
[2];
9012 /* These are the cases in which we win. */
9014 return "add%I1b,tr %1,%0,%3";
9016 /* None of the following cases win, but they don't lose either. */
9019 if (dbr_sequence_length () == 0)
9020 /* Nothing in the delay slot, fake it by putting the combined
9021 insn (the copy or add) in the delay slot of a bl. */
9022 return "b %3\n\tadd%I1 %1,%0,%0";
9024 /* Something in the delay slot, but we've got a long branch. */
9025 return "add%I1 %1,%0,%0\n\tb %3";
9028 output_asm_insn ("add%I1 %1,%0,%0", operands
);
9029 return pa_output_lbranch (operands
[3], insn
, 1);
9032 /* We use this hook to perform a PA specific optimization which is difficult
9033 to do in earlier passes. */
9038 remove_useless_addtr_insns (1);
9040 if (pa_cpu
< PROCESSOR_8000
)
9041 pa_combine_instructions ();
9044 /* The PA has a number of odd instructions which can perform multiple
9045 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9046 it may be profitable to combine two instructions into one instruction
9047 with two outputs. It's not profitable PA2.0 machines because the
9048 two outputs would take two slots in the reorder buffers.
9050 This routine finds instructions which can be combined and combines
9051 them. We only support some of the potential combinations, and we
9052 only try common ways to find suitable instructions.
9054 * addb can add two registers or a register and a small integer
9055 and jump to a nearby (+-8k) location. Normally the jump to the
9056 nearby location is conditional on the result of the add, but by
9057 using the "true" condition we can make the jump unconditional.
9058 Thus addb can perform two independent operations in one insn.
9060 * movb is similar to addb in that it can perform a reg->reg
9061 or small immediate->reg copy and jump to a nearby (+-8k location).
9063 * fmpyadd and fmpysub can perform a FP multiply and either an
9064 FP add or FP sub if the operands of the multiply and add/sub are
9065 independent (there are other minor restrictions). Note both
9066 the fmpy and fadd/fsub can in theory move to better spots according
9067 to data dependencies, but for now we require the fmpy stay at a
9070 * Many of the memory operations can perform pre & post updates
9071 of index registers. GCC's pre/post increment/decrement addressing
9072 is far too simple to take advantage of all the possibilities. This
9073 pass may not be suitable since those insns may not be independent.
9075 * comclr can compare two ints or an int and a register, nullify
9076 the following instruction and zero some other register. This
9077 is more difficult to use as it's harder to find an insn which
9078 will generate a comclr than finding something like an unconditional
9079 branch. (conditional moves & long branches create comclr insns).
9081 * Most arithmetic operations can conditionally skip the next
9082 instruction. They can be viewed as "perform this operation
9083 and conditionally jump to this nearby location" (where nearby
9084 is an insns away). These are difficult to use due to the
9085 branch length restrictions. */
9088 pa_combine_instructions (void)
9092 /* This can get expensive since the basic algorithm is on the
9093 order of O(n^2) (or worse). Only do it for -O2 or higher
9094 levels of optimization. */
9098 /* Walk down the list of insns looking for "anchor" insns which
9099 may be combined with "floating" insns. As the name implies,
9100 "anchor" instructions don't move, while "floating" insns may
9102 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9103 rtx_insn
*new_rtx
= make_insn_raw (par
);
9105 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9107 enum attr_pa_combine_type anchor_attr
;
9108 enum attr_pa_combine_type floater_attr
;
9110 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9111 Also ignore any special USE insns. */
9112 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
9113 || GET_CODE (PATTERN (anchor
)) == USE
9114 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
9117 anchor_attr
= get_attr_pa_combine_type (anchor
);
9118 /* See if anchor is an insn suitable for combination. */
9119 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9120 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9121 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9122 && ! forward_branch_p (anchor
)))
9126 for (floater
= PREV_INSN (anchor
);
9128 floater
= PREV_INSN (floater
))
9130 if (NOTE_P (floater
)
9131 || (NONJUMP_INSN_P (floater
)
9132 && (GET_CODE (PATTERN (floater
)) == USE
9133 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9136 /* Anything except a regular INSN will stop our search. */
9137 if (! NONJUMP_INSN_P (floater
))
9143 /* See if FLOATER is suitable for combination with the
9145 floater_attr
= get_attr_pa_combine_type (floater
);
9146 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9147 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9148 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9149 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9151 /* If ANCHOR and FLOATER can be combined, then we're
9152 done with this pass. */
9153 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9154 SET_DEST (PATTERN (floater
)),
9155 XEXP (SET_SRC (PATTERN (floater
)), 0),
9156 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9160 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9161 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9163 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9165 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9166 SET_DEST (PATTERN (floater
)),
9167 XEXP (SET_SRC (PATTERN (floater
)), 0),
9168 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9173 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9174 SET_DEST (PATTERN (floater
)),
9175 SET_SRC (PATTERN (floater
)),
9176 SET_SRC (PATTERN (floater
))))
9182 /* If we didn't find anything on the backwards scan try forwards. */
9184 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9185 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9187 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9189 if (NOTE_P (floater
)
9190 || (NONJUMP_INSN_P (floater
)
9191 && (GET_CODE (PATTERN (floater
)) == USE
9192 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9196 /* Anything except a regular INSN will stop our search. */
9197 if (! NONJUMP_INSN_P (floater
))
9203 /* See if FLOATER is suitable for combination with the
9205 floater_attr
= get_attr_pa_combine_type (floater
);
9206 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9207 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9208 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9209 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9211 /* If ANCHOR and FLOATER can be combined, then we're
9212 done with this pass. */
9213 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9214 SET_DEST (PATTERN (floater
)),
9215 XEXP (SET_SRC (PATTERN (floater
)),
9217 XEXP (SET_SRC (PATTERN (floater
)),
9224 /* FLOATER will be nonzero if we found a suitable floating
9225 insn for combination with ANCHOR. */
9227 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9228 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9230 /* Emit the new instruction and delete the old anchor. */
9231 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9232 copy_rtx (PATTERN (floater
)));
9233 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9234 emit_insn_before (temp
, anchor
);
9236 SET_INSN_DELETED (anchor
);
9238 /* Emit a special USE insn for FLOATER, then delete
9239 the floating insn. */
9240 temp
= copy_rtx (PATTERN (floater
));
9241 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9242 delete_insn (floater
);
9247 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9249 /* Emit the new_jump instruction and delete the old anchor. */
9250 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9251 copy_rtx (PATTERN (floater
)));
9252 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9253 temp
= emit_jump_insn_before (temp
, anchor
);
9255 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9256 SET_INSN_DELETED (anchor
);
9258 /* Emit a special USE insn for FLOATER, then delete
9259 the floating insn. */
9260 temp
= copy_rtx (PATTERN (floater
));
9261 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9262 delete_insn (floater
);
9270 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9271 int reversed
, rtx dest
,
9274 int insn_code_number
;
9275 rtx_insn
*start
, *end
;
9277 /* Create a PARALLEL with the patterns of ANCHOR and
9278 FLOATER, try to recognize it, then test constraints
9279 for the resulting pattern.
9281 If the pattern doesn't match or the constraints
9282 aren't met keep searching for a suitable floater
9284 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9285 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9286 INSN_CODE (new_rtx
) = -1;
9287 insn_code_number
= recog_memoized (new_rtx
);
9288 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9289 if (insn_code_number
< 0
9290 || (extract_insn (new_rtx
),
9291 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9305 /* There's up to three operands to consider. One
9306 output and two inputs.
9308 The output must not be used between FLOATER & ANCHOR
9309 exclusive. The inputs must not be set between
9310 FLOATER and ANCHOR exclusive. */
9312 if (reg_used_between_p (dest
, start
, end
))
9315 if (reg_set_between_p (src1
, start
, end
))
9318 if (reg_set_between_p (src2
, start
, end
))
9321 /* If we get here, then everything is good. */
9325 /* Return nonzero if references for INSN are delayed.
9327 Millicode insns are actually function calls with some special
9328 constraints on arguments and register usage.
9330 Millicode calls always expect their arguments in the integer argument
9331 registers, and always return their result in %r29 (ret1). They
9332 are expected to clobber their arguments, %r1, %r29, and the return
9333 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9335 This function tells reorg that the references to arguments and
9336 millicode calls do not appear to happen until after the millicode call.
9337 This allows reorg to put insns which set the argument registers into the
9338 delay slot of the millicode call -- thus they act more like traditional
9341 Note we cannot consider side effects of the insn to be delayed because
9342 the branch and link insn will clobber the return pointer. If we happened
9343 to use the return pointer in the delay slot of the call, then we lose.
9345 get_attr_type will try to recognize the given insn, so make sure to
9346 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9349 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9351 return ((NONJUMP_INSN_P (insn
)
9352 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9353 && GET_CODE (PATTERN (insn
)) != USE
9354 && GET_CODE (PATTERN (insn
)) != CLOBBER
9355 && get_attr_type (insn
) == TYPE_MILLI
));
9358 /* Promote the return value, but not the arguments. */
9361 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9363 int *punsignedp ATTRIBUTE_UNUSED
,
9364 const_tree fntype ATTRIBUTE_UNUSED
,
9367 if (for_return
== 0)
9369 return promote_mode (type
, mode
, punsignedp
);
9372 /* On the HP-PA the value is found in register(s) 28(-29), unless
9373 the mode is SF or DF. Then the value is returned in fr4 (32).
9375 This must perform the same promotions as PROMOTE_MODE, else promoting
9376 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9378 Small structures must be returned in a PARALLEL on PA64 in order
9379 to match the HP Compiler ABI. */
9382 pa_function_value (const_tree valtype
,
9383 const_tree func ATTRIBUTE_UNUSED
,
9384 bool outgoing ATTRIBUTE_UNUSED
)
9386 machine_mode valmode
;
9388 if (AGGREGATE_TYPE_P (valtype
)
9389 || TREE_CODE (valtype
) == COMPLEX_TYPE
9390 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9392 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9394 /* Handle aggregates that fit exactly in a word or double word. */
9395 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9396 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9400 /* Aggregates with a size less than or equal to 128 bits are
9401 returned in GR 28(-29). They are left justified. The pad
9402 bits are undefined. Larger aggregates are returned in
9406 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9408 for (i
= 0; i
< ub
; i
++)
9410 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9411 gen_rtx_REG (DImode
, 28 + i
),
9416 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9418 else if (valsize
> UNITS_PER_WORD
)
9420 /* Aggregates 5 to 8 bytes in size are returned in general
9421 registers r28-r29 in the same manner as other non
9422 floating-point objects. The data is right-justified and
9423 zero-extended to 64 bits. This is opposite to the normal
9424 justification used on big endian targets and requires
9425 special treatment. */
9426 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9427 gen_rtx_REG (DImode
, 28), const0_rtx
);
9428 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9432 if ((INTEGRAL_TYPE_P (valtype
)
9433 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9434 || POINTER_TYPE_P (valtype
))
9435 valmode
= word_mode
;
9437 valmode
= TYPE_MODE (valtype
);
9439 if (TREE_CODE (valtype
) == REAL_TYPE
9440 && !AGGREGATE_TYPE_P (valtype
)
9441 && TYPE_MODE (valtype
) != TFmode
9442 && !TARGET_SOFT_FLOAT
)
9443 return gen_rtx_REG (valmode
, 32);
9445 return gen_rtx_REG (valmode
, 28);
9448 /* Implement the TARGET_LIBCALL_VALUE hook. */
9451 pa_libcall_value (machine_mode mode
,
9452 const_rtx fun ATTRIBUTE_UNUSED
)
9454 if (! TARGET_SOFT_FLOAT
9455 && (mode
== SFmode
|| mode
== DFmode
))
9456 return gen_rtx_REG (mode
, 32);
9458 return gen_rtx_REG (mode
, 28);
9461 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9464 pa_function_value_regno_p (const unsigned int regno
)
9467 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9473 /* Update the data in CUM to advance over an argument
9474 of mode MODE and data type TYPE.
9475 (TYPE is null for libcalls where that information may not be available.) */
9478 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9479 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9481 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9482 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9484 cum
->nargs_prototype
--;
9485 cum
->words
+= (arg_size
9486 + ((cum
->words
& 01)
9487 && type
!= NULL_TREE
9491 /* Return the location of a parameter that is passed in a register or NULL
9492 if the parameter has any component that is passed in memory.
9494 This is new code and will be pushed to into the net sources after
9497 ??? We might want to restructure this so that it looks more like other
9500 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9501 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9503 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9504 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9511 if (mode
== VOIDmode
)
9514 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9516 /* If this arg would be passed partially or totally on the stack, then
9517 this routine should return zero. pa_arg_partial_bytes will
9518 handle arguments which are split between regs and stack slots if
9519 the ABI mandates split arguments. */
9522 /* The 32-bit ABI does not split arguments. */
9523 if (cum
->words
+ arg_size
> max_arg_words
)
9529 alignment
= cum
->words
& 1;
9530 if (cum
->words
+ alignment
>= max_arg_words
)
9534 /* The 32bit ABIs and the 64bit ABIs are rather different,
9535 particularly in their handling of FP registers. We might
9536 be able to cleverly share code between them, but I'm not
9537 going to bother in the hope that splitting them up results
9538 in code that is more easily understood. */
9542 /* Advance the base registers to their current locations.
9544 Remember, gprs grow towards smaller register numbers while
9545 fprs grow to higher register numbers. Also remember that
9546 although FP regs are 32-bit addressable, we pretend that
9547 the registers are 64-bits wide. */
9548 gpr_reg_base
= 26 - cum
->words
;
9549 fpr_reg_base
= 32 + cum
->words
;
9551 /* Arguments wider than one word and small aggregates need special
9555 || (type
&& (AGGREGATE_TYPE_P (type
)
9556 || TREE_CODE (type
) == COMPLEX_TYPE
9557 || TREE_CODE (type
) == VECTOR_TYPE
)))
9559 /* Double-extended precision (80-bit), quad-precision (128-bit)
9560 and aggregates including complex numbers are aligned on
9561 128-bit boundaries. The first eight 64-bit argument slots
9562 are associated one-to-one, with general registers r26
9563 through r19, and also with floating-point registers fr4
9564 through fr11. Arguments larger than one word are always
9565 passed in general registers.
9567 Using a PARALLEL with a word mode register results in left
9568 justified data on a big-endian target. */
9571 int i
, offset
= 0, ub
= arg_size
;
9573 /* Align the base register. */
9574 gpr_reg_base
-= alignment
;
9576 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9577 for (i
= 0; i
< ub
; i
++)
9579 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9580 gen_rtx_REG (DImode
, gpr_reg_base
),
9586 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9591 /* If the argument is larger than a word, then we know precisely
9592 which registers we must use. */
9606 /* Structures 5 to 8 bytes in size are passed in the general
9607 registers in the same manner as other non floating-point
9608 objects. The data is right-justified and zero-extended
9609 to 64 bits. This is opposite to the normal justification
9610 used on big endian targets and requires special treatment.
9611 We now define BLOCK_REG_PADDING to pad these objects.
9612 Aggregates, complex and vector types are passed in the same
9613 manner as structures. */
9615 || (type
&& (AGGREGATE_TYPE_P (type
)
9616 || TREE_CODE (type
) == COMPLEX_TYPE
9617 || TREE_CODE (type
) == VECTOR_TYPE
)))
9619 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9620 gen_rtx_REG (DImode
, gpr_reg_base
),
9622 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9627 /* We have a single word (32 bits). A simple computation
9628 will get us the register #s we need. */
9629 gpr_reg_base
= 26 - cum
->words
;
9630 fpr_reg_base
= 32 + 2 * cum
->words
;
9634 /* Determine if the argument needs to be passed in both general and
9635 floating point registers. */
9636 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9637 /* If we are doing soft-float with portable runtime, then there
9638 is no need to worry about FP regs. */
9639 && !TARGET_SOFT_FLOAT
9640 /* The parameter must be some kind of scalar float, else we just
9641 pass it in integer registers. */
9642 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9643 /* The target function must not have a prototype. */
9644 && cum
->nargs_prototype
<= 0
9645 /* libcalls do not need to pass items in both FP and general
9647 && type
!= NULL_TREE
9648 /* All this hair applies to "outgoing" args only. This includes
9649 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9651 /* Also pass outgoing floating arguments in both registers in indirect
9652 calls with the 32 bit ABI and the HP assembler since there is no
9653 way to the specify argument locations in static functions. */
9658 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9664 gen_rtx_EXPR_LIST (VOIDmode
,
9665 gen_rtx_REG (mode
, fpr_reg_base
),
9667 gen_rtx_EXPR_LIST (VOIDmode
,
9668 gen_rtx_REG (mode
, gpr_reg_base
),
9673 /* See if we should pass this parameter in a general register. */
9674 if (TARGET_SOFT_FLOAT
9675 /* Indirect calls in the normal 32bit ABI require all arguments
9676 to be passed in general registers. */
9677 || (!TARGET_PORTABLE_RUNTIME
9681 /* If the parameter is not a scalar floating-point parameter,
9682 then it belongs in GPRs. */
9683 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9684 /* Structure with single SFmode field belongs in GPR. */
9685 || (type
&& AGGREGATE_TYPE_P (type
)))
9686 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9688 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9693 /* Arguments larger than one word are double word aligned. */
9696 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9698 bool singleword
= (type
9699 ? (integer_zerop (TYPE_SIZE (type
))
9700 || !TREE_CONSTANT (TYPE_SIZE (type
))
9701 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9702 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9704 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9707 /* If this arg would be passed totally in registers or totally on the stack,
9708 then this routine should return zero. */
9711 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9712 tree type
, bool named ATTRIBUTE_UNUSED
)
9714 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9715 unsigned int max_arg_words
= 8;
9716 unsigned int offset
= 0;
9721 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9724 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9725 /* Arg fits fully into registers. */
9727 else if (cum
->words
+ offset
>= max_arg_words
)
9728 /* Arg fully on the stack. */
9732 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9736 /* A get_unnamed_section callback for switching to the text section.
9738 This function is only used with SOM. Because we don't support
9739 named subspaces, we can only create a new subspace or switch back
9740 to the default text subspace. */
9743 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9745 gcc_assert (TARGET_SOM
);
9748 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9750 /* We only want to emit a .nsubspa directive once at the
9751 start of the function. */
9752 cfun
->machine
->in_nsubspa
= 1;
9754 /* Create a new subspace for the text. This provides
9755 better stub placement and one-only functions. */
9757 && DECL_ONE_ONLY (cfun
->decl
)
9758 && !DECL_WEAK (cfun
->decl
))
9760 output_section_asm_op ("\t.SPACE $TEXT$\n"
9761 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9762 "ACCESS=44,SORT=24,COMDAT");
9768 /* There isn't a current function or the body of the current
9769 function has been completed. So, we are changing to the
9770 text section to output debugging information. Thus, we
9771 need to forget that we are in the text section so that
9772 varasm.c will call us when text_section is selected again. */
9773 gcc_assert (!cfun
|| !cfun
->machine
9774 || cfun
->machine
->in_nsubspa
== 2);
9777 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9780 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9783 /* A get_unnamed_section callback for switching to comdat data
9784 sections. This function is only used with SOM. */
9787 som_output_comdat_data_section_asm_op (const void *data
)
9790 output_section_asm_op (data
);
9793 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9796 pa_som_asm_init_sections (void)
9799 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9801 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9802 is not being generated. */
9803 som_readonly_data_section
9804 = get_unnamed_section (0, output_section_asm_op
,
9805 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9807 /* When secondary definitions are not supported, SOM makes readonly
9808 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9810 som_one_only_readonly_data_section
9811 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9813 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9814 "ACCESS=0x2c,SORT=16,COMDAT");
9817 /* When secondary definitions are not supported, SOM makes data one-only
9818 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9819 som_one_only_data_section
9820 = get_unnamed_section (SECTION_WRITE
,
9821 som_output_comdat_data_section_asm_op
,
9822 "\t.SPACE $PRIVATE$\n"
9823 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9824 "ACCESS=31,SORT=24,COMDAT");
9827 som_tm_clone_table_section
9828 = get_unnamed_section (0, output_section_asm_op
,
9829 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9831 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9832 which reference data within the $TEXT$ space (for example constant
9833 strings in the $LIT$ subspace).
9835 The assemblers (GAS and HP as) both have problems with handling
9836 the difference of two symbols which is the other correct way to
9837 reference constant data during PIC code generation.
9839 So, there's no way to reference constant data which is in the
9840 $TEXT$ space during PIC generation. Instead place all constant
9841 data into the $PRIVATE$ subspace (this reduces sharing, but it
9842 works correctly). */
9843 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9845 /* We must not have a reference to an external symbol defined in a
9846 shared library in a readonly section, else the SOM linker will
9849 So, we force exception information into the data section. */
9850 exception_section
= data_section
;
9853 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9856 pa_som_tm_clone_table_section (void)
9858 return som_tm_clone_table_section
;
9861 /* On hpux10, the linker will give an error if we have a reference
9862 in the read-only data section to a symbol defined in a shared
9863 library. Therefore, expressions that might require a reloc can
9864 not be placed in the read-only data section. */
9867 pa_select_section (tree exp
, int reloc
,
9868 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9870 if (TREE_CODE (exp
) == VAR_DECL
9871 && TREE_READONLY (exp
)
9872 && !TREE_THIS_VOLATILE (exp
)
9873 && DECL_INITIAL (exp
)
9874 && (DECL_INITIAL (exp
) == error_mark_node
9875 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9879 && DECL_ONE_ONLY (exp
)
9880 && !DECL_WEAK (exp
))
9881 return som_one_only_readonly_data_section
;
9883 return readonly_data_section
;
9885 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9886 return readonly_data_section
;
9888 && TREE_CODE (exp
) == VAR_DECL
9889 && DECL_ONE_ONLY (exp
)
9890 && !DECL_WEAK (exp
))
9891 return som_one_only_data_section
;
9893 return data_section
;
9896 /* Implement pa_reloc_rw_mask. */
9899 pa_reloc_rw_mask (void)
9901 /* We force (const (plus (symbol) (const_int))) to memory when the
9902 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9903 handle this construct in read-only memory and we want to avoid
9904 this for ELF. So, we always force an RTX needing relocation to
9905 the data section. */
9910 pa_globalize_label (FILE *stream
, const char *name
)
9912 /* We only handle DATA objects here, functions are globalized in
9913 ASM_DECLARE_FUNCTION_NAME. */
9914 if (! FUNCTION_NAME_P (name
))
9916 fputs ("\t.EXPORT ", stream
);
9917 assemble_name (stream
, name
);
9918 fputs (",DATA\n", stream
);
9922 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9925 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9926 int incoming ATTRIBUTE_UNUSED
)
9928 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9931 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9934 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9936 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9937 PA64 ABI says that objects larger than 128 bits are returned in memory.
9938 Note, int_size_in_bytes can return -1 if the size of the object is
9939 variable or larger than the maximum value that can be expressed as
9940 a HOST_WIDE_INT. It can also return zero for an empty type. The
9941 simplest way to handle variable and empty types is to pass them in
9942 memory. This avoids problems in defining the boundaries of argument
9943 slots, allocating registers, etc. */
9944 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9945 || int_size_in_bytes (type
) <= 0);
9948 /* Structure to hold declaration and name of external symbols that are
9949 emitted by GCC. We generate a vector of these symbols and output them
9950 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9951 This avoids putting out names that are never really used. */
9953 typedef struct GTY(()) extern_symbol
9959 /* Define gc'd vector type for extern_symbol. */
9961 /* Vector of extern_symbol pointers. */
9962 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9964 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9965 /* Mark DECL (name NAME) as an external reference (assembler output
9966 file FILE). This saves the names to output at the end of the file
9967 if actually referenced. */
9970 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9972 gcc_assert (file
== asm_out_file
);
9973 extern_symbol p
= {decl
, name
};
9974 vec_safe_push (extern_symbols
, p
);
9978 /* Output text required at the end of an assembler file.
9979 This includes deferred plabels and .import directives for
9980 all external symbols that were actually referenced. */
9985 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9989 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9990 output_deferred_profile_counters ();
9993 output_deferred_plabels ();
9995 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9996 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9998 tree decl
= p
->decl
;
10000 if (!TREE_ASM_WRITTEN (decl
)
10001 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
10002 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
10005 vec_free (extern_symbols
);
10008 if (NEED_INDICATE_EXEC_STACK
)
10009 file_end_indicate_exec_stack ();
10012 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10015 pa_can_change_mode_class (machine_mode from
, machine_mode to
,
10016 reg_class_t rclass
)
10021 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
10024 /* Reject changes to/from modes with zero size. */
10025 if (!GET_MODE_SIZE (from
) || !GET_MODE_SIZE (to
))
10028 /* Reject changes to/from complex and vector modes. */
10029 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
10030 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
10033 /* There is no way to load QImode or HImode values directly from memory
10034 to a FP register. SImode loads to the FP registers are not zero
10035 extended. On the 64-bit target, this conflicts with the definition
10036 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10037 different sizes in the floating-point registers. */
10038 if (MAYBE_FP_REG_CLASS_P (rclass
))
10041 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10042 in specific sets of registers. Thus, we cannot allow changing
10043 to a larger mode when it's larger than a word. */
10044 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
10045 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
10051 /* Implement TARGET_MODES_TIEABLE_P.
10053 We should return FALSE for QImode and HImode because these modes
10054 are not ok in the floating-point registers. However, this prevents
10055 tieing these modes to SImode and DImode in the general registers.
10056 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10057 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10058 in the floating-point registers. */
10061 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10063 /* Don't tie modes in different classes. */
10064 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
10071 /* Length in units of the trampoline instruction code. */
10073 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10076 /* Output assembler code for a block containing the constant parts
10077 of a trampoline, leaving space for the variable parts.\
10079 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10080 and then branches to the specified routine.
10082 This code template is copied from text segment to stack location
10083 and then patched with pa_trampoline_init to contain valid values,
10084 and then entered as a subroutine.
10086 It is best to keep this as small as possible to avoid having to
10087 flush multiple lines in the cache. */
10090 pa_asm_trampoline_template (FILE *f
)
10094 fputs ("\tldw 36(%r22),%r21\n", f
);
10095 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
10096 if (ASSEMBLER_DIALECT
== 0)
10097 fputs ("\tdepi 0,31,2,%r21\n", f
);
10099 fputs ("\tdepwi 0,31,2,%r21\n", f
);
10100 fputs ("\tldw 4(%r21),%r19\n", f
);
10101 fputs ("\tldw 0(%r21),%r21\n", f
);
10104 fputs ("\tbve (%r21)\n", f
);
10105 fputs ("\tldw 40(%r22),%r29\n", f
);
10106 fputs ("\t.word 0\n", f
);
10107 fputs ("\t.word 0\n", f
);
10111 fputs ("\tldsid (%r21),%r1\n", f
);
10112 fputs ("\tmtsp %r1,%sr0\n", f
);
10113 fputs ("\tbe 0(%sr0,%r21)\n", f
);
10114 fputs ("\tldw 40(%r22),%r29\n", f
);
10116 fputs ("\t.word 0\n", f
);
10117 fputs ("\t.word 0\n", f
);
10118 fputs ("\t.word 0\n", f
);
10119 fputs ("\t.word 0\n", f
);
10123 fputs ("\t.dword 0\n", f
);
10124 fputs ("\t.dword 0\n", f
);
10125 fputs ("\t.dword 0\n", f
);
10126 fputs ("\t.dword 0\n", f
);
10127 fputs ("\tmfia %r31\n", f
);
10128 fputs ("\tldd 24(%r31),%r1\n", f
);
10129 fputs ("\tldd 24(%r1),%r27\n", f
);
10130 fputs ("\tldd 16(%r1),%r1\n", f
);
10131 fputs ("\tbve (%r1)\n", f
);
10132 fputs ("\tldd 32(%r31),%r31\n", f
);
10133 fputs ("\t.dword 0 ; fptr\n", f
);
10134 fputs ("\t.dword 0 ; static link\n", f
);
10138 /* Emit RTL insns to initialize the variable parts of a trampoline.
10139 FNADDR is an RTX for the address of the function's pure code.
10140 CXT is an RTX for the static chain value for the function.
10142 Move the function address to the trampoline template at offset 36.
10143 Move the static chain value to trampoline template at offset 40.
10144 Move the trampoline address to trampoline template at offset 44.
10145 Move r19 to trampoline template at offset 48. The latter two
10146 words create a plabel for the indirect call to the trampoline.
10148 A similar sequence is used for the 64-bit port but the plabel is
10149 at the beginning of the trampoline.
10151 Finally, the cache entries for the trampoline code are flushed.
10152 This is necessary to ensure that the trampoline instruction sequence
10153 is written to memory prior to any attempts at prefetching the code
10157 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10159 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10160 rtx start_addr
= gen_reg_rtx (Pmode
);
10161 rtx end_addr
= gen_reg_rtx (Pmode
);
10162 rtx line_length
= gen_reg_rtx (Pmode
);
10165 emit_block_move (m_tramp
, assemble_trampoline_template (),
10166 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10167 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10171 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10172 emit_move_insn (tmp
, fnaddr
);
10173 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10174 emit_move_insn (tmp
, chain_value
);
10176 /* Create a fat pointer for the trampoline. */
10177 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10178 emit_move_insn (tmp
, r_tramp
);
10179 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10180 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10182 /* fdc and fic only use registers for the address to flush,
10183 they do not accept integer displacements. We align the
10184 start and end addresses to the beginning of their respective
10185 cache lines to minimize the number of lines flushed. */
10186 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10187 GEN_INT (-MIN_CACHELINE_SIZE
)));
10188 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10189 TRAMPOLINE_CODE_SIZE
-1));
10190 emit_insn (gen_andsi3 (end_addr
, tmp
,
10191 GEN_INT (-MIN_CACHELINE_SIZE
)));
10192 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10193 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10194 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10195 gen_reg_rtx (Pmode
),
10196 gen_reg_rtx (Pmode
)));
10200 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10201 emit_move_insn (tmp
, fnaddr
);
10202 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10203 emit_move_insn (tmp
, chain_value
);
10205 /* Create a fat pointer for the trampoline. */
10206 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10207 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10209 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10210 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10212 /* fdc and fic only use registers for the address to flush,
10213 they do not accept integer displacements. We align the
10214 start and end addresses to the beginning of their respective
10215 cache lines to minimize the number of lines flushed. */
10216 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10217 emit_insn (gen_anddi3 (start_addr
, tmp
,
10218 GEN_INT (-MIN_CACHELINE_SIZE
)));
10219 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10220 TRAMPOLINE_CODE_SIZE
- 1));
10221 emit_insn (gen_anddi3 (end_addr
, tmp
,
10222 GEN_INT (-MIN_CACHELINE_SIZE
)));
10223 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10224 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10225 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10226 gen_reg_rtx (Pmode
),
10227 gen_reg_rtx (Pmode
)));
10230 #ifdef HAVE_ENABLE_EXECUTE_STACK
10231 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10232 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
10236 /* Perform any machine-specific adjustment in the address of the trampoline.
10237 ADDR contains the address that was passed to pa_trampoline_init.
10238 Adjust the trampoline address to point to the plabel at offset 44. */
10241 pa_trampoline_adjust_address (rtx addr
)
10244 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10249 pa_delegitimize_address (rtx orig_x
)
10251 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10253 if (GET_CODE (x
) == LO_SUM
10254 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10255 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10256 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10261 pa_internal_arg_pointer (void)
10263 /* The argument pointer and the hard frame pointer are the same in
10264 the 32-bit runtime, so we don't need a copy. */
10266 return copy_to_reg (virtual_incoming_args_rtx
);
10268 return virtual_incoming_args_rtx
;
10271 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10272 Frame pointer elimination is automatically handled. */
10275 pa_can_eliminate (const int from
, const int to
)
10277 /* The argument cannot be eliminated in the 64-bit runtime. */
10278 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10281 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10282 ? ! frame_pointer_needed
10286 /* Define the offset between two registers, FROM to be eliminated and its
10287 replacement TO, at the start of a routine. */
10289 pa_initial_elimination_offset (int from
, int to
)
10291 HOST_WIDE_INT offset
;
10293 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10294 && to
== STACK_POINTER_REGNUM
)
10295 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10296 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10299 gcc_unreachable ();
10305 pa_conditional_register_usage (void)
10309 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10311 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10312 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10313 for (i
= 33; i
< 56; i
+= 2)
10314 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10316 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10318 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10319 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10322 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10325 /* Target hook for c_mode_for_suffix. */
10327 static machine_mode
10328 pa_c_mode_for_suffix (char suffix
)
10330 if (HPUX_LONG_DOUBLE_LIBRARY
)
10339 /* Target hook for function_section. */
10342 pa_function_section (tree decl
, enum node_frequency freq
,
10343 bool startup
, bool exit
)
10345 /* Put functions in text section if target doesn't have named sections. */
10346 if (!targetm_common
.have_named_sections
)
10347 return text_section
;
10349 /* Force nested functions into the same section as the containing
10352 && DECL_SECTION_NAME (decl
) == NULL
10353 && DECL_CONTEXT (decl
) != NULL_TREE
10354 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10355 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10356 return function_section (DECL_CONTEXT (decl
));
10358 /* Otherwise, use the default function section. */
10359 return default_function_section (decl
, freq
, startup
, exit
);
10362 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10364 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10365 that need more than three instructions to load prior to reload. This
10366 limit is somewhat arbitrary. It takes three instructions to load a
10367 CONST_INT from memory but two are memory accesses. It may be better
10368 to increase the allowed range for CONST_INTS. We may also be able
10369 to handle CONST_DOUBLES. */
10372 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10374 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10377 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10380 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10381 legitimate constants. The other variants can't be handled by
10382 the move patterns after reload starts. */
10383 if (tls_referenced_p (x
))
10386 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10390 && HOST_BITS_PER_WIDE_INT
> 32
10391 && GET_CODE (x
) == CONST_INT
10392 && !reload_in_progress
10393 && !reload_completed
10394 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10395 && !pa_cint_ok_for_move (UINTVAL (x
)))
10398 if (function_label_operand (x
, mode
))
10404 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10406 static unsigned int
10407 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10409 unsigned int flags
;
10411 flags
= default_section_type_flags (decl
, name
, reloc
);
10413 /* Function labels are placed in the constant pool. This can
10414 cause a section conflict if decls are put in ".data.rel.ro"
10415 or ".data.rel.ro.local" using the __attribute__ construct. */
10416 if (strcmp (name
, ".data.rel.ro") == 0
10417 || strcmp (name
, ".data.rel.ro.local") == 0)
10418 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10423 /* pa_legitimate_address_p recognizes an RTL expression that is a
10424 valid memory address for an instruction. The MODE argument is the
10425 machine mode for the MEM expression that wants to use this address.
10427 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10428 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10429 available with floating point loads and stores, and integer loads.
10430 We get better code by allowing indexed addresses in the initial
10433 The acceptance of indexed addresses as legitimate implies that we
10434 must provide patterns for doing indexed integer stores, or the move
10435 expanders must force the address of an indexed store to a register.
10436 We have adopted the latter approach.
10438 Another function of pa_legitimate_address_p is to ensure that
10439 the base register is a valid pointer for indexed instructions.
10440 On targets that have non-equivalent space registers, we have to
10441 know at the time of assembler output which register in a REG+REG
10442 pair is the base register. The REG_POINTER flag is sometimes lost
10443 in reload and the following passes, so it can't be relied on during
10444 code generation. Thus, we either have to canonicalize the order
10445 of the registers in REG+REG indexed addresses, or treat REG+REG
10446 addresses separately and provide patterns for both permutations.
10448 The latter approach requires several hundred additional lines of
10449 code in pa.md. The downside to canonicalizing is that a PLUS
10450 in the wrong order can't combine to form to make a scaled indexed
10451 memory operand. As we won't need to canonicalize the operands if
10452 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10454 We initially break out scaled indexed addresses in canonical order
10455 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10456 scaled indexed addresses during RTL generation. However, fold_rtx
10457 has its own opinion on how the operands of a PLUS should be ordered.
10458 If one of the operands is equivalent to a constant, it will make
10459 that operand the second operand. As the base register is likely to
10460 be equivalent to a SYMBOL_REF, we have made it the second operand.
10462 pa_legitimate_address_p accepts REG+REG as legitimate when the
10463 operands are in the order INDEX+BASE on targets with non-equivalent
10464 space registers, and in any order on targets with equivalent space
10465 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10467 We treat a SYMBOL_REF as legitimate if it is part of the current
10468 function's constant-pool, because such addresses can actually be
10469 output as REG+SMALLINT. */
10472 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10475 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10476 : REG_OK_FOR_BASE_P (x
)))
10477 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10478 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10479 && REG_P (XEXP (x
, 0))
10480 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10481 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10484 if (GET_CODE (x
) == PLUS
)
10488 /* For REG+REG, the base register should be in XEXP (x, 1),
10489 so check it first. */
10490 if (REG_P (XEXP (x
, 1))
10491 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10492 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10493 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10494 else if (REG_P (XEXP (x
, 0))
10495 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10496 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10497 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10501 if (GET_CODE (index
) == CONST_INT
)
10503 if (INT_5_BITS (index
))
10506 /* When INT14_OK_STRICT is false, a secondary reload is needed
10507 to adjust the displacement of SImode and DImode floating point
10508 instructions but this may fail when the register also needs
10509 reloading. So, we return false when STRICT is true. We
10510 also reject long displacements for float mode addresses since
10511 the majority of accesses will use floating point instructions
10512 that don't support 14-bit offsets. */
10513 if (!INT14_OK_STRICT
10514 && (strict
|| !(reload_in_progress
|| reload_completed
))
10519 return base14_operand (index
, mode
);
10522 if (!TARGET_DISABLE_INDEXING
10523 /* Only accept the "canonical" INDEX+BASE operand order
10524 on targets with non-equivalent space registers. */
10525 && (TARGET_NO_SPACE_REGS
10527 : (base
== XEXP (x
, 1) && REG_P (index
)
10528 && (reload_completed
10529 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10530 || REG_POINTER (base
))
10531 && (reload_completed
10532 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10533 || !REG_POINTER (index
))))
10534 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10535 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10536 : REG_OK_FOR_INDEX_P (index
))
10537 && borx_reg_operand (base
, Pmode
)
10538 && borx_reg_operand (index
, Pmode
))
10541 if (!TARGET_DISABLE_INDEXING
10542 && GET_CODE (index
) == MULT
10543 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10544 && REG_P (XEXP (index
, 0))
10545 && GET_MODE (XEXP (index
, 0)) == Pmode
10546 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10547 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10548 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10549 && INTVAL (XEXP (index
, 1))
10550 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10551 && borx_reg_operand (base
, Pmode
))
10557 if (GET_CODE (x
) == LO_SUM
)
10559 rtx y
= XEXP (x
, 0);
10561 if (GET_CODE (y
) == SUBREG
)
10562 y
= SUBREG_REG (y
);
10565 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10566 : REG_OK_FOR_BASE_P (y
)))
10568 /* Needed for -fPIC */
10570 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10573 if (!INT14_OK_STRICT
10574 && (strict
|| !(reload_in_progress
|| reload_completed
))
10579 if (CONSTANT_P (XEXP (x
, 1)))
10585 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10591 /* Look for machine dependent ways to make the invalid address AD a
10594 For the PA, transform:
10596 memory(X + <large int>)
10600 if (<large int> & mask) >= 16
10601 Y = (<large int> & ~mask) + mask + 1 Round up.
10603 Y = (<large int> & ~mask) Round down.
10605 memory (Z + (<large int> - Y));
10607 This makes reload inheritance and reload_cse work better since Z
10610 There may be more opportunities to improve code with this hook. */
10613 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10614 int opnum
, int type
,
10615 int ind_levels ATTRIBUTE_UNUSED
)
10617 long offset
, newoffset
, mask
;
10618 rtx new_rtx
, temp
= NULL_RTX
;
10620 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10621 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10623 if (optimize
&& GET_CODE (ad
) == PLUS
)
10624 temp
= simplify_binary_operation (PLUS
, Pmode
,
10625 XEXP (ad
, 0), XEXP (ad
, 1));
10627 new_rtx
= temp
? temp
: ad
;
10630 && GET_CODE (new_rtx
) == PLUS
10631 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10632 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10634 offset
= INTVAL (XEXP ((new_rtx
), 1));
10636 /* Choose rounding direction. Round up if we are >= halfway. */
10637 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10638 newoffset
= (offset
& ~mask
) + mask
+ 1;
10640 newoffset
= offset
& ~mask
;
10642 /* Ensure that long displacements are aligned. */
10644 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10645 || (TARGET_64BIT
&& (mode
) == DImode
)))
10646 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10648 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10650 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10651 GEN_INT (newoffset
));
10652 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10653 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10654 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10655 opnum
, (enum reload_type
) type
);
10663 /* Output address vector. */
10666 pa_output_addr_vec (rtx lab
, rtx body
)
10668 int idx
, vlen
= XVECLEN (body
, 0);
10670 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10672 fputs ("\t.begin_brtab\n", asm_out_file
);
10673 for (idx
= 0; idx
< vlen
; idx
++)
10675 ASM_OUTPUT_ADDR_VEC_ELT
10676 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10679 fputs ("\t.end_brtab\n", asm_out_file
);
10682 /* Output address difference vector. */
10685 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10687 rtx base
= XEXP (XEXP (body
, 0), 0);
10688 int idx
, vlen
= XVECLEN (body
, 1);
10690 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10692 fputs ("\t.begin_brtab\n", asm_out_file
);
10693 for (idx
= 0; idx
< vlen
; idx
++)
10695 ASM_OUTPUT_ADDR_DIFF_ELT
10698 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10699 CODE_LABEL_NUMBER (base
));
10702 fputs ("\t.end_brtab\n", asm_out_file
);
10705 /* This is a helper function for the other atomic operations. This function
10706 emits a loop that contains SEQ that iterates until a compare-and-swap
10707 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10708 a set of instructions that takes a value from OLD_REG as an input and
10709 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10710 set to the current contents of MEM. After SEQ, a compare-and-swap will
10711 attempt to update MEM with NEW_REG. The function returns true when the
10712 loop was generated successfully. */
10715 pa_expand_compare_and_swap_loop (rtx mem
, rtx old_reg
, rtx new_reg
, rtx seq
)
10717 machine_mode mode
= GET_MODE (mem
);
10718 rtx_code_label
*label
;
10719 rtx cmp_reg
, success
, oldval
;
10721 /* The loop we want to generate looks like
10727 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10731 Note that we only do the plain load from memory once. Subsequent
10732 iterations use the value loaded by the compare-and-swap pattern. */
10734 label
= gen_label_rtx ();
10735 cmp_reg
= gen_reg_rtx (mode
);
10737 emit_move_insn (cmp_reg
, mem
);
10738 emit_label (label
);
10739 emit_move_insn (old_reg
, cmp_reg
);
10743 success
= NULL_RTX
;
10745 if (!expand_atomic_compare_and_swap (&success
, &oldval
, mem
, old_reg
,
10746 new_reg
, false, MEMMODEL_SYNC_SEQ_CST
,
10750 if (oldval
!= cmp_reg
)
10751 emit_move_insn (cmp_reg
, oldval
);
10753 /* Mark this jump predicted not taken. */
10754 emit_cmp_and_jump_insns (success
, const0_rtx
, EQ
, const0_rtx
,
10755 GET_MODE (success
), 1, label
,
10756 profile_probability::guessed_never ());
10760 /* This function tries to implement an atomic exchange operation using a
10761 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10762 *MEM are returned, using TARGET if possible. No memory model is required
10763 since a compare_and_swap loop is seq-cst. */
10766 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target
, rtx mem
, rtx val
)
10768 machine_mode mode
= GET_MODE (mem
);
10770 if (can_compare_and_swap_p (mode
, true))
10772 if (!target
|| !register_operand (target
, mode
))
10773 target
= gen_reg_rtx (mode
);
10774 if (pa_expand_compare_and_swap_loop (mem
, target
, val
, NULL_RTX
))
10781 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10782 arguments passed by hidden reference in the 32-bit HP runtime. Users
10783 can override this behavior for better compatibility with openmp at the
10784 risk of library incompatibilities. Arguments are always passed by value
10785 in the 64-bit HP runtime. */
10788 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED
,
10789 machine_mode mode ATTRIBUTE_UNUSED
,
10790 const_tree type ATTRIBUTE_UNUSED
,
10791 bool named ATTRIBUTE_UNUSED
)
10793 return !TARGET_CALLER_COPIES
;
10796 /* Implement TARGET_HARD_REGNO_NREGS. */
10798 static unsigned int
10799 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED
, machine_mode mode
)
10801 return PA_HARD_REGNO_NREGS (regno
, mode
);
10804 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10807 pa_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10809 return PA_HARD_REGNO_MODE_OK (regno
, mode
);