1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
31 #include "stringpool.h"
36 #include "diagnostic-core.h"
37 #include "insn-attr.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 /* This file should be included last. */
55 #include "target-def.h"
57 /* Return nonzero if there is a bypass for the output of
58 OUT_INSN and the fp store IN_INSN. */
60 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
62 machine_mode store_mode
;
63 machine_mode other_mode
;
66 if (recog_memoized (in_insn
) < 0
67 || (get_attr_type (in_insn
) != TYPE_FPSTORE
68 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
69 || recog_memoized (out_insn
) < 0)
72 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
74 set
= single_set (out_insn
);
78 other_mode
= GET_MODE (SET_SRC (set
));
80 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
84 #ifndef DO_FRAME_NOTES
85 #ifdef INCOMING_RETURN_ADDR_RTX
86 #define DO_FRAME_NOTES 1
88 #define DO_FRAME_NOTES 0
92 static void pa_option_override (void);
93 static void copy_reg_pointer (rtx
, rtx
);
94 static void fix_range (const char *);
95 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
97 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
98 static bool hppa_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
99 static inline rtx
force_mode (machine_mode
, rtx
);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
104 static bool forward_branch_p (rtx_insn
*);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
106 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
107 static int compute_movmem_length (rtx_insn
*);
108 static int compute_clrmem_length (rtx_insn
*);
109 static bool pa_assemble_integer (rtx
, unsigned int, int);
110 static void remove_useless_addtr_insns (int);
111 static void store_reg (int, HOST_WIDE_INT
, int);
112 static void store_reg_modify (int, int, HOST_WIDE_INT
);
113 static void load_reg (int, HOST_WIDE_INT
, int);
114 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
115 static rtx
pa_function_value (const_tree
, const_tree
, bool);
116 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
117 static bool pa_function_value_regno_p (const unsigned int);
118 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
119 static void update_total_code_bytes (unsigned int);
120 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
121 static int pa_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
122 static int pa_adjust_priority (rtx_insn
*, int);
123 static int pa_issue_rate (void);
124 static int pa_reloc_rw_mask (void);
125 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
126 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
127 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
129 static void pa_encode_section_info (tree
, rtx
, int);
130 static const char *pa_strip_name_encoding (const char *);
131 static bool pa_function_ok_for_sibcall (tree
, tree
);
132 static void pa_globalize_label (FILE *, const char *)
134 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
135 HOST_WIDE_INT
, tree
);
136 #if !defined(USE_COLLECT2)
137 static void pa_asm_out_constructor (rtx
, int);
138 static void pa_asm_out_destructor (rtx
, int);
140 static void pa_init_builtins (void);
141 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
142 static rtx
hppa_builtin_saveregs (void);
143 static void hppa_va_start (tree
, rtx
);
144 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
145 static bool pa_scalar_mode_supported_p (machine_mode
);
146 static bool pa_commutative_p (const_rtx x
, int outer_code
);
147 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
148 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
149 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
150 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
151 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
152 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
153 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
154 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
155 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
156 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
157 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
158 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
159 static void output_deferred_plabels (void);
160 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
161 #ifdef ASM_OUTPUT_EXTERNAL_REAL
162 static void pa_hpux_file_end (void);
164 static void pa_init_libfuncs (void);
165 static rtx
pa_struct_value_rtx (tree
, int);
166 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
168 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
170 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
172 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
174 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
175 static struct machine_function
* pa_init_machine_status (void);
176 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
178 secondary_reload_info
*);
179 static void pa_extra_live_on_entry (bitmap
);
180 static machine_mode
pa_promote_function_mode (const_tree
,
184 static void pa_asm_trampoline_template (FILE *);
185 static void pa_trampoline_init (rtx
, tree
, rtx
);
186 static rtx
pa_trampoline_adjust_address (rtx
);
187 static rtx
pa_delegitimize_address (rtx
);
188 static bool pa_print_operand_punct_valid_p (unsigned char);
189 static rtx
pa_internal_arg_pointer (void);
190 static bool pa_can_eliminate (const int, const int);
191 static void pa_conditional_register_usage (void);
192 static machine_mode
pa_c_mode_for_suffix (char);
193 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
194 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
195 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
196 static unsigned int pa_section_type_flags (tree
, const char *, int);
197 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
198 static bool pa_callee_copies (cumulative_args_t
, machine_mode
,
201 /* The following extra sections are only used for SOM. */
202 static GTY(()) section
*som_readonly_data_section
;
203 static GTY(()) section
*som_one_only_readonly_data_section
;
204 static GTY(()) section
*som_one_only_data_section
;
205 static GTY(()) section
*som_tm_clone_table_section
;
207 /* Counts for the number of callee-saved general and floating point
208 registers which were saved by the current function's prologue. */
209 static int gr_saved
, fr_saved
;
211 /* Boolean indicating whether the return pointer was saved by the
212 current function's prologue. */
213 static bool rp_saved
;
215 static rtx
find_addr_reg (rtx
);
217 /* Keep track of the number of bytes we have output in the CODE subspace
218 during this compilation so we'll know when to emit inline long-calls. */
219 unsigned long total_code_bytes
;
221 /* The last address of the previous function plus the number of bytes in
222 associated thunks that have been output. This is used to determine if
223 a thunk can use an IA-relative branch to reach its target function. */
224 static unsigned int last_address
;
226 /* Variables to handle plabels that we discover are necessary at assembly
227 output time. They are output after the current function. */
228 struct GTY(()) deferred_plabel
233 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
235 static size_t n_deferred_plabels
= 0;
237 /* Initialize the GCC target structure. */
239 #undef TARGET_OPTION_OVERRIDE
240 #define TARGET_OPTION_OVERRIDE pa_option_override
242 #undef TARGET_ASM_ALIGNED_HI_OP
243 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
244 #undef TARGET_ASM_ALIGNED_SI_OP
245 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
246 #undef TARGET_ASM_ALIGNED_DI_OP
247 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
248 #undef TARGET_ASM_UNALIGNED_HI_OP
249 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
250 #undef TARGET_ASM_UNALIGNED_SI_OP
251 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
252 #undef TARGET_ASM_UNALIGNED_DI_OP
253 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
254 #undef TARGET_ASM_INTEGER
255 #define TARGET_ASM_INTEGER pa_assemble_integer
257 #undef TARGET_ASM_FUNCTION_PROLOGUE
258 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
259 #undef TARGET_ASM_FUNCTION_EPILOGUE
260 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
262 #undef TARGET_FUNCTION_VALUE
263 #define TARGET_FUNCTION_VALUE pa_function_value
264 #undef TARGET_LIBCALL_VALUE
265 #define TARGET_LIBCALL_VALUE pa_libcall_value
266 #undef TARGET_FUNCTION_VALUE_REGNO_P
267 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
269 #undef TARGET_LEGITIMIZE_ADDRESS
270 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
272 #undef TARGET_SCHED_ADJUST_COST
273 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
274 #undef TARGET_SCHED_ADJUST_PRIORITY
275 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
276 #undef TARGET_SCHED_ISSUE_RATE
277 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
279 #undef TARGET_ENCODE_SECTION_INFO
280 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
281 #undef TARGET_STRIP_NAME_ENCODING
282 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
284 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
285 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
287 #undef TARGET_COMMUTATIVE_P
288 #define TARGET_COMMUTATIVE_P pa_commutative_p
290 #undef TARGET_ASM_OUTPUT_MI_THUNK
291 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
292 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
293 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
295 #undef TARGET_ASM_FILE_END
296 #ifdef ASM_OUTPUT_EXTERNAL_REAL
297 #define TARGET_ASM_FILE_END pa_hpux_file_end
299 #define TARGET_ASM_FILE_END output_deferred_plabels
302 #undef TARGET_ASM_RELOC_RW_MASK
303 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
306 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 #if !defined(USE_COLLECT2)
309 #undef TARGET_ASM_CONSTRUCTOR
310 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
311 #undef TARGET_ASM_DESTRUCTOR
312 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
315 #undef TARGET_INIT_BUILTINS
316 #define TARGET_INIT_BUILTINS pa_init_builtins
318 #undef TARGET_EXPAND_BUILTIN
319 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321 #undef TARGET_REGISTER_MOVE_COST
322 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
323 #undef TARGET_RTX_COSTS
324 #define TARGET_RTX_COSTS hppa_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hppa_address_cost
328 #undef TARGET_MACHINE_DEPENDENT_REORG
329 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331 #undef TARGET_INIT_LIBFUNCS
332 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334 #undef TARGET_PROMOTE_FUNCTION_MODE
335 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
336 #undef TARGET_PROMOTE_PROTOTYPES
337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 #undef TARGET_STRUCT_VALUE_RTX
340 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
341 #undef TARGET_RETURN_IN_MEMORY
342 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
343 #undef TARGET_MUST_PASS_IN_STACK
344 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
347 #undef TARGET_CALLEE_COPIES
348 #define TARGET_CALLEE_COPIES pa_callee_copies
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
351 #undef TARGET_FUNCTION_ARG
352 #define TARGET_FUNCTION_ARG pa_function_arg
353 #undef TARGET_FUNCTION_ARG_ADVANCE
354 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
355 #undef TARGET_FUNCTION_ARG_BOUNDARY
356 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
359 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
360 #undef TARGET_EXPAND_BUILTIN_VA_START
361 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
362 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
363 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 #undef TARGET_SCALAR_MODE_SUPPORTED_P
366 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 #undef TARGET_CANNOT_FORCE_CONST_MEM
369 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 #undef TARGET_SECONDARY_RELOAD
372 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_EXTRA_LIVE_ON_ENTRY
375 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
377 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
378 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
379 #undef TARGET_TRAMPOLINE_INIT
380 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
381 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
382 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
383 #undef TARGET_DELEGITIMIZE_ADDRESS
384 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
385 #undef TARGET_INTERNAL_ARG_POINTER
386 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
387 #undef TARGET_CAN_ELIMINATE
388 #define TARGET_CAN_ELIMINATE pa_can_eliminate
389 #undef TARGET_CONDITIONAL_REGISTER_USAGE
390 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
391 #undef TARGET_C_MODE_FOR_SUFFIX
392 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
393 #undef TARGET_ASM_FUNCTION_SECTION
394 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
396 #undef TARGET_LEGITIMATE_CONSTANT_P
397 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
398 #undef TARGET_SECTION_TYPE_FLAGS
399 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
400 #undef TARGET_LEGITIMATE_ADDRESS_P
401 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
404 #define TARGET_LRA_P hook_bool_void_false
406 struct gcc_target targetm
= TARGET_INITIALIZER
;
408 /* Parse the -mfixed-range= option string. */
411 fix_range (const char *const_str
)
414 char *str
, *dash
, *comma
;
416 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
417 REG2 are either register names or register numbers. The effect
418 of this option is to mark the registers in the range from REG1 to
419 REG2 as ``fixed'' so they won't be used by the compiler. This is
420 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
422 i
= strlen (const_str
);
423 str
= (char *) alloca (i
+ 1);
424 memcpy (str
, const_str
, i
+ 1);
428 dash
= strchr (str
, '-');
431 warning (0, "value of -mfixed-range must have form REG1-REG2");
436 comma
= strchr (dash
+ 1, ',');
440 first
= decode_reg_name (str
);
443 warning (0, "unknown register name: %s", str
);
447 last
= decode_reg_name (dash
+ 1);
450 warning (0, "unknown register name: %s", dash
+ 1);
458 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
462 for (i
= first
; i
<= last
; ++i
)
463 fixed_regs
[i
] = call_used_regs
[i
] = 1;
472 /* Check if all floating point registers have been fixed. */
473 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
478 target_flags
|= MASK_DISABLE_FPREGS
;
481 /* Implement the TARGET_OPTION_OVERRIDE hook. */
484 pa_option_override (void)
487 cl_deferred_option
*opt
;
488 vec
<cl_deferred_option
> *v
489 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
492 FOR_EACH_VEC_ELT (*v
, i
, opt
)
494 switch (opt
->opt_index
)
496 case OPT_mfixed_range_
:
497 fix_range (opt
->arg
);
505 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
507 warning (0, "PIC code generation is not supported in the portable runtime model");
510 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
512 warning (0, "PIC code generation is not compatible with fast indirect calls");
515 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
517 warning (0, "-g is only supported when using GAS on this processor,");
518 warning (0, "-g option disabled");
519 write_symbols
= NO_DEBUG
;
522 /* We only support the "big PIC" model now. And we always generate PIC
523 code when in 64bit mode. */
524 if (flag_pic
== 1 || TARGET_64BIT
)
527 /* Disable -freorder-blocks-and-partition as we don't support hot and
528 cold partitioning. */
529 if (flag_reorder_blocks_and_partition
)
531 inform (input_location
,
532 "-freorder-blocks-and-partition does not work "
533 "on this architecture");
534 flag_reorder_blocks_and_partition
= 0;
535 flag_reorder_blocks
= 1;
538 /* We can't guarantee that .dword is available for 32-bit targets. */
539 if (UNITS_PER_WORD
== 4)
540 targetm
.asm_out
.aligned_op
.di
= NULL
;
542 /* The unaligned ops are only available when using GAS. */
545 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
546 targetm
.asm_out
.unaligned_op
.si
= NULL
;
547 targetm
.asm_out
.unaligned_op
.di
= NULL
;
550 init_machine_status
= pa_init_machine_status
;
555 PA_BUILTIN_COPYSIGNQ
,
558 PA_BUILTIN_HUGE_VALQ
,
562 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
565 pa_init_builtins (void)
567 #ifdef DONT_HAVE_FPUTC_UNLOCKED
569 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
570 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
571 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
578 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
579 set_user_assembler_name (decl
, "_Isfinite");
580 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
581 set_user_assembler_name (decl
, "_Isfinitef");
585 if (HPUX_LONG_DOUBLE_LIBRARY
)
589 /* Under HPUX, the __float128 type is a synonym for "long double". */
590 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
593 /* TFmode support builtins. */
594 ftype
= build_function_type_list (long_double_type_node
,
595 long_double_type_node
,
597 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
598 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
599 "_U_Qfabs", NULL_TREE
);
600 TREE_READONLY (decl
) = 1;
601 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
603 ftype
= build_function_type_list (long_double_type_node
,
604 long_double_type_node
,
605 long_double_type_node
,
607 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
608 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
609 "_U_Qfcopysign", NULL_TREE
);
610 TREE_READONLY (decl
) = 1;
611 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
613 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
614 decl
= add_builtin_function ("__builtin_infq", ftype
,
615 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
617 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
619 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
620 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
622 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
627 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
628 machine_mode mode ATTRIBUTE_UNUSED
,
629 int ignore ATTRIBUTE_UNUSED
)
631 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
632 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
636 case PA_BUILTIN_FABSQ
:
637 case PA_BUILTIN_COPYSIGNQ
:
638 return expand_call (exp
, target
, ignore
);
640 case PA_BUILTIN_INFQ
:
641 case PA_BUILTIN_HUGE_VALQ
:
643 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
648 tmp
= const_double_from_real_value (inf
, target_mode
);
650 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
653 target
= gen_reg_rtx (target_mode
);
655 emit_move_insn (target
, tmp
);
666 /* Function to init struct machine_function.
667 This will be called, via a pointer variable,
668 from push_function_context. */
670 static struct machine_function
*
671 pa_init_machine_status (void)
673 return ggc_cleared_alloc
<machine_function
> ();
676 /* If FROM is a probable pointer register, mark TO as a probable
677 pointer register with the same pointer alignment as FROM. */
680 copy_reg_pointer (rtx to
, rtx from
)
682 if (REG_POINTER (from
))
683 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
686 /* Return 1 if X contains a symbolic expression. We know these
687 expressions will have one of a few well defined forms, so
688 we need only check those forms. */
690 pa_symbolic_expression_p (rtx x
)
693 /* Strip off any HIGH. */
694 if (GET_CODE (x
) == HIGH
)
697 return symbolic_operand (x
, VOIDmode
);
700 /* Accept any constant that can be moved in one instruction into a
703 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival
)
705 /* OK if ldo, ldil, or zdepi, can be used. */
706 return (VAL_14_BITS_P (ival
)
707 || pa_ldil_cint_p (ival
)
708 || pa_zdepi_cint_p (ival
));
711 /* True iff ldil can be used to load this CONST_INT. The least
712 significant 11 bits of the value must be zero and the value must
713 not change sign when extended from 32 to 64 bits. */
715 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival
)
717 unsigned HOST_WIDE_INT x
;
719 x
= ival
& (((unsigned HOST_WIDE_INT
) -1 << 31) | 0x7ff);
720 return x
== 0 || x
== ((unsigned HOST_WIDE_INT
) -1 << 31);
723 /* True iff zdepi can be used to generate this CONST_INT.
724 zdepi first sign extends a 5-bit signed number to a given field
725 length, then places this field anywhere in a zero. */
727 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
729 unsigned HOST_WIDE_INT lsb_mask
, t
;
731 /* This might not be obvious, but it's at least fast.
732 This function is critical; we don't have the time loops would take. */
734 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
735 /* Return true iff t is a power of two. */
736 return ((t
& (t
- 1)) == 0);
739 /* True iff depi or extru can be used to compute (reg & mask).
740 Accept bit pattern like these:
745 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
748 mask
+= mask
& -mask
;
749 return (mask
& (mask
- 1)) == 0;
752 /* True iff depi can be used to compute (reg | MASK). */
754 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
756 mask
+= mask
& -mask
;
757 return (mask
& (mask
- 1)) == 0;
760 /* Legitimize PIC addresses. If the address is already
761 position-independent, we return ORIG. Newly generated
762 position-independent addresses go to REG. If we need more
763 than one register, we lose. */
766 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
770 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
772 /* Labels need special handling. */
773 if (pic_label_operand (orig
, mode
))
777 /* We do not want to go through the movXX expanders here since that
778 would create recursion.
780 Nor do we really want to call a generator for a named pattern
781 since that requires multiple patterns if we want to support
784 So instead we just emit the raw set, which avoids the movXX
785 expanders completely. */
786 mark_reg_pointer (reg
, BITS_PER_UNIT
);
787 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
789 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
790 add_reg_note (insn
, REG_EQUAL
, orig
);
792 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
793 and update LABEL_NUSES because this is not done automatically. */
794 if (reload_in_progress
|| reload_completed
)
796 /* Extract LABEL_REF. */
797 if (GET_CODE (orig
) == CONST
)
798 orig
= XEXP (XEXP (orig
, 0), 0);
799 /* Extract CODE_LABEL. */
800 orig
= XEXP (orig
, 0);
801 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
802 /* Make sure we have label and not a note. */
804 LABEL_NUSES (orig
)++;
806 crtl
->uses_pic_offset_table
= 1;
809 if (GET_CODE (orig
) == SYMBOL_REF
)
816 /* Before reload, allocate a temporary register for the intermediate
817 result. This allows the sequence to be deleted when the final
818 result is unused and the insns are trivially dead. */
819 tmp_reg
= ((reload_in_progress
|| reload_completed
)
820 ? reg
: gen_reg_rtx (Pmode
));
822 if (function_label_operand (orig
, VOIDmode
))
824 /* Force function label into memory in word mode. */
825 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
826 /* Load plabel address from DLT. */
827 emit_move_insn (tmp_reg
,
828 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
829 gen_rtx_HIGH (word_mode
, orig
)));
831 = gen_const_mem (Pmode
,
832 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
833 gen_rtx_UNSPEC (Pmode
,
836 emit_move_insn (reg
, pic_ref
);
837 /* Now load address of function descriptor. */
838 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
842 /* Load symbol reference from DLT. */
843 emit_move_insn (tmp_reg
,
844 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
845 gen_rtx_HIGH (word_mode
, orig
)));
847 = gen_const_mem (Pmode
,
848 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
849 gen_rtx_UNSPEC (Pmode
,
854 crtl
->uses_pic_offset_table
= 1;
855 mark_reg_pointer (reg
, BITS_PER_UNIT
);
856 insn
= emit_move_insn (reg
, pic_ref
);
858 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
859 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
863 else if (GET_CODE (orig
) == CONST
)
867 if (GET_CODE (XEXP (orig
, 0)) == PLUS
868 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
872 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
874 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
875 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
876 base
== reg
? 0 : reg
);
878 if (GET_CODE (orig
) == CONST_INT
)
880 if (INT_14_BITS (orig
))
881 return plus_constant (Pmode
, base
, INTVAL (orig
));
882 orig
= force_reg (Pmode
, orig
);
884 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
885 /* Likewise, should we set special REG_NOTEs here? */
891 static GTY(()) rtx gen_tls_tga
;
894 gen_tls_get_addr (void)
897 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
902 hppa_tls_call (rtx arg
)
906 ret
= gen_reg_rtx (Pmode
);
907 emit_library_call_value (gen_tls_get_addr (), ret
,
908 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
914 legitimize_tls_address (rtx addr
)
916 rtx ret
, tmp
, t1
, t2
, tp
;
919 /* Currently, we can't handle anything but a SYMBOL_REF. */
920 if (GET_CODE (addr
) != SYMBOL_REF
)
923 switch (SYMBOL_REF_TLS_MODEL (addr
))
925 case TLS_MODEL_GLOBAL_DYNAMIC
:
926 tmp
= gen_reg_rtx (Pmode
);
928 emit_insn (gen_tgd_load_pic (tmp
, addr
));
930 emit_insn (gen_tgd_load (tmp
, addr
));
931 ret
= hppa_tls_call (tmp
);
934 case TLS_MODEL_LOCAL_DYNAMIC
:
935 ret
= gen_reg_rtx (Pmode
);
936 tmp
= gen_reg_rtx (Pmode
);
939 emit_insn (gen_tld_load_pic (tmp
, addr
));
941 emit_insn (gen_tld_load (tmp
, addr
));
942 t1
= hppa_tls_call (tmp
);
945 t2
= gen_reg_rtx (Pmode
);
946 emit_libcall_block (insn
, t2
, t1
,
947 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
949 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
952 case TLS_MODEL_INITIAL_EXEC
:
953 tp
= gen_reg_rtx (Pmode
);
954 tmp
= gen_reg_rtx (Pmode
);
955 ret
= gen_reg_rtx (Pmode
);
956 emit_insn (gen_tp_load (tp
));
958 emit_insn (gen_tie_load_pic (tmp
, addr
));
960 emit_insn (gen_tie_load (tmp
, addr
));
961 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
964 case TLS_MODEL_LOCAL_EXEC
:
965 tp
= gen_reg_rtx (Pmode
);
966 ret
= gen_reg_rtx (Pmode
);
967 emit_insn (gen_tp_load (tp
));
968 emit_insn (gen_tle_load (ret
, addr
, tp
));
978 /* Helper for hppa_legitimize_address. Given X, return true if it
979 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
981 This respectively represent canonical shift-add rtxs or scaled
984 mem_shadd_or_shadd_rtx_p (rtx x
)
986 return ((GET_CODE (x
) == ASHIFT
987 || GET_CODE (x
) == MULT
)
988 && GET_CODE (XEXP (x
, 1)) == CONST_INT
989 && ((GET_CODE (x
) == ASHIFT
990 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
991 || (GET_CODE (x
) == MULT
992 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
995 /* Try machine-dependent ways of modifying an illegitimate address
996 to be legitimate. If we find one, return the new, valid address.
997 This macro is used in only one place: `memory_address' in explow.c.
999 OLDX is the address as it was before break_out_memory_refs was called.
1000 In some cases it is useful to look at this to decide what needs to be done.
1002 It is always safe for this macro to do nothing. It exists to recognize
1003 opportunities to optimize the output.
1005 For the PA, transform:
1007 memory(X + <large int>)
1011 if (<large int> & mask) >= 16
1012 Y = (<large int> & ~mask) + mask + 1 Round up.
1014 Y = (<large int> & ~mask) Round down.
1016 memory (Z + (<large int> - Y));
1018 This is for CSE to find several similar references, and only use one Z.
1020 X can either be a SYMBOL_REF or REG, but because combine cannot
1021 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1022 D will not fit in 14 bits.
1024 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1027 MODE_INT references allow displacements which fit in 14 bits, so use
1030 This relies on the fact that most mode MODE_FLOAT references will use FP
1031 registers and most mode MODE_INT references will use integer registers.
1032 (In the rare case of an FP register used in an integer MODE, we depend
1033 on secondary reloads to clean things up.)
1036 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1037 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1038 addressing modes to be used).
1040 Note that the addresses passed into hppa_legitimize_address always
1041 come from a MEM, so we only have to match the MULT form on incoming
1042 addresses. But to be future proof we also match the ASHIFT form.
1044 However, this routine always places those shift-add sequences into
1045 registers, so we have to generate the ASHIFT form as our output.
1047 Put X and Z into registers. Then put the entire expression into
1051 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1056 /* We need to canonicalize the order of operands in unscaled indexed
1057 addresses since the code that checks if an address is valid doesn't
1058 always try both orders. */
1059 if (!TARGET_NO_SPACE_REGS
1060 && GET_CODE (x
) == PLUS
1061 && GET_MODE (x
) == Pmode
1062 && REG_P (XEXP (x
, 0))
1063 && REG_P (XEXP (x
, 1))
1064 && REG_POINTER (XEXP (x
, 0))
1065 && !REG_POINTER (XEXP (x
, 1)))
1066 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1068 if (tls_referenced_p (x
))
1069 return legitimize_tls_address (x
);
1071 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1073 /* Strip off CONST. */
1074 if (GET_CODE (x
) == CONST
)
1077 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1078 That should always be safe. */
1079 if (GET_CODE (x
) == PLUS
1080 && GET_CODE (XEXP (x
, 0)) == REG
1081 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1083 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1084 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1087 /* Note we must reject symbols which represent function addresses
1088 since the assembler/linker can't handle arithmetic on plabels. */
1089 if (GET_CODE (x
) == PLUS
1090 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1091 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1092 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1093 || GET_CODE (XEXP (x
, 0)) == REG
))
1095 rtx int_part
, ptr_reg
;
1097 int offset
= INTVAL (XEXP (x
, 1));
1100 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1101 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1103 /* Choose which way to round the offset. Round up if we
1104 are >= halfway to the next boundary. */
1105 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1106 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1108 newoffset
= (offset
& ~ mask
);
1110 /* If the newoffset will not fit in 14 bits (ldo), then
1111 handling this would take 4 or 5 instructions (2 to load
1112 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1113 add the new offset and the SYMBOL_REF.) Combine can
1114 not handle 4->2 or 5->2 combinations, so do not create
1116 if (! VAL_14_BITS_P (newoffset
)
1117 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1119 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1122 gen_rtx_HIGH (Pmode
, const_part
));
1125 gen_rtx_LO_SUM (Pmode
,
1126 tmp_reg
, const_part
));
1130 if (! VAL_14_BITS_P (newoffset
))
1131 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1133 int_part
= GEN_INT (newoffset
);
1135 ptr_reg
= force_reg (Pmode
,
1136 gen_rtx_PLUS (Pmode
,
1137 force_reg (Pmode
, XEXP (x
, 0)),
1140 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1143 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1145 if (GET_CODE (x
) == PLUS
1146 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1147 && (OBJECT_P (XEXP (x
, 1))
1148 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1149 && GET_CODE (XEXP (x
, 1)) != CONST
)
1151 /* If we were given a MULT, we must fix the constant
1152 as we're going to create the ASHIFT form. */
1153 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1154 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1155 shift_val
= exact_log2 (shift_val
);
1159 if (GET_CODE (reg1
) != REG
)
1160 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1162 reg2
= XEXP (XEXP (x
, 0), 0);
1163 if (GET_CODE (reg2
) != REG
)
1164 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1166 return force_reg (Pmode
,
1167 gen_rtx_PLUS (Pmode
,
1168 gen_rtx_ASHIFT (Pmode
, reg2
,
1169 GEN_INT (shift_val
)),
1173 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1175 Only do so for floating point modes since this is more speculative
1176 and we lose if it's an integer store. */
1177 if (GET_CODE (x
) == PLUS
1178 && GET_CODE (XEXP (x
, 0)) == PLUS
1179 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1180 && (mode
== SFmode
|| mode
== DFmode
))
1182 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1184 /* If we were given a MULT, we must fix the constant
1185 as we're going to create the ASHIFT form. */
1186 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1187 shift_val
= exact_log2 (shift_val
);
1189 /* Try and figure out what to use as a base register. */
1190 rtx reg1
, reg2
, base
, idx
;
1192 reg1
= XEXP (XEXP (x
, 0), 1);
1197 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1198 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1199 it's a base register below. */
1200 if (GET_CODE (reg1
) != REG
)
1201 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1203 if (GET_CODE (reg2
) != REG
)
1204 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1206 /* Figure out what the base and index are. */
1208 if (GET_CODE (reg1
) == REG
1209 && REG_POINTER (reg1
))
1212 idx
= gen_rtx_PLUS (Pmode
,
1213 gen_rtx_ASHIFT (Pmode
,
1214 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1215 GEN_INT (shift_val
)),
1218 else if (GET_CODE (reg2
) == REG
1219 && REG_POINTER (reg2
))
1228 /* If the index adds a large constant, try to scale the
1229 constant so that it can be loaded with only one insn. */
1230 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1231 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1232 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1233 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1235 /* Divide the CONST_INT by the scale factor, then add it to A. */
1236 int val
= INTVAL (XEXP (idx
, 1));
1237 val
/= (1 << shift_val
);
1239 reg1
= XEXP (XEXP (idx
, 0), 0);
1240 if (GET_CODE (reg1
) != REG
)
1241 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1243 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1245 /* We can now generate a simple scaled indexed address. */
1248 (Pmode
, gen_rtx_PLUS (Pmode
,
1249 gen_rtx_ASHIFT (Pmode
, reg1
,
1250 GEN_INT (shift_val
)),
1254 /* If B + C is still a valid base register, then add them. */
1255 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1256 && INTVAL (XEXP (idx
, 1)) <= 4096
1257 && INTVAL (XEXP (idx
, 1)) >= -4096)
1261 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1263 reg2
= XEXP (XEXP (idx
, 0), 0);
1264 if (GET_CODE (reg2
) != CONST_INT
)
1265 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1267 return force_reg (Pmode
,
1268 gen_rtx_PLUS (Pmode
,
1269 gen_rtx_ASHIFT (Pmode
, reg2
,
1270 GEN_INT (shift_val
)),
1274 /* Get the index into a register, then add the base + index and
1275 return a register holding the result. */
1277 /* First get A into a register. */
1278 reg1
= XEXP (XEXP (idx
, 0), 0);
1279 if (GET_CODE (reg1
) != REG
)
1280 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1282 /* And get B into a register. */
1283 reg2
= XEXP (idx
, 1);
1284 if (GET_CODE (reg2
) != REG
)
1285 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1287 reg1
= force_reg (Pmode
,
1288 gen_rtx_PLUS (Pmode
,
1289 gen_rtx_ASHIFT (Pmode
, reg1
,
1290 GEN_INT (shift_val
)),
1293 /* Add the result to our base register and return. */
1294 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1298 /* Uh-oh. We might have an address for x[n-100000]. This needs
1299 special handling to avoid creating an indexed memory address
1300 with x-100000 as the base.
1302 If the constant part is small enough, then it's still safe because
1303 there is a guard page at the beginning and end of the data segment.
1305 Scaled references are common enough that we want to try and rearrange the
1306 terms so that we can use indexing for these addresses too. Only
1307 do the optimization for floatint point modes. */
1309 if (GET_CODE (x
) == PLUS
1310 && pa_symbolic_expression_p (XEXP (x
, 1)))
1312 /* Ugly. We modify things here so that the address offset specified
1313 by the index expression is computed first, then added to x to form
1314 the entire address. */
1316 rtx regx1
, regx2
, regy1
, regy2
, y
;
1318 /* Strip off any CONST. */
1320 if (GET_CODE (y
) == CONST
)
1323 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1325 /* See if this looks like
1326 (plus (mult (reg) (mem_shadd_const))
1327 (const (plus (symbol_ref) (const_int))))
1329 Where const_int is small. In that case the const
1330 expression is a valid pointer for indexing.
1332 If const_int is big, but can be divided evenly by shadd_const
1333 and added to (reg). This allows more scaled indexed addresses. */
1334 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1335 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1336 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1337 && INTVAL (XEXP (y
, 1)) >= -4096
1338 && INTVAL (XEXP (y
, 1)) <= 4095)
1340 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1342 /* If we were given a MULT, we must fix the constant
1343 as we're going to create the ASHIFT form. */
1344 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1345 shift_val
= exact_log2 (shift_val
);
1350 if (GET_CODE (reg1
) != REG
)
1351 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1353 reg2
= XEXP (XEXP (x
, 0), 0);
1354 if (GET_CODE (reg2
) != REG
)
1355 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1359 gen_rtx_PLUS (Pmode
,
1360 gen_rtx_ASHIFT (Pmode
,
1362 GEN_INT (shift_val
)),
1365 else if ((mode
== DFmode
|| mode
== SFmode
)
1366 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1367 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1368 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1369 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1371 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1373 /* If we were given a MULT, we must fix the constant
1374 as we're going to create the ASHIFT form. */
1375 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1376 shift_val
= exact_log2 (shift_val
);
1379 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1380 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1381 regx2
= XEXP (XEXP (x
, 0), 0);
1382 if (GET_CODE (regx2
) != REG
)
1383 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1384 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1388 gen_rtx_PLUS (Pmode
,
1389 gen_rtx_ASHIFT (Pmode
, regx2
,
1390 GEN_INT (shift_val
)),
1391 force_reg (Pmode
, XEXP (y
, 0))));
1393 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1394 && INTVAL (XEXP (y
, 1)) >= -4096
1395 && INTVAL (XEXP (y
, 1)) <= 4095)
1397 /* This is safe because of the guard page at the
1398 beginning and end of the data space. Just
1399 return the original address. */
1404 /* Doesn't look like one we can optimize. */
1405 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1406 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1407 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1408 regx1
= force_reg (Pmode
,
1409 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1411 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1419 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1421 Compute extra cost of moving data between one register class
1424 Make moves from SAR so expensive they should never happen. We used to
1425 have 0xffff here, but that generates overflow in rare cases.
1427 Copies involving a FP register and a non-FP register are relatively
1428 expensive because they must go through memory.
1430 Other copies are reasonably cheap. */
1433 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1434 reg_class_t from
, reg_class_t to
)
1436 if (from
== SHIFT_REGS
)
1438 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1440 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1441 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1447 /* For the HPPA, REG and REG+CONST is cost 0
1448 and addresses involving symbolic constants are cost 2.
1450 PIC addresses are very expensive.
1452 It is no coincidence that this has the same structure
1453 as pa_legitimate_address_p. */
1456 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1457 addr_space_t as ATTRIBUTE_UNUSED
,
1458 bool speed ATTRIBUTE_UNUSED
)
1460 switch (GET_CODE (X
))
1473 /* Compute a (partial) cost for rtx X. Return true if the complete
1474 cost has been computed, and false if subexpressions should be
1475 scanned. In either case, *TOTAL contains the cost result. */
1478 hppa_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
1479 int opno ATTRIBUTE_UNUSED
,
1480 int *total
, bool speed ATTRIBUTE_UNUSED
)
1483 int code
= GET_CODE (x
);
1488 if (INTVAL (x
) == 0)
1490 else if (INT_14_BITS (x
))
1507 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1508 && outer_code
!= SET
)
1515 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1517 *total
= COSTS_N_INSNS (3);
1521 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1522 factor
= GET_MODE_SIZE (mode
) / 4;
1526 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1527 *total
= factor
* factor
* COSTS_N_INSNS (8);
1529 *total
= factor
* factor
* COSTS_N_INSNS (20);
1533 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1535 *total
= COSTS_N_INSNS (14);
1543 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1544 factor
= GET_MODE_SIZE (mode
) / 4;
1548 *total
= factor
* factor
* COSTS_N_INSNS (60);
1551 case PLUS
: /* this includes shNadd insns */
1553 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1555 *total
= COSTS_N_INSNS (3);
1559 /* A size N times larger than UNITS_PER_WORD needs N times as
1560 many insns, taking N times as long. */
1561 factor
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
1564 *total
= factor
* COSTS_N_INSNS (1);
1570 *total
= COSTS_N_INSNS (1);
1578 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1579 new rtx with the correct mode. */
1581 force_mode (machine_mode mode
, rtx orig
)
1583 if (mode
== GET_MODE (orig
))
1586 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1588 return gen_rtx_REG (mode
, REGNO (orig
));
1591 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1594 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1596 return tls_referenced_p (x
);
1599 /* Emit insns to move operands[1] into operands[0].
1601 Return 1 if we have written out everything that needs to be done to
1602 do the move. Otherwise, return 0 and the caller will emit the move
1605 Note SCRATCH_REG may not be in the proper mode depending on how it
1606 will be used. This routine is responsible for creating a new copy
1607 of SCRATCH_REG in the proper mode. */
1610 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1612 register rtx operand0
= operands
[0];
1613 register rtx operand1
= operands
[1];
1616 /* We can only handle indexed addresses in the destination operand
1617 of floating point stores. Thus, we need to break out indexed
1618 addresses from the destination operand. */
1619 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1621 gcc_assert (can_create_pseudo_p ());
1623 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1624 operand0
= replace_equiv_address (operand0
, tem
);
1627 /* On targets with non-equivalent space registers, break out unscaled
1628 indexed addresses from the source operand before the final CSE.
1629 We have to do this because the REG_POINTER flag is not correctly
1630 carried through various optimization passes and CSE may substitute
1631 a pseudo without the pointer set for one with the pointer set. As
1632 a result, we loose various opportunities to create insns with
1633 unscaled indexed addresses. */
1634 if (!TARGET_NO_SPACE_REGS
1635 && !cse_not_expected
1636 && GET_CODE (operand1
) == MEM
1637 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1638 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1639 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1641 = replace_equiv_address (operand1
,
1642 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1645 && reload_in_progress
&& GET_CODE (operand0
) == REG
1646 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1647 operand0
= reg_equiv_mem (REGNO (operand0
));
1648 else if (scratch_reg
1649 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1650 && GET_CODE (SUBREG_REG (operand0
)) == REG
1651 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1653 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1654 the code which tracks sets/uses for delete_output_reload. */
1655 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1656 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1657 SUBREG_BYTE (operand0
));
1658 operand0
= alter_subreg (&temp
, true);
1662 && reload_in_progress
&& GET_CODE (operand1
) == REG
1663 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1664 operand1
= reg_equiv_mem (REGNO (operand1
));
1665 else if (scratch_reg
1666 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1667 && GET_CODE (SUBREG_REG (operand1
)) == REG
1668 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1670 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1671 the code which tracks sets/uses for delete_output_reload. */
1672 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1673 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1674 SUBREG_BYTE (operand1
));
1675 operand1
= alter_subreg (&temp
, true);
1678 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1679 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1680 != XEXP (operand0
, 0)))
1681 operand0
= replace_equiv_address (operand0
, tem
);
1683 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1684 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1685 != XEXP (operand1
, 0)))
1686 operand1
= replace_equiv_address (operand1
, tem
);
1688 /* Handle secondary reloads for loads/stores of FP registers from
1689 REG+D addresses where D does not fit in 5 or 14 bits, including
1690 (subreg (mem (addr))) cases, and reloads for other unsupported
1693 && FP_REG_P (operand0
)
1694 && (MEM_P (operand1
)
1695 || (GET_CODE (operand1
) == SUBREG
1696 && MEM_P (XEXP (operand1
, 0)))))
1700 if (GET_CODE (op1
) == SUBREG
)
1701 op1
= XEXP (op1
, 0);
1703 if (reg_plus_base_memory_operand (op1
, GET_MODE (op1
)))
1707 && INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1708 && !INT_5_BITS (XEXP (XEXP (op1
, 0), 1)))
1710 /* SCRATCH_REG will hold an address and maybe the actual data.
1711 We want it in WORD_MODE regardless of what mode it was
1712 originally given to us. */
1713 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1715 /* D might not fit in 14 bits either; for such cases load D
1716 into scratch reg. */
1717 if (!INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1719 emit_move_insn (scratch_reg
, XEXP (XEXP (op1
, 0), 1));
1720 emit_move_insn (scratch_reg
,
1721 gen_rtx_fmt_ee (GET_CODE (XEXP (op1
, 0)),
1723 XEXP (XEXP (op1
, 0), 0),
1727 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1728 emit_insn (gen_rtx_SET (operand0
,
1729 replace_equiv_address (op1
, scratch_reg
)));
1733 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op1
, VOIDmode
))
1734 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1
, 0))
1735 || IS_INDEX_ADDR_P (XEXP (op1
, 0)))
1737 /* Load memory address into SCRATCH_REG. */
1738 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1739 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1740 emit_insn (gen_rtx_SET (operand0
,
1741 replace_equiv_address (op1
, scratch_reg
)));
1745 else if (scratch_reg
1746 && FP_REG_P (operand1
)
1747 && (MEM_P (operand0
)
1748 || (GET_CODE (operand0
) == SUBREG
1749 && MEM_P (XEXP (operand0
, 0)))))
1753 if (GET_CODE (op0
) == SUBREG
)
1754 op0
= XEXP (op0
, 0);
1756 if (reg_plus_base_memory_operand (op0
, GET_MODE (op0
)))
1760 && INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1761 && !INT_5_BITS (XEXP (XEXP (op0
, 0), 1)))
1763 /* SCRATCH_REG will hold an address and maybe the actual data.
1764 We want it in WORD_MODE regardless of what mode it was
1765 originally given to us. */
1766 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1768 /* D might not fit in 14 bits either; for such cases load D
1769 into scratch reg. */
1770 if (!INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1772 emit_move_insn (scratch_reg
, XEXP (XEXP (op0
, 0), 1));
1773 emit_move_insn (scratch_reg
,
1774 gen_rtx_fmt_ee (GET_CODE (XEXP (op0
, 0)),
1776 XEXP (XEXP (op0
, 0), 0),
1780 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1781 emit_insn (gen_rtx_SET (replace_equiv_address (op0
, scratch_reg
),
1786 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op0
, VOIDmode
))
1787 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0
, 0))
1788 || IS_INDEX_ADDR_P (XEXP (op0
, 0)))
1790 /* Load memory address into SCRATCH_REG. */
1791 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1792 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1793 emit_insn (gen_rtx_SET (replace_equiv_address (op0
, scratch_reg
),
1798 /* Handle secondary reloads for loads of FP registers from constant
1799 expressions by forcing the constant into memory. For the most part,
1800 this is only necessary for SImode and DImode.
1802 Use scratch_reg to hold the address of the memory location. */
1803 else if (scratch_reg
1804 && CONSTANT_P (operand1
)
1805 && FP_REG_P (operand0
))
1807 rtx const_mem
, xoperands
[2];
1809 if (operand1
== CONST0_RTX (mode
))
1811 emit_insn (gen_rtx_SET (operand0
, operand1
));
1815 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1816 it in WORD_MODE regardless of what mode it was originally given
1818 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1820 /* Force the constant into memory and put the address of the
1821 memory location into scratch_reg. */
1822 const_mem
= force_const_mem (mode
, operand1
);
1823 xoperands
[0] = scratch_reg
;
1824 xoperands
[1] = XEXP (const_mem
, 0);
1825 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1827 /* Now load the destination register. */
1828 emit_insn (gen_rtx_SET (operand0
,
1829 replace_equiv_address (const_mem
, scratch_reg
)));
1832 /* Handle secondary reloads for SAR. These occur when trying to load
1833 the SAR from memory or a constant. */
1834 else if (scratch_reg
1835 && GET_CODE (operand0
) == REG
1836 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1837 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1838 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1840 /* D might not fit in 14 bits either; for such cases load D into
1842 if (GET_CODE (operand1
) == MEM
1843 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1845 /* We are reloading the address into the scratch register, so we
1846 want to make sure the scratch register is a full register. */
1847 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1849 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1850 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1853 XEXP (XEXP (operand1
, 0),
1857 /* Now we are going to load the scratch register from memory,
1858 we want to load it in the same width as the original MEM,
1859 which must be the same as the width of the ultimate destination,
1861 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1863 emit_move_insn (scratch_reg
,
1864 replace_equiv_address (operand1
, scratch_reg
));
1868 /* We want to load the scratch register using the same mode as
1869 the ultimate destination. */
1870 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1872 emit_move_insn (scratch_reg
, operand1
);
1875 /* And emit the insn to set the ultimate destination. We know that
1876 the scratch register has the same mode as the destination at this
1878 emit_move_insn (operand0
, scratch_reg
);
1882 /* Handle the most common case: storing into a register. */
1883 if (register_operand (operand0
, mode
))
1885 /* Legitimize TLS symbol references. This happens for references
1886 that aren't a legitimate constant. */
1887 if (PA_SYMBOL_REF_TLS_P (operand1
))
1888 operand1
= legitimize_tls_address (operand1
);
1890 if (register_operand (operand1
, mode
)
1891 || (GET_CODE (operand1
) == CONST_INT
1892 && pa_cint_ok_for_move (UINTVAL (operand1
)))
1893 || (operand1
== CONST0_RTX (mode
))
1894 || (GET_CODE (operand1
) == HIGH
1895 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1896 /* Only `general_operands' can come here, so MEM is ok. */
1897 || GET_CODE (operand1
) == MEM
)
1899 /* Various sets are created during RTL generation which don't
1900 have the REG_POINTER flag correctly set. After the CSE pass,
1901 instruction recognition can fail if we don't consistently
1902 set this flag when performing register copies. This should
1903 also improve the opportunities for creating insns that use
1904 unscaled indexing. */
1905 if (REG_P (operand0
) && REG_P (operand1
))
1907 if (REG_POINTER (operand1
)
1908 && !REG_POINTER (operand0
)
1909 && !HARD_REGISTER_P (operand0
))
1910 copy_reg_pointer (operand0
, operand1
);
1913 /* When MEMs are broken out, the REG_POINTER flag doesn't
1914 get set. In some cases, we can set the REG_POINTER flag
1915 from the declaration for the MEM. */
1916 if (REG_P (operand0
)
1917 && GET_CODE (operand1
) == MEM
1918 && !REG_POINTER (operand0
))
1920 tree decl
= MEM_EXPR (operand1
);
1922 /* Set the register pointer flag and register alignment
1923 if the declaration for this memory reference is a
1929 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1931 if (TREE_CODE (decl
) == COMPONENT_REF
)
1932 decl
= TREE_OPERAND (decl
, 1);
1934 type
= TREE_TYPE (decl
);
1935 type
= strip_array_types (type
);
1937 if (POINTER_TYPE_P (type
))
1938 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
1942 emit_insn (gen_rtx_SET (operand0
, operand1
));
1946 else if (GET_CODE (operand0
) == MEM
)
1948 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1949 && !(reload_in_progress
|| reload_completed
))
1951 rtx temp
= gen_reg_rtx (DFmode
);
1953 emit_insn (gen_rtx_SET (temp
, operand1
));
1954 emit_insn (gen_rtx_SET (operand0
, temp
));
1957 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1959 /* Run this case quickly. */
1960 emit_insn (gen_rtx_SET (operand0
, operand1
));
1963 if (! (reload_in_progress
|| reload_completed
))
1965 operands
[0] = validize_mem (operand0
);
1966 operands
[1] = operand1
= force_reg (mode
, operand1
);
1970 /* Simplify the source if we need to.
1971 Note we do have to handle function labels here, even though we do
1972 not consider them legitimate constants. Loop optimizations can
1973 call the emit_move_xxx with one as a source. */
1974 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1975 || (GET_CODE (operand1
) == HIGH
1976 && symbolic_operand (XEXP (operand1
, 0), mode
))
1977 || function_label_operand (operand1
, VOIDmode
)
1978 || tls_referenced_p (operand1
))
1982 if (GET_CODE (operand1
) == HIGH
)
1985 operand1
= XEXP (operand1
, 0);
1987 if (symbolic_operand (operand1
, mode
))
1989 /* Argh. The assembler and linker can't handle arithmetic
1992 So we force the plabel into memory, load operand0 from
1993 the memory location, then add in the constant part. */
1994 if ((GET_CODE (operand1
) == CONST
1995 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1996 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1998 || function_label_operand (operand1
, VOIDmode
))
2000 rtx temp
, const_part
;
2002 /* Figure out what (if any) scratch register to use. */
2003 if (reload_in_progress
|| reload_completed
)
2005 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
2006 /* SCRATCH_REG will hold an address and maybe the actual
2007 data. We want it in WORD_MODE regardless of what mode it
2008 was originally given to us. */
2009 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2012 scratch_reg
= gen_reg_rtx (Pmode
);
2014 if (GET_CODE (operand1
) == CONST
)
2016 /* Save away the constant part of the expression. */
2017 const_part
= XEXP (XEXP (operand1
, 0), 1);
2018 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
2020 /* Force the function label into memory. */
2021 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2025 /* No constant part. */
2026 const_part
= NULL_RTX
;
2028 /* Force the function label into memory. */
2029 temp
= force_const_mem (mode
, operand1
);
2033 /* Get the address of the memory location. PIC-ify it if
2035 temp
= XEXP (temp
, 0);
2037 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2039 /* Put the address of the memory location into our destination
2042 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2044 /* Now load from the memory location into our destination
2046 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2047 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2049 /* And add back in the constant part. */
2050 if (const_part
!= NULL_RTX
)
2051 expand_inc (operand0
, const_part
);
2061 if (reload_in_progress
|| reload_completed
)
2063 temp
= scratch_reg
? scratch_reg
: operand0
;
2064 /* TEMP will hold an address and maybe the actual
2065 data. We want it in WORD_MODE regardless of what mode it
2066 was originally given to us. */
2067 temp
= force_mode (word_mode
, temp
);
2070 temp
= gen_reg_rtx (Pmode
);
2072 /* Force (const (plus (symbol) (const_int))) to memory
2073 if the const_int will not fit in 14 bits. Although
2074 this requires a relocation, the instruction sequence
2075 needed to load the value is shorter. */
2076 if (GET_CODE (operand1
) == CONST
2077 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2078 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2079 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2081 rtx x
, m
= force_const_mem (mode
, operand1
);
2083 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2084 x
= replace_equiv_address (m
, x
);
2085 insn
= emit_move_insn (operand0
, x
);
2089 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2090 if (REG_P (operand0
) && REG_P (operands
[1]))
2091 copy_reg_pointer (operand0
, operands
[1]);
2092 insn
= emit_move_insn (operand0
, operands
[1]);
2095 /* Put a REG_EQUAL note on this insn. */
2096 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2098 /* On the HPPA, references to data space are supposed to use dp,
2099 register 27, but showing it in the RTL inhibits various cse
2100 and loop optimizations. */
2105 if (reload_in_progress
|| reload_completed
)
2107 temp
= scratch_reg
? scratch_reg
: operand0
;
2108 /* TEMP will hold an address and maybe the actual
2109 data. We want it in WORD_MODE regardless of what mode it
2110 was originally given to us. */
2111 temp
= force_mode (word_mode
, temp
);
2114 temp
= gen_reg_rtx (mode
);
2116 /* Loading a SYMBOL_REF into a register makes that register
2117 safe to be used as the base in an indexed address.
2119 Don't mark hard registers though. That loses. */
2120 if (GET_CODE (operand0
) == REG
2121 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2122 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2123 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2124 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2127 set
= gen_rtx_SET (operand0
, temp
);
2129 set
= gen_rtx_SET (operand0
,
2130 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2132 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2138 else if (tls_referenced_p (operand1
))
2143 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2145 addend
= XEXP (XEXP (tmp
, 0), 1);
2146 tmp
= XEXP (XEXP (tmp
, 0), 0);
2149 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2150 tmp
= legitimize_tls_address (tmp
);
2153 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2154 tmp
= force_operand (tmp
, operands
[0]);
2158 else if (GET_CODE (operand1
) != CONST_INT
2159 || !pa_cint_ok_for_move (UINTVAL (operand1
)))
2164 HOST_WIDE_INT value
= 0;
2165 HOST_WIDE_INT insv
= 0;
2168 if (GET_CODE (operand1
) == CONST_INT
)
2169 value
= INTVAL (operand1
);
2172 && GET_CODE (operand1
) == CONST_INT
2173 && HOST_BITS_PER_WIDE_INT
> 32
2174 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2178 /* Extract the low order 32 bits of the value and sign extend.
2179 If the new value is the same as the original value, we can
2180 can use the original value as-is. If the new value is
2181 different, we use it and insert the most-significant 32-bits
2182 of the original value into the final result. */
2183 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2184 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2187 #if HOST_BITS_PER_WIDE_INT > 32
2188 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2192 operand1
= GEN_INT (nval
);
2196 if (reload_in_progress
|| reload_completed
)
2197 temp
= scratch_reg
? scratch_reg
: operand0
;
2199 temp
= gen_reg_rtx (mode
);
2201 /* We don't directly split DImode constants on 32-bit targets
2202 because PLUS uses an 11-bit immediate and the insn sequence
2203 generated is not as efficient as the one using HIGH/LO_SUM. */
2204 if (GET_CODE (operand1
) == CONST_INT
2205 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2206 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2209 /* Directly break constant into high and low parts. This
2210 provides better optimization opportunities because various
2211 passes recognize constants split with PLUS but not LO_SUM.
2212 We use a 14-bit signed low part except when the addition
2213 of 0x4000 to the high part might change the sign of the
2215 HOST_WIDE_INT low
= value
& 0x3fff;
2216 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2220 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2228 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2229 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2233 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2234 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2237 insn
= emit_move_insn (operands
[0], operands
[1]);
2239 /* Now insert the most significant 32 bits of the value
2240 into the register. When we don't have a second register
2241 available, it could take up to nine instructions to load
2242 a 64-bit integer constant. Prior to reload, we force
2243 constants that would take more than three instructions
2244 to load to the constant pool. During and after reload,
2245 we have to handle all possible values. */
2248 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2249 register and the value to be inserted is outside the
2250 range that can be loaded with three depdi instructions. */
2251 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2253 operand1
= GEN_INT (insv
);
2255 emit_insn (gen_rtx_SET (temp
,
2256 gen_rtx_HIGH (mode
, operand1
)));
2257 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2259 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2262 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2267 int len
= 5, pos
= 27;
2269 /* Insert the bits using the depdi instruction. */
2272 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2273 HOST_WIDE_INT sign
= v5
< 0;
2275 /* Left extend the insertion. */
2276 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2277 while (pos
> 0 && (insv
& 1) == sign
)
2279 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2285 insn
= emit_insn (gen_insvdi (operand0
,
2290 insn
= emit_insn (gen_insvsi (operand0
,
2295 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2301 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2306 /* Now have insn-emit do whatever it normally does. */
2310 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2311 it will need a link/runtime reloc). */
2314 pa_reloc_needed (tree exp
)
2318 switch (TREE_CODE (exp
))
2323 case POINTER_PLUS_EXPR
:
2326 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2327 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2331 case NON_LVALUE_EXPR
:
2332 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2338 unsigned HOST_WIDE_INT ix
;
2340 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2342 reloc
|= pa_reloc_needed (value
);
2356 /* Return the best assembler insn template
2357 for moving operands[1] into operands[0] as a fullword. */
2359 pa_singlemove_string (rtx
*operands
)
2361 HOST_WIDE_INT intval
;
2363 if (GET_CODE (operands
[0]) == MEM
)
2364 return "stw %r1,%0";
2365 if (GET_CODE (operands
[1]) == MEM
)
2367 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2371 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2373 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2375 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands
[1]), i
);
2377 operands
[1] = GEN_INT (i
);
2378 /* Fall through to CONST_INT case. */
2380 if (GET_CODE (operands
[1]) == CONST_INT
)
2382 intval
= INTVAL (operands
[1]);
2384 if (VAL_14_BITS_P (intval
))
2386 else if ((intval
& 0x7ff) == 0)
2387 return "ldil L'%1,%0";
2388 else if (pa_zdepi_cint_p (intval
))
2389 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2391 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2393 return "copy %1,%0";
2397 /* Compute position (in OP[1]) and width (in OP[2])
2398 useful for copying IMM to a register using the zdepi
2399 instructions. Store the immediate value to insert in OP[0]. */
2401 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2405 /* Find the least significant set bit in IMM. */
2406 for (lsb
= 0; lsb
< 32; lsb
++)
2413 /* Choose variants based on *sign* of the 5-bit field. */
2414 if ((imm
& 0x10) == 0)
2415 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2418 /* Find the width of the bitstring in IMM. */
2419 for (len
= 5; len
< 32 - lsb
; len
++)
2421 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2425 /* Sign extend IMM as a 5-bit value. */
2426 imm
= (imm
& 0xf) - 0x10;
2434 /* Compute position (in OP[1]) and width (in OP[2])
2435 useful for copying IMM to a register using the depdi,z
2436 instructions. Store the immediate value to insert in OP[0]. */
2439 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2441 int lsb
, len
, maxlen
;
2443 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2445 /* Find the least significant set bit in IMM. */
2446 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2453 /* Choose variants based on *sign* of the 5-bit field. */
2454 if ((imm
& 0x10) == 0)
2455 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2458 /* Find the width of the bitstring in IMM. */
2459 for (len
= 5; len
< maxlen
- lsb
; len
++)
2461 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2465 /* Extend length if host is narrow and IMM is negative. */
2466 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2469 /* Sign extend IMM as a 5-bit value. */
2470 imm
= (imm
& 0xf) - 0x10;
2478 /* Output assembler code to perform a doubleword move insn
2479 with operands OPERANDS. */
2482 pa_output_move_double (rtx
*operands
)
2484 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2486 rtx addreg0
= 0, addreg1
= 0;
2489 /* First classify both operands. */
2491 if (REG_P (operands
[0]))
2493 else if (offsettable_memref_p (operands
[0]))
2495 else if (GET_CODE (operands
[0]) == MEM
)
2500 if (REG_P (operands
[1]))
2502 else if (CONSTANT_P (operands
[1]))
2504 else if (offsettable_memref_p (operands
[1]))
2506 else if (GET_CODE (operands
[1]) == MEM
)
2511 /* Check for the cases that the operand constraints are not
2512 supposed to allow to happen. */
2513 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2515 /* Handle copies between general and floating registers. */
2517 if (optype0
== REGOP
&& optype1
== REGOP
2518 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2520 if (FP_REG_P (operands
[0]))
2522 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2523 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2524 return "{fldds|fldd} -16(%%sp),%0";
2528 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2529 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2530 return "{ldws|ldw} -12(%%sp),%R0";
2534 /* Handle auto decrementing and incrementing loads and stores
2535 specifically, since the structure of the function doesn't work
2536 for them without major modification. Do it better when we learn
2537 this port about the general inc/dec addressing of PA.
2538 (This was written by tege. Chide him if it doesn't work.) */
2540 if (optype0
== MEMOP
)
2542 /* We have to output the address syntax ourselves, since print_operand
2543 doesn't deal with the addresses we want to use. Fix this later. */
2545 rtx addr
= XEXP (operands
[0], 0);
2546 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2548 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2550 operands
[0] = XEXP (addr
, 0);
2551 gcc_assert (GET_CODE (operands
[1]) == REG
2552 && GET_CODE (operands
[0]) == REG
);
2554 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2556 /* No overlap between high target register and address
2557 register. (We do this in a non-obvious way to
2558 save a register file writeback) */
2559 if (GET_CODE (addr
) == POST_INC
)
2560 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2561 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2563 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2565 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2567 operands
[0] = XEXP (addr
, 0);
2568 gcc_assert (GET_CODE (operands
[1]) == REG
2569 && GET_CODE (operands
[0]) == REG
);
2571 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2572 /* No overlap between high target register and address
2573 register. (We do this in a non-obvious way to save a
2574 register file writeback) */
2575 if (GET_CODE (addr
) == PRE_INC
)
2576 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2577 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2580 if (optype1
== MEMOP
)
2582 /* We have to output the address syntax ourselves, since print_operand
2583 doesn't deal with the addresses we want to use. Fix this later. */
2585 rtx addr
= XEXP (operands
[1], 0);
2586 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2588 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2590 operands
[1] = XEXP (addr
, 0);
2591 gcc_assert (GET_CODE (operands
[0]) == REG
2592 && GET_CODE (operands
[1]) == REG
);
2594 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2596 /* No overlap between high target register and address
2597 register. (We do this in a non-obvious way to
2598 save a register file writeback) */
2599 if (GET_CODE (addr
) == POST_INC
)
2600 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2601 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2605 /* This is an undefined situation. We should load into the
2606 address register *and* update that register. Probably
2607 we don't need to handle this at all. */
2608 if (GET_CODE (addr
) == POST_INC
)
2609 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2610 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2613 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2615 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2617 operands
[1] = XEXP (addr
, 0);
2618 gcc_assert (GET_CODE (operands
[0]) == REG
2619 && GET_CODE (operands
[1]) == REG
);
2621 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2623 /* No overlap between high target register and address
2624 register. (We do this in a non-obvious way to
2625 save a register file writeback) */
2626 if (GET_CODE (addr
) == PRE_INC
)
2627 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2628 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2632 /* This is an undefined situation. We should load into the
2633 address register *and* update that register. Probably
2634 we don't need to handle this at all. */
2635 if (GET_CODE (addr
) == PRE_INC
)
2636 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2637 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2640 else if (GET_CODE (addr
) == PLUS
2641 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2645 /* Load address into left half of destination register. */
2646 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2647 xoperands
[1] = XEXP (addr
, 1);
2648 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2649 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2650 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2652 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2654 else if (GET_CODE (addr
) == PLUS
2655 && REG_P (XEXP (addr
, 0))
2656 && REG_P (XEXP (addr
, 1)))
2660 /* Load address into left half of destination register. */
2661 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2662 xoperands
[1] = XEXP (addr
, 0);
2663 xoperands
[2] = XEXP (addr
, 1);
2664 output_asm_insn ("{addl|add,l} %1,%2,%0",
2666 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2670 /* If an operand is an unoffsettable memory ref, find a register
2671 we can increment temporarily to make it refer to the second word. */
2673 if (optype0
== MEMOP
)
2674 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2676 if (optype1
== MEMOP
)
2677 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2679 /* Ok, we can do one word at a time.
2680 Normally we do the low-numbered word first.
2682 In either case, set up in LATEHALF the operands to use
2683 for the high-numbered word and in some cases alter the
2684 operands in OPERANDS to be suitable for the low-numbered word. */
2686 if (optype0
== REGOP
)
2687 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2688 else if (optype0
== OFFSOP
)
2689 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2691 latehalf
[0] = operands
[0];
2693 if (optype1
== REGOP
)
2694 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2695 else if (optype1
== OFFSOP
)
2696 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2697 else if (optype1
== CNSTOP
)
2699 if (GET_CODE (operands
[1]) == HIGH
)
2701 operands
[1] = XEXP (operands
[1], 0);
2704 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2707 latehalf
[1] = operands
[1];
2709 /* If the first move would clobber the source of the second one,
2710 do them in the other order.
2712 This can happen in two cases:
2714 mem -> register where the first half of the destination register
2715 is the same register used in the memory's address. Reload
2716 can create such insns.
2718 mem in this case will be either register indirect or register
2719 indirect plus a valid offset.
2721 register -> register move where REGNO(dst) == REGNO(src + 1)
2722 someone (Tim/Tege?) claimed this can happen for parameter loads.
2724 Handle mem -> register case first. */
2725 if (optype0
== REGOP
2726 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2727 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2729 /* Do the late half first. */
2731 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2732 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2736 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2737 return pa_singlemove_string (operands
);
2740 /* Now handle register -> register case. */
2741 if (optype0
== REGOP
&& optype1
== REGOP
2742 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2744 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2745 return pa_singlemove_string (operands
);
2748 /* Normal case: do the two words, low-numbered first. */
2750 output_asm_insn (pa_singlemove_string (operands
), operands
);
2752 /* Make any unoffsettable addresses point at high-numbered word. */
2754 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2756 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2758 /* Do high-numbered word. */
2760 output_asm_insn ("ldil L'%1,%0", latehalf
);
2762 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2764 /* Undo the adds we just did. */
2766 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2768 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2774 pa_output_fp_move_double (rtx
*operands
)
2776 if (FP_REG_P (operands
[0]))
2778 if (FP_REG_P (operands
[1])
2779 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2780 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2782 output_asm_insn ("fldd%F1 %1,%0", operands
);
2784 else if (FP_REG_P (operands
[1]))
2786 output_asm_insn ("fstd%F0 %1,%0", operands
);
2792 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2794 /* This is a pain. You have to be prepared to deal with an
2795 arbitrary address here including pre/post increment/decrement.
2797 so avoid this in the MD. */
2798 gcc_assert (GET_CODE (operands
[0]) == REG
);
2800 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2801 xoperands
[0] = operands
[0];
2802 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2807 /* Return a REG that occurs in ADDR with coefficient 1.
2808 ADDR can be effectively incremented by incrementing REG. */
2811 find_addr_reg (rtx addr
)
2813 while (GET_CODE (addr
) == PLUS
)
2815 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2816 addr
= XEXP (addr
, 0);
2817 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2818 addr
= XEXP (addr
, 1);
2819 else if (CONSTANT_P (XEXP (addr
, 0)))
2820 addr
= XEXP (addr
, 1);
2821 else if (CONSTANT_P (XEXP (addr
, 1)))
2822 addr
= XEXP (addr
, 0);
2826 gcc_assert (GET_CODE (addr
) == REG
);
2830 /* Emit code to perform a block move.
2832 OPERANDS[0] is the destination pointer as a REG, clobbered.
2833 OPERANDS[1] is the source pointer as a REG, clobbered.
2834 OPERANDS[2] is a register for temporary storage.
2835 OPERANDS[3] is a register for temporary storage.
2836 OPERANDS[4] is the size as a CONST_INT
2837 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2838 OPERANDS[6] is another temporary register. */
2841 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2843 int align
= INTVAL (operands
[5]);
2844 unsigned long n_bytes
= INTVAL (operands
[4]);
2846 /* We can't move more than a word at a time because the PA
2847 has no longer integer move insns. (Could use fp mem ops?) */
2848 if (align
> (TARGET_64BIT
? 8 : 4))
2849 align
= (TARGET_64BIT
? 8 : 4);
2851 /* Note that we know each loop below will execute at least twice
2852 (else we would have open-coded the copy). */
2856 /* Pre-adjust the loop counter. */
2857 operands
[4] = GEN_INT (n_bytes
- 16);
2858 output_asm_insn ("ldi %4,%2", operands
);
2861 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2862 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2863 output_asm_insn ("std,ma %3,8(%0)", operands
);
2864 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2865 output_asm_insn ("std,ma %6,8(%0)", operands
);
2867 /* Handle the residual. There could be up to 7 bytes of
2868 residual to copy! */
2869 if (n_bytes
% 16 != 0)
2871 operands
[4] = GEN_INT (n_bytes
% 8);
2872 if (n_bytes
% 16 >= 8)
2873 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2874 if (n_bytes
% 8 != 0)
2875 output_asm_insn ("ldd 0(%1),%6", operands
);
2876 if (n_bytes
% 16 >= 8)
2877 output_asm_insn ("std,ma %3,8(%0)", operands
);
2878 if (n_bytes
% 8 != 0)
2879 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2884 /* Pre-adjust the loop counter. */
2885 operands
[4] = GEN_INT (n_bytes
- 8);
2886 output_asm_insn ("ldi %4,%2", operands
);
2889 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2890 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2891 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2892 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2893 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2895 /* Handle the residual. There could be up to 7 bytes of
2896 residual to copy! */
2897 if (n_bytes
% 8 != 0)
2899 operands
[4] = GEN_INT (n_bytes
% 4);
2900 if (n_bytes
% 8 >= 4)
2901 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2902 if (n_bytes
% 4 != 0)
2903 output_asm_insn ("ldw 0(%1),%6", operands
);
2904 if (n_bytes
% 8 >= 4)
2905 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2906 if (n_bytes
% 4 != 0)
2907 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2912 /* Pre-adjust the loop counter. */
2913 operands
[4] = GEN_INT (n_bytes
- 4);
2914 output_asm_insn ("ldi %4,%2", operands
);
2917 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2918 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2919 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2920 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2921 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2923 /* Handle the residual. */
2924 if (n_bytes
% 4 != 0)
2926 if (n_bytes
% 4 >= 2)
2927 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2928 if (n_bytes
% 2 != 0)
2929 output_asm_insn ("ldb 0(%1),%6", operands
);
2930 if (n_bytes
% 4 >= 2)
2931 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2932 if (n_bytes
% 2 != 0)
2933 output_asm_insn ("stb %6,0(%0)", operands
);
2938 /* Pre-adjust the loop counter. */
2939 operands
[4] = GEN_INT (n_bytes
- 2);
2940 output_asm_insn ("ldi %4,%2", operands
);
2943 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2944 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2945 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2946 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2947 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2949 /* Handle the residual. */
2950 if (n_bytes
% 2 != 0)
2952 output_asm_insn ("ldb 0(%1),%3", operands
);
2953 output_asm_insn ("stb %3,0(%0)", operands
);
2962 /* Count the number of insns necessary to handle this block move.
2964 Basic structure is the same as emit_block_move, except that we
2965 count insns rather than emit them. */
2968 compute_movmem_length (rtx_insn
*insn
)
2970 rtx pat
= PATTERN (insn
);
2971 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2972 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2973 unsigned int n_insns
= 0;
2975 /* We can't move more than four bytes at a time because the PA
2976 has no longer integer move insns. (Could use fp mem ops?) */
2977 if (align
> (TARGET_64BIT
? 8 : 4))
2978 align
= (TARGET_64BIT
? 8 : 4);
2980 /* The basic copying loop. */
2984 if (n_bytes
% (2 * align
) != 0)
2986 if ((n_bytes
% (2 * align
)) >= align
)
2989 if ((n_bytes
% align
) != 0)
2993 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2997 /* Emit code to perform a block clear.
2999 OPERANDS[0] is the destination pointer as a REG, clobbered.
3000 OPERANDS[1] is a register for temporary storage.
3001 OPERANDS[2] is the size as a CONST_INT
3002 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3005 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
3007 int align
= INTVAL (operands
[3]);
3008 unsigned long n_bytes
= INTVAL (operands
[2]);
3010 /* We can't clear more than a word at a time because the PA
3011 has no longer integer move insns. */
3012 if (align
> (TARGET_64BIT
? 8 : 4))
3013 align
= (TARGET_64BIT
? 8 : 4);
3015 /* Note that we know each loop below will execute at least twice
3016 (else we would have open-coded the copy). */
3020 /* Pre-adjust the loop counter. */
3021 operands
[2] = GEN_INT (n_bytes
- 16);
3022 output_asm_insn ("ldi %2,%1", operands
);
3025 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3026 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
3027 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3029 /* Handle the residual. There could be up to 7 bytes of
3030 residual to copy! */
3031 if (n_bytes
% 16 != 0)
3033 operands
[2] = GEN_INT (n_bytes
% 8);
3034 if (n_bytes
% 16 >= 8)
3035 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3036 if (n_bytes
% 8 != 0)
3037 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3042 /* Pre-adjust the loop counter. */
3043 operands
[2] = GEN_INT (n_bytes
- 8);
3044 output_asm_insn ("ldi %2,%1", operands
);
3047 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3048 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3049 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3051 /* Handle the residual. There could be up to 7 bytes of
3052 residual to copy! */
3053 if (n_bytes
% 8 != 0)
3055 operands
[2] = GEN_INT (n_bytes
% 4);
3056 if (n_bytes
% 8 >= 4)
3057 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3058 if (n_bytes
% 4 != 0)
3059 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3064 /* Pre-adjust the loop counter. */
3065 operands
[2] = GEN_INT (n_bytes
- 4);
3066 output_asm_insn ("ldi %2,%1", operands
);
3069 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3070 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3071 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3073 /* Handle the residual. */
3074 if (n_bytes
% 4 != 0)
3076 if (n_bytes
% 4 >= 2)
3077 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3078 if (n_bytes
% 2 != 0)
3079 output_asm_insn ("stb %%r0,0(%0)", operands
);
3084 /* Pre-adjust the loop counter. */
3085 operands
[2] = GEN_INT (n_bytes
- 2);
3086 output_asm_insn ("ldi %2,%1", operands
);
3089 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3090 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3091 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3093 /* Handle the residual. */
3094 if (n_bytes
% 2 != 0)
3095 output_asm_insn ("stb %%r0,0(%0)", operands
);
3104 /* Count the number of insns necessary to handle this block move.
3106 Basic structure is the same as emit_block_move, except that we
3107 count insns rather than emit them. */
3110 compute_clrmem_length (rtx_insn
*insn
)
3112 rtx pat
= PATTERN (insn
);
3113 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3114 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3115 unsigned int n_insns
= 0;
3117 /* We can't clear more than a word at a time because the PA
3118 has no longer integer move insns. */
3119 if (align
> (TARGET_64BIT
? 8 : 4))
3120 align
= (TARGET_64BIT
? 8 : 4);
3122 /* The basic loop. */
3126 if (n_bytes
% (2 * align
) != 0)
3128 if ((n_bytes
% (2 * align
)) >= align
)
3131 if ((n_bytes
% align
) != 0)
3135 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3141 pa_output_and (rtx
*operands
)
3143 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3145 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3146 int ls0
, ls1
, ms0
, p
, len
;
3148 for (ls0
= 0; ls0
< 32; ls0
++)
3149 if ((mask
& (1 << ls0
)) == 0)
3152 for (ls1
= ls0
; ls1
< 32; ls1
++)
3153 if ((mask
& (1 << ls1
)) != 0)
3156 for (ms0
= ls1
; ms0
< 32; ms0
++)
3157 if ((mask
& (1 << ms0
)) == 0)
3160 gcc_assert (ms0
== 32);
3168 operands
[2] = GEN_INT (len
);
3169 return "{extru|extrw,u} %1,31,%2,%0";
3173 /* We could use this `depi' for the case above as well, but `depi'
3174 requires one more register file access than an `extru'. */
3179 operands
[2] = GEN_INT (p
);
3180 operands
[3] = GEN_INT (len
);
3181 return "{depi|depwi} 0,%2,%3,%0";
3185 return "and %1,%2,%0";
3188 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3189 storing the result in operands[0]. */
3191 pa_output_64bit_and (rtx
*operands
)
3193 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3195 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3196 int ls0
, ls1
, ms0
, p
, len
;
3198 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3199 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3202 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3203 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3206 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3207 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3210 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3212 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3218 operands
[2] = GEN_INT (len
);
3219 return "extrd,u %1,63,%2,%0";
3223 /* We could use this `depi' for the case above as well, but `depi'
3224 requires one more register file access than an `extru'. */
3229 operands
[2] = GEN_INT (p
);
3230 operands
[3] = GEN_INT (len
);
3231 return "depdi 0,%2,%3,%0";
3235 return "and %1,%2,%0";
3239 pa_output_ior (rtx
*operands
)
3241 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3242 int bs0
, bs1
, p
, len
;
3244 if (INTVAL (operands
[2]) == 0)
3245 return "copy %1,%0";
3247 for (bs0
= 0; bs0
< 32; bs0
++)
3248 if ((mask
& (1 << bs0
)) != 0)
3251 for (bs1
= bs0
; bs1
< 32; bs1
++)
3252 if ((mask
& (1 << bs1
)) == 0)
3255 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3260 operands
[2] = GEN_INT (p
);
3261 operands
[3] = GEN_INT (len
);
3262 return "{depi|depwi} -1,%2,%3,%0";
3265 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3266 storing the result in operands[0]. */
3268 pa_output_64bit_ior (rtx
*operands
)
3270 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3271 int bs0
, bs1
, p
, len
;
3273 if (INTVAL (operands
[2]) == 0)
3274 return "copy %1,%0";
3276 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3277 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3280 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3281 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3284 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3285 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3290 operands
[2] = GEN_INT (p
);
3291 operands
[3] = GEN_INT (len
);
3292 return "depdi -1,%2,%3,%0";
3295 /* Target hook for assembling integer objects. This code handles
3296 aligned SI and DI integers specially since function references
3297 must be preceded by P%. */
3300 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3305 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3306 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3307 calling output_addr_const. Otherwise, it may call assemble_external
3308 in the midst of outputing the assembler code for the SYMBOL_REF.
3309 We restore the SYMBOL_REF_DECL after the output is done. */
3310 if (GET_CODE (x
) == SYMBOL_REF
)
3312 decl
= SYMBOL_REF_DECL (x
);
3315 assemble_external (decl
);
3316 SET_SYMBOL_REF_DECL (x
, NULL
);
3320 if (size
== UNITS_PER_WORD
3322 && function_label_operand (x
, VOIDmode
))
3324 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3326 /* We don't want an OPD when generating fast indirect calls. */
3327 if (!TARGET_FAST_INDIRECT_CALLS
)
3328 fputs ("P%", asm_out_file
);
3330 output_addr_const (asm_out_file
, x
);
3331 fputc ('\n', asm_out_file
);
3335 result
= default_assemble_integer (x
, size
, aligned_p
);
3338 SET_SYMBOL_REF_DECL (x
, decl
);
3343 /* Output an ascii string. */
3345 pa_output_ascii (FILE *file
, const char *p
, int size
)
3349 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3351 /* The HP assembler can only take strings of 256 characters at one
3352 time. This is a limitation on input line length, *not* the
3353 length of the string. Sigh. Even worse, it seems that the
3354 restriction is in number of input characters (see \xnn &
3355 \whatever). So we have to do this very carefully. */
3357 fputs ("\t.STRING \"", file
);
3360 for (i
= 0; i
< size
; i
+= 4)
3364 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3366 register unsigned int c
= (unsigned char) p
[i
+ io
];
3368 if (c
== '\"' || c
== '\\')
3369 partial_output
[co
++] = '\\';
3370 if (c
>= ' ' && c
< 0177)
3371 partial_output
[co
++] = c
;
3375 partial_output
[co
++] = '\\';
3376 partial_output
[co
++] = 'x';
3377 hexd
= c
/ 16 - 0 + '0';
3379 hexd
-= '9' - 'a' + 1;
3380 partial_output
[co
++] = hexd
;
3381 hexd
= c
% 16 - 0 + '0';
3383 hexd
-= '9' - 'a' + 1;
3384 partial_output
[co
++] = hexd
;
3387 if (chars_output
+ co
> 243)
3389 fputs ("\"\n\t.STRING \"", file
);
3392 fwrite (partial_output
, 1, (size_t) co
, file
);
3396 fputs ("\"\n", file
);
3399 /* Try to rewrite floating point comparisons & branches to avoid
3400 useless add,tr insns.
3402 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3403 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3404 first attempt to remove useless add,tr insns. It is zero
3405 for the second pass as reorg sometimes leaves bogus REG_DEAD
3408 When CHECK_NOTES is zero we can only eliminate add,tr insns
3409 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3412 remove_useless_addtr_insns (int check_notes
)
3415 static int pass
= 0;
3417 /* This is fairly cheap, so always run it when optimizing. */
3421 int fbranch_count
= 0;
3423 /* Walk all the insns in this function looking for fcmp & fbranch
3424 instructions. Keep track of how many of each we find. */
3425 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3429 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3430 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3433 tmp
= PATTERN (insn
);
3435 /* It must be a set. */
3436 if (GET_CODE (tmp
) != SET
)
3439 /* If the destination is CCFP, then we've found an fcmp insn. */
3440 tmp
= SET_DEST (tmp
);
3441 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3447 tmp
= PATTERN (insn
);
3448 /* If this is an fbranch instruction, bump the fbranch counter. */
3449 if (GET_CODE (tmp
) == SET
3450 && SET_DEST (tmp
) == pc_rtx
3451 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3452 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3453 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3454 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3462 /* Find all floating point compare + branch insns. If possible,
3463 reverse the comparison & the branch to avoid add,tr insns. */
3464 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3469 /* Ignore anything that isn't an INSN. */
3470 if (! NONJUMP_INSN_P (insn
))
3473 tmp
= PATTERN (insn
);
3475 /* It must be a set. */
3476 if (GET_CODE (tmp
) != SET
)
3479 /* The destination must be CCFP, which is register zero. */
3480 tmp
= SET_DEST (tmp
);
3481 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3484 /* INSN should be a set of CCFP.
3486 See if the result of this insn is used in a reversed FP
3487 conditional branch. If so, reverse our condition and
3488 the branch. Doing so avoids useless add,tr insns. */
3489 next
= next_insn (insn
);
3492 /* Jumps, calls and labels stop our search. */
3493 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3496 /* As does another fcmp insn. */
3497 if (NONJUMP_INSN_P (next
)
3498 && GET_CODE (PATTERN (next
)) == SET
3499 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3500 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3503 next
= next_insn (next
);
3506 /* Is NEXT_INSN a branch? */
3507 if (next
&& JUMP_P (next
))
3509 rtx pattern
= PATTERN (next
);
3511 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3512 and CCFP dies, then reverse our conditional and the branch
3513 to avoid the add,tr. */
3514 if (GET_CODE (pattern
) == SET
3515 && SET_DEST (pattern
) == pc_rtx
3516 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3517 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3518 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3519 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3520 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3521 && (fcmp_count
== fbranch_count
3523 && find_regno_note (next
, REG_DEAD
, 0))))
3525 /* Reverse the branch. */
3526 tmp
= XEXP (SET_SRC (pattern
), 1);
3527 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3528 XEXP (SET_SRC (pattern
), 2) = tmp
;
3529 INSN_CODE (next
) = -1;
3531 /* Reverse our condition. */
3532 tmp
= PATTERN (insn
);
3533 PUT_CODE (XEXP (tmp
, 1),
3534 (reverse_condition_maybe_unordered
3535 (GET_CODE (XEXP (tmp
, 1)))));
3545 /* You may have trouble believing this, but this is the 32 bit HP-PA
3550 Variable arguments (optional; any number may be allocated)
3552 SP-(4*(N+9)) arg word N
3557 Fixed arguments (must be allocated; may remain unused)
3566 SP-32 External Data Pointer (DP)
3568 SP-24 External/stub RP (RP')
3572 SP-8 Calling Stub RP (RP'')
3577 SP-0 Stack Pointer (points to next available address)
3581 /* This function saves registers as follows. Registers marked with ' are
3582 this function's registers (as opposed to the previous function's).
3583 If a frame_pointer isn't needed, r4 is saved as a general register;
3584 the space for the frame pointer is still allocated, though, to keep
3590 SP (FP') Previous FP
3591 SP + 4 Alignment filler (sigh)
3592 SP + 8 Space for locals reserved here.
3596 SP + n All call saved register used.
3600 SP + o All call saved fp registers used.
3604 SP + p (SP') points to next available address.
3608 /* Global variables set by output_function_prologue(). */
3609 /* Size of frame. Need to know this to emit return insns from
3611 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3612 static int save_fregs
;
3614 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3615 Handle case where DISP > 8k by using the add_high_const patterns.
3617 Note in DISP > 8k case, we will leave the high part of the address
3618 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3621 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3623 rtx dest
, src
, basereg
;
3626 src
= gen_rtx_REG (word_mode
, reg
);
3627 basereg
= gen_rtx_REG (Pmode
, base
);
3628 if (VAL_14_BITS_P (disp
))
3630 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3631 insn
= emit_move_insn (dest
, src
);
3633 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3635 rtx delta
= GEN_INT (disp
);
3636 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3638 emit_move_insn (tmpreg
, delta
);
3639 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3642 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3643 gen_rtx_SET (tmpreg
,
3644 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3645 RTX_FRAME_RELATED_P (insn
) = 1;
3647 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3648 insn
= emit_move_insn (dest
, src
);
3652 rtx delta
= GEN_INT (disp
);
3653 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3654 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3656 emit_move_insn (tmpreg
, high
);
3657 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3658 insn
= emit_move_insn (dest
, src
);
3660 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3661 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3662 gen_rtx_PLUS (word_mode
,
3669 RTX_FRAME_RELATED_P (insn
) = 1;
3672 /* Emit RTL to store REG at the memory location specified by BASE and then
3673 add MOD to BASE. MOD must be <= 8k. */
3676 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3678 rtx basereg
, srcreg
, delta
;
3681 gcc_assert (VAL_14_BITS_P (mod
));
3683 basereg
= gen_rtx_REG (Pmode
, base
);
3684 srcreg
= gen_rtx_REG (word_mode
, reg
);
3685 delta
= GEN_INT (mod
);
3687 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3690 RTX_FRAME_RELATED_P (insn
) = 1;
3692 /* RTX_FRAME_RELATED_P must be set on each frame related set
3693 in a parallel with more than one element. */
3694 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3695 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3699 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3700 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3701 whether to add a frame note or not.
3703 In the DISP > 8k case, we leave the high part of the address in %r1.
3704 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3707 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3711 if (VAL_14_BITS_P (disp
))
3713 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3714 plus_constant (Pmode
,
3715 gen_rtx_REG (Pmode
, base
), disp
));
3717 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3719 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3720 rtx delta
= GEN_INT (disp
);
3721 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3723 emit_move_insn (tmpreg
, delta
);
3724 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3725 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3727 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3728 gen_rtx_SET (tmpreg
,
3729 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3733 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3734 rtx delta
= GEN_INT (disp
);
3735 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3737 emit_move_insn (tmpreg
,
3738 gen_rtx_PLUS (Pmode
, basereg
,
3739 gen_rtx_HIGH (Pmode
, delta
)));
3740 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3741 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3744 if (DO_FRAME_NOTES
&& note
)
3745 RTX_FRAME_RELATED_P (insn
) = 1;
3749 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3754 /* The code in pa_expand_prologue and pa_expand_epilogue must
3755 be consistent with the rounding and size calculation done here.
3756 Change them at the same time. */
3758 /* We do our own stack alignment. First, round the size of the
3759 stack locals up to a word boundary. */
3760 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3762 /* Space for previous frame pointer + filler. If any frame is
3763 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3764 waste some space here for the sake of HP compatibility. The
3765 first slot is only used when the frame pointer is needed. */
3766 if (size
|| frame_pointer_needed
)
3767 size
+= STARTING_FRAME_OFFSET
;
3769 /* If the current function calls __builtin_eh_return, then we need
3770 to allocate stack space for registers that will hold data for
3771 the exception handler. */
3772 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3776 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3778 size
+= i
* UNITS_PER_WORD
;
3781 /* Account for space used by the callee general register saves. */
3782 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3783 if (df_regs_ever_live_p (i
))
3784 size
+= UNITS_PER_WORD
;
3786 /* Account for space used by the callee floating point register saves. */
3787 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3788 if (df_regs_ever_live_p (i
)
3789 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3793 /* We always save both halves of the FP register, so always
3794 increment the frame size by 8 bytes. */
3798 /* If any of the floating registers are saved, account for the
3799 alignment needed for the floating point register save block. */
3802 size
= (size
+ 7) & ~7;
3807 /* The various ABIs include space for the outgoing parameters in the
3808 size of the current function's stack frame. We don't need to align
3809 for the outgoing arguments as their alignment is set by the final
3810 rounding for the frame as a whole. */
3811 size
+= crtl
->outgoing_args_size
;
3813 /* Allocate space for the fixed frame marker. This space must be
3814 allocated for any function that makes calls or allocates
3816 if (!crtl
->is_leaf
|| size
)
3817 size
+= TARGET_64BIT
? 48 : 32;
3819 /* Finally, round to the preferred stack boundary. */
3820 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3821 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3824 /* Generate the assembly code for function entry. FILE is a stdio
3825 stream to output the code to. SIZE is an int: how many units of
3826 temporary storage to allocate.
3828 Refer to the array `regs_ever_live' to determine which registers to
3829 save; `regs_ever_live[I]' is nonzero if register number I is ever
3830 used in the function. This function is responsible for knowing
3831 which registers should not be saved even if used. */
3833 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3834 of memory. If any fpu reg is used in the function, we allocate
3835 such a block here, at the bottom of the frame, just in case it's needed.
3837 If this function is a leaf procedure, then we may choose not
3838 to do a "save" insn. The decision about whether or not
3839 to do this is made in regclass.c. */
3842 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3844 /* The function's label and associated .PROC must never be
3845 separated and must be output *after* any profiling declarations
3846 to avoid changing spaces/subspaces within a procedure. */
3847 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3848 fputs ("\t.PROC\n", file
);
3850 /* pa_expand_prologue does the dirty work now. We just need
3851 to output the assembler directives which denote the start
3853 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3855 fputs (",NO_CALLS", file
);
3857 fputs (",CALLS", file
);
3859 fputs (",SAVE_RP", file
);
3861 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3862 at the beginning of the frame and that it is used as the frame
3863 pointer for the frame. We do this because our current frame
3864 layout doesn't conform to that specified in the HP runtime
3865 documentation and we need a way to indicate to programs such as
3866 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3867 isn't used by HP compilers but is supported by the assembler.
3868 However, SAVE_SP is supposed to indicate that the previous stack
3869 pointer has been saved in the frame marker. */
3870 if (frame_pointer_needed
)
3871 fputs (",SAVE_SP", file
);
3873 /* Pass on information about the number of callee register saves
3874 performed in the prologue.
3876 The compiler is supposed to pass the highest register number
3877 saved, the assembler then has to adjust that number before
3878 entering it into the unwind descriptor (to account for any
3879 caller saved registers with lower register numbers than the
3880 first callee saved register). */
3882 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3885 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3887 fputs ("\n\t.ENTRY\n", file
);
3889 remove_useless_addtr_insns (0);
3893 pa_expand_prologue (void)
3895 int merge_sp_adjust_with_store
= 0;
3896 HOST_WIDE_INT size
= get_frame_size ();
3897 HOST_WIDE_INT offset
;
3906 /* Compute total size for frame pointer, filler, locals and rounding to
3907 the next word boundary. Similar code appears in pa_compute_frame_size
3908 and must be changed in tandem with this code. */
3909 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3910 if (local_fsize
|| frame_pointer_needed
)
3911 local_fsize
+= STARTING_FRAME_OFFSET
;
3913 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3914 if (flag_stack_usage_info
)
3915 current_function_static_stack_size
= actual_fsize
;
3917 /* Compute a few things we will use often. */
3918 tmpreg
= gen_rtx_REG (word_mode
, 1);
3920 /* Save RP first. The calling conventions manual states RP will
3921 always be stored into the caller's frame at sp - 20 or sp - 16
3922 depending on which ABI is in use. */
3923 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3925 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3931 /* Allocate the local frame and set up the frame pointer if needed. */
3932 if (actual_fsize
!= 0)
3934 if (frame_pointer_needed
)
3936 /* Copy the old frame pointer temporarily into %r1. Set up the
3937 new stack pointer, then store away the saved old frame pointer
3938 into the stack at sp and at the same time update the stack
3939 pointer by actual_fsize bytes. Two versions, first
3940 handles small (<8k) frames. The second handles large (>=8k)
3942 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3944 RTX_FRAME_RELATED_P (insn
) = 1;
3946 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3948 RTX_FRAME_RELATED_P (insn
) = 1;
3950 if (VAL_14_BITS_P (actual_fsize
))
3951 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3954 /* It is incorrect to store the saved frame pointer at *sp,
3955 then increment sp (writes beyond the current stack boundary).
3957 So instead use stwm to store at *sp and post-increment the
3958 stack pointer as an atomic operation. Then increment sp to
3959 finish allocating the new frame. */
3960 HOST_WIDE_INT adjust1
= 8192 - 64;
3961 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3963 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3964 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3968 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3969 we need to store the previous stack pointer (frame pointer)
3970 into the frame marker on targets that use the HP unwind
3971 library. This allows the HP unwind library to be used to
3972 unwind GCC frames. However, we are not fully compatible
3973 with the HP library because our frame layout differs from
3974 that specified in the HP runtime specification.
3976 We don't want a frame note on this instruction as the frame
3977 marker moves during dynamic stack allocation.
3979 This instruction also serves as a blockage to prevent
3980 register spills from being scheduled before the stack
3981 pointer is raised. This is necessary as we store
3982 registers using the frame pointer as a base register,
3983 and the frame pointer is set before sp is raised. */
3984 if (TARGET_HPUX_UNWIND_LIBRARY
)
3986 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3987 GEN_INT (TARGET_64BIT
? -8 : -4));
3989 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3990 hard_frame_pointer_rtx
);
3993 emit_insn (gen_blockage ());
3995 /* no frame pointer needed. */
3998 /* In some cases we can perform the first callee register save
3999 and allocating the stack frame at the same time. If so, just
4000 make a note of it and defer allocating the frame until saving
4001 the callee registers. */
4002 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
4003 merge_sp_adjust_with_store
= 1;
4004 /* Can not optimize. Adjust the stack frame by actual_fsize
4007 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4012 /* Normal register save.
4014 Do not save the frame pointer in the frame_pointer_needed case. It
4015 was done earlier. */
4016 if (frame_pointer_needed
)
4018 offset
= local_fsize
;
4020 /* Saving the EH return data registers in the frame is the simplest
4021 way to get the frame unwind information emitted. We put them
4022 just before the general registers. */
4023 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4025 unsigned int i
, regno
;
4029 regno
= EH_RETURN_DATA_REGNO (i
);
4030 if (regno
== INVALID_REGNUM
)
4033 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4034 offset
+= UNITS_PER_WORD
;
4038 for (i
= 18; i
>= 4; i
--)
4039 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4041 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4042 offset
+= UNITS_PER_WORD
;
4045 /* Account for %r3 which is saved in a special place. */
4048 /* No frame pointer needed. */
4051 offset
= local_fsize
- actual_fsize
;
4053 /* Saving the EH return data registers in the frame is the simplest
4054 way to get the frame unwind information emitted. */
4055 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4057 unsigned int i
, regno
;
4061 regno
= EH_RETURN_DATA_REGNO (i
);
4062 if (regno
== INVALID_REGNUM
)
4065 /* If merge_sp_adjust_with_store is nonzero, then we can
4066 optimize the first save. */
4067 if (merge_sp_adjust_with_store
)
4069 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4070 merge_sp_adjust_with_store
= 0;
4073 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4074 offset
+= UNITS_PER_WORD
;
4078 for (i
= 18; i
>= 3; i
--)
4079 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4081 /* If merge_sp_adjust_with_store is nonzero, then we can
4082 optimize the first GR save. */
4083 if (merge_sp_adjust_with_store
)
4085 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4086 merge_sp_adjust_with_store
= 0;
4089 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4090 offset
+= UNITS_PER_WORD
;
4094 /* If we wanted to merge the SP adjustment with a GR save, but we never
4095 did any GR saves, then just emit the adjustment here. */
4096 if (merge_sp_adjust_with_store
)
4097 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4101 /* The hppa calling conventions say that %r19, the pic offset
4102 register, is saved at sp - 32 (in this function's frame)
4103 when generating PIC code. FIXME: What is the correct thing
4104 to do for functions which make no calls and allocate no
4105 frame? Do we need to allocate a frame, or can we just omit
4106 the save? For now we'll just omit the save.
4108 We don't want a note on this insn as the frame marker can
4109 move if there is a dynamic stack allocation. */
4110 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4112 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4114 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4118 /* Align pointer properly (doubleword boundary). */
4119 offset
= (offset
+ 7) & ~7;
4121 /* Floating point register store. */
4126 /* First get the frame or stack pointer to the start of the FP register
4128 if (frame_pointer_needed
)
4130 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4131 base
= hard_frame_pointer_rtx
;
4135 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4136 base
= stack_pointer_rtx
;
4139 /* Now actually save the FP registers. */
4140 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4142 if (df_regs_ever_live_p (i
)
4143 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4147 addr
= gen_rtx_MEM (DFmode
,
4148 gen_rtx_POST_INC (word_mode
, tmpreg
));
4149 reg
= gen_rtx_REG (DFmode
, i
);
4150 insn
= emit_move_insn (addr
, reg
);
4153 RTX_FRAME_RELATED_P (insn
) = 1;
4156 rtx mem
= gen_rtx_MEM (DFmode
,
4157 plus_constant (Pmode
, base
,
4159 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4160 gen_rtx_SET (mem
, reg
));
4164 rtx meml
= gen_rtx_MEM (SFmode
,
4165 plus_constant (Pmode
, base
,
4167 rtx memr
= gen_rtx_MEM (SFmode
,
4168 plus_constant (Pmode
, base
,
4170 rtx regl
= gen_rtx_REG (SFmode
, i
);
4171 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4172 rtx setl
= gen_rtx_SET (meml
, regl
);
4173 rtx setr
= gen_rtx_SET (memr
, regr
);
4176 RTX_FRAME_RELATED_P (setl
) = 1;
4177 RTX_FRAME_RELATED_P (setr
) = 1;
4178 vec
= gen_rtvec (2, setl
, setr
);
4179 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4180 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4183 offset
+= GET_MODE_SIZE (DFmode
);
4190 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4191 Handle case where DISP > 8k by using the add_high_const patterns. */
4194 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4196 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4197 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4200 if (VAL_14_BITS_P (disp
))
4201 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4202 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4204 rtx delta
= GEN_INT (disp
);
4205 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4207 emit_move_insn (tmpreg
, delta
);
4208 if (TARGET_DISABLE_INDEXING
)
4210 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4211 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4214 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4218 rtx delta
= GEN_INT (disp
);
4219 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4220 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4222 emit_move_insn (tmpreg
, high
);
4223 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4226 emit_move_insn (dest
, src
);
4229 /* Update the total code bytes output to the text section. */
4232 update_total_code_bytes (unsigned int nbytes
)
4234 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4235 && !IN_NAMED_SECTION_P (cfun
->decl
))
4237 unsigned int old_total
= total_code_bytes
;
4239 total_code_bytes
+= nbytes
;
4241 /* Be prepared to handle overflows. */
4242 if (old_total
> total_code_bytes
)
4243 total_code_bytes
= UINT_MAX
;
4247 /* This function generates the assembly code for function exit.
4248 Args are as for output_function_prologue ().
4250 The function epilogue should not depend on the current stack
4251 pointer! It should use the frame pointer only. This is mandatory
4252 because of alloca; we also take advantage of it to omit stack
4253 adjustments before returning. */
4256 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4258 rtx_insn
*insn
= get_last_insn ();
4261 /* pa_expand_epilogue does the dirty work now. We just need
4262 to output the assembler directives which denote the end
4265 To make debuggers happy, emit a nop if the epilogue was completely
4266 eliminated due to a volatile call as the last insn in the
4267 current function. That way the return address (in %r2) will
4268 always point to a valid instruction in the current function. */
4270 /* Get the last real insn. */
4272 insn
= prev_real_insn (insn
);
4274 /* If it is a sequence, then look inside. */
4275 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4276 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4278 /* If insn is a CALL_INSN, then it must be a call to a volatile
4279 function (otherwise there would be epilogue insns). */
4280 if (insn
&& CALL_P (insn
))
4282 fputs ("\tnop\n", file
);
4288 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4290 if (TARGET_SOM
&& TARGET_GAS
)
4292 /* We are done with this subspace except possibly for some additional
4293 debug information. Forget that we are in this subspace to ensure
4294 that the next function is output in its own subspace. */
4296 cfun
->machine
->in_nsubspa
= 2;
4299 /* Thunks do their own insn accounting. */
4303 if (INSN_ADDRESSES_SET_P ())
4305 last_address
= extra_nop
? 4 : 0;
4306 insn
= get_last_nonnote_insn ();
4309 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4311 last_address
+= insn_default_length (insn
);
4313 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4314 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4317 last_address
= UINT_MAX
;
4319 /* Finally, update the total number of code bytes output so far. */
4320 update_total_code_bytes (last_address
);
4324 pa_expand_epilogue (void)
4327 HOST_WIDE_INT offset
;
4328 HOST_WIDE_INT ret_off
= 0;
4330 int merge_sp_adjust_with_load
= 0;
4332 /* We will use this often. */
4333 tmpreg
= gen_rtx_REG (word_mode
, 1);
4335 /* Try to restore RP early to avoid load/use interlocks when
4336 RP gets used in the return (bv) instruction. This appears to still
4337 be necessary even when we schedule the prologue and epilogue. */
4340 ret_off
= TARGET_64BIT
? -16 : -20;
4341 if (frame_pointer_needed
)
4343 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4348 /* No frame pointer, and stack is smaller than 8k. */
4349 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4351 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4357 /* General register restores. */
4358 if (frame_pointer_needed
)
4360 offset
= local_fsize
;
4362 /* If the current function calls __builtin_eh_return, then we need
4363 to restore the saved EH data registers. */
4364 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4366 unsigned int i
, regno
;
4370 regno
= EH_RETURN_DATA_REGNO (i
);
4371 if (regno
== INVALID_REGNUM
)
4374 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4375 offset
+= UNITS_PER_WORD
;
4379 for (i
= 18; i
>= 4; i
--)
4380 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4382 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4383 offset
+= UNITS_PER_WORD
;
4388 offset
= local_fsize
- actual_fsize
;
4390 /* If the current function calls __builtin_eh_return, then we need
4391 to restore the saved EH data registers. */
4392 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4394 unsigned int i
, regno
;
4398 regno
= EH_RETURN_DATA_REGNO (i
);
4399 if (regno
== INVALID_REGNUM
)
4402 /* Only for the first load.
4403 merge_sp_adjust_with_load holds the register load
4404 with which we will merge the sp adjustment. */
4405 if (merge_sp_adjust_with_load
== 0
4407 && VAL_14_BITS_P (-actual_fsize
))
4408 merge_sp_adjust_with_load
= regno
;
4410 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4411 offset
+= UNITS_PER_WORD
;
4415 for (i
= 18; i
>= 3; i
--)
4417 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4419 /* Only for the first load.
4420 merge_sp_adjust_with_load holds the register load
4421 with which we will merge the sp adjustment. */
4422 if (merge_sp_adjust_with_load
== 0
4424 && VAL_14_BITS_P (-actual_fsize
))
4425 merge_sp_adjust_with_load
= i
;
4427 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4428 offset
+= UNITS_PER_WORD
;
4433 /* Align pointer properly (doubleword boundary). */
4434 offset
= (offset
+ 7) & ~7;
4436 /* FP register restores. */
4439 /* Adjust the register to index off of. */
4440 if (frame_pointer_needed
)
4441 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4443 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4445 /* Actually do the restores now. */
4446 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4447 if (df_regs_ever_live_p (i
)
4448 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4450 rtx src
= gen_rtx_MEM (DFmode
,
4451 gen_rtx_POST_INC (word_mode
, tmpreg
));
4452 rtx dest
= gen_rtx_REG (DFmode
, i
);
4453 emit_move_insn (dest
, src
);
4457 /* Emit a blockage insn here to keep these insns from being moved to
4458 an earlier spot in the epilogue, or into the main instruction stream.
4460 This is necessary as we must not cut the stack back before all the
4461 restores are finished. */
4462 emit_insn (gen_blockage ());
4464 /* Reset stack pointer (and possibly frame pointer). The stack
4465 pointer is initially set to fp + 64 to avoid a race condition. */
4466 if (frame_pointer_needed
)
4468 rtx delta
= GEN_INT (-64);
4470 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4471 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4472 stack_pointer_rtx
, delta
));
4474 /* If we were deferring a callee register restore, do it now. */
4475 else if (merge_sp_adjust_with_load
)
4477 rtx delta
= GEN_INT (-actual_fsize
);
4478 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4480 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4482 else if (actual_fsize
!= 0)
4483 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4486 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4487 frame greater than 8k), do so now. */
4489 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4491 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4493 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4495 emit_insn (gen_blockage ());
4496 emit_insn (TARGET_64BIT
4497 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4498 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4503 pa_can_use_return_insn (void)
4505 if (!reload_completed
)
4508 if (frame_pointer_needed
)
4511 if (df_regs_ever_live_p (2))
4517 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4521 hppa_pic_save_rtx (void)
4523 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4526 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4527 #define NO_DEFERRED_PROFILE_COUNTERS 0
4531 /* Vector of funcdef numbers. */
4532 static vec
<int> funcdef_nos
;
4534 /* Output deferred profile counters. */
4536 output_deferred_profile_counters (void)
4541 if (funcdef_nos
.is_empty ())
4544 switch_to_section (data_section
);
4545 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4546 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4548 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4550 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4551 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4554 funcdef_nos
.release ();
4558 hppa_profile_hook (int label_no
)
4560 /* We use SImode for the address of the function in both 32 and
4561 64-bit code to avoid having to provide DImode versions of the
4562 lcla2 and load_offset_label_address insn patterns. */
4563 rtx reg
= gen_reg_rtx (SImode
);
4564 rtx_code_label
*label_rtx
= gen_label_rtx ();
4565 rtx mcount
= gen_rtx_MEM (Pmode
, gen_rtx_SYMBOL_REF (Pmode
, "_mcount"));
4566 int reg_parm_stack_space
= REG_PARM_STACK_SPACE (NULL_TREE
);
4567 rtx arg_bytes
, begin_label_rtx
;
4568 rtx_insn
*call_insn
;
4569 char begin_label_name
[16];
4570 bool use_mcount_pcrel_call
;
4572 /* If we can reach _mcount with a pc-relative call, we can optimize
4573 loading the address of the current function. This requires linker
4574 long branch stub support. */
4575 if (!TARGET_PORTABLE_RUNTIME
4576 && !TARGET_LONG_CALLS
4577 && (TARGET_SOM
|| flag_function_sections
))
4578 use_mcount_pcrel_call
= TRUE
;
4580 use_mcount_pcrel_call
= FALSE
;
4582 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4584 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4586 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4588 if (!use_mcount_pcrel_call
)
4590 /* The address of the function is loaded into %r25 with an instruction-
4591 relative sequence that avoids the use of relocations. The sequence
4592 is split so that the load_offset_label_address instruction can
4593 occupy the delay slot of the call to _mcount. */
4595 emit_insn (gen_lcla2 (reg
, label_rtx
));
4597 emit_insn (gen_lcla1 (reg
, label_rtx
));
4599 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4605 if (!NO_DEFERRED_PROFILE_COUNTERS
)
4607 rtx count_label_rtx
, addr
, r24
;
4608 char count_label_name
[16];
4610 funcdef_nos
.safe_push (label_no
);
4611 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4612 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
,
4613 ggc_strdup (count_label_name
));
4615 addr
= force_reg (Pmode
, count_label_rtx
);
4616 r24
= gen_rtx_REG (Pmode
, 24);
4617 emit_move_insn (r24
, addr
);
4619 arg_bytes
= GEN_INT (TARGET_64BIT
? 24 : 12);
4620 if (use_mcount_pcrel_call
)
4621 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4624 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4626 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4630 arg_bytes
= GEN_INT (TARGET_64BIT
? 16 : 8);
4631 if (use_mcount_pcrel_call
)
4632 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4635 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4638 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4639 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4641 /* Indicate the _mcount call cannot throw, nor will it execute a
4643 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4645 /* Allocate space for fixed arguments. */
4646 if (reg_parm_stack_space
> crtl
->outgoing_args_size
)
4647 crtl
->outgoing_args_size
= reg_parm_stack_space
;
4650 /* Fetch the return address for the frame COUNT steps up from
4651 the current frame, after the prologue. FRAMEADDR is the
4652 frame pointer of the COUNT frame.
4654 We want to ignore any export stub remnants here. To handle this,
4655 we examine the code at the return address, and if it is an export
4656 stub, we return a memory rtx for the stub return address stored
4659 The value returned is used in two different ways:
4661 1. To find a function's caller.
4663 2. To change the return address for a function.
4665 This function handles most instances of case 1; however, it will
4666 fail if there are two levels of stubs to execute on the return
4667 path. The only way I believe that can happen is if the return value
4668 needs a parameter relocation, which never happens for C code.
4670 This function handles most instances of case 2; however, it will
4671 fail if we did not originally have stub code on the return path
4672 but will need stub code on the new return path. This can happen if
4673 the caller & callee are both in the main program, but the new
4674 return location is in a shared library. */
4677 pa_return_addr_rtx (int count
, rtx frameaddr
)
4684 /* The instruction stream at the return address of a PA1.X export stub is:
4686 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4687 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4688 0x00011820 | stub+16: mtsp r1,sr0
4689 0xe0400002 | stub+20: be,n 0(sr0,rp)
4691 0xe0400002 must be specified as -532676606 so that it won't be
4692 rejected as an invalid immediate operand on 64-bit hosts.
4694 The instruction stream at the return address of a PA2.0 export stub is:
4696 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4697 0xe840d002 | stub+12: bve,n (rp)
4700 HOST_WIDE_INT insns
[4];
4706 rp
= get_hard_reg_initial_val (Pmode
, 2);
4708 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4711 /* If there is no export stub then just use the value saved from
4712 the return pointer register. */
4714 saved_rp
= gen_reg_rtx (Pmode
);
4715 emit_move_insn (saved_rp
, rp
);
4717 /* Get pointer to the instruction stream. We have to mask out the
4718 privilege level from the two low order bits of the return address
4719 pointer here so that ins will point to the start of the first
4720 instruction that would have been executed if we returned. */
4721 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4722 label
= gen_label_rtx ();
4726 insns
[0] = 0x4bc23fd1;
4727 insns
[1] = -398405630;
4732 insns
[0] = 0x4bc23fd1;
4733 insns
[1] = 0x004010a1;
4734 insns
[2] = 0x00011820;
4735 insns
[3] = -532676606;
4739 /* Check the instruction stream at the normal return address for the
4740 export stub. If it is an export stub, than our return address is
4741 really in -24[frameaddr]. */
4743 for (i
= 0; i
< len
; i
++)
4745 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4746 rtx op1
= GEN_INT (insns
[i
]);
4747 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4750 /* Here we know that our return address points to an export
4751 stub. We don't want to return the address of the export stub,
4752 but rather the return address of the export stub. That return
4753 address is stored at -24[frameaddr]. */
4755 emit_move_insn (saved_rp
,
4757 memory_address (Pmode
,
4758 plus_constant (Pmode
, frameaddr
,
4767 pa_emit_bcond_fp (rtx operands
[])
4769 enum rtx_code code
= GET_CODE (operands
[0]);
4770 rtx operand0
= operands
[1];
4771 rtx operand1
= operands
[2];
4772 rtx label
= operands
[3];
4774 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4775 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4777 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4778 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4781 gen_rtx_REG (CCFPmode
, 0),
4783 gen_rtx_LABEL_REF (VOIDmode
, label
),
4788 /* Adjust the cost of a scheduling dependency. Return the new cost of
4789 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4792 pa_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
4795 enum attr_type attr_type
;
4797 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4798 true dependencies as they are described with bypasses now. */
4799 if (pa_cpu
>= PROCESSOR_8000
|| dep_type
== 0)
4802 if (! recog_memoized (insn
))
4805 attr_type
= get_attr_type (insn
);
4810 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4813 if (attr_type
== TYPE_FPLOAD
)
4815 rtx pat
= PATTERN (insn
);
4816 rtx dep_pat
= PATTERN (dep_insn
);
4817 if (GET_CODE (pat
) == PARALLEL
)
4819 /* This happens for the fldXs,mb patterns. */
4820 pat
= XVECEXP (pat
, 0, 0);
4822 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4823 /* If this happens, we have to extend this to schedule
4824 optimally. Return 0 for now. */
4827 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4829 if (! recog_memoized (dep_insn
))
4831 switch (get_attr_type (dep_insn
))
4838 case TYPE_FPSQRTSGL
:
4839 case TYPE_FPSQRTDBL
:
4840 /* A fpload can't be issued until one cycle before a
4841 preceding arithmetic operation has finished if
4842 the target of the fpload is any of the sources
4843 (or destination) of the arithmetic operation. */
4844 return insn_default_latency (dep_insn
) - 1;
4851 else if (attr_type
== TYPE_FPALU
)
4853 rtx pat
= PATTERN (insn
);
4854 rtx dep_pat
= PATTERN (dep_insn
);
4855 if (GET_CODE (pat
) == PARALLEL
)
4857 /* This happens for the fldXs,mb patterns. */
4858 pat
= XVECEXP (pat
, 0, 0);
4860 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4861 /* If this happens, we have to extend this to schedule
4862 optimally. Return 0 for now. */
4865 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4867 if (! recog_memoized (dep_insn
))
4869 switch (get_attr_type (dep_insn
))
4873 case TYPE_FPSQRTSGL
:
4874 case TYPE_FPSQRTDBL
:
4875 /* An ALU flop can't be issued until two cycles before a
4876 preceding divide or sqrt operation has finished if
4877 the target of the ALU flop is any of the sources
4878 (or destination) of the divide or sqrt operation. */
4879 return insn_default_latency (dep_insn
) - 2;
4887 /* For other anti dependencies, the cost is 0. */
4890 case REG_DEP_OUTPUT
:
4891 /* Output dependency; DEP_INSN writes a register that INSN writes some
4893 if (attr_type
== TYPE_FPLOAD
)
4895 rtx pat
= PATTERN (insn
);
4896 rtx dep_pat
= PATTERN (dep_insn
);
4897 if (GET_CODE (pat
) == PARALLEL
)
4899 /* This happens for the fldXs,mb patterns. */
4900 pat
= XVECEXP (pat
, 0, 0);
4902 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4903 /* If this happens, we have to extend this to schedule
4904 optimally. Return 0 for now. */
4907 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4909 if (! recog_memoized (dep_insn
))
4911 switch (get_attr_type (dep_insn
))
4918 case TYPE_FPSQRTSGL
:
4919 case TYPE_FPSQRTDBL
:
4920 /* A fpload can't be issued until one cycle before a
4921 preceding arithmetic operation has finished if
4922 the target of the fpload is the destination of the
4923 arithmetic operation.
4925 Exception: For PA7100LC, PA7200 and PA7300, the cost
4926 is 3 cycles, unless they bundle together. We also
4927 pay the penalty if the second insn is a fpload. */
4928 return insn_default_latency (dep_insn
) - 1;
4935 else if (attr_type
== TYPE_FPALU
)
4937 rtx pat
= PATTERN (insn
);
4938 rtx dep_pat
= PATTERN (dep_insn
);
4939 if (GET_CODE (pat
) == PARALLEL
)
4941 /* This happens for the fldXs,mb patterns. */
4942 pat
= XVECEXP (pat
, 0, 0);
4944 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4945 /* If this happens, we have to extend this to schedule
4946 optimally. Return 0 for now. */
4949 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4951 if (! recog_memoized (dep_insn
))
4953 switch (get_attr_type (dep_insn
))
4957 case TYPE_FPSQRTSGL
:
4958 case TYPE_FPSQRTDBL
:
4959 /* An ALU flop can't be issued until two cycles before a
4960 preceding divide or sqrt operation has finished if
4961 the target of the ALU flop is also the target of
4962 the divide or sqrt operation. */
4963 return insn_default_latency (dep_insn
) - 2;
4971 /* For other output dependencies, the cost is 0. */
4979 /* Adjust scheduling priorities. We use this to try and keep addil
4980 and the next use of %r1 close together. */
4982 pa_adjust_priority (rtx_insn
*insn
, int priority
)
4984 rtx set
= single_set (insn
);
4988 src
= SET_SRC (set
);
4989 dest
= SET_DEST (set
);
4990 if (GET_CODE (src
) == LO_SUM
4991 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4992 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4995 else if (GET_CODE (src
) == MEM
4996 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4997 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4998 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
5001 else if (GET_CODE (dest
) == MEM
5002 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
5003 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
5004 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
5010 /* The 700 can only issue a single insn at a time.
5011 The 7XXX processors can issue two insns at a time.
5012 The 8000 can issue 4 insns at a time. */
5014 pa_issue_rate (void)
5018 case PROCESSOR_700
: return 1;
5019 case PROCESSOR_7100
: return 2;
5020 case PROCESSOR_7100LC
: return 2;
5021 case PROCESSOR_7200
: return 2;
5022 case PROCESSOR_7300
: return 2;
5023 case PROCESSOR_8000
: return 4;
5032 /* Return any length plus adjustment needed by INSN which already has
5033 its length computed as LENGTH. Return LENGTH if no adjustment is
5036 Also compute the length of an inline block move here as it is too
5037 complicated to express as a length attribute in pa.md. */
5039 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
5041 rtx pat
= PATTERN (insn
);
5043 /* If length is negative or undefined, provide initial length. */
5044 if ((unsigned int) length
>= INT_MAX
)
5046 if (GET_CODE (pat
) == SEQUENCE
)
5047 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
5049 switch (get_attr_type (insn
))
5052 length
= pa_attr_length_millicode_call (insn
);
5055 length
= pa_attr_length_call (insn
, 0);
5058 length
= pa_attr_length_call (insn
, 1);
5061 length
= pa_attr_length_indirect_call (insn
);
5063 case TYPE_SH_FUNC_ADRS
:
5064 length
= pa_attr_length_millicode_call (insn
) + 20;
5071 /* Block move pattern. */
5072 if (NONJUMP_INSN_P (insn
)
5073 && GET_CODE (pat
) == PARALLEL
5074 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5075 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5076 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5077 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5078 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5079 length
+= compute_movmem_length (insn
) - 4;
5080 /* Block clear pattern. */
5081 else if (NONJUMP_INSN_P (insn
)
5082 && GET_CODE (pat
) == PARALLEL
5083 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5084 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5085 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5086 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5087 length
+= compute_clrmem_length (insn
) - 4;
5088 /* Conditional branch with an unfilled delay slot. */
5089 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5091 /* Adjust a short backwards conditional with an unfilled delay slot. */
5092 if (GET_CODE (pat
) == SET
5094 && JUMP_LABEL (insn
) != NULL_RTX
5095 && ! forward_branch_p (insn
))
5097 else if (GET_CODE (pat
) == PARALLEL
5098 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5101 /* Adjust dbra insn with short backwards conditional branch with
5102 unfilled delay slot -- only for case where counter is in a
5103 general register register. */
5104 else if (GET_CODE (pat
) == PARALLEL
5105 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5106 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5107 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5109 && ! forward_branch_p (insn
))
5115 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5118 pa_print_operand_punct_valid_p (unsigned char code
)
5129 /* Print operand X (an rtx) in assembler syntax to file FILE.
5130 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5131 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5134 pa_print_operand (FILE *file
, rtx x
, int code
)
5139 /* Output a 'nop' if there's nothing for the delay slot. */
5140 if (dbr_sequence_length () == 0)
5141 fputs ("\n\tnop", file
);
5144 /* Output a nullification completer if there's nothing for the */
5145 /* delay slot or nullification is requested. */
5146 if (dbr_sequence_length () == 0 ||
5148 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5152 /* Print out the second register name of a register pair.
5153 I.e., R (6) => 7. */
5154 fputs (reg_names
[REGNO (x
) + 1], file
);
5157 /* A register or zero. */
5159 || (x
== CONST0_RTX (DFmode
))
5160 || (x
== CONST0_RTX (SFmode
)))
5162 fputs ("%r0", file
);
5168 /* A register or zero (floating point). */
5170 || (x
== CONST0_RTX (DFmode
))
5171 || (x
== CONST0_RTX (SFmode
)))
5173 fputs ("%fr0", file
);
5182 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5183 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5184 pa_output_global_address (file
, xoperands
[1], 0);
5185 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5189 case 'C': /* Plain (C)ondition */
5191 switch (GET_CODE (x
))
5194 fputs ("=", file
); break;
5196 fputs ("<>", file
); break;
5198 fputs (">", file
); break;
5200 fputs (">=", file
); break;
5202 fputs (">>=", file
); break;
5204 fputs (">>", file
); break;
5206 fputs ("<", file
); break;
5208 fputs ("<=", file
); break;
5210 fputs ("<<=", file
); break;
5212 fputs ("<<", file
); break;
5217 case 'N': /* Condition, (N)egated */
5218 switch (GET_CODE (x
))
5221 fputs ("<>", file
); break;
5223 fputs ("=", file
); break;
5225 fputs ("<=", file
); break;
5227 fputs ("<", file
); break;
5229 fputs ("<<", file
); break;
5231 fputs ("<<=", file
); break;
5233 fputs (">=", file
); break;
5235 fputs (">", file
); break;
5237 fputs (">>", file
); break;
5239 fputs (">>=", file
); break;
5244 /* For floating point comparisons. Note that the output
5245 predicates are the complement of the desired mode. The
5246 conditions for GT, GE, LT, LE and LTGT cause an invalid
5247 operation exception if the result is unordered and this
5248 exception is enabled in the floating-point status register. */
5250 switch (GET_CODE (x
))
5253 fputs ("!=", file
); break;
5255 fputs ("=", file
); break;
5257 fputs ("!>", file
); break;
5259 fputs ("!>=", file
); break;
5261 fputs ("!<", file
); break;
5263 fputs ("!<=", file
); break;
5265 fputs ("!<>", file
); break;
5267 fputs ("!?<=", file
); break;
5269 fputs ("!?<", file
); break;
5271 fputs ("!?>=", file
); break;
5273 fputs ("!?>", file
); break;
5275 fputs ("!?=", file
); break;
5277 fputs ("!?", file
); break;
5279 fputs ("?", file
); break;
5284 case 'S': /* Condition, operands are (S)wapped. */
5285 switch (GET_CODE (x
))
5288 fputs ("=", file
); break;
5290 fputs ("<>", file
); break;
5292 fputs ("<", file
); break;
5294 fputs ("<=", file
); break;
5296 fputs ("<<=", file
); break;
5298 fputs ("<<", file
); break;
5300 fputs (">", file
); break;
5302 fputs (">=", file
); break;
5304 fputs (">>=", file
); break;
5306 fputs (">>", file
); break;
5311 case 'B': /* Condition, (B)oth swapped and negate. */
5312 switch (GET_CODE (x
))
5315 fputs ("<>", file
); break;
5317 fputs ("=", file
); break;
5319 fputs (">=", file
); break;
5321 fputs (">", file
); break;
5323 fputs (">>", file
); break;
5325 fputs (">>=", file
); break;
5327 fputs ("<=", file
); break;
5329 fputs ("<", file
); break;
5331 fputs ("<<", file
); break;
5333 fputs ("<<=", file
); break;
5339 gcc_assert (GET_CODE (x
) == CONST_INT
);
5340 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5343 gcc_assert (GET_CODE (x
) == CONST_INT
);
5344 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5347 gcc_assert (GET_CODE (x
) == CONST_INT
);
5348 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5351 gcc_assert (GET_CODE (x
) == CONST_INT
5352 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5353 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5356 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5357 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5360 gcc_assert (GET_CODE (x
) == CONST_INT
);
5361 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5364 gcc_assert (GET_CODE (x
) == CONST_INT
);
5365 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5368 if (GET_CODE (x
) == CONST_INT
)
5373 switch (GET_CODE (XEXP (x
, 0)))
5377 if (ASSEMBLER_DIALECT
== 0)
5378 fputs ("s,mb", file
);
5380 fputs (",mb", file
);
5384 if (ASSEMBLER_DIALECT
== 0)
5385 fputs ("s,ma", file
);
5387 fputs (",ma", file
);
5390 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5391 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5393 if (ASSEMBLER_DIALECT
== 0)
5396 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5397 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5399 if (ASSEMBLER_DIALECT
== 0)
5400 fputs ("x,s", file
);
5404 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5408 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5414 pa_output_global_address (file
, x
, 0);
5417 pa_output_global_address (file
, x
, 1);
5419 case 0: /* Don't do anything special */
5424 compute_zdepwi_operands (INTVAL (x
), op
);
5425 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5431 compute_zdepdi_operands (INTVAL (x
), op
);
5432 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5436 /* We can get here from a .vtable_inherit due to our
5437 CONSTANT_ADDRESS_P rejecting perfectly good constant
5443 if (GET_CODE (x
) == REG
)
5445 fputs (reg_names
[REGNO (x
)], file
);
5446 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5452 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5453 && (REGNO (x
) & 1) == 0)
5456 else if (GET_CODE (x
) == MEM
)
5458 int size
= GET_MODE_SIZE (GET_MODE (x
));
5459 rtx base
= NULL_RTX
;
5460 switch (GET_CODE (XEXP (x
, 0)))
5464 base
= XEXP (XEXP (x
, 0), 0);
5465 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5469 base
= XEXP (XEXP (x
, 0), 0);
5470 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5473 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5474 fprintf (file
, "%s(%s)",
5475 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5476 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5477 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5478 fprintf (file
, "%s(%s)",
5479 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5480 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5481 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5482 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5484 /* Because the REG_POINTER flag can get lost during reload,
5485 pa_legitimate_address_p canonicalizes the order of the
5486 index and base registers in the combined move patterns. */
5487 rtx base
= XEXP (XEXP (x
, 0), 1);
5488 rtx index
= XEXP (XEXP (x
, 0), 0);
5490 fprintf (file
, "%s(%s)",
5491 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5494 output_address (GET_MODE (x
), XEXP (x
, 0));
5497 output_address (GET_MODE (x
), XEXP (x
, 0));
5502 output_addr_const (file
, x
);
5505 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5508 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5511 /* Imagine (high (const (plus ...))). */
5512 if (GET_CODE (x
) == HIGH
)
5515 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5516 output_addr_const (file
, x
);
5517 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5519 output_addr_const (file
, x
);
5520 fputs ("-$global$", file
);
5522 else if (GET_CODE (x
) == CONST
)
5524 const char *sep
= "";
5525 int offset
= 0; /* assembler wants -$global$ at end */
5526 rtx base
= NULL_RTX
;
5528 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5532 base
= XEXP (XEXP (x
, 0), 0);
5533 output_addr_const (file
, base
);
5536 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5542 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5546 base
= XEXP (XEXP (x
, 0), 1);
5547 output_addr_const (file
, base
);
5550 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5556 /* How bogus. The compiler is apparently responsible for
5557 rounding the constant if it uses an LR field selector.
5559 The linker and/or assembler seem a better place since
5560 they have to do this kind of thing already.
5562 If we fail to do this, HP's optimizing linker may eliminate
5563 an addil, but not update the ldw/stw/ldo instruction that
5564 uses the result of the addil. */
5566 offset
= ((offset
+ 0x1000) & ~0x1fff);
5568 switch (GET_CODE (XEXP (x
, 0)))
5581 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5589 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5590 fputs ("-$global$", file
);
5592 fprintf (file
, "%s%d", sep
, offset
);
5595 output_addr_const (file
, x
);
5598 /* Output boilerplate text to appear at the beginning of the file.
5599 There are several possible versions. */
5600 #define aputs(x) fputs(x, asm_out_file)
5602 pa_file_start_level (void)
5605 aputs ("\t.LEVEL 2.0w\n");
5606 else if (TARGET_PA_20
)
5607 aputs ("\t.LEVEL 2.0\n");
5608 else if (TARGET_PA_11
)
5609 aputs ("\t.LEVEL 1.1\n");
5611 aputs ("\t.LEVEL 1.0\n");
5615 pa_file_start_space (int sortspace
)
5617 aputs ("\t.SPACE $PRIVATE$");
5620 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5622 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5623 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5624 "\n\t.SPACE $TEXT$");
5627 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5628 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5632 pa_file_start_file (int want_version
)
5634 if (write_symbols
!= NO_DEBUG
)
5636 output_file_directive (asm_out_file
, main_input_filename
);
5638 aputs ("\t.version\t\"01.01\"\n");
5643 pa_file_start_mcount (const char *aswhat
)
5646 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5650 pa_elf_file_start (void)
5652 pa_file_start_level ();
5653 pa_file_start_mcount ("ENTRY");
5654 pa_file_start_file (0);
5658 pa_som_file_start (void)
5660 pa_file_start_level ();
5661 pa_file_start_space (0);
5662 aputs ("\t.IMPORT $global$,DATA\n"
5663 "\t.IMPORT $$dyncall,MILLICODE\n");
5664 pa_file_start_mcount ("CODE");
5665 pa_file_start_file (0);
5669 pa_linux_file_start (void)
5671 pa_file_start_file (1);
5672 pa_file_start_level ();
5673 pa_file_start_mcount ("CODE");
5677 pa_hpux64_gas_file_start (void)
5679 pa_file_start_level ();
5680 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5682 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5684 pa_file_start_file (1);
5688 pa_hpux64_hpas_file_start (void)
5690 pa_file_start_level ();
5691 pa_file_start_space (1);
5692 pa_file_start_mcount ("CODE");
5693 pa_file_start_file (0);
5697 /* Search the deferred plabel list for SYMBOL and return its internal
5698 label. If an entry for SYMBOL is not found, a new entry is created. */
5701 pa_get_deferred_plabel (rtx symbol
)
5703 const char *fname
= XSTR (symbol
, 0);
5706 /* See if we have already put this function on the list of deferred
5707 plabels. This list is generally small, so a liner search is not
5708 too ugly. If it proves too slow replace it with something faster. */
5709 for (i
= 0; i
< n_deferred_plabels
; i
++)
5710 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5713 /* If the deferred plabel list is empty, or this entry was not found
5714 on the list, create a new entry on the list. */
5715 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5719 if (deferred_plabels
== 0)
5720 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5722 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5724 n_deferred_plabels
+ 1);
5726 i
= n_deferred_plabels
++;
5727 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5728 deferred_plabels
[i
].symbol
= symbol
;
5730 /* Gross. We have just implicitly taken the address of this
5731 function. Mark it in the same manner as assemble_name. */
5732 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5734 mark_referenced (id
);
5737 return deferred_plabels
[i
].internal_label
;
5741 output_deferred_plabels (void)
5745 /* If we have some deferred plabels, then we need to switch into the
5746 data or readonly data section, and align it to a 4 byte boundary
5747 before outputting the deferred plabels. */
5748 if (n_deferred_plabels
)
5750 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5751 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5754 /* Now output the deferred plabels. */
5755 for (i
= 0; i
< n_deferred_plabels
; i
++)
5757 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5758 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5759 assemble_integer (deferred_plabels
[i
].symbol
,
5760 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5764 /* Initialize optabs to point to emulation routines. */
5767 pa_init_libfuncs (void)
5769 if (HPUX_LONG_DOUBLE_LIBRARY
)
5771 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5772 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5773 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5774 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5775 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5776 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5777 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5778 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5779 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5781 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5782 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5783 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5784 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5785 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5786 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5787 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5789 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5790 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5791 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5792 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5794 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5795 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5796 : "_U_Qfcnvfxt_quad_to_sgl");
5797 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5798 "_U_Qfcnvfxt_quad_to_dbl");
5799 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5800 "_U_Qfcnvfxt_quad_to_usgl");
5801 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5802 "_U_Qfcnvfxt_quad_to_udbl");
5804 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5805 "_U_Qfcnvxf_sgl_to_quad");
5806 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5807 "_U_Qfcnvxf_dbl_to_quad");
5808 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5809 "_U_Qfcnvxf_usgl_to_quad");
5810 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5811 "_U_Qfcnvxf_udbl_to_quad");
5814 if (TARGET_SYNC_LIBCALL
)
5815 init_sync_libfuncs (8);
5818 /* HP's millicode routines mean something special to the assembler.
5819 Keep track of which ones we have used. */
5821 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5822 static void import_milli (enum millicodes
);
5823 static char imported
[(int) end1000
];
5824 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5825 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5826 #define MILLI_START 10
5829 import_milli (enum millicodes code
)
5831 char str
[sizeof (import_string
)];
5833 if (!imported
[(int) code
])
5835 imported
[(int) code
] = 1;
5836 strcpy (str
, import_string
);
5837 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5838 output_asm_insn (str
, 0);
5842 /* The register constraints have put the operands and return value in
5843 the proper registers. */
5846 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5848 import_milli (mulI
);
5849 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5852 /* Emit the rtl for doing a division by a constant. */
5854 /* Do magic division millicodes exist for this value? */
5855 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5857 /* We'll use an array to keep track of the magic millicodes and
5858 whether or not we've used them already. [n][0] is signed, [n][1] is
5861 static int div_milli
[16][2];
5864 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5866 if (GET_CODE (operands
[2]) == CONST_INT
5867 && INTVAL (operands
[2]) > 0
5868 && INTVAL (operands
[2]) < 16
5869 && pa_magic_milli
[INTVAL (operands
[2])])
5871 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5873 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5877 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5878 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5880 gen_rtx_REG (SImode
, 26),
5882 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5883 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5884 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5885 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5886 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5887 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5894 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5898 /* If the divisor is a constant, try to use one of the special
5900 if (GET_CODE (operands
[0]) == CONST_INT
)
5902 static char buf
[100];
5903 divisor
= INTVAL (operands
[0]);
5904 if (!div_milli
[divisor
][unsignedp
])
5906 div_milli
[divisor
][unsignedp
] = 1;
5908 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5910 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5914 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5915 INTVAL (operands
[0]));
5916 return pa_output_millicode_call (insn
,
5917 gen_rtx_SYMBOL_REF (SImode
, buf
));
5921 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5922 INTVAL (operands
[0]));
5923 return pa_output_millicode_call (insn
,
5924 gen_rtx_SYMBOL_REF (SImode
, buf
));
5927 /* Divisor isn't a special constant. */
5932 import_milli (divU
);
5933 return pa_output_millicode_call (insn
,
5934 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5938 import_milli (divI
);
5939 return pa_output_millicode_call (insn
,
5940 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5945 /* Output a $$rem millicode to do mod. */
5948 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5952 import_milli (remU
);
5953 return pa_output_millicode_call (insn
,
5954 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5958 import_milli (remI
);
5959 return pa_output_millicode_call (insn
,
5960 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5965 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5967 const char *arg_regs
[4];
5968 machine_mode arg_mode
;
5970 int i
, output_flag
= 0;
5973 /* We neither need nor want argument location descriptors for the
5974 64bit runtime environment or the ELF32 environment. */
5975 if (TARGET_64BIT
|| TARGET_ELF32
)
5978 for (i
= 0; i
< 4; i
++)
5981 /* Specify explicitly that no argument relocations should take place
5982 if using the portable runtime calling conventions. */
5983 if (TARGET_PORTABLE_RUNTIME
)
5985 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5990 gcc_assert (CALL_P (call_insn
));
5991 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5992 link
; link
= XEXP (link
, 1))
5994 rtx use
= XEXP (link
, 0);
5996 if (! (GET_CODE (use
) == USE
5997 && GET_CODE (XEXP (use
, 0)) == REG
5998 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
6001 arg_mode
= GET_MODE (XEXP (use
, 0));
6002 regno
= REGNO (XEXP (use
, 0));
6003 if (regno
>= 23 && regno
<= 26)
6005 arg_regs
[26 - regno
] = "GR";
6006 if (arg_mode
== DImode
)
6007 arg_regs
[25 - regno
] = "GR";
6009 else if (regno
>= 32 && regno
<= 39)
6011 if (arg_mode
== SFmode
)
6012 arg_regs
[(regno
- 32) / 2] = "FR";
6015 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6016 arg_regs
[(regno
- 34) / 2] = "FR";
6017 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
6019 arg_regs
[(regno
- 34) / 2] = "FU";
6020 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
6025 fputs ("\t.CALL ", asm_out_file
);
6026 for (i
= 0; i
< 4; i
++)
6031 fputc (',', asm_out_file
);
6032 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
6035 fputc ('\n', asm_out_file
);
6038 /* Inform reload about cases where moving X with a mode MODE to or from
6039 a register in RCLASS requires an extra scratch or immediate register.
6040 Return the class needed for the immediate register. */
6043 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
6044 machine_mode mode
, secondary_reload_info
*sri
)
6047 enum reg_class rclass
= (enum reg_class
) rclass_i
;
6049 /* Handle the easy stuff first. */
6050 if (rclass
== R1_REGS
)
6056 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
6062 /* If we have something like (mem (mem (...)), we can safely assume the
6063 inner MEM will end up in a general register after reloading, so there's
6064 no need for a secondary reload. */
6065 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
6068 /* Trying to load a constant into a FP register during PIC code
6069 generation requires %r1 as a scratch register. For float modes,
6070 the only legitimate constant is CONST0_RTX. However, there are
6071 a few patterns that accept constant double operands. */
6073 && FP_REG_CLASS_P (rclass
)
6074 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6079 sri
->icode
= CODE_FOR_reload_insi_r1
;
6083 sri
->icode
= CODE_FOR_reload_indi_r1
;
6087 sri
->icode
= CODE_FOR_reload_insf_r1
;
6091 sri
->icode
= CODE_FOR_reload_indf_r1
;
6100 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6101 register when we're generating PIC code or when the operand isn't
6103 if (pa_symbolic_expression_p (x
))
6105 if (GET_CODE (x
) == HIGH
)
6108 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6113 sri
->icode
= CODE_FOR_reload_insi_r1
;
6117 sri
->icode
= CODE_FOR_reload_indi_r1
;
6127 /* Profiling showed the PA port spends about 1.3% of its compilation
6128 time in true_regnum from calls inside pa_secondary_reload_class. */
6129 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6130 regno
= true_regnum (x
);
6132 /* Handle reloads for floating point loads and stores. */
6133 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6134 && FP_REG_CLASS_P (rclass
))
6140 /* We don't need a secondary reload for indexed memory addresses.
6142 When INT14_OK_STRICT is true, it might appear that we could
6143 directly allow register indirect memory addresses. However,
6144 this doesn't work because we don't support SUBREGs in
6145 floating-point register copies and reload doesn't tell us
6146 when it's going to use a SUBREG. */
6147 if (IS_INDEX_ADDR_P (x
))
6151 /* Request a secondary reload with a general scratch register
6152 for everything else. ??? Could symbolic operands be handled
6153 directly when generating non-pic PA 2.0 code? */
6155 ? direct_optab_handler (reload_in_optab
, mode
)
6156 : direct_optab_handler (reload_out_optab
, mode
));
6160 /* A SAR<->FP register copy requires an intermediate general register
6161 and secondary memory. We need a secondary reload with a general
6162 scratch register for spills. */
6163 if (rclass
== SHIFT_REGS
)
6166 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6169 ? direct_optab_handler (reload_in_optab
, mode
)
6170 : direct_optab_handler (reload_out_optab
, mode
));
6174 /* Handle FP copy. */
6175 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6176 return GENERAL_REGS
;
6179 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6180 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6181 && FP_REG_CLASS_P (rclass
))
6182 return GENERAL_REGS
;
6187 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6188 is only marked as live on entry by df-scan when it is a fixed
6189 register. It isn't a fixed register in the 64-bit runtime,
6190 so we need to mark it here. */
6193 pa_extra_live_on_entry (bitmap regs
)
6196 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6199 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6200 to prevent it from being deleted. */
6203 pa_eh_return_handler_rtx (void)
6207 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6208 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6209 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6214 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6215 by invisible reference. As a GCC extension, we also pass anything
6216 with a zero or variable size by reference.
6218 The 64-bit runtime does not describe passing any types by invisible
6219 reference. The internals of GCC can't currently handle passing
6220 empty structures, and zero or variable length arrays when they are
6221 not passed entirely on the stack or by reference. Thus, as a GCC
6222 extension, we pass these types by reference. The HP compiler doesn't
6223 support these types, so hopefully there shouldn't be any compatibility
6224 issues. This may have to be revisited when HP releases a C99 compiler
6225 or updates the ABI. */
6228 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6229 machine_mode mode
, const_tree type
,
6230 bool named ATTRIBUTE_UNUSED
)
6235 size
= int_size_in_bytes (type
);
6237 size
= GET_MODE_SIZE (mode
);
6242 return size
<= 0 || size
> 8;
6246 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6251 && (AGGREGATE_TYPE_P (type
)
6252 || TREE_CODE (type
) == COMPLEX_TYPE
6253 || TREE_CODE (type
) == VECTOR_TYPE
)))
6255 /* Return none if justification is not required. */
6257 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6258 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6261 /* The directions set here are ignored when a BLKmode argument larger
6262 than a word is placed in a register. Different code is used for
6263 the stack and registers. This makes it difficult to have a
6264 consistent data representation for both the stack and registers.
6265 For both runtimes, the justification and padding for arguments on
6266 the stack and in registers should be identical. */
6268 /* The 64-bit runtime specifies left justification for aggregates. */
6271 /* The 32-bit runtime architecture specifies right justification.
6272 When the argument is passed on the stack, the argument is padded
6273 with garbage on the left. The HP compiler pads with zeros. */
6277 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6284 /* Do what is necessary for `va_start'. We look at the current function
6285 to determine if stdargs or varargs is used and fill in an initial
6286 va_list. A pointer to this constructor is returned. */
6289 hppa_builtin_saveregs (void)
6292 tree fntype
= TREE_TYPE (current_function_decl
);
6293 int argadj
= ((!stdarg_p (fntype
))
6294 ? UNITS_PER_WORD
: 0);
6297 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6299 offset
= crtl
->args
.arg_offset_rtx
;
6305 /* Adjust for varargs/stdarg differences. */
6307 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6309 offset
= crtl
->args
.arg_offset_rtx
;
6311 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6312 from the incoming arg pointer and growing to larger addresses. */
6313 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6314 emit_move_insn (gen_rtx_MEM (word_mode
,
6315 plus_constant (Pmode
,
6316 arg_pointer_rtx
, off
)),
6317 gen_rtx_REG (word_mode
, i
));
6319 /* The incoming args pointer points just beyond the flushback area;
6320 normally this is not a serious concern. However, when we are doing
6321 varargs/stdargs we want to make the arg pointer point to the start
6322 of the incoming argument area. */
6323 emit_move_insn (virtual_incoming_args_rtx
,
6324 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6326 /* Now return a pointer to the first anonymous argument. */
6327 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6328 virtual_incoming_args_rtx
,
6329 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6332 /* Store general registers on the stack. */
6333 dest
= gen_rtx_MEM (BLKmode
,
6334 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6336 set_mem_alias_set (dest
, get_varargs_alias_set ());
6337 set_mem_align (dest
, BITS_PER_WORD
);
6338 move_block_from_reg (23, dest
, 4);
6340 /* move_block_from_reg will emit code to store the argument registers
6341 individually as scalar stores.
6343 However, other insns may later load from the same addresses for
6344 a structure load (passing a struct to a varargs routine).
6346 The alias code assumes that such aliasing can never happen, so we
6347 have to keep memory referencing insns from moving up beyond the
6348 last argument register store. So we emit a blockage insn here. */
6349 emit_insn (gen_blockage ());
6351 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6352 crtl
->args
.internal_arg_pointer
,
6353 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6357 hppa_va_start (tree valist
, rtx nextarg
)
6359 nextarg
= expand_builtin_saveregs ();
6360 std_expand_builtin_va_start (valist
, nextarg
);
6364 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6369 /* Args grow upward. We can use the generic routines. */
6370 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6372 else /* !TARGET_64BIT */
6374 tree ptr
= build_pointer_type (type
);
6377 unsigned int size
, ofs
;
6380 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6384 ptr
= build_pointer_type (type
);
6386 size
= int_size_in_bytes (type
);
6387 valist_type
= TREE_TYPE (valist
);
6389 /* Args grow down. Not handled by generic routines. */
6391 u
= fold_convert (sizetype
, size_in_bytes (type
));
6392 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6393 t
= fold_build_pointer_plus (valist
, u
);
6395 /* Align to 4 or 8 byte boundary depending on argument size. */
6397 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6398 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6399 t
= fold_convert (valist_type
, t
);
6401 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6403 ofs
= (8 - size
) % 4;
6405 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6407 t
= fold_convert (ptr
, t
);
6408 t
= build_va_arg_indirect_ref (t
);
6411 t
= build_va_arg_indirect_ref (t
);
6417 /* True if MODE is valid for the target. By "valid", we mean able to
6418 be manipulated in non-trivial ways. In particular, this means all
6419 the arithmetic is supported.
6421 Currently, TImode is not valid as the HP 64-bit runtime documentation
6422 doesn't document the alignment and calling conventions for this type.
6423 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6424 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6427 pa_scalar_mode_supported_p (machine_mode mode
)
6429 int precision
= GET_MODE_PRECISION (mode
);
6431 switch (GET_MODE_CLASS (mode
))
6433 case MODE_PARTIAL_INT
:
6435 if (precision
== CHAR_TYPE_SIZE
)
6437 if (precision
== SHORT_TYPE_SIZE
)
6439 if (precision
== INT_TYPE_SIZE
)
6441 if (precision
== LONG_TYPE_SIZE
)
6443 if (precision
== LONG_LONG_TYPE_SIZE
)
6448 if (precision
== FLOAT_TYPE_SIZE
)
6450 if (precision
== DOUBLE_TYPE_SIZE
)
6452 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6456 case MODE_DECIMAL_FLOAT
:
6464 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6465 it branches into the delay slot. Otherwise, return FALSE. */
6468 branch_to_delay_slot_p (rtx_insn
*insn
)
6470 rtx_insn
*jump_insn
;
6472 if (dbr_sequence_length ())
6475 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6478 insn
= next_active_insn (insn
);
6479 if (jump_insn
== insn
)
6482 /* We can't rely on the length of asms. So, we return FALSE when
6483 the branch is followed by an asm. */
6485 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6486 || asm_noperands (PATTERN (insn
)) >= 0
6487 || get_attr_length (insn
) > 0)
6494 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6496 This occurs when INSN has an unfilled delay slot and is followed
6497 by an asm. Disaster can occur if the asm is empty and the jump
6498 branches into the delay slot. So, we add a nop in the delay slot
6499 when this occurs. */
6502 branch_needs_nop_p (rtx_insn
*insn
)
6504 rtx_insn
*jump_insn
;
6506 if (dbr_sequence_length ())
6509 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6512 insn
= next_active_insn (insn
);
6513 if (!insn
|| jump_insn
== insn
)
6516 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6517 || asm_noperands (PATTERN (insn
)) >= 0)
6518 && get_attr_length (insn
) > 0)
6525 /* Return TRUE if INSN, a forward jump insn, can use nullification
6526 to skip the following instruction. This avoids an extra cycle due
6527 to a mis-predicted branch when we fall through. */
6530 use_skip_p (rtx_insn
*insn
)
6532 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6536 insn
= next_active_insn (insn
);
6538 /* We can't rely on the length of asms, so we can't skip asms. */
6540 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn
)) >= 0)
6543 if (get_attr_length (insn
) == 4
6544 && jump_insn
== next_active_insn (insn
))
6546 if (get_attr_length (insn
) > 0)
6553 /* This routine handles all the normal conditional branch sequences we
6554 might need to generate. It handles compare immediate vs compare
6555 register, nullification of delay slots, varying length branches,
6556 negated branches, and all combinations of the above. It returns the
6557 output appropriate to emit the branch corresponding to all given
6561 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6563 static char buf
[100];
6565 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6566 int length
= get_attr_length (insn
);
6569 /* A conditional branch to the following instruction (e.g. the delay slot)
6570 is asking for a disaster. This can happen when not optimizing and
6571 when jump optimization fails.
6573 While it is usually safe to emit nothing, this can fail if the
6574 preceding instruction is a nullified branch with an empty delay
6575 slot and the same branch target as this branch. We could check
6576 for this but jump optimization should eliminate nop jumps. It
6577 is always safe to emit a nop. */
6578 if (branch_to_delay_slot_p (insn
))
6581 /* The doubleword form of the cmpib instruction doesn't have the LEU
6582 and GTU conditions while the cmpb instruction does. Since we accept
6583 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6584 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6585 operands
[2] = gen_rtx_REG (DImode
, 0);
6586 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6587 operands
[1] = gen_rtx_REG (DImode
, 0);
6589 /* If this is a long branch with its delay slot unfilled, set `nullify'
6590 as it can nullify the delay slot and save a nop. */
6591 if (length
== 8 && dbr_sequence_length () == 0)
6594 /* If this is a short forward conditional branch which did not get
6595 its delay slot filled, the delay slot can still be nullified. */
6596 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6597 nullify
= forward_branch_p (insn
);
6599 /* A forward branch over a single nullified insn can be done with a
6600 comclr instruction. This avoids a single cycle penalty due to
6601 mis-predicted branch if we fall through (branch not taken). */
6602 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6606 /* All short conditional branches except backwards with an unfilled
6610 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6612 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6613 if (GET_MODE (operands
[1]) == DImode
)
6616 strcat (buf
, "%B3");
6618 strcat (buf
, "%S3");
6620 strcat (buf
, " %2,%r1,%%r0");
6623 if (branch_needs_nop_p (insn
))
6624 strcat (buf
, ",n %2,%r1,%0%#");
6626 strcat (buf
, ",n %2,%r1,%0");
6629 strcat (buf
, " %2,%r1,%0");
6632 /* All long conditionals. Note a short backward branch with an
6633 unfilled delay slot is treated just like a long backward branch
6634 with an unfilled delay slot. */
6636 /* Handle weird backwards branch with a filled delay slot
6637 which is nullified. */
6638 if (dbr_sequence_length () != 0
6639 && ! forward_branch_p (insn
)
6642 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6643 if (GET_MODE (operands
[1]) == DImode
)
6646 strcat (buf
, "%S3");
6648 strcat (buf
, "%B3");
6649 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6651 /* Handle short backwards branch with an unfilled delay slot.
6652 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6653 taken and untaken branches. */
6654 else if (dbr_sequence_length () == 0
6655 && ! forward_branch_p (insn
)
6656 && INSN_ADDRESSES_SET_P ()
6657 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6658 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6660 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6661 if (GET_MODE (operands
[1]) == DImode
)
6664 strcat (buf
, "%B3 %2,%r1,%0%#");
6666 strcat (buf
, "%S3 %2,%r1,%0%#");
6670 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6671 if (GET_MODE (operands
[1]) == DImode
)
6674 strcat (buf
, "%S3");
6676 strcat (buf
, "%B3");
6678 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6680 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6685 /* The reversed conditional branch must branch over one additional
6686 instruction if the delay slot is filled and needs to be extracted
6687 by pa_output_lbranch. If the delay slot is empty or this is a
6688 nullified forward branch, the instruction after the reversed
6689 condition branch must be nullified. */
6690 if (dbr_sequence_length () == 0
6691 || (nullify
&& forward_branch_p (insn
)))
6695 operands
[4] = GEN_INT (length
);
6700 operands
[4] = GEN_INT (length
+ 4);
6703 /* Create a reversed conditional branch which branches around
6704 the following insns. */
6705 if (GET_MODE (operands
[1]) != DImode
)
6711 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6714 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6720 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6723 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6732 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6735 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6741 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6744 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6748 output_asm_insn (buf
, operands
);
6749 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6754 /* Output a PIC pc-relative instruction sequence to load the address of
6755 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6756 or a code label. OPERANDS[1] specifies the register to use to load
6757 the program counter. OPERANDS[3] may be used for label generation
6758 The sequence is always three instructions in length. The program
6759 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6760 Register %r1 is clobbered. */
6763 pa_output_pic_pcrel_sequence (rtx
*operands
)
6765 gcc_assert (SYMBOL_REF_P (operands
[0]) || LABEL_P (operands
[0]));
6768 /* We can use mfia to determine the current program counter. */
6769 if (TARGET_SOM
|| !TARGET_GAS
)
6771 operands
[3] = gen_label_rtx ();
6772 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6773 CODE_LABEL_NUMBER (operands
[3]));
6774 output_asm_insn ("mfia %1", operands
);
6775 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6776 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6780 output_asm_insn ("mfia %1", operands
);
6781 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands
);
6782 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands
);
6787 /* We need to use a branch to determine the current program counter. */
6788 output_asm_insn ("{bl|b,l} .+8,%1", operands
);
6789 if (TARGET_SOM
|| !TARGET_GAS
)
6791 operands
[3] = gen_label_rtx ();
6792 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6793 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6794 CODE_LABEL_NUMBER (operands
[3]));
6795 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6799 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands
);
6800 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands
);
6805 /* This routine handles output of long unconditional branches that
6806 exceed the maximum range of a simple branch instruction. Since
6807 we don't have a register available for the branch, we save register
6808 %r1 in the frame marker, load the branch destination DEST into %r1,
6809 execute the branch, and restore %r1 in the delay slot of the branch.
6811 Since long branches may have an insn in the delay slot and the
6812 delay slot is used to restore %r1, we in general need to extract
6813 this insn and execute it before the branch. However, to facilitate
6814 use of this function by conditional branches, we also provide an
6815 option to not extract the delay insn so that it will be emitted
6816 after the long branch. So, if there is an insn in the delay slot,
6817 it is extracted if XDELAY is nonzero.
6819 The lengths of the various long-branch sequences are 20, 16 and 24
6820 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6823 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6827 xoperands
[0] = dest
;
6829 /* First, free up the delay slot. */
6830 if (xdelay
&& dbr_sequence_length () != 0)
6832 /* We can't handle a jump in the delay slot. */
6833 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6835 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6838 /* Now delete the delay insn. */
6839 SET_INSN_DELETED (NEXT_INSN (insn
));
6842 /* Output an insn to save %r1. The runtime documentation doesn't
6843 specify whether the "Clean Up" slot in the callers frame can
6844 be clobbered by the callee. It isn't copied by HP's builtin
6845 alloca, so this suggests that it can be clobbered if necessary.
6846 The "Static Link" location is copied by HP builtin alloca, so
6847 we avoid using it. Using the cleanup slot might be a problem
6848 if we have to interoperate with languages that pass cleanup
6849 information. However, it should be possible to handle these
6850 situations with GCC's asm feature.
6852 The "Current RP" slot is reserved for the called procedure, so
6853 we try to use it when we don't have a frame of our own. It's
6854 rather unlikely that we won't have a frame when we need to emit
6857 Really the way to go long term is a register scavenger; goto
6858 the target of the jump and find a register which we can use
6859 as a scratch to hold the value in %r1. Then, we wouldn't have
6860 to free up the delay slot or clobber a slot that may be needed
6861 for other purposes. */
6864 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6865 /* Use the return pointer slot in the frame marker. */
6866 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6868 /* Use the slot at -40 in the frame marker since HP builtin
6869 alloca doesn't copy it. */
6870 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6874 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6875 /* Use the return pointer slot in the frame marker. */
6876 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6878 /* Use the "Clean Up" slot in the frame marker. In GCC,
6879 the only other use of this location is for copying a
6880 floating point double argument from a floating-point
6881 register to two general registers. The copy is done
6882 as an "atomic" operation when outputting a call, so it
6883 won't interfere with our using the location here. */
6884 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6887 if (TARGET_PORTABLE_RUNTIME
)
6889 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6890 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6891 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6895 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
6896 xoperands
[2] = xoperands
[1];
6897 pa_output_pic_pcrel_sequence (xoperands
);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6901 /* Now output a very long branch to the original target. */
6902 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6904 /* Now restore the value of %r1 in the delay slot. */
6907 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6908 return "ldd -16(%%r30),%%r1";
6910 return "ldd -40(%%r30),%%r1";
6914 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6915 return "ldw -20(%%r30),%%r1";
6917 return "ldw -12(%%r30),%%r1";
6921 /* This routine handles all the branch-on-bit conditional branch sequences we
6922 might need to generate. It handles nullification of delay slots,
6923 varying length branches, negated branches and all combinations of the
6924 above. it returns the appropriate output template to emit the branch. */
6927 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6929 static char buf
[100];
6931 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6932 int length
= get_attr_length (insn
);
6935 /* A conditional branch to the following instruction (e.g. the delay slot) is
6936 asking for a disaster. I do not think this can happen as this pattern
6937 is only used when optimizing; jump optimization should eliminate the
6938 jump. But be prepared just in case. */
6940 if (branch_to_delay_slot_p (insn
))
6943 /* If this is a long branch with its delay slot unfilled, set `nullify'
6944 as it can nullify the delay slot and save a nop. */
6945 if (length
== 8 && dbr_sequence_length () == 0)
6948 /* If this is a short forward conditional branch which did not get
6949 its delay slot filled, the delay slot can still be nullified. */
6950 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6951 nullify
= forward_branch_p (insn
);
6953 /* A forward branch over a single nullified insn can be done with a
6954 extrs instruction. This avoids a single cycle penalty due to
6955 mis-predicted branch if we fall through (branch not taken). */
6956 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6961 /* All short conditional branches except backwards with an unfilled
6965 strcpy (buf
, "{extrs,|extrw,s,}");
6967 strcpy (buf
, "bb,");
6968 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6969 strcpy (buf
, "extrd,s,*");
6970 else if (GET_MODE (operands
[0]) == DImode
)
6971 strcpy (buf
, "bb,*");
6972 if ((which
== 0 && negated
)
6973 || (which
== 1 && ! negated
))
6978 strcat (buf
, " %0,%1,1,%%r0");
6979 else if (nullify
&& negated
)
6981 if (branch_needs_nop_p (insn
))
6982 strcat (buf
, ",n %0,%1,%3%#");
6984 strcat (buf
, ",n %0,%1,%3");
6986 else if (nullify
&& ! negated
)
6988 if (branch_needs_nop_p (insn
))
6989 strcat (buf
, ",n %0,%1,%2%#");
6991 strcat (buf
, ",n %0,%1,%2");
6993 else if (! nullify
&& negated
)
6994 strcat (buf
, " %0,%1,%3");
6995 else if (! nullify
&& ! negated
)
6996 strcat (buf
, " %0,%1,%2");
6999 /* All long conditionals. Note a short backward branch with an
7000 unfilled delay slot is treated just like a long backward branch
7001 with an unfilled delay slot. */
7003 /* Handle weird backwards branch with a filled delay slot
7004 which is nullified. */
7005 if (dbr_sequence_length () != 0
7006 && ! forward_branch_p (insn
)
7009 strcpy (buf
, "bb,");
7010 if (GET_MODE (operands
[0]) == DImode
)
7012 if ((which
== 0 && negated
)
7013 || (which
== 1 && ! negated
))
7018 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
7020 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
7022 /* Handle short backwards branch with an unfilled delay slot.
7023 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7024 taken and untaken branches. */
7025 else if (dbr_sequence_length () == 0
7026 && ! forward_branch_p (insn
)
7027 && INSN_ADDRESSES_SET_P ()
7028 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7029 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7031 strcpy (buf
, "bb,");
7032 if (GET_MODE (operands
[0]) == DImode
)
7034 if ((which
== 0 && negated
)
7035 || (which
== 1 && ! negated
))
7040 strcat (buf
, " %0,%1,%3%#");
7042 strcat (buf
, " %0,%1,%2%#");
7046 if (GET_MODE (operands
[0]) == DImode
)
7047 strcpy (buf
, "extrd,s,*");
7049 strcpy (buf
, "{extrs,|extrw,s,}");
7050 if ((which
== 0 && negated
)
7051 || (which
== 1 && ! negated
))
7055 if (nullify
&& negated
)
7056 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
7057 else if (nullify
&& ! negated
)
7058 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
7060 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
7062 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
7067 /* The reversed conditional branch must branch over one additional
7068 instruction if the delay slot is filled and needs to be extracted
7069 by pa_output_lbranch. If the delay slot is empty or this is a
7070 nullified forward branch, the instruction after the reversed
7071 condition branch must be nullified. */
7072 if (dbr_sequence_length () == 0
7073 || (nullify
&& forward_branch_p (insn
)))
7077 operands
[4] = GEN_INT (length
);
7082 operands
[4] = GEN_INT (length
+ 4);
7085 if (GET_MODE (operands
[0]) == DImode
)
7086 strcpy (buf
, "bb,*");
7088 strcpy (buf
, "bb,");
7089 if ((which
== 0 && negated
)
7090 || (which
== 1 && !negated
))
7095 strcat (buf
, ",n %0,%1,.+%4");
7097 strcat (buf
, " %0,%1,.+%4");
7098 output_asm_insn (buf
, operands
);
7099 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7105 /* This routine handles all the branch-on-variable-bit conditional branch
7106 sequences we might need to generate. It handles nullification of delay
7107 slots, varying length branches, negated branches and all combinations
7108 of the above. it returns the appropriate output template to emit the
7112 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7115 static char buf
[100];
7117 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7118 int length
= get_attr_length (insn
);
7121 /* A conditional branch to the following instruction (e.g. the delay slot) is
7122 asking for a disaster. I do not think this can happen as this pattern
7123 is only used when optimizing; jump optimization should eliminate the
7124 jump. But be prepared just in case. */
7126 if (branch_to_delay_slot_p (insn
))
7129 /* If this is a long branch with its delay slot unfilled, set `nullify'
7130 as it can nullify the delay slot and save a nop. */
7131 if (length
== 8 && dbr_sequence_length () == 0)
7134 /* If this is a short forward conditional branch which did not get
7135 its delay slot filled, the delay slot can still be nullified. */
7136 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7137 nullify
= forward_branch_p (insn
);
7139 /* A forward branch over a single nullified insn can be done with a
7140 extrs instruction. This avoids a single cycle penalty due to
7141 mis-predicted branch if we fall through (branch not taken). */
7142 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7147 /* All short conditional branches except backwards with an unfilled
7151 strcpy (buf
, "{vextrs,|extrw,s,}");
7153 strcpy (buf
, "{bvb,|bb,}");
7154 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7155 strcpy (buf
, "extrd,s,*");
7156 else if (GET_MODE (operands
[0]) == DImode
)
7157 strcpy (buf
, "bb,*");
7158 if ((which
== 0 && negated
)
7159 || (which
== 1 && ! negated
))
7164 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7165 else if (nullify
&& negated
)
7167 if (branch_needs_nop_p (insn
))
7168 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7170 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7172 else if (nullify
&& ! negated
)
7174 if (branch_needs_nop_p (insn
))
7175 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7177 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7179 else if (! nullify
&& negated
)
7180 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7181 else if (! nullify
&& ! negated
)
7182 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7185 /* All long conditionals. Note a short backward branch with an
7186 unfilled delay slot is treated just like a long backward branch
7187 with an unfilled delay slot. */
7189 /* Handle weird backwards branch with a filled delay slot
7190 which is nullified. */
7191 if (dbr_sequence_length () != 0
7192 && ! forward_branch_p (insn
)
7195 strcpy (buf
, "{bvb,|bb,}");
7196 if (GET_MODE (operands
[0]) == DImode
)
7198 if ((which
== 0 && negated
)
7199 || (which
== 1 && ! negated
))
7204 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7206 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7208 /* Handle short backwards branch with an unfilled delay slot.
7209 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7210 taken and untaken branches. */
7211 else if (dbr_sequence_length () == 0
7212 && ! forward_branch_p (insn
)
7213 && INSN_ADDRESSES_SET_P ()
7214 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7215 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7217 strcpy (buf
, "{bvb,|bb,}");
7218 if (GET_MODE (operands
[0]) == DImode
)
7220 if ((which
== 0 && negated
)
7221 || (which
== 1 && ! negated
))
7226 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7228 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7232 strcpy (buf
, "{vextrs,|extrw,s,}");
7233 if (GET_MODE (operands
[0]) == DImode
)
7234 strcpy (buf
, "extrd,s,*");
7235 if ((which
== 0 && negated
)
7236 || (which
== 1 && ! negated
))
7240 if (nullify
&& negated
)
7241 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7242 else if (nullify
&& ! negated
)
7243 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7245 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7247 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7252 /* The reversed conditional branch must branch over one additional
7253 instruction if the delay slot is filled and needs to be extracted
7254 by pa_output_lbranch. If the delay slot is empty or this is a
7255 nullified forward branch, the instruction after the reversed
7256 condition branch must be nullified. */
7257 if (dbr_sequence_length () == 0
7258 || (nullify
&& forward_branch_p (insn
)))
7262 operands
[4] = GEN_INT (length
);
7267 operands
[4] = GEN_INT (length
+ 4);
7270 if (GET_MODE (operands
[0]) == DImode
)
7271 strcpy (buf
, "bb,*");
7273 strcpy (buf
, "{bvb,|bb,}");
7274 if ((which
== 0 && negated
)
7275 || (which
== 1 && !negated
))
7280 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7282 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7283 output_asm_insn (buf
, operands
);
7284 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7290 /* Return the output template for emitting a dbra type insn.
7292 Note it may perform some output operations on its own before
7293 returning the final output string. */
7295 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7297 int length
= get_attr_length (insn
);
7299 /* A conditional branch to the following instruction (e.g. the delay slot) is
7300 asking for a disaster. Be prepared! */
7302 if (branch_to_delay_slot_p (insn
))
7304 if (which_alternative
== 0)
7305 return "ldo %1(%0),%0";
7306 else if (which_alternative
== 1)
7308 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7309 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7310 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7311 return "{fldws|fldw} -16(%%r30),%0";
7315 output_asm_insn ("ldw %0,%4", operands
);
7316 return "ldo %1(%4),%4\n\tstw %4,%0";
7320 if (which_alternative
== 0)
7322 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7325 /* If this is a long branch with its delay slot unfilled, set `nullify'
7326 as it can nullify the delay slot and save a nop. */
7327 if (length
== 8 && dbr_sequence_length () == 0)
7330 /* If this is a short forward conditional branch which did not get
7331 its delay slot filled, the delay slot can still be nullified. */
7332 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7333 nullify
= forward_branch_p (insn
);
7340 if (branch_needs_nop_p (insn
))
7341 return "addib,%C2,n %1,%0,%3%#";
7343 return "addib,%C2,n %1,%0,%3";
7346 return "addib,%C2 %1,%0,%3";
7349 /* Handle weird backwards branch with a fulled delay slot
7350 which is nullified. */
7351 if (dbr_sequence_length () != 0
7352 && ! forward_branch_p (insn
)
7354 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7355 /* Handle short backwards branch with an unfilled delay slot.
7356 Using a addb;nop rather than addi;bl saves 1 cycle for both
7357 taken and untaken branches. */
7358 else if (dbr_sequence_length () == 0
7359 && ! forward_branch_p (insn
)
7360 && INSN_ADDRESSES_SET_P ()
7361 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7362 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7363 return "addib,%C2 %1,%0,%3%#";
7365 /* Handle normal cases. */
7367 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7369 return "addi,%N2 %1,%0,%0\n\tb %3";
7372 /* The reversed conditional branch must branch over one additional
7373 instruction if the delay slot is filled and needs to be extracted
7374 by pa_output_lbranch. If the delay slot is empty or this is a
7375 nullified forward branch, the instruction after the reversed
7376 condition branch must be nullified. */
7377 if (dbr_sequence_length () == 0
7378 || (nullify
&& forward_branch_p (insn
)))
7382 operands
[4] = GEN_INT (length
);
7387 operands
[4] = GEN_INT (length
+ 4);
7391 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7393 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7395 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7399 /* Deal with gross reload from FP register case. */
7400 else if (which_alternative
== 1)
7402 /* Move loop counter from FP register to MEM then into a GR,
7403 increment the GR, store the GR into MEM, and finally reload
7404 the FP register from MEM from within the branch's delay slot. */
7405 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7407 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7409 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7410 else if (length
== 28)
7411 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7414 operands
[5] = GEN_INT (length
- 16);
7415 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7416 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7417 return pa_output_lbranch (operands
[3], insn
, 0);
7420 /* Deal with gross reload from memory case. */
7423 /* Reload loop counter from memory, the store back to memory
7424 happens in the branch's delay slot. */
7425 output_asm_insn ("ldw %0,%4", operands
);
7427 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7428 else if (length
== 16)
7429 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7432 operands
[5] = GEN_INT (length
- 4);
7433 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7434 return pa_output_lbranch (operands
[3], insn
, 0);
7439 /* Return the output template for emitting a movb type insn.
7441 Note it may perform some output operations on its own before
7442 returning the final output string. */
7444 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7445 int reverse_comparison
)
7447 int length
= get_attr_length (insn
);
7449 /* A conditional branch to the following instruction (e.g. the delay slot) is
7450 asking for a disaster. Be prepared! */
7452 if (branch_to_delay_slot_p (insn
))
7454 if (which_alternative
== 0)
7455 return "copy %1,%0";
7456 else if (which_alternative
== 1)
7458 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7459 return "{fldws|fldw} -16(%%r30),%0";
7461 else if (which_alternative
== 2)
7467 /* Support the second variant. */
7468 if (reverse_comparison
)
7469 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7471 if (which_alternative
== 0)
7473 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7476 /* If this is a long branch with its delay slot unfilled, set `nullify'
7477 as it can nullify the delay slot and save a nop. */
7478 if (length
== 8 && dbr_sequence_length () == 0)
7481 /* If this is a short forward conditional branch which did not get
7482 its delay slot filled, the delay slot can still be nullified. */
7483 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7484 nullify
= forward_branch_p (insn
);
7491 if (branch_needs_nop_p (insn
))
7492 return "movb,%C2,n %1,%0,%3%#";
7494 return "movb,%C2,n %1,%0,%3";
7497 return "movb,%C2 %1,%0,%3";
7500 /* Handle weird backwards branch with a filled delay slot
7501 which is nullified. */
7502 if (dbr_sequence_length () != 0
7503 && ! forward_branch_p (insn
)
7505 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7507 /* Handle short backwards branch with an unfilled delay slot.
7508 Using a movb;nop rather than or;bl saves 1 cycle for both
7509 taken and untaken branches. */
7510 else if (dbr_sequence_length () == 0
7511 && ! forward_branch_p (insn
)
7512 && INSN_ADDRESSES_SET_P ()
7513 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7514 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7515 return "movb,%C2 %1,%0,%3%#";
7516 /* Handle normal cases. */
7518 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7520 return "or,%N2 %1,%%r0,%0\n\tb %3";
7523 /* The reversed conditional branch must branch over one additional
7524 instruction if the delay slot is filled and needs to be extracted
7525 by pa_output_lbranch. If the delay slot is empty or this is a
7526 nullified forward branch, the instruction after the reversed
7527 condition branch must be nullified. */
7528 if (dbr_sequence_length () == 0
7529 || (nullify
&& forward_branch_p (insn
)))
7533 operands
[4] = GEN_INT (length
);
7538 operands
[4] = GEN_INT (length
+ 4);
7542 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7544 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7546 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7549 /* Deal with gross reload for FP destination register case. */
7550 else if (which_alternative
== 1)
7552 /* Move source register to MEM, perform the branch test, then
7553 finally load the FP register from MEM from within the branch's
7555 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7557 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7558 else if (length
== 16)
7559 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7562 operands
[4] = GEN_INT (length
- 4);
7563 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7564 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7565 return pa_output_lbranch (operands
[3], insn
, 0);
7568 /* Deal with gross reload from memory case. */
7569 else if (which_alternative
== 2)
7571 /* Reload loop counter from memory, the store back to memory
7572 happens in the branch's delay slot. */
7574 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7575 else if (length
== 12)
7576 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7579 operands
[4] = GEN_INT (length
);
7580 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7582 return pa_output_lbranch (operands
[3], insn
, 0);
7585 /* Handle SAR as a destination. */
7589 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7590 else if (length
== 12)
7591 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7594 operands
[4] = GEN_INT (length
);
7595 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7597 return pa_output_lbranch (operands
[3], insn
, 0);
7602 /* Copy any FP arguments in INSN into integer registers. */
7604 copy_fp_args (rtx_insn
*insn
)
7609 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7611 int arg_mode
, regno
;
7612 rtx use
= XEXP (link
, 0);
7614 if (! (GET_CODE (use
) == USE
7615 && GET_CODE (XEXP (use
, 0)) == REG
7616 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7619 arg_mode
= GET_MODE (XEXP (use
, 0));
7620 regno
= REGNO (XEXP (use
, 0));
7622 /* Is it a floating point register? */
7623 if (regno
>= 32 && regno
<= 39)
7625 /* Copy the FP register into an integer register via memory. */
7626 if (arg_mode
== SFmode
)
7628 xoperands
[0] = XEXP (use
, 0);
7629 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7630 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7631 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7635 xoperands
[0] = XEXP (use
, 0);
7636 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7637 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7638 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7639 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7645 /* Compute length of the FP argument copy sequence for INSN. */
7647 length_fp_args (rtx_insn
*insn
)
7652 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7654 int arg_mode
, regno
;
7655 rtx use
= XEXP (link
, 0);
7657 if (! (GET_CODE (use
) == USE
7658 && GET_CODE (XEXP (use
, 0)) == REG
7659 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7662 arg_mode
= GET_MODE (XEXP (use
, 0));
7663 regno
= REGNO (XEXP (use
, 0));
7665 /* Is it a floating point register? */
7666 if (regno
>= 32 && regno
<= 39)
7668 if (arg_mode
== SFmode
)
7678 /* Return the attribute length for the millicode call instruction INSN.
7679 The length must match the code generated by pa_output_millicode_call.
7680 We include the delay slot in the returned length as it is better to
7681 over estimate the length than to under estimate it. */
7684 pa_attr_length_millicode_call (rtx_insn
*insn
)
7686 unsigned long distance
= -1;
7687 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7689 if (INSN_ADDRESSES_SET_P ())
7691 distance
= (total
+ insn_current_reference_address (insn
));
7692 if (distance
< total
)
7698 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7703 else if (TARGET_PORTABLE_RUNTIME
)
7707 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7717 /* INSN is a function call.
7719 CALL_DEST is the routine we are calling. */
7722 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7724 int attr_length
= get_attr_length (insn
);
7725 int seq_length
= dbr_sequence_length ();
7728 xoperands
[0] = call_dest
;
7730 /* Handle the common case where we are sure that the branch will
7731 reach the beginning of the $CODE$ subspace. The within reach
7732 form of the $$sh_func_adrs call has a length of 28. Because it
7733 has an attribute type of sh_func_adrs, it never has a nonzero
7734 sequence length (i.e., the delay slot is never filled). */
7735 if (!TARGET_LONG_CALLS
7736 && (attr_length
== 8
7737 || (attr_length
== 28
7738 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7740 xoperands
[1] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7741 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7747 /* It might seem that one insn could be saved by accessing
7748 the millicode function using the linkage table. However,
7749 this doesn't work in shared libraries and other dynamically
7750 loaded objects. Using a pc-relative sequence also avoids
7751 problems related to the implicit use of the gp register. */
7752 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
7753 xoperands
[2] = xoperands
[1];
7754 pa_output_pic_pcrel_sequence (xoperands
);
7755 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7757 else if (TARGET_PORTABLE_RUNTIME
)
7759 /* Pure portable runtime doesn't allow be/ble; we also don't
7760 have PIC support in the assembler/linker, so this sequence
7763 /* Get the address of our target into %r1. */
7764 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7765 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7767 /* Get our return address into %r31. */
7768 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7769 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7771 /* Jump to our target address in %r1. */
7772 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7776 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7778 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7780 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7784 xoperands
[1] = gen_rtx_REG (Pmode
, 31);
7785 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
7786 pa_output_pic_pcrel_sequence (xoperands
);
7788 /* Adjust return address. */
7789 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands
);
7791 /* Jump to our target address in %r1. */
7792 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7796 if (seq_length
== 0)
7797 output_asm_insn ("nop", xoperands
);
7802 /* Return the attribute length of the call instruction INSN. The SIBCALL
7803 flag indicates whether INSN is a regular call or a sibling call. The
7804 length returned must be longer than the code actually generated by
7805 pa_output_call. Since branch shortening is done before delay branch
7806 sequencing, there is no way to determine whether or not the delay
7807 slot will be filled during branch shortening. Even when the delay
7808 slot is filled, we may have to add a nop if the delay slot contains
7809 a branch that can't reach its target. Thus, we always have to include
7810 the delay slot in the length estimate. This used to be done in
7811 pa_adjust_insn_length but we do it here now as some sequences always
7812 fill the delay slot and we can save four bytes in the estimate for
7816 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7819 rtx call
, call_dest
;
7822 rtx pat
= PATTERN (insn
);
7823 unsigned long distance
= -1;
7825 gcc_assert (CALL_P (insn
));
7827 if (INSN_ADDRESSES_SET_P ())
7829 unsigned long total
;
7831 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7832 distance
= (total
+ insn_current_reference_address (insn
));
7833 if (distance
< total
)
7837 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7839 /* Get the call rtx. */
7840 call
= XVECEXP (pat
, 0, 0);
7841 if (GET_CODE (call
) == SET
)
7842 call
= SET_SRC (call
);
7844 gcc_assert (GET_CODE (call
) == CALL
);
7846 /* Determine if this is a local call. */
7847 call_dest
= XEXP (XEXP (call
, 0), 0);
7848 call_decl
= SYMBOL_REF_DECL (call_dest
);
7849 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7851 /* pc-relative branch. */
7852 if (!TARGET_LONG_CALLS
7853 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7854 || distance
< MAX_PCREL17F_OFFSET
))
7857 /* 64-bit plabel sequence. */
7858 else if (TARGET_64BIT
&& !local_call
)
7859 length
+= sibcall
? 28 : 24;
7861 /* non-pic long absolute branch sequence. */
7862 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7865 /* long pc-relative branch sequence. */
7866 else if (TARGET_LONG_PIC_SDIFF_CALL
7867 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
7871 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7875 /* 32-bit plabel sequence. */
7881 length
+= length_fp_args (insn
);
7891 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7899 /* INSN is a function call.
7901 CALL_DEST is the routine we are calling. */
7904 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7906 int seq_length
= dbr_sequence_length ();
7907 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7908 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7911 xoperands
[0] = call_dest
;
7913 /* Handle the common case where we're sure that the branch will reach
7914 the beginning of the "$CODE$" subspace. This is the beginning of
7915 the current function if we are in a named section. */
7916 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7918 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7919 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7923 if (TARGET_64BIT
&& !local_call
)
7925 /* ??? As far as I can tell, the HP linker doesn't support the
7926 long pc-relative sequence described in the 64-bit runtime
7927 architecture. So, we use a slightly longer indirect call. */
7928 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7929 xoperands
[1] = gen_label_rtx ();
7931 /* If this isn't a sibcall, we put the load of %r27 into the
7932 delay slot. We can't do this in a sibcall as we don't
7933 have a second call-clobbered scratch register available.
7934 We don't need to do anything when generating fast indirect
7936 if (seq_length
!= 0 && !sibcall
)
7938 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7941 /* Now delete the delay insn. */
7942 SET_INSN_DELETED (NEXT_INSN (insn
));
7946 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7947 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7948 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7952 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7953 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7954 output_asm_insn ("bve (%%r1)", xoperands
);
7958 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7959 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7960 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7966 int indirect_call
= 0;
7968 /* Emit a long call. There are several different sequences
7969 of increasing length and complexity. In most cases,
7970 they don't allow an instruction in the delay slot. */
7971 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7972 && !TARGET_LONG_PIC_SDIFF_CALL
7973 && !(TARGET_GAS
&& !TARGET_SOM
&& local_call
)
7981 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7983 /* A non-jump insn in the delay slot. By definition we can
7984 emit this insn before the call (and in fact before argument
7986 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7989 /* Now delete the delay insn. */
7990 SET_INSN_DELETED (NEXT_INSN (insn
));
7994 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7996 /* This is the best sequence for making long calls in
7997 non-pic code. Unfortunately, GNU ld doesn't provide
7998 the stub needed for external calls, and GAS's support
7999 for this with the SOM linker is buggy. It is safe
8000 to use this for local calls. */
8001 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8003 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
8007 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8010 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
8012 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8018 /* The HP assembler and linker can handle relocations for
8019 the difference of two symbols. The HP assembler
8020 recognizes the sequence as a pc-relative call and
8021 the linker provides stubs when needed. */
8023 /* GAS currently can't generate the relocations that
8024 are needed for the SOM linker under HP-UX using this
8025 sequence. The GNU linker doesn't generate the stubs
8026 that are needed for external calls on TARGET_ELF32
8027 with this sequence. For now, we have to use a longer
8028 plabel sequence when using GAS for non local calls. */
8029 if (TARGET_LONG_PIC_SDIFF_CALL
8030 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
8032 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
8033 xoperands
[2] = xoperands
[1];
8034 pa_output_pic_pcrel_sequence (xoperands
);
8038 /* Emit a long plabel-based call sequence. This is
8039 essentially an inline implementation of $$dyncall.
8040 We don't actually try to call $$dyncall as this is
8041 as difficult as calling the function itself. */
8042 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
8043 xoperands
[1] = gen_label_rtx ();
8045 /* Since the call is indirect, FP arguments in registers
8046 need to be copied to the general registers. Then, the
8047 argument relocation stub will copy them back. */
8049 copy_fp_args (insn
);
8053 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
8054 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
8055 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
8059 output_asm_insn ("addil LR'%0-$global$,%%r27",
8061 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8065 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
8066 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
8067 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
8068 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
8070 if (!sibcall
&& !TARGET_PA_20
)
8072 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8073 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8074 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8076 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8083 output_asm_insn ("bve (%%r1)", xoperands
);
8088 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8089 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8093 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8098 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8099 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8104 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8105 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8107 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8111 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8112 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8114 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8117 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8119 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8127 if (seq_length
== 0)
8128 output_asm_insn ("nop", xoperands
);
8133 /* Return the attribute length of the indirect call instruction INSN.
8134 The length must match the code generated by output_indirect call.
8135 The returned length includes the delay slot. Currently, the delay
8136 slot of an indirect call sequence is not exposed and it is used by
8137 the sequence itself. */
8140 pa_attr_length_indirect_call (rtx_insn
*insn
)
8142 unsigned long distance
= -1;
8143 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8145 if (INSN_ADDRESSES_SET_P ())
8147 distance
= (total
+ insn_current_reference_address (insn
));
8148 if (distance
< total
)
8155 if (TARGET_FAST_INDIRECT_CALLS
)
8158 if (TARGET_PORTABLE_RUNTIME
)
8161 /* Inline version of $$dyncall. */
8162 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8165 if (!TARGET_LONG_CALLS
8166 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8167 || distance
< MAX_PCREL17F_OFFSET
))
8170 /* Out of reach, can use ble. */
8174 /* Inline version of $$dyncall. */
8175 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
)
8181 /* Long PIC pc-relative call. */
8186 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8193 xoperands
[0] = call_dest
;
8194 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8195 "bve,l (%%r2),%%r2\n\t"
8196 "ldd 24(%0),%%r27", xoperands
);
8200 /* First the special case for kernels, level 0 systems, etc. */
8201 if (TARGET_FAST_INDIRECT_CALLS
)
8203 pa_output_arg_descriptor (insn
);
8205 return "bve,l,n (%%r22),%%r2\n\tnop";
8206 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8209 if (TARGET_PORTABLE_RUNTIME
)
8211 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8212 "ldo R'$$dyncall(%%r31),%%r31", xoperands
);
8213 pa_output_arg_descriptor (insn
);
8214 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8217 /* Maybe emit a fast inline version of $$dyncall. */
8218 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8220 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8221 "ldw 2(%%r22),%%r19\n\t"
8222 "ldw -2(%%r22),%%r22", xoperands
);
8223 pa_output_arg_descriptor (insn
);
8224 if (TARGET_NO_SPACE_REGS
)
8227 return "bve,l,n (%%r22),%%r2\n\tnop";
8228 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8230 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8233 /* Now the normal case -- we can reach $$dyncall directly or
8234 we're sure that we can get there via a long-branch stub.
8236 No need to check target flags as the length uniquely identifies
8237 the remaining cases. */
8238 length
= pa_attr_length_indirect_call (insn
);
8241 pa_output_arg_descriptor (insn
);
8243 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8244 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8245 variant of the B,L instruction can't be used on the SOM target. */
8246 if (TARGET_PA_20
&& !TARGET_SOM
)
8247 return "b,l,n $$dyncall,%%r2\n\tnop";
8249 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8252 /* Long millicode call, but we are not generating PIC or portable runtime
8256 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands
);
8257 pa_output_arg_descriptor (insn
);
8258 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8261 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8262 pc-relative call sequence is five instructions. The inline PA 2.0
8263 version of $$dyncall is also five instructions. The PA 1.X versions
8264 are longer but still an overall win. */
8265 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
|| !optimize_size
)
8267 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8268 "ldw 2(%%r22),%%r19\n\t"
8269 "ldw -2(%%r22),%%r22", xoperands
);
8270 if (TARGET_NO_SPACE_REGS
)
8272 pa_output_arg_descriptor (insn
);
8274 return "bve,l,n (%%r22),%%r2\n\tnop";
8275 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8279 pa_output_arg_descriptor (insn
);
8280 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8282 output_asm_insn ("bl .+8,%%r2\n\t"
8283 "ldo 16(%%r2),%%r2\n\t"
8284 "ldsid (%%r22),%%r1\n\t"
8285 "mtsp %%r1,%%sr0", xoperands
);
8286 pa_output_arg_descriptor (insn
);
8287 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8290 /* We need a long PIC call to $$dyncall. */
8291 xoperands
[0] = gen_rtx_SYMBOL_REF (Pmode
, "$$dyncall");
8292 xoperands
[1] = gen_rtx_REG (Pmode
, 2);
8293 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
8294 pa_output_pic_pcrel_sequence (xoperands
);
8295 pa_output_arg_descriptor (insn
);
8296 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8299 /* In HPUX 8.0's shared library scheme, special relocations are needed
8300 for function labels if they might be passed to a function
8301 in a shared library (because shared libraries don't live in code
8302 space), and special magic is needed to construct their address. */
8305 pa_encode_label (rtx sym
)
8307 const char *str
= XSTR (sym
, 0);
8308 int len
= strlen (str
) + 1;
8311 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8315 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8319 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8321 int old_referenced
= 0;
8323 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8325 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8327 default_encode_section_info (decl
, rtl
, first
);
8329 if (first
&& TEXT_SPACE_P (decl
))
8331 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8332 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8333 pa_encode_label (XEXP (rtl
, 0));
8335 else if (old_referenced
)
8336 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8339 /* This is sort of inverse to pa_encode_section_info. */
8342 pa_strip_name_encoding (const char *str
)
8344 str
+= (*str
== '@');
8345 str
+= (*str
== '*');
8349 /* Returns 1 if OP is a function label involved in a simple addition
8350 with a constant. Used to keep certain patterns from matching
8351 during instruction combination. */
8353 pa_is_function_label_plus_const (rtx op
)
8355 /* Strip off any CONST. */
8356 if (GET_CODE (op
) == CONST
)
8359 return (GET_CODE (op
) == PLUS
8360 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8361 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8364 /* Output assembly code for a thunk to FUNCTION. */
8367 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8368 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8371 static unsigned int current_thunk_number
;
8372 int val_14
= VAL_14_BITS_P (delta
);
8373 unsigned int old_last_address
= last_address
, nbytes
= 0;
8377 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8378 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8379 xoperands
[2] = GEN_INT (delta
);
8381 final_start_function (emit_barrier (), file
, 1);
8383 /* Output the thunk. We know that the function is in the same
8384 translation unit (i.e., the same space) as the thunk, and that
8385 thunks are output after their method. Thus, we don't need an
8386 external branch to reach the function. With SOM and GAS,
8387 functions and thunks are effectively in different sections.
8388 Thus, we can always use a IA-relative branch and the linker
8389 will add a long branch stub if necessary.
8391 However, we have to be careful when generating PIC code on the
8392 SOM port to ensure that the sequence does not transfer to an
8393 import stub for the target function as this could clobber the
8394 return value saved at SP-24. This would also apply to the
8395 32-bit linux port if the multi-space model is implemented. */
8396 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8397 && !(flag_pic
&& TREE_PUBLIC (function
))
8398 && (TARGET_GAS
|| last_address
< 262132))
8399 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8400 && ((targetm_common
.have_named_sections
8401 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8402 /* The GNU 64-bit linker has rather poor stub management.
8403 So, we use a long branch from thunks that aren't in
8404 the same section as the target function. */
8406 && (DECL_SECTION_NAME (thunk_fndecl
)
8407 != DECL_SECTION_NAME (function
)))
8408 || ((DECL_SECTION_NAME (thunk_fndecl
)
8409 == DECL_SECTION_NAME (function
))
8410 && last_address
< 262132)))
8411 /* In this case, we need to be able to reach the start of
8412 the stub table even though the function is likely closer
8413 and can be jumped to directly. */
8414 || (targetm_common
.have_named_sections
8415 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8416 && DECL_SECTION_NAME (function
) == NULL
8417 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8419 || (!targetm_common
.have_named_sections
8420 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8423 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8425 output_asm_insn ("b %0", xoperands
);
8429 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8434 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8438 else if (TARGET_64BIT
)
8442 /* We only have one call-clobbered scratch register, so we can't
8443 make use of the delay slot if delta doesn't fit in 14 bits. */
8446 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8447 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8450 /* Load function address into %r1. */
8451 xop
[0] = xoperands
[0];
8452 xop
[1] = gen_rtx_REG (Pmode
, 1);
8454 pa_output_pic_pcrel_sequence (xop
);
8458 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8459 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8464 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8468 else if (TARGET_PORTABLE_RUNTIME
)
8470 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8471 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8474 output_asm_insn ("ldil L'%2,%%r26", xoperands
);
8476 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8480 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8485 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands
);
8489 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8491 /* The function is accessible from outside this module. The only
8492 way to avoid an import stub between the thunk and function is to
8493 call the function directly with an indirect sequence similar to
8494 that used by $$dyncall. This is possible because $$dyncall acts
8495 as the import stub in an indirect call. */
8496 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8497 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8498 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8499 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8500 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8501 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8502 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8503 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8504 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8508 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8514 output_asm_insn ("bve (%%r22)", xoperands
);
8517 else if (TARGET_NO_SPACE_REGS
)
8519 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8524 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8525 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8526 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8531 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8533 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8539 /* Load function address into %r22. */
8540 xop
[0] = xoperands
[0];
8541 xop
[1] = gen_rtx_REG (Pmode
, 1);
8542 xop
[2] = gen_rtx_REG (Pmode
, 22);
8543 pa_output_pic_pcrel_sequence (xop
);
8546 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8548 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8552 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8557 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8564 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8566 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8567 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8571 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8576 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8581 final_end_function ();
8583 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8585 switch_to_section (data_section
);
8586 output_asm_insn (".align 4", xoperands
);
8587 ASM_OUTPUT_LABEL (file
, label
);
8588 output_asm_insn (".word P'%0", xoperands
);
8591 current_thunk_number
++;
8592 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8593 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8594 last_address
+= nbytes
;
8595 if (old_last_address
> last_address
)
8596 last_address
= UINT_MAX
;
8597 update_total_code_bytes (nbytes
);
8600 /* Only direct calls to static functions are allowed to be sibling (tail)
8603 This restriction is necessary because some linker generated stubs will
8604 store return pointers into rp' in some cases which might clobber a
8605 live value already in rp'.
8607 In a sibcall the current function and the target function share stack
8608 space. Thus if the path to the current function and the path to the
8609 target function save a value in rp', they save the value into the
8610 same stack slot, which has undesirable consequences.
8612 Because of the deferred binding nature of shared libraries any function
8613 with external scope could be in a different load module and thus require
8614 rp' to be saved when calling that function. So sibcall optimizations
8615 can only be safe for static function.
8617 Note that GCC never needs return value relocations, so we don't have to
8618 worry about static calls with return value relocations (which require
8621 It is safe to perform a sibcall optimization when the target function
8622 will never return. */
8624 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8626 if (TARGET_PORTABLE_RUNTIME
)
8629 /* Sibcalls are not ok because the arg pointer register is not a fixed
8630 register. This prevents the sibcall optimization from occurring. In
8631 addition, there are problems with stub placement using GNU ld. This
8632 is because a normal sibcall branch uses a 17-bit relocation while
8633 a regular call branch uses a 22-bit relocation. As a result, more
8634 care needs to be taken in the placement of long-branch stubs. */
8638 /* Sibcalls are only ok within a translation unit. */
8639 return (decl
&& !TREE_PUBLIC (decl
));
8642 /* ??? Addition is not commutative on the PA due to the weird implicit
8643 space register selection rules for memory addresses. Therefore, we
8644 don't consider a + b == b + a, as this might be inside a MEM. */
8646 pa_commutative_p (const_rtx x
, int outer_code
)
8648 return (COMMUTATIVE_P (x
)
8649 && (TARGET_NO_SPACE_REGS
8650 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8651 || GET_CODE (x
) != PLUS
));
8654 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8655 use in fmpyadd instructions. */
8657 pa_fmpyaddoperands (rtx
*operands
)
8659 machine_mode mode
= GET_MODE (operands
[0]);
8661 /* Must be a floating point mode. */
8662 if (mode
!= SFmode
&& mode
!= DFmode
)
8665 /* All modes must be the same. */
8666 if (! (mode
== GET_MODE (operands
[1])
8667 && mode
== GET_MODE (operands
[2])
8668 && mode
== GET_MODE (operands
[3])
8669 && mode
== GET_MODE (operands
[4])
8670 && mode
== GET_MODE (operands
[5])))
8673 /* All operands must be registers. */
8674 if (! (GET_CODE (operands
[1]) == REG
8675 && GET_CODE (operands
[2]) == REG
8676 && GET_CODE (operands
[3]) == REG
8677 && GET_CODE (operands
[4]) == REG
8678 && GET_CODE (operands
[5]) == REG
))
8681 /* Only 2 real operands to the addition. One of the input operands must
8682 be the same as the output operand. */
8683 if (! rtx_equal_p (operands
[3], operands
[4])
8684 && ! rtx_equal_p (operands
[3], operands
[5]))
8687 /* Inout operand of add cannot conflict with any operands from multiply. */
8688 if (rtx_equal_p (operands
[3], operands
[0])
8689 || rtx_equal_p (operands
[3], operands
[1])
8690 || rtx_equal_p (operands
[3], operands
[2]))
8693 /* multiply cannot feed into addition operands. */
8694 if (rtx_equal_p (operands
[4], operands
[0])
8695 || rtx_equal_p (operands
[5], operands
[0]))
8698 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8700 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8701 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8702 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8703 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8704 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8705 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8708 /* Passed. Operands are suitable for fmpyadd. */
8712 #if !defined(USE_COLLECT2)
8714 pa_asm_out_constructor (rtx symbol
, int priority
)
8716 if (!function_label_operand (symbol
, VOIDmode
))
8717 pa_encode_label (symbol
);
8719 #ifdef CTORS_SECTION_ASM_OP
8720 default_ctor_section_asm_out_constructor (symbol
, priority
);
8722 # ifdef TARGET_ASM_NAMED_SECTION
8723 default_named_section_asm_out_constructor (symbol
, priority
);
8725 default_stabs_asm_out_constructor (symbol
, priority
);
8731 pa_asm_out_destructor (rtx symbol
, int priority
)
8733 if (!function_label_operand (symbol
, VOIDmode
))
8734 pa_encode_label (symbol
);
8736 #ifdef DTORS_SECTION_ASM_OP
8737 default_dtor_section_asm_out_destructor (symbol
, priority
);
8739 # ifdef TARGET_ASM_NAMED_SECTION
8740 default_named_section_asm_out_destructor (symbol
, priority
);
8742 default_stabs_asm_out_destructor (symbol
, priority
);
8748 /* This function places uninitialized global data in the bss section.
8749 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8750 function on the SOM port to prevent uninitialized global data from
8751 being placed in the data section. */
8754 pa_asm_output_aligned_bss (FILE *stream
,
8756 unsigned HOST_WIDE_INT size
,
8759 switch_to_section (bss_section
);
8760 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8762 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8763 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8766 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8767 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8770 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8771 ASM_OUTPUT_LABEL (stream
, name
);
8772 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8775 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8776 that doesn't allow the alignment of global common storage to be directly
8777 specified. The SOM linker aligns common storage based on the rounded
8778 value of the NUM_BYTES parameter in the .comm directive. It's not
8779 possible to use the .align directive as it doesn't affect the alignment
8780 of the label associated with a .comm directive. */
8783 pa_asm_output_aligned_common (FILE *stream
,
8785 unsigned HOST_WIDE_INT size
,
8788 unsigned int max_common_align
;
8790 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8791 if (align
> max_common_align
)
8793 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8794 "for global common data. Using %u",
8795 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8796 align
= max_common_align
;
8799 switch_to_section (bss_section
);
8801 assemble_name (stream
, name
);
8802 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8803 MAX (size
, align
/ BITS_PER_UNIT
));
8806 /* We can't use .comm for local common storage as the SOM linker effectively
8807 treats the symbol as universal and uses the same storage for local symbols
8808 with the same name in different object files. The .block directive
8809 reserves an uninitialized block of storage. However, it's not common
8810 storage. Fortunately, GCC never requests common storage with the same
8811 name in any given translation unit. */
8814 pa_asm_output_aligned_local (FILE *stream
,
8816 unsigned HOST_WIDE_INT size
,
8819 switch_to_section (bss_section
);
8820 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8823 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8824 assemble_name (stream
, name
);
8825 fprintf (stream
, "\n");
8828 ASM_OUTPUT_LABEL (stream
, name
);
8829 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8832 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8833 use in fmpysub instructions. */
8835 pa_fmpysuboperands (rtx
*operands
)
8837 machine_mode mode
= GET_MODE (operands
[0]);
8839 /* Must be a floating point mode. */
8840 if (mode
!= SFmode
&& mode
!= DFmode
)
8843 /* All modes must be the same. */
8844 if (! (mode
== GET_MODE (operands
[1])
8845 && mode
== GET_MODE (operands
[2])
8846 && mode
== GET_MODE (operands
[3])
8847 && mode
== GET_MODE (operands
[4])
8848 && mode
== GET_MODE (operands
[5])))
8851 /* All operands must be registers. */
8852 if (! (GET_CODE (operands
[1]) == REG
8853 && GET_CODE (operands
[2]) == REG
8854 && GET_CODE (operands
[3]) == REG
8855 && GET_CODE (operands
[4]) == REG
8856 && GET_CODE (operands
[5]) == REG
))
8859 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8860 operation, so operands[4] must be the same as operand[3]. */
8861 if (! rtx_equal_p (operands
[3], operands
[4]))
8864 /* multiply cannot feed into subtraction. */
8865 if (rtx_equal_p (operands
[5], operands
[0]))
8868 /* Inout operand of sub cannot conflict with any operands from multiply. */
8869 if (rtx_equal_p (operands
[3], operands
[0])
8870 || rtx_equal_p (operands
[3], operands
[1])
8871 || rtx_equal_p (operands
[3], operands
[2]))
8874 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8876 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8877 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8878 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8879 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8880 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8881 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8884 /* Passed. Operands are suitable for fmpysub. */
8888 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8889 constants for a MULT embedded inside a memory address. */
8891 pa_mem_shadd_constant_p (int val
)
8893 if (val
== 2 || val
== 4 || val
== 8)
8899 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8900 constants for shadd instructions. */
8902 pa_shadd_constant_p (int val
)
8904 if (val
== 1 || val
== 2 || val
== 3)
8910 /* Return TRUE if INSN branches forward. */
8913 forward_branch_p (rtx_insn
*insn
)
8915 rtx lab
= JUMP_LABEL (insn
);
8917 /* The INSN must have a jump label. */
8918 gcc_assert (lab
!= NULL_RTX
);
8920 if (INSN_ADDRESSES_SET_P ())
8921 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8928 insn
= NEXT_INSN (insn
);
8934 /* Output an unconditional move and branch insn. */
8937 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8939 int length
= get_attr_length (insn
);
8941 /* These are the cases in which we win. */
8943 return "mov%I1b,tr %1,%0,%2";
8945 /* None of the following cases win, but they don't lose either. */
8948 if (dbr_sequence_length () == 0)
8950 /* Nothing in the delay slot, fake it by putting the combined
8951 insn (the copy or add) in the delay slot of a bl. */
8952 if (GET_CODE (operands
[1]) == CONST_INT
)
8953 return "b %2\n\tldi %1,%0";
8955 return "b %2\n\tcopy %1,%0";
8959 /* Something in the delay slot, but we've got a long branch. */
8960 if (GET_CODE (operands
[1]) == CONST_INT
)
8961 return "ldi %1,%0\n\tb %2";
8963 return "copy %1,%0\n\tb %2";
8967 if (GET_CODE (operands
[1]) == CONST_INT
)
8968 output_asm_insn ("ldi %1,%0", operands
);
8970 output_asm_insn ("copy %1,%0", operands
);
8971 return pa_output_lbranch (operands
[2], insn
, 1);
8974 /* Output an unconditional add and branch insn. */
8977 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
8979 int length
= get_attr_length (insn
);
8981 /* To make life easy we want operand0 to be the shared input/output
8982 operand and operand1 to be the readonly operand. */
8983 if (operands
[0] == operands
[1])
8984 operands
[1] = operands
[2];
8986 /* These are the cases in which we win. */
8988 return "add%I1b,tr %1,%0,%3";
8990 /* None of the following cases win, but they don't lose either. */
8993 if (dbr_sequence_length () == 0)
8994 /* Nothing in the delay slot, fake it by putting the combined
8995 insn (the copy or add) in the delay slot of a bl. */
8996 return "b %3\n\tadd%I1 %1,%0,%0";
8998 /* Something in the delay slot, but we've got a long branch. */
8999 return "add%I1 %1,%0,%0\n\tb %3";
9002 output_asm_insn ("add%I1 %1,%0,%0", operands
);
9003 return pa_output_lbranch (operands
[3], insn
, 1);
9006 /* We use this hook to perform a PA specific optimization which is difficult
9007 to do in earlier passes. */
9012 remove_useless_addtr_insns (1);
9014 if (pa_cpu
< PROCESSOR_8000
)
9015 pa_combine_instructions ();
9018 /* The PA has a number of odd instructions which can perform multiple
9019 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9020 it may be profitable to combine two instructions into one instruction
9021 with two outputs. It's not profitable PA2.0 machines because the
9022 two outputs would take two slots in the reorder buffers.
9024 This routine finds instructions which can be combined and combines
9025 them. We only support some of the potential combinations, and we
9026 only try common ways to find suitable instructions.
9028 * addb can add two registers or a register and a small integer
9029 and jump to a nearby (+-8k) location. Normally the jump to the
9030 nearby location is conditional on the result of the add, but by
9031 using the "true" condition we can make the jump unconditional.
9032 Thus addb can perform two independent operations in one insn.
9034 * movb is similar to addb in that it can perform a reg->reg
9035 or small immediate->reg copy and jump to a nearby (+-8k location).
9037 * fmpyadd and fmpysub can perform a FP multiply and either an
9038 FP add or FP sub if the operands of the multiply and add/sub are
9039 independent (there are other minor restrictions). Note both
9040 the fmpy and fadd/fsub can in theory move to better spots according
9041 to data dependencies, but for now we require the fmpy stay at a
9044 * Many of the memory operations can perform pre & post updates
9045 of index registers. GCC's pre/post increment/decrement addressing
9046 is far too simple to take advantage of all the possibilities. This
9047 pass may not be suitable since those insns may not be independent.
9049 * comclr can compare two ints or an int and a register, nullify
9050 the following instruction and zero some other register. This
9051 is more difficult to use as it's harder to find an insn which
9052 will generate a comclr than finding something like an unconditional
9053 branch. (conditional moves & long branches create comclr insns).
9055 * Most arithmetic operations can conditionally skip the next
9056 instruction. They can be viewed as "perform this operation
9057 and conditionally jump to this nearby location" (where nearby
9058 is an insns away). These are difficult to use due to the
9059 branch length restrictions. */
9062 pa_combine_instructions (void)
9066 /* This can get expensive since the basic algorithm is on the
9067 order of O(n^2) (or worse). Only do it for -O2 or higher
9068 levels of optimization. */
9072 /* Walk down the list of insns looking for "anchor" insns which
9073 may be combined with "floating" insns. As the name implies,
9074 "anchor" instructions don't move, while "floating" insns may
9076 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9077 rtx_insn
*new_rtx
= make_insn_raw (par
);
9079 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9081 enum attr_pa_combine_type anchor_attr
;
9082 enum attr_pa_combine_type floater_attr
;
9084 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9085 Also ignore any special USE insns. */
9086 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
9087 || GET_CODE (PATTERN (anchor
)) == USE
9088 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
9091 anchor_attr
= get_attr_pa_combine_type (anchor
);
9092 /* See if anchor is an insn suitable for combination. */
9093 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9094 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9095 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9096 && ! forward_branch_p (anchor
)))
9100 for (floater
= PREV_INSN (anchor
);
9102 floater
= PREV_INSN (floater
))
9104 if (NOTE_P (floater
)
9105 || (NONJUMP_INSN_P (floater
)
9106 && (GET_CODE (PATTERN (floater
)) == USE
9107 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9110 /* Anything except a regular INSN will stop our search. */
9111 if (! NONJUMP_INSN_P (floater
))
9117 /* See if FLOATER is suitable for combination with the
9119 floater_attr
= get_attr_pa_combine_type (floater
);
9120 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9121 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9122 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9123 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9125 /* If ANCHOR and FLOATER can be combined, then we're
9126 done with this pass. */
9127 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9128 SET_DEST (PATTERN (floater
)),
9129 XEXP (SET_SRC (PATTERN (floater
)), 0),
9130 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9134 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9135 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9137 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9139 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9140 SET_DEST (PATTERN (floater
)),
9141 XEXP (SET_SRC (PATTERN (floater
)), 0),
9142 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9147 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9148 SET_DEST (PATTERN (floater
)),
9149 SET_SRC (PATTERN (floater
)),
9150 SET_SRC (PATTERN (floater
))))
9156 /* If we didn't find anything on the backwards scan try forwards. */
9158 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9159 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9161 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9163 if (NOTE_P (floater
)
9164 || (NONJUMP_INSN_P (floater
)
9165 && (GET_CODE (PATTERN (floater
)) == USE
9166 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9170 /* Anything except a regular INSN will stop our search. */
9171 if (! NONJUMP_INSN_P (floater
))
9177 /* See if FLOATER is suitable for combination with the
9179 floater_attr
= get_attr_pa_combine_type (floater
);
9180 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9181 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9182 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9183 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9185 /* If ANCHOR and FLOATER can be combined, then we're
9186 done with this pass. */
9187 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9188 SET_DEST (PATTERN (floater
)),
9189 XEXP (SET_SRC (PATTERN (floater
)),
9191 XEXP (SET_SRC (PATTERN (floater
)),
9198 /* FLOATER will be nonzero if we found a suitable floating
9199 insn for combination with ANCHOR. */
9201 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9202 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9204 /* Emit the new instruction and delete the old anchor. */
9205 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9206 copy_rtx (PATTERN (floater
)));
9207 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9208 emit_insn_before (temp
, anchor
);
9210 SET_INSN_DELETED (anchor
);
9212 /* Emit a special USE insn for FLOATER, then delete
9213 the floating insn. */
9214 temp
= copy_rtx (PATTERN (floater
));
9215 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9216 delete_insn (floater
);
9221 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9223 /* Emit the new_jump instruction and delete the old anchor. */
9224 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9225 copy_rtx (PATTERN (floater
)));
9226 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9227 temp
= emit_jump_insn_before (temp
, anchor
);
9229 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9230 SET_INSN_DELETED (anchor
);
9232 /* Emit a special USE insn for FLOATER, then delete
9233 the floating insn. */
9234 temp
= copy_rtx (PATTERN (floater
));
9235 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9236 delete_insn (floater
);
9244 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9245 int reversed
, rtx dest
,
9248 int insn_code_number
;
9249 rtx_insn
*start
, *end
;
9251 /* Create a PARALLEL with the patterns of ANCHOR and
9252 FLOATER, try to recognize it, then test constraints
9253 for the resulting pattern.
9255 If the pattern doesn't match or the constraints
9256 aren't met keep searching for a suitable floater
9258 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9259 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9260 INSN_CODE (new_rtx
) = -1;
9261 insn_code_number
= recog_memoized (new_rtx
);
9262 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9263 if (insn_code_number
< 0
9264 || (extract_insn (new_rtx
),
9265 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9279 /* There's up to three operands to consider. One
9280 output and two inputs.
9282 The output must not be used between FLOATER & ANCHOR
9283 exclusive. The inputs must not be set between
9284 FLOATER and ANCHOR exclusive. */
9286 if (reg_used_between_p (dest
, start
, end
))
9289 if (reg_set_between_p (src1
, start
, end
))
9292 if (reg_set_between_p (src2
, start
, end
))
9295 /* If we get here, then everything is good. */
9299 /* Return nonzero if references for INSN are delayed.
9301 Millicode insns are actually function calls with some special
9302 constraints on arguments and register usage.
9304 Millicode calls always expect their arguments in the integer argument
9305 registers, and always return their result in %r29 (ret1). They
9306 are expected to clobber their arguments, %r1, %r29, and the return
9307 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9309 This function tells reorg that the references to arguments and
9310 millicode calls do not appear to happen until after the millicode call.
9311 This allows reorg to put insns which set the argument registers into the
9312 delay slot of the millicode call -- thus they act more like traditional
9315 Note we cannot consider side effects of the insn to be delayed because
9316 the branch and link insn will clobber the return pointer. If we happened
9317 to use the return pointer in the delay slot of the call, then we lose.
9319 get_attr_type will try to recognize the given insn, so make sure to
9320 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9323 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9325 return ((NONJUMP_INSN_P (insn
)
9326 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9327 && GET_CODE (PATTERN (insn
)) != USE
9328 && GET_CODE (PATTERN (insn
)) != CLOBBER
9329 && get_attr_type (insn
) == TYPE_MILLI
));
9332 /* Promote the return value, but not the arguments. */
9335 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9337 int *punsignedp ATTRIBUTE_UNUSED
,
9338 const_tree fntype ATTRIBUTE_UNUSED
,
9341 if (for_return
== 0)
9343 return promote_mode (type
, mode
, punsignedp
);
9346 /* On the HP-PA the value is found in register(s) 28(-29), unless
9347 the mode is SF or DF. Then the value is returned in fr4 (32).
9349 This must perform the same promotions as PROMOTE_MODE, else promoting
9350 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9352 Small structures must be returned in a PARALLEL on PA64 in order
9353 to match the HP Compiler ABI. */
9356 pa_function_value (const_tree valtype
,
9357 const_tree func ATTRIBUTE_UNUSED
,
9358 bool outgoing ATTRIBUTE_UNUSED
)
9360 machine_mode valmode
;
9362 if (AGGREGATE_TYPE_P (valtype
)
9363 || TREE_CODE (valtype
) == COMPLEX_TYPE
9364 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9366 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9368 /* Handle aggregates that fit exactly in a word or double word. */
9369 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9370 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9374 /* Aggregates with a size less than or equal to 128 bits are
9375 returned in GR 28(-29). They are left justified. The pad
9376 bits are undefined. Larger aggregates are returned in
9380 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9382 for (i
= 0; i
< ub
; i
++)
9384 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9385 gen_rtx_REG (DImode
, 28 + i
),
9390 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9392 else if (valsize
> UNITS_PER_WORD
)
9394 /* Aggregates 5 to 8 bytes in size are returned in general
9395 registers r28-r29 in the same manner as other non
9396 floating-point objects. The data is right-justified and
9397 zero-extended to 64 bits. This is opposite to the normal
9398 justification used on big endian targets and requires
9399 special treatment. */
9400 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9401 gen_rtx_REG (DImode
, 28), const0_rtx
);
9402 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9406 if ((INTEGRAL_TYPE_P (valtype
)
9407 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9408 || POINTER_TYPE_P (valtype
))
9409 valmode
= word_mode
;
9411 valmode
= TYPE_MODE (valtype
);
9413 if (TREE_CODE (valtype
) == REAL_TYPE
9414 && !AGGREGATE_TYPE_P (valtype
)
9415 && TYPE_MODE (valtype
) != TFmode
9416 && !TARGET_SOFT_FLOAT
)
9417 return gen_rtx_REG (valmode
, 32);
9419 return gen_rtx_REG (valmode
, 28);
9422 /* Implement the TARGET_LIBCALL_VALUE hook. */
9425 pa_libcall_value (machine_mode mode
,
9426 const_rtx fun ATTRIBUTE_UNUSED
)
9428 if (! TARGET_SOFT_FLOAT
9429 && (mode
== SFmode
|| mode
== DFmode
))
9430 return gen_rtx_REG (mode
, 32);
9432 return gen_rtx_REG (mode
, 28);
9435 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9438 pa_function_value_regno_p (const unsigned int regno
)
9441 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9447 /* Update the data in CUM to advance over an argument
9448 of mode MODE and data type TYPE.
9449 (TYPE is null for libcalls where that information may not be available.) */
9452 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9453 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9455 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9456 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9458 cum
->nargs_prototype
--;
9459 cum
->words
+= (arg_size
9460 + ((cum
->words
& 01)
9461 && type
!= NULL_TREE
9465 /* Return the location of a parameter that is passed in a register or NULL
9466 if the parameter has any component that is passed in memory.
9468 This is new code and will be pushed to into the net sources after
9471 ??? We might want to restructure this so that it looks more like other
9474 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9475 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9477 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9478 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9485 if (mode
== VOIDmode
)
9488 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9490 /* If this arg would be passed partially or totally on the stack, then
9491 this routine should return zero. pa_arg_partial_bytes will
9492 handle arguments which are split between regs and stack slots if
9493 the ABI mandates split arguments. */
9496 /* The 32-bit ABI does not split arguments. */
9497 if (cum
->words
+ arg_size
> max_arg_words
)
9503 alignment
= cum
->words
& 1;
9504 if (cum
->words
+ alignment
>= max_arg_words
)
9508 /* The 32bit ABIs and the 64bit ABIs are rather different,
9509 particularly in their handling of FP registers. We might
9510 be able to cleverly share code between them, but I'm not
9511 going to bother in the hope that splitting them up results
9512 in code that is more easily understood. */
9516 /* Advance the base registers to their current locations.
9518 Remember, gprs grow towards smaller register numbers while
9519 fprs grow to higher register numbers. Also remember that
9520 although FP regs are 32-bit addressable, we pretend that
9521 the registers are 64-bits wide. */
9522 gpr_reg_base
= 26 - cum
->words
;
9523 fpr_reg_base
= 32 + cum
->words
;
9525 /* Arguments wider than one word and small aggregates need special
9529 || (type
&& (AGGREGATE_TYPE_P (type
)
9530 || TREE_CODE (type
) == COMPLEX_TYPE
9531 || TREE_CODE (type
) == VECTOR_TYPE
)))
9533 /* Double-extended precision (80-bit), quad-precision (128-bit)
9534 and aggregates including complex numbers are aligned on
9535 128-bit boundaries. The first eight 64-bit argument slots
9536 are associated one-to-one, with general registers r26
9537 through r19, and also with floating-point registers fr4
9538 through fr11. Arguments larger than one word are always
9539 passed in general registers.
9541 Using a PARALLEL with a word mode register results in left
9542 justified data on a big-endian target. */
9545 int i
, offset
= 0, ub
= arg_size
;
9547 /* Align the base register. */
9548 gpr_reg_base
-= alignment
;
9550 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9551 for (i
= 0; i
< ub
; i
++)
9553 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9554 gen_rtx_REG (DImode
, gpr_reg_base
),
9560 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9565 /* If the argument is larger than a word, then we know precisely
9566 which registers we must use. */
9580 /* Structures 5 to 8 bytes in size are passed in the general
9581 registers in the same manner as other non floating-point
9582 objects. The data is right-justified and zero-extended
9583 to 64 bits. This is opposite to the normal justification
9584 used on big endian targets and requires special treatment.
9585 We now define BLOCK_REG_PADDING to pad these objects.
9586 Aggregates, complex and vector types are passed in the same
9587 manner as structures. */
9589 || (type
&& (AGGREGATE_TYPE_P (type
)
9590 || TREE_CODE (type
) == COMPLEX_TYPE
9591 || TREE_CODE (type
) == VECTOR_TYPE
)))
9593 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9594 gen_rtx_REG (DImode
, gpr_reg_base
),
9596 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9601 /* We have a single word (32 bits). A simple computation
9602 will get us the register #s we need. */
9603 gpr_reg_base
= 26 - cum
->words
;
9604 fpr_reg_base
= 32 + 2 * cum
->words
;
9608 /* Determine if the argument needs to be passed in both general and
9609 floating point registers. */
9610 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9611 /* If we are doing soft-float with portable runtime, then there
9612 is no need to worry about FP regs. */
9613 && !TARGET_SOFT_FLOAT
9614 /* The parameter must be some kind of scalar float, else we just
9615 pass it in integer registers. */
9616 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9617 /* The target function must not have a prototype. */
9618 && cum
->nargs_prototype
<= 0
9619 /* libcalls do not need to pass items in both FP and general
9621 && type
!= NULL_TREE
9622 /* All this hair applies to "outgoing" args only. This includes
9623 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9625 /* Also pass outgoing floating arguments in both registers in indirect
9626 calls with the 32 bit ABI and the HP assembler since there is no
9627 way to the specify argument locations in static functions. */
9632 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9638 gen_rtx_EXPR_LIST (VOIDmode
,
9639 gen_rtx_REG (mode
, fpr_reg_base
),
9641 gen_rtx_EXPR_LIST (VOIDmode
,
9642 gen_rtx_REG (mode
, gpr_reg_base
),
9647 /* See if we should pass this parameter in a general register. */
9648 if (TARGET_SOFT_FLOAT
9649 /* Indirect calls in the normal 32bit ABI require all arguments
9650 to be passed in general registers. */
9651 || (!TARGET_PORTABLE_RUNTIME
9655 /* If the parameter is not a scalar floating-point parameter,
9656 then it belongs in GPRs. */
9657 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9658 /* Structure with single SFmode field belongs in GPR. */
9659 || (type
&& AGGREGATE_TYPE_P (type
)))
9660 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9662 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9667 /* Arguments larger than one word are double word aligned. */
9670 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9672 bool singleword
= (type
9673 ? (integer_zerop (TYPE_SIZE (type
))
9674 || !TREE_CONSTANT (TYPE_SIZE (type
))
9675 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9676 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9678 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9681 /* If this arg would be passed totally in registers or totally on the stack,
9682 then this routine should return zero. */
9685 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9686 tree type
, bool named ATTRIBUTE_UNUSED
)
9688 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9689 unsigned int max_arg_words
= 8;
9690 unsigned int offset
= 0;
9695 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9698 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9699 /* Arg fits fully into registers. */
9701 else if (cum
->words
+ offset
>= max_arg_words
)
9702 /* Arg fully on the stack. */
9706 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9710 /* A get_unnamed_section callback for switching to the text section.
9712 This function is only used with SOM. Because we don't support
9713 named subspaces, we can only create a new subspace or switch back
9714 to the default text subspace. */
9717 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9719 gcc_assert (TARGET_SOM
);
9722 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9724 /* We only want to emit a .nsubspa directive once at the
9725 start of the function. */
9726 cfun
->machine
->in_nsubspa
= 1;
9728 /* Create a new subspace for the text. This provides
9729 better stub placement and one-only functions. */
9731 && DECL_ONE_ONLY (cfun
->decl
)
9732 && !DECL_WEAK (cfun
->decl
))
9734 output_section_asm_op ("\t.SPACE $TEXT$\n"
9735 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9736 "ACCESS=44,SORT=24,COMDAT");
9742 /* There isn't a current function or the body of the current
9743 function has been completed. So, we are changing to the
9744 text section to output debugging information. Thus, we
9745 need to forget that we are in the text section so that
9746 varasm.c will call us when text_section is selected again. */
9747 gcc_assert (!cfun
|| !cfun
->machine
9748 || cfun
->machine
->in_nsubspa
== 2);
9751 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9754 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9757 /* A get_unnamed_section callback for switching to comdat data
9758 sections. This function is only used with SOM. */
9761 som_output_comdat_data_section_asm_op (const void *data
)
9764 output_section_asm_op (data
);
9767 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9770 pa_som_asm_init_sections (void)
9773 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9775 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9776 is not being generated. */
9777 som_readonly_data_section
9778 = get_unnamed_section (0, output_section_asm_op
,
9779 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9781 /* When secondary definitions are not supported, SOM makes readonly
9782 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9784 som_one_only_readonly_data_section
9785 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9787 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9788 "ACCESS=0x2c,SORT=16,COMDAT");
9791 /* When secondary definitions are not supported, SOM makes data one-only
9792 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9793 som_one_only_data_section
9794 = get_unnamed_section (SECTION_WRITE
,
9795 som_output_comdat_data_section_asm_op
,
9796 "\t.SPACE $PRIVATE$\n"
9797 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9798 "ACCESS=31,SORT=24,COMDAT");
9801 som_tm_clone_table_section
9802 = get_unnamed_section (0, output_section_asm_op
,
9803 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9805 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9806 which reference data within the $TEXT$ space (for example constant
9807 strings in the $LIT$ subspace).
9809 The assemblers (GAS and HP as) both have problems with handling
9810 the difference of two symbols which is the other correct way to
9811 reference constant data during PIC code generation.
9813 So, there's no way to reference constant data which is in the
9814 $TEXT$ space during PIC generation. Instead place all constant
9815 data into the $PRIVATE$ subspace (this reduces sharing, but it
9816 works correctly). */
9817 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9819 /* We must not have a reference to an external symbol defined in a
9820 shared library in a readonly section, else the SOM linker will
9823 So, we force exception information into the data section. */
9824 exception_section
= data_section
;
9827 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9830 pa_som_tm_clone_table_section (void)
9832 return som_tm_clone_table_section
;
9835 /* On hpux10, the linker will give an error if we have a reference
9836 in the read-only data section to a symbol defined in a shared
9837 library. Therefore, expressions that might require a reloc can
9838 not be placed in the read-only data section. */
9841 pa_select_section (tree exp
, int reloc
,
9842 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9844 if (TREE_CODE (exp
) == VAR_DECL
9845 && TREE_READONLY (exp
)
9846 && !TREE_THIS_VOLATILE (exp
)
9847 && DECL_INITIAL (exp
)
9848 && (DECL_INITIAL (exp
) == error_mark_node
9849 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9853 && DECL_ONE_ONLY (exp
)
9854 && !DECL_WEAK (exp
))
9855 return som_one_only_readonly_data_section
;
9857 return readonly_data_section
;
9859 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9860 return readonly_data_section
;
9862 && TREE_CODE (exp
) == VAR_DECL
9863 && DECL_ONE_ONLY (exp
)
9864 && !DECL_WEAK (exp
))
9865 return som_one_only_data_section
;
9867 return data_section
;
9870 /* Implement pa_reloc_rw_mask. */
9873 pa_reloc_rw_mask (void)
9875 /* We force (const (plus (symbol) (const_int))) to memory when the
9876 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9877 handle this construct in read-only memory and we want to avoid
9878 this for ELF. So, we always force an RTX needing relocation to
9879 the data section. */
9884 pa_globalize_label (FILE *stream
, const char *name
)
9886 /* We only handle DATA objects here, functions are globalized in
9887 ASM_DECLARE_FUNCTION_NAME. */
9888 if (! FUNCTION_NAME_P (name
))
9890 fputs ("\t.EXPORT ", stream
);
9891 assemble_name (stream
, name
);
9892 fputs (",DATA\n", stream
);
9896 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9899 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9900 int incoming ATTRIBUTE_UNUSED
)
9902 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9905 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9908 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9910 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9911 PA64 ABI says that objects larger than 128 bits are returned in memory.
9912 Note, int_size_in_bytes can return -1 if the size of the object is
9913 variable or larger than the maximum value that can be expressed as
9914 a HOST_WIDE_INT. It can also return zero for an empty type. The
9915 simplest way to handle variable and empty types is to pass them in
9916 memory. This avoids problems in defining the boundaries of argument
9917 slots, allocating registers, etc. */
9918 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9919 || int_size_in_bytes (type
) <= 0);
9922 /* Structure to hold declaration and name of external symbols that are
9923 emitted by GCC. We generate a vector of these symbols and output them
9924 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9925 This avoids putting out names that are never really used. */
9927 typedef struct GTY(()) extern_symbol
9933 /* Define gc'd vector type for extern_symbol. */
9935 /* Vector of extern_symbol pointers. */
9936 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9938 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9939 /* Mark DECL (name NAME) as an external reference (assembler output
9940 file FILE). This saves the names to output at the end of the file
9941 if actually referenced. */
9944 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9946 gcc_assert (file
== asm_out_file
);
9947 extern_symbol p
= {decl
, name
};
9948 vec_safe_push (extern_symbols
, p
);
9951 /* Output text required at the end of an assembler file.
9952 This includes deferred plabels and .import directives for
9953 all external symbols that were actually referenced. */
9956 pa_hpux_file_end (void)
9961 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9962 output_deferred_profile_counters ();
9964 output_deferred_plabels ();
9966 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9968 tree decl
= p
->decl
;
9970 if (!TREE_ASM_WRITTEN (decl
)
9971 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9972 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9975 vec_free (extern_symbols
);
9979 /* Return true if a change from mode FROM to mode TO for a register
9980 in register class RCLASS is invalid. */
9983 pa_cannot_change_mode_class (machine_mode from
, machine_mode to
,
9984 enum reg_class rclass
)
9989 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9992 /* Reject changes to/from modes with zero size. */
9993 if (!GET_MODE_SIZE (from
) || !GET_MODE_SIZE (to
))
9996 /* Reject changes to/from complex and vector modes. */
9997 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9998 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
10001 /* There is no way to load QImode or HImode values directly from memory
10002 to a FP register. SImode loads to the FP registers are not zero
10003 extended. On the 64-bit target, this conflicts with the definition
10004 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10005 different sizes in the floating-point registers. */
10006 if (MAYBE_FP_REG_CLASS_P (rclass
))
10009 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10010 in specific sets of registers. Thus, we cannot allow changing
10011 to a larger mode when it's larger than a word. */
10012 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
10013 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
10019 /* Returns TRUE if it is a good idea to tie two pseudo registers
10020 when one has mode MODE1 and one has mode MODE2.
10021 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10022 for any hard reg, then this must be FALSE for correct output.
10024 We should return FALSE for QImode and HImode because these modes
10025 are not ok in the floating-point registers. However, this prevents
10026 tieing these modes to SImode and DImode in the general registers.
10027 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
10028 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10029 in the floating-point registers. */
10032 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10034 /* Don't tie modes in different classes. */
10035 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
10042 /* Length in units of the trampoline instruction code. */
10044 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10047 /* Output assembler code for a block containing the constant parts
10048 of a trampoline, leaving space for the variable parts.\
10050 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10051 and then branches to the specified routine.
10053 This code template is copied from text segment to stack location
10054 and then patched with pa_trampoline_init to contain valid values,
10055 and then entered as a subroutine.
10057 It is best to keep this as small as possible to avoid having to
10058 flush multiple lines in the cache. */
10061 pa_asm_trampoline_template (FILE *f
)
10065 fputs ("\tldw 36(%r22),%r21\n", f
);
10066 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
10067 if (ASSEMBLER_DIALECT
== 0)
10068 fputs ("\tdepi 0,31,2,%r21\n", f
);
10070 fputs ("\tdepwi 0,31,2,%r21\n", f
);
10071 fputs ("\tldw 4(%r21),%r19\n", f
);
10072 fputs ("\tldw 0(%r21),%r21\n", f
);
10075 fputs ("\tbve (%r21)\n", f
);
10076 fputs ("\tldw 40(%r22),%r29\n", f
);
10077 fputs ("\t.word 0\n", f
);
10078 fputs ("\t.word 0\n", f
);
10082 fputs ("\tldsid (%r21),%r1\n", f
);
10083 fputs ("\tmtsp %r1,%sr0\n", f
);
10084 fputs ("\tbe 0(%sr0,%r21)\n", f
);
10085 fputs ("\tldw 40(%r22),%r29\n", f
);
10087 fputs ("\t.word 0\n", f
);
10088 fputs ("\t.word 0\n", f
);
10089 fputs ("\t.word 0\n", f
);
10090 fputs ("\t.word 0\n", f
);
10094 fputs ("\t.dword 0\n", f
);
10095 fputs ("\t.dword 0\n", f
);
10096 fputs ("\t.dword 0\n", f
);
10097 fputs ("\t.dword 0\n", f
);
10098 fputs ("\tmfia %r31\n", f
);
10099 fputs ("\tldd 24(%r31),%r1\n", f
);
10100 fputs ("\tldd 24(%r1),%r27\n", f
);
10101 fputs ("\tldd 16(%r1),%r1\n", f
);
10102 fputs ("\tbve (%r1)\n", f
);
10103 fputs ("\tldd 32(%r31),%r31\n", f
);
10104 fputs ("\t.dword 0 ; fptr\n", f
);
10105 fputs ("\t.dword 0 ; static link\n", f
);
10109 /* Emit RTL insns to initialize the variable parts of a trampoline.
10110 FNADDR is an RTX for the address of the function's pure code.
10111 CXT is an RTX for the static chain value for the function.
10113 Move the function address to the trampoline template at offset 36.
10114 Move the static chain value to trampoline template at offset 40.
10115 Move the trampoline address to trampoline template at offset 44.
10116 Move r19 to trampoline template at offset 48. The latter two
10117 words create a plabel for the indirect call to the trampoline.
10119 A similar sequence is used for the 64-bit port but the plabel is
10120 at the beginning of the trampoline.
10122 Finally, the cache entries for the trampoline code are flushed.
10123 This is necessary to ensure that the trampoline instruction sequence
10124 is written to memory prior to any attempts at prefetching the code
10128 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10130 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10131 rtx start_addr
= gen_reg_rtx (Pmode
);
10132 rtx end_addr
= gen_reg_rtx (Pmode
);
10133 rtx line_length
= gen_reg_rtx (Pmode
);
10136 emit_block_move (m_tramp
, assemble_trampoline_template (),
10137 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10138 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10142 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10143 emit_move_insn (tmp
, fnaddr
);
10144 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10145 emit_move_insn (tmp
, chain_value
);
10147 /* Create a fat pointer for the trampoline. */
10148 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10149 emit_move_insn (tmp
, r_tramp
);
10150 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10151 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10153 /* fdc and fic only use registers for the address to flush,
10154 they do not accept integer displacements. We align the
10155 start and end addresses to the beginning of their respective
10156 cache lines to minimize the number of lines flushed. */
10157 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10158 GEN_INT (-MIN_CACHELINE_SIZE
)));
10159 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10160 TRAMPOLINE_CODE_SIZE
-1));
10161 emit_insn (gen_andsi3 (end_addr
, tmp
,
10162 GEN_INT (-MIN_CACHELINE_SIZE
)));
10163 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10164 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10165 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10166 gen_reg_rtx (Pmode
),
10167 gen_reg_rtx (Pmode
)));
10171 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10172 emit_move_insn (tmp
, fnaddr
);
10173 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10174 emit_move_insn (tmp
, chain_value
);
10176 /* Create a fat pointer for the trampoline. */
10177 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10178 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10180 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10181 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10183 /* fdc and fic only use registers for the address to flush,
10184 they do not accept integer displacements. We align the
10185 start and end addresses to the beginning of their respective
10186 cache lines to minimize the number of lines flushed. */
10187 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10188 emit_insn (gen_anddi3 (start_addr
, tmp
,
10189 GEN_INT (-MIN_CACHELINE_SIZE
)));
10190 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10191 TRAMPOLINE_CODE_SIZE
- 1));
10192 emit_insn (gen_anddi3 (end_addr
, tmp
,
10193 GEN_INT (-MIN_CACHELINE_SIZE
)));
10194 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10195 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10196 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10197 gen_reg_rtx (Pmode
),
10198 gen_reg_rtx (Pmode
)));
10201 #ifdef HAVE_ENABLE_EXECUTE_STACK
10202 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10203 Â Â Â Â LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
10207 /* Perform any machine-specific adjustment in the address of the trampoline.
10208 ADDR contains the address that was passed to pa_trampoline_init.
10209 Adjust the trampoline address to point to the plabel at offset 44. */
10212 pa_trampoline_adjust_address (rtx addr
)
10215 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10220 pa_delegitimize_address (rtx orig_x
)
10222 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10224 if (GET_CODE (x
) == LO_SUM
10225 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10226 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10227 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10232 pa_internal_arg_pointer (void)
10234 /* The argument pointer and the hard frame pointer are the same in
10235 the 32-bit runtime, so we don't need a copy. */
10237 return copy_to_reg (virtual_incoming_args_rtx
);
10239 return virtual_incoming_args_rtx
;
10242 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10243 Frame pointer elimination is automatically handled. */
10246 pa_can_eliminate (const int from
, const int to
)
10248 /* The argument cannot be eliminated in the 64-bit runtime. */
10249 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10252 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10253 ? ! frame_pointer_needed
10257 /* Define the offset between two registers, FROM to be eliminated and its
10258 replacement TO, at the start of a routine. */
10260 pa_initial_elimination_offset (int from
, int to
)
10262 HOST_WIDE_INT offset
;
10264 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10265 && to
== STACK_POINTER_REGNUM
)
10266 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10267 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10270 gcc_unreachable ();
10276 pa_conditional_register_usage (void)
10280 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10282 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10283 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10284 for (i
= 33; i
< 56; i
+= 2)
10285 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10287 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10289 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10290 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10293 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10296 /* Target hook for c_mode_for_suffix. */
10298 static machine_mode
10299 pa_c_mode_for_suffix (char suffix
)
10301 if (HPUX_LONG_DOUBLE_LIBRARY
)
10310 /* Target hook for function_section. */
10313 pa_function_section (tree decl
, enum node_frequency freq
,
10314 bool startup
, bool exit
)
10316 /* Put functions in text section if target doesn't have named sections. */
10317 if (!targetm_common
.have_named_sections
)
10318 return text_section
;
10320 /* Force nested functions into the same section as the containing
10323 && DECL_SECTION_NAME (decl
) == NULL
10324 && DECL_CONTEXT (decl
) != NULL_TREE
10325 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10326 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10327 return function_section (DECL_CONTEXT (decl
));
10329 /* Otherwise, use the default function section. */
10330 return default_function_section (decl
, freq
, startup
, exit
);
10333 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10335 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10336 that need more than three instructions to load prior to reload. This
10337 limit is somewhat arbitrary. It takes three instructions to load a
10338 CONST_INT from memory but two are memory accesses. It may be better
10339 to increase the allowed range for CONST_INTS. We may also be able
10340 to handle CONST_DOUBLES. */
10343 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10345 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10348 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10351 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10352 legitimate constants. The other variants can't be handled by
10353 the move patterns after reload starts. */
10354 if (tls_referenced_p (x
))
10357 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10361 && HOST_BITS_PER_WIDE_INT
> 32
10362 && GET_CODE (x
) == CONST_INT
10363 && !reload_in_progress
10364 && !reload_completed
10365 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10366 && !pa_cint_ok_for_move (UINTVAL (x
)))
10369 if (function_label_operand (x
, mode
))
10375 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10377 static unsigned int
10378 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10380 unsigned int flags
;
10382 flags
= default_section_type_flags (decl
, name
, reloc
);
10384 /* Function labels are placed in the constant pool. This can
10385 cause a section conflict if decls are put in ".data.rel.ro"
10386 or ".data.rel.ro.local" using the __attribute__ construct. */
10387 if (strcmp (name
, ".data.rel.ro") == 0
10388 || strcmp (name
, ".data.rel.ro.local") == 0)
10389 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10394 /* pa_legitimate_address_p recognizes an RTL expression that is a
10395 valid memory address for an instruction. The MODE argument is the
10396 machine mode for the MEM expression that wants to use this address.
10398 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10399 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10400 available with floating point loads and stores, and integer loads.
10401 We get better code by allowing indexed addresses in the initial
10404 The acceptance of indexed addresses as legitimate implies that we
10405 must provide patterns for doing indexed integer stores, or the move
10406 expanders must force the address of an indexed store to a register.
10407 We have adopted the latter approach.
10409 Another function of pa_legitimate_address_p is to ensure that
10410 the base register is a valid pointer for indexed instructions.
10411 On targets that have non-equivalent space registers, we have to
10412 know at the time of assembler output which register in a REG+REG
10413 pair is the base register. The REG_POINTER flag is sometimes lost
10414 in reload and the following passes, so it can't be relied on during
10415 code generation. Thus, we either have to canonicalize the order
10416 of the registers in REG+REG indexed addresses, or treat REG+REG
10417 addresses separately and provide patterns for both permutations.
10419 The latter approach requires several hundred additional lines of
10420 code in pa.md. The downside to canonicalizing is that a PLUS
10421 in the wrong order can't combine to form to make a scaled indexed
10422 memory operand. As we won't need to canonicalize the operands if
10423 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10425 We initially break out scaled indexed addresses in canonical order
10426 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10427 scaled indexed addresses during RTL generation. However, fold_rtx
10428 has its own opinion on how the operands of a PLUS should be ordered.
10429 If one of the operands is equivalent to a constant, it will make
10430 that operand the second operand. As the base register is likely to
10431 be equivalent to a SYMBOL_REF, we have made it the second operand.
10433 pa_legitimate_address_p accepts REG+REG as legitimate when the
10434 operands are in the order INDEX+BASE on targets with non-equivalent
10435 space registers, and in any order on targets with equivalent space
10436 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10438 We treat a SYMBOL_REF as legitimate if it is part of the current
10439 function's constant-pool, because such addresses can actually be
10440 output as REG+SMALLINT. */
10443 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10446 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10447 : REG_OK_FOR_BASE_P (x
)))
10448 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10449 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10450 && REG_P (XEXP (x
, 0))
10451 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10452 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10455 if (GET_CODE (x
) == PLUS
)
10459 /* For REG+REG, the base register should be in XEXP (x, 1),
10460 so check it first. */
10461 if (REG_P (XEXP (x
, 1))
10462 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10463 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10464 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10465 else if (REG_P (XEXP (x
, 0))
10466 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10467 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10468 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10472 if (GET_CODE (index
) == CONST_INT
)
10474 if (INT_5_BITS (index
))
10477 /* When INT14_OK_STRICT is false, a secondary reload is needed
10478 to adjust the displacement of SImode and DImode floating point
10479 instructions but this may fail when the register also needs
10480 reloading. So, we return false when STRICT is true. We
10481 also reject long displacements for float mode addresses since
10482 the majority of accesses will use floating point instructions
10483 that don't support 14-bit offsets. */
10484 if (!INT14_OK_STRICT
10485 && (strict
|| !(reload_in_progress
|| reload_completed
))
10490 return base14_operand (index
, mode
);
10493 if (!TARGET_DISABLE_INDEXING
10494 /* Only accept the "canonical" INDEX+BASE operand order
10495 on targets with non-equivalent space registers. */
10496 && (TARGET_NO_SPACE_REGS
10498 : (base
== XEXP (x
, 1) && REG_P (index
)
10499 && (reload_completed
10500 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10501 || REG_POINTER (base
))
10502 && (reload_completed
10503 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10504 || !REG_POINTER (index
))))
10505 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10506 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10507 : REG_OK_FOR_INDEX_P (index
))
10508 && borx_reg_operand (base
, Pmode
)
10509 && borx_reg_operand (index
, Pmode
))
10512 if (!TARGET_DISABLE_INDEXING
10513 && GET_CODE (index
) == MULT
10514 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10515 && REG_P (XEXP (index
, 0))
10516 && GET_MODE (XEXP (index
, 0)) == Pmode
10517 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10518 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10519 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10520 && INTVAL (XEXP (index
, 1))
10521 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10522 && borx_reg_operand (base
, Pmode
))
10528 if (GET_CODE (x
) == LO_SUM
)
10530 rtx y
= XEXP (x
, 0);
10532 if (GET_CODE (y
) == SUBREG
)
10533 y
= SUBREG_REG (y
);
10536 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10537 : REG_OK_FOR_BASE_P (y
)))
10539 /* Needed for -fPIC */
10541 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10544 if (!INT14_OK_STRICT
10545 && (strict
|| !(reload_in_progress
|| reload_completed
))
10550 if (CONSTANT_P (XEXP (x
, 1)))
10556 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10562 /* Look for machine dependent ways to make the invalid address AD a
10565 For the PA, transform:
10567 memory(X + <large int>)
10571 if (<large int> & mask) >= 16
10572 Y = (<large int> & ~mask) + mask + 1 Round up.
10574 Y = (<large int> & ~mask) Round down.
10576 memory (Z + (<large int> - Y));
10578 This makes reload inheritance and reload_cse work better since Z
10581 There may be more opportunities to improve code with this hook. */
10584 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10585 int opnum
, int type
,
10586 int ind_levels ATTRIBUTE_UNUSED
)
10588 long offset
, newoffset
, mask
;
10589 rtx new_rtx
, temp
= NULL_RTX
;
10591 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10592 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10594 if (optimize
&& GET_CODE (ad
) == PLUS
)
10595 temp
= simplify_binary_operation (PLUS
, Pmode
,
10596 XEXP (ad
, 0), XEXP (ad
, 1));
10598 new_rtx
= temp
? temp
: ad
;
10601 && GET_CODE (new_rtx
) == PLUS
10602 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10603 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10605 offset
= INTVAL (XEXP ((new_rtx
), 1));
10607 /* Choose rounding direction. Round up if we are >= halfway. */
10608 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10609 newoffset
= (offset
& ~mask
) + mask
+ 1;
10611 newoffset
= offset
& ~mask
;
10613 /* Ensure that long displacements are aligned. */
10615 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10616 || (TARGET_64BIT
&& (mode
) == DImode
)))
10617 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10619 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10621 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10622 GEN_INT (newoffset
));
10623 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10624 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10625 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10626 opnum
, (enum reload_type
) type
);
10634 /* Output address vector. */
10637 pa_output_addr_vec (rtx lab
, rtx body
)
10639 int idx
, vlen
= XVECLEN (body
, 0);
10641 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10643 fputs ("\t.begin_brtab\n", asm_out_file
);
10644 for (idx
= 0; idx
< vlen
; idx
++)
10646 ASM_OUTPUT_ADDR_VEC_ELT
10647 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10650 fputs ("\t.end_brtab\n", asm_out_file
);
10653 /* Output address difference vector. */
10656 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10658 rtx base
= XEXP (XEXP (body
, 0), 0);
10659 int idx
, vlen
= XVECLEN (body
, 1);
10661 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10663 fputs ("\t.begin_brtab\n", asm_out_file
);
10664 for (idx
= 0; idx
< vlen
; idx
++)
10666 ASM_OUTPUT_ADDR_DIFF_ELT
10669 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10670 CODE_LABEL_NUMBER (base
));
10673 fputs ("\t.end_brtab\n", asm_out_file
);
10676 /* This is a helper function for the other atomic operations. This function
10677 emits a loop that contains SEQ that iterates until a compare-and-swap
10678 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10679 a set of instructions that takes a value from OLD_REG as an input and
10680 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10681 set to the current contents of MEM. After SEQ, a compare-and-swap will
10682 attempt to update MEM with NEW_REG. The function returns true when the
10683 loop was generated successfully. */
10686 pa_expand_compare_and_swap_loop (rtx mem
, rtx old_reg
, rtx new_reg
, rtx seq
)
10688 machine_mode mode
= GET_MODE (mem
);
10689 rtx_code_label
*label
;
10690 rtx cmp_reg
, success
, oldval
;
10692 /* The loop we want to generate looks like
10698 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10702 Note that we only do the plain load from memory once. Subsequent
10703 iterations use the value loaded by the compare-and-swap pattern. */
10705 label
= gen_label_rtx ();
10706 cmp_reg
= gen_reg_rtx (mode
);
10708 emit_move_insn (cmp_reg
, mem
);
10709 emit_label (label
);
10710 emit_move_insn (old_reg
, cmp_reg
);
10714 success
= NULL_RTX
;
10716 if (!expand_atomic_compare_and_swap (&success
, &oldval
, mem
, old_reg
,
10717 new_reg
, false, MEMMODEL_SYNC_SEQ_CST
,
10721 if (oldval
!= cmp_reg
)
10722 emit_move_insn (cmp_reg
, oldval
);
10724 /* Mark this jump predicted not taken. */
10725 emit_cmp_and_jump_insns (success
, const0_rtx
, EQ
, const0_rtx
,
10726 GET_MODE (success
), 1, label
, 0);
10730 /* This function tries to implement an atomic exchange operation using a
10731 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10732 *MEM are returned, using TARGET if possible. No memory model is required
10733 since a compare_and_swap loop is seq-cst. */
10736 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target
, rtx mem
, rtx val
)
10738 machine_mode mode
= GET_MODE (mem
);
10740 if (can_compare_and_swap_p (mode
, true))
10742 if (!target
|| !register_operand (target
, mode
))
10743 target
= gen_reg_rtx (mode
);
10744 if (pa_expand_compare_and_swap_loop (mem
, target
, val
, NULL_RTX
))
10751 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10752 arguments passed by hidden reference in the 32-bit HP runtime. Users
10753 can override this behavior for better compatibility with openmp at the
10754 risk of library incompatibilities. Arguments are always passed by value
10755 in the 64-bit HP runtime. */
10758 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED
,
10759 machine_mode mode ATTRIBUTE_UNUSED
,
10760 const_tree type ATTRIBUTE_UNUSED
,
10761 bool named ATTRIBUTE_UNUSED
)
10763 return !TARGET_CALLER_COPIES
;