1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "stringpool.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
47 #include "common/common-target.h"
48 #include "langhooks.h"
53 /* This file should be included last. */
54 #include "target-def.h"
56 /* Return nonzero if there is a bypass for the output of
57 OUT_INSN and the fp store IN_INSN. */
59 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
61 machine_mode store_mode
;
62 machine_mode other_mode
;
65 if (recog_memoized (in_insn
) < 0
66 || (get_attr_type (in_insn
) != TYPE_FPSTORE
67 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
68 || recog_memoized (out_insn
) < 0)
71 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
73 set
= single_set (out_insn
);
77 other_mode
= GET_MODE (SET_SRC (set
));
79 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
83 #ifndef DO_FRAME_NOTES
84 #ifdef INCOMING_RETURN_ADDR_RTX
85 #define DO_FRAME_NOTES 1
87 #define DO_FRAME_NOTES 0
91 static void pa_option_override (void);
92 static void copy_reg_pointer (rtx
, rtx
);
93 static void fix_range (const char *);
94 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
96 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
97 static bool hppa_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
98 static inline rtx
force_mode (machine_mode
, rtx
);
99 static void pa_reorg (void);
100 static void pa_combine_instructions (void);
101 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
103 static bool forward_branch_p (rtx_insn
*);
104 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
105 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
106 static int compute_movmem_length (rtx_insn
*);
107 static int compute_clrmem_length (rtx_insn
*);
108 static bool pa_assemble_integer (rtx
, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT
, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT
);
112 static void load_reg (int, HOST_WIDE_INT
, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
114 static rtx
pa_function_value (const_tree
, const_tree
, bool);
115 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
116 static bool pa_function_value_regno_p (const unsigned int);
117 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
118 static void update_total_code_bytes (unsigned int);
119 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
120 static int pa_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
121 static int pa_adjust_priority (rtx_insn
*, int);
122 static int pa_issue_rate (void);
123 static int pa_reloc_rw_mask (void);
124 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
125 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
126 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
128 static void pa_encode_section_info (tree
, rtx
, int);
129 static const char *pa_strip_name_encoding (const char *);
130 static bool pa_function_ok_for_sibcall (tree
, tree
);
131 static void pa_globalize_label (FILE *, const char *)
133 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
134 HOST_WIDE_INT
, tree
);
135 #if !defined(USE_COLLECT2)
136 static void pa_asm_out_constructor (rtx
, int);
137 static void pa_asm_out_destructor (rtx
, int);
139 static void pa_init_builtins (void);
140 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
141 static rtx
hppa_builtin_saveregs (void);
142 static void hppa_va_start (tree
, rtx
);
143 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
144 static bool pa_scalar_mode_supported_p (machine_mode
);
145 static bool pa_commutative_p (const_rtx x
, int outer_code
);
146 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
147 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
148 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
149 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
150 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
151 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
152 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
153 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
154 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
155 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
156 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
157 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
158 static void output_deferred_plabels (void);
159 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
160 #ifdef ASM_OUTPUT_EXTERNAL_REAL
161 static void pa_hpux_file_end (void);
163 static void pa_init_libfuncs (void);
164 static rtx
pa_struct_value_rtx (tree
, int);
165 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
167 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
169 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
171 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
173 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
174 static struct machine_function
* pa_init_machine_status (void);
175 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
177 secondary_reload_info
*);
178 static void pa_extra_live_on_entry (bitmap
);
179 static machine_mode
pa_promote_function_mode (const_tree
,
183 static void pa_asm_trampoline_template (FILE *);
184 static void pa_trampoline_init (rtx
, tree
, rtx
);
185 static rtx
pa_trampoline_adjust_address (rtx
);
186 static rtx
pa_delegitimize_address (rtx
);
187 static bool pa_print_operand_punct_valid_p (unsigned char);
188 static rtx
pa_internal_arg_pointer (void);
189 static bool pa_can_eliminate (const int, const int);
190 static void pa_conditional_register_usage (void);
191 static machine_mode
pa_c_mode_for_suffix (char);
192 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
193 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
194 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
195 static unsigned int pa_section_type_flags (tree
, const char *, int);
196 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
198 /* The following extra sections are only used for SOM. */
199 static GTY(()) section
*som_readonly_data_section
;
200 static GTY(()) section
*som_one_only_readonly_data_section
;
201 static GTY(()) section
*som_one_only_data_section
;
202 static GTY(()) section
*som_tm_clone_table_section
;
204 /* Counts for the number of callee-saved general and floating point
205 registers which were saved by the current function's prologue. */
206 static int gr_saved
, fr_saved
;
208 /* Boolean indicating whether the return pointer was saved by the
209 current function's prologue. */
210 static bool rp_saved
;
212 static rtx
find_addr_reg (rtx
);
214 /* Keep track of the number of bytes we have output in the CODE subspace
215 during this compilation so we'll know when to emit inline long-calls. */
216 unsigned long total_code_bytes
;
218 /* The last address of the previous function plus the number of bytes in
219 associated thunks that have been output. This is used to determine if
220 a thunk can use an IA-relative branch to reach its target function. */
221 static unsigned int last_address
;
223 /* Variables to handle plabels that we discover are necessary at assembly
224 output time. They are output after the current function. */
225 struct GTY(()) deferred_plabel
230 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
232 static size_t n_deferred_plabels
= 0;
234 /* Initialize the GCC target structure. */
236 #undef TARGET_OPTION_OVERRIDE
237 #define TARGET_OPTION_OVERRIDE pa_option_override
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER pa_assemble_integer
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_EPILOGUE
257 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
259 #undef TARGET_FUNCTION_VALUE
260 #define TARGET_FUNCTION_VALUE pa_function_value
261 #undef TARGET_LIBCALL_VALUE
262 #define TARGET_LIBCALL_VALUE pa_libcall_value
263 #undef TARGET_FUNCTION_VALUE_REGNO_P
264 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
266 #undef TARGET_LEGITIMIZE_ADDRESS
267 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
269 #undef TARGET_SCHED_ADJUST_COST
270 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
271 #undef TARGET_SCHED_ADJUST_PRIORITY
272 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
273 #undef TARGET_SCHED_ISSUE_RATE
274 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
276 #undef TARGET_ENCODE_SECTION_INFO
277 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
278 #undef TARGET_STRIP_NAME_ENCODING
279 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
281 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
282 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
284 #undef TARGET_COMMUTATIVE_P
285 #define TARGET_COMMUTATIVE_P pa_commutative_p
287 #undef TARGET_ASM_OUTPUT_MI_THUNK
288 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
289 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
292 #undef TARGET_ASM_FILE_END
293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
294 #define TARGET_ASM_FILE_END pa_hpux_file_end
296 #define TARGET_ASM_FILE_END output_deferred_plabels
299 #undef TARGET_ASM_RELOC_RW_MASK
300 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
302 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
303 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
305 #if !defined(USE_COLLECT2)
306 #undef TARGET_ASM_CONSTRUCTOR
307 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
308 #undef TARGET_ASM_DESTRUCTOR
309 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #undef TARGET_INIT_BUILTINS
313 #define TARGET_INIT_BUILTINS pa_init_builtins
315 #undef TARGET_EXPAND_BUILTIN
316 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
318 #undef TARGET_REGISTER_MOVE_COST
319 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
320 #undef TARGET_RTX_COSTS
321 #define TARGET_RTX_COSTS hppa_rtx_costs
322 #undef TARGET_ADDRESS_COST
323 #define TARGET_ADDRESS_COST hppa_address_cost
325 #undef TARGET_MACHINE_DEPENDENT_REORG
326 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
328 #undef TARGET_INIT_LIBFUNCS
329 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
331 #undef TARGET_PROMOTE_FUNCTION_MODE
332 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
333 #undef TARGET_PROMOTE_PROTOTYPES
334 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
340 #undef TARGET_MUST_PASS_IN_STACK
341 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
342 #undef TARGET_PASS_BY_REFERENCE
343 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
344 #undef TARGET_CALLEE_COPIES
345 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
346 #undef TARGET_ARG_PARTIAL_BYTES
347 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
348 #undef TARGET_FUNCTION_ARG
349 #define TARGET_FUNCTION_ARG pa_function_arg
350 #undef TARGET_FUNCTION_ARG_ADVANCE
351 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
352 #undef TARGET_FUNCTION_ARG_BOUNDARY
353 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
355 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
356 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
357 #undef TARGET_EXPAND_BUILTIN_VA_START
358 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
360 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
362 #undef TARGET_SCALAR_MODE_SUPPORTED_P
363 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
365 #undef TARGET_CANNOT_FORCE_CONST_MEM
366 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
368 #undef TARGET_SECONDARY_RELOAD
369 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
371 #undef TARGET_EXTRA_LIVE_ON_ENTRY
372 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
380 #undef TARGET_DELEGITIMIZE_ADDRESS
381 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
382 #undef TARGET_INTERNAL_ARG_POINTER
383 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
384 #undef TARGET_CAN_ELIMINATE
385 #define TARGET_CAN_ELIMINATE pa_can_eliminate
386 #undef TARGET_CONDITIONAL_REGISTER_USAGE
387 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
388 #undef TARGET_C_MODE_FOR_SUFFIX
389 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
390 #undef TARGET_ASM_FUNCTION_SECTION
391 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
393 #undef TARGET_LEGITIMATE_CONSTANT_P
394 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
395 #undef TARGET_SECTION_TYPE_FLAGS
396 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
397 #undef TARGET_LEGITIMATE_ADDRESS_P
398 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
400 struct gcc_target targetm
= TARGET_INITIALIZER
;
402 /* Parse the -mfixed-range= option string. */
405 fix_range (const char *const_str
)
408 char *str
, *dash
, *comma
;
410 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
411 REG2 are either register names or register numbers. The effect
412 of this option is to mark the registers in the range from REG1 to
413 REG2 as ``fixed'' so they won't be used by the compiler. This is
414 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
416 i
= strlen (const_str
);
417 str
= (char *) alloca (i
+ 1);
418 memcpy (str
, const_str
, i
+ 1);
422 dash
= strchr (str
, '-');
425 warning (0, "value of -mfixed-range must have form REG1-REG2");
430 comma
= strchr (dash
+ 1, ',');
434 first
= decode_reg_name (str
);
437 warning (0, "unknown register name: %s", str
);
441 last
= decode_reg_name (dash
+ 1);
444 warning (0, "unknown register name: %s", dash
+ 1);
452 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
456 for (i
= first
; i
<= last
; ++i
)
457 fixed_regs
[i
] = call_used_regs
[i
] = 1;
466 /* Check if all floating point registers have been fixed. */
467 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
472 target_flags
|= MASK_DISABLE_FPREGS
;
475 /* Implement the TARGET_OPTION_OVERRIDE hook. */
478 pa_option_override (void)
481 cl_deferred_option
*opt
;
482 vec
<cl_deferred_option
> *v
483 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
486 FOR_EACH_VEC_ELT (*v
, i
, opt
)
488 switch (opt
->opt_index
)
490 case OPT_mfixed_range_
:
491 fix_range (opt
->arg
);
499 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
501 warning (0, "PIC code generation is not supported in the portable runtime model");
504 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
506 warning (0, "PIC code generation is not compatible with fast indirect calls");
509 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
511 warning (0, "-g is only supported when using GAS on this processor,");
512 warning (0, "-g option disabled");
513 write_symbols
= NO_DEBUG
;
516 /* We only support the "big PIC" model now. And we always generate PIC
517 code when in 64bit mode. */
518 if (flag_pic
== 1 || TARGET_64BIT
)
521 /* Disable -freorder-blocks-and-partition as we don't support hot and
522 cold partitioning. */
523 if (flag_reorder_blocks_and_partition
)
525 inform (input_location
,
526 "-freorder-blocks-and-partition does not work "
527 "on this architecture");
528 flag_reorder_blocks_and_partition
= 0;
529 flag_reorder_blocks
= 1;
532 /* We can't guarantee that .dword is available for 32-bit targets. */
533 if (UNITS_PER_WORD
== 4)
534 targetm
.asm_out
.aligned_op
.di
= NULL
;
536 /* The unaligned ops are only available when using GAS. */
539 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
540 targetm
.asm_out
.unaligned_op
.si
= NULL
;
541 targetm
.asm_out
.unaligned_op
.di
= NULL
;
544 init_machine_status
= pa_init_machine_status
;
549 PA_BUILTIN_COPYSIGNQ
,
552 PA_BUILTIN_HUGE_VALQ
,
556 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
559 pa_init_builtins (void)
561 #ifdef DONT_HAVE_FPUTC_UNLOCKED
563 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
564 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
565 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
572 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
573 set_user_assembler_name (decl
, "_Isfinite");
574 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
575 set_user_assembler_name (decl
, "_Isfinitef");
579 if (HPUX_LONG_DOUBLE_LIBRARY
)
583 /* Under HPUX, the __float128 type is a synonym for "long double". */
584 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
587 /* TFmode support builtins. */
588 ftype
= build_function_type_list (long_double_type_node
,
589 long_double_type_node
,
591 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
592 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
593 "_U_Qfabs", NULL_TREE
);
594 TREE_READONLY (decl
) = 1;
595 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
597 ftype
= build_function_type_list (long_double_type_node
,
598 long_double_type_node
,
599 long_double_type_node
,
601 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
602 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
603 "_U_Qfcopysign", NULL_TREE
);
604 TREE_READONLY (decl
) = 1;
605 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
607 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
608 decl
= add_builtin_function ("__builtin_infq", ftype
,
609 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
611 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
613 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
614 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
616 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
621 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
622 machine_mode mode ATTRIBUTE_UNUSED
,
623 int ignore ATTRIBUTE_UNUSED
)
625 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
626 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
630 case PA_BUILTIN_FABSQ
:
631 case PA_BUILTIN_COPYSIGNQ
:
632 return expand_call (exp
, target
, ignore
);
634 case PA_BUILTIN_INFQ
:
635 case PA_BUILTIN_HUGE_VALQ
:
637 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
642 tmp
= const_double_from_real_value (inf
, target_mode
);
644 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
647 target
= gen_reg_rtx (target_mode
);
649 emit_move_insn (target
, tmp
);
660 /* Function to init struct machine_function.
661 This will be called, via a pointer variable,
662 from push_function_context. */
664 static struct machine_function
*
665 pa_init_machine_status (void)
667 return ggc_cleared_alloc
<machine_function
> ();
670 /* If FROM is a probable pointer register, mark TO as a probable
671 pointer register with the same pointer alignment as FROM. */
674 copy_reg_pointer (rtx to
, rtx from
)
676 if (REG_POINTER (from
))
677 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
680 /* Return 1 if X contains a symbolic expression. We know these
681 expressions will have one of a few well defined forms, so
682 we need only check those forms. */
684 pa_symbolic_expression_p (rtx x
)
687 /* Strip off any HIGH. */
688 if (GET_CODE (x
) == HIGH
)
691 return symbolic_operand (x
, VOIDmode
);
694 /* Accept any constant that can be moved in one instruction into a
697 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival
)
699 /* OK if ldo, ldil, or zdepi, can be used. */
700 return (VAL_14_BITS_P (ival
)
701 || pa_ldil_cint_p (ival
)
702 || pa_zdepi_cint_p (ival
));
705 /* True iff ldil can be used to load this CONST_INT. The least
706 significant 11 bits of the value must be zero and the value must
707 not change sign when extended from 32 to 64 bits. */
709 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival
)
711 unsigned HOST_WIDE_INT x
;
713 x
= ival
& (((unsigned HOST_WIDE_INT
) -1 << 31) | 0x7ff);
714 return x
== 0 || x
== ((unsigned HOST_WIDE_INT
) -1 << 31);
717 /* True iff zdepi can be used to generate this CONST_INT.
718 zdepi first sign extends a 5-bit signed number to a given field
719 length, then places this field anywhere in a zero. */
721 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
723 unsigned HOST_WIDE_INT lsb_mask
, t
;
725 /* This might not be obvious, but it's at least fast.
726 This function is critical; we don't have the time loops would take. */
728 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
729 /* Return true iff t is a power of two. */
730 return ((t
& (t
- 1)) == 0);
733 /* True iff depi or extru can be used to compute (reg & mask).
734 Accept bit pattern like these:
739 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
742 mask
+= mask
& -mask
;
743 return (mask
& (mask
- 1)) == 0;
746 /* True iff depi can be used to compute (reg | MASK). */
748 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
750 mask
+= mask
& -mask
;
751 return (mask
& (mask
- 1)) == 0;
754 /* Legitimize PIC addresses. If the address is already
755 position-independent, we return ORIG. Newly generated
756 position-independent addresses go to REG. If we need more
757 than one register, we lose. */
760 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
764 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
766 /* Labels need special handling. */
767 if (pic_label_operand (orig
, mode
))
771 /* We do not want to go through the movXX expanders here since that
772 would create recursion.
774 Nor do we really want to call a generator for a named pattern
775 since that requires multiple patterns if we want to support
778 So instead we just emit the raw set, which avoids the movXX
779 expanders completely. */
780 mark_reg_pointer (reg
, BITS_PER_UNIT
);
781 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
783 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
784 add_reg_note (insn
, REG_EQUAL
, orig
);
786 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
787 and update LABEL_NUSES because this is not done automatically. */
788 if (reload_in_progress
|| reload_completed
)
790 /* Extract LABEL_REF. */
791 if (GET_CODE (orig
) == CONST
)
792 orig
= XEXP (XEXP (orig
, 0), 0);
793 /* Extract CODE_LABEL. */
794 orig
= XEXP (orig
, 0);
795 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
796 /* Make sure we have label and not a note. */
798 LABEL_NUSES (orig
)++;
800 crtl
->uses_pic_offset_table
= 1;
803 if (GET_CODE (orig
) == SYMBOL_REF
)
810 /* Before reload, allocate a temporary register for the intermediate
811 result. This allows the sequence to be deleted when the final
812 result is unused and the insns are trivially dead. */
813 tmp_reg
= ((reload_in_progress
|| reload_completed
)
814 ? reg
: gen_reg_rtx (Pmode
));
816 if (function_label_operand (orig
, VOIDmode
))
818 /* Force function label into memory in word mode. */
819 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
820 /* Load plabel address from DLT. */
821 emit_move_insn (tmp_reg
,
822 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
823 gen_rtx_HIGH (word_mode
, orig
)));
825 = gen_const_mem (Pmode
,
826 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
827 gen_rtx_UNSPEC (Pmode
,
830 emit_move_insn (reg
, pic_ref
);
831 /* Now load address of function descriptor. */
832 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
836 /* Load symbol reference from DLT. */
837 emit_move_insn (tmp_reg
,
838 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
839 gen_rtx_HIGH (word_mode
, orig
)));
841 = gen_const_mem (Pmode
,
842 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
843 gen_rtx_UNSPEC (Pmode
,
848 crtl
->uses_pic_offset_table
= 1;
849 mark_reg_pointer (reg
, BITS_PER_UNIT
);
850 insn
= emit_move_insn (reg
, pic_ref
);
852 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
853 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
857 else if (GET_CODE (orig
) == CONST
)
861 if (GET_CODE (XEXP (orig
, 0)) == PLUS
862 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
866 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
868 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
869 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
870 base
== reg
? 0 : reg
);
872 if (GET_CODE (orig
) == CONST_INT
)
874 if (INT_14_BITS (orig
))
875 return plus_constant (Pmode
, base
, INTVAL (orig
));
876 orig
= force_reg (Pmode
, orig
);
878 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
879 /* Likewise, should we set special REG_NOTEs here? */
885 static GTY(()) rtx gen_tls_tga
;
888 gen_tls_get_addr (void)
891 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
896 hppa_tls_call (rtx arg
)
900 ret
= gen_reg_rtx (Pmode
);
901 emit_library_call_value (gen_tls_get_addr (), ret
,
902 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
908 legitimize_tls_address (rtx addr
)
910 rtx ret
, tmp
, t1
, t2
, tp
;
913 /* Currently, we can't handle anything but a SYMBOL_REF. */
914 if (GET_CODE (addr
) != SYMBOL_REF
)
917 switch (SYMBOL_REF_TLS_MODEL (addr
))
919 case TLS_MODEL_GLOBAL_DYNAMIC
:
920 tmp
= gen_reg_rtx (Pmode
);
922 emit_insn (gen_tgd_load_pic (tmp
, addr
));
924 emit_insn (gen_tgd_load (tmp
, addr
));
925 ret
= hppa_tls_call (tmp
);
928 case TLS_MODEL_LOCAL_DYNAMIC
:
929 ret
= gen_reg_rtx (Pmode
);
930 tmp
= gen_reg_rtx (Pmode
);
933 emit_insn (gen_tld_load_pic (tmp
, addr
));
935 emit_insn (gen_tld_load (tmp
, addr
));
936 t1
= hppa_tls_call (tmp
);
939 t2
= gen_reg_rtx (Pmode
);
940 emit_libcall_block (insn
, t2
, t1
,
941 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
943 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
946 case TLS_MODEL_INITIAL_EXEC
:
947 tp
= gen_reg_rtx (Pmode
);
948 tmp
= gen_reg_rtx (Pmode
);
949 ret
= gen_reg_rtx (Pmode
);
950 emit_insn (gen_tp_load (tp
));
952 emit_insn (gen_tie_load_pic (tmp
, addr
));
954 emit_insn (gen_tie_load (tmp
, addr
));
955 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
958 case TLS_MODEL_LOCAL_EXEC
:
959 tp
= gen_reg_rtx (Pmode
);
960 ret
= gen_reg_rtx (Pmode
);
961 emit_insn (gen_tp_load (tp
));
962 emit_insn (gen_tle_load (ret
, addr
, tp
));
972 /* Helper for hppa_legitimize_address. Given X, return true if it
973 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
975 This respectively represent canonical shift-add rtxs or scaled
978 mem_shadd_or_shadd_rtx_p (rtx x
)
980 return ((GET_CODE (x
) == ASHIFT
981 || GET_CODE (x
) == MULT
)
982 && GET_CODE (XEXP (x
, 1)) == CONST_INT
983 && ((GET_CODE (x
) == ASHIFT
984 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
985 || (GET_CODE (x
) == MULT
986 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
989 /* Try machine-dependent ways of modifying an illegitimate address
990 to be legitimate. If we find one, return the new, valid address.
991 This macro is used in only one place: `memory_address' in explow.c.
993 OLDX is the address as it was before break_out_memory_refs was called.
994 In some cases it is useful to look at this to decide what needs to be done.
996 It is always safe for this macro to do nothing. It exists to recognize
997 opportunities to optimize the output.
999 For the PA, transform:
1001 memory(X + <large int>)
1005 if (<large int> & mask) >= 16
1006 Y = (<large int> & ~mask) + mask + 1 Round up.
1008 Y = (<large int> & ~mask) Round down.
1010 memory (Z + (<large int> - Y));
1012 This is for CSE to find several similar references, and only use one Z.
1014 X can either be a SYMBOL_REF or REG, but because combine cannot
1015 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1016 D will not fit in 14 bits.
1018 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1021 MODE_INT references allow displacements which fit in 14 bits, so use
1024 This relies on the fact that most mode MODE_FLOAT references will use FP
1025 registers and most mode MODE_INT references will use integer registers.
1026 (In the rare case of an FP register used in an integer MODE, we depend
1027 on secondary reloads to clean things up.)
1030 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1031 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1032 addressing modes to be used).
1034 Note that the addresses passed into hppa_legitimize_address always
1035 come from a MEM, so we only have to match the MULT form on incoming
1036 addresses. But to be future proof we also match the ASHIFT form.
1038 However, this routine always places those shift-add sequences into
1039 registers, so we have to generate the ASHIFT form as our output.
1041 Put X and Z into registers. Then put the entire expression into
1045 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1050 /* We need to canonicalize the order of operands in unscaled indexed
1051 addresses since the code that checks if an address is valid doesn't
1052 always try both orders. */
1053 if (!TARGET_NO_SPACE_REGS
1054 && GET_CODE (x
) == PLUS
1055 && GET_MODE (x
) == Pmode
1056 && REG_P (XEXP (x
, 0))
1057 && REG_P (XEXP (x
, 1))
1058 && REG_POINTER (XEXP (x
, 0))
1059 && !REG_POINTER (XEXP (x
, 1)))
1060 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1062 if (tls_referenced_p (x
))
1063 return legitimize_tls_address (x
);
1065 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1067 /* Strip off CONST. */
1068 if (GET_CODE (x
) == CONST
)
1071 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1072 That should always be safe. */
1073 if (GET_CODE (x
) == PLUS
1074 && GET_CODE (XEXP (x
, 0)) == REG
1075 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1077 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1078 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1081 /* Note we must reject symbols which represent function addresses
1082 since the assembler/linker can't handle arithmetic on plabels. */
1083 if (GET_CODE (x
) == PLUS
1084 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1085 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1086 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1087 || GET_CODE (XEXP (x
, 0)) == REG
))
1089 rtx int_part
, ptr_reg
;
1091 int offset
= INTVAL (XEXP (x
, 1));
1094 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1095 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1097 /* Choose which way to round the offset. Round up if we
1098 are >= halfway to the next boundary. */
1099 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1100 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1102 newoffset
= (offset
& ~ mask
);
1104 /* If the newoffset will not fit in 14 bits (ldo), then
1105 handling this would take 4 or 5 instructions (2 to load
1106 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1107 add the new offset and the SYMBOL_REF.) Combine can
1108 not handle 4->2 or 5->2 combinations, so do not create
1110 if (! VAL_14_BITS_P (newoffset
)
1111 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1113 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1116 gen_rtx_HIGH (Pmode
, const_part
));
1119 gen_rtx_LO_SUM (Pmode
,
1120 tmp_reg
, const_part
));
1124 if (! VAL_14_BITS_P (newoffset
))
1125 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1127 int_part
= GEN_INT (newoffset
);
1129 ptr_reg
= force_reg (Pmode
,
1130 gen_rtx_PLUS (Pmode
,
1131 force_reg (Pmode
, XEXP (x
, 0)),
1134 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1137 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1139 if (GET_CODE (x
) == PLUS
1140 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1141 && (OBJECT_P (XEXP (x
, 1))
1142 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1143 && GET_CODE (XEXP (x
, 1)) != CONST
)
1145 /* If we were given a MULT, we must fix the constant
1146 as we're going to create the ASHIFT form. */
1147 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1148 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1149 shift_val
= exact_log2 (shift_val
);
1153 if (GET_CODE (reg1
) != REG
)
1154 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1156 reg2
= XEXP (XEXP (x
, 0), 0);
1157 if (GET_CODE (reg2
) != REG
)
1158 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1160 return force_reg (Pmode
,
1161 gen_rtx_PLUS (Pmode
,
1162 gen_rtx_ASHIFT (Pmode
, reg2
,
1163 GEN_INT (shift_val
)),
1167 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1169 Only do so for floating point modes since this is more speculative
1170 and we lose if it's an integer store. */
1171 if (GET_CODE (x
) == PLUS
1172 && GET_CODE (XEXP (x
, 0)) == PLUS
1173 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1174 && (mode
== SFmode
|| mode
== DFmode
))
1176 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1178 /* If we were given a MULT, we must fix the constant
1179 as we're going to create the ASHIFT form. */
1180 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1181 shift_val
= exact_log2 (shift_val
);
1183 /* Try and figure out what to use as a base register. */
1184 rtx reg1
, reg2
, base
, idx
;
1186 reg1
= XEXP (XEXP (x
, 0), 1);
1191 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1192 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1193 it's a base register below. */
1194 if (GET_CODE (reg1
) != REG
)
1195 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1197 if (GET_CODE (reg2
) != REG
)
1198 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1200 /* Figure out what the base and index are. */
1202 if (GET_CODE (reg1
) == REG
1203 && REG_POINTER (reg1
))
1206 idx
= gen_rtx_PLUS (Pmode
,
1207 gen_rtx_ASHIFT (Pmode
,
1208 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1209 GEN_INT (shift_val
)),
1212 else if (GET_CODE (reg2
) == REG
1213 && REG_POINTER (reg2
))
1222 /* If the index adds a large constant, try to scale the
1223 constant so that it can be loaded with only one insn. */
1224 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1225 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1226 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1227 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1229 /* Divide the CONST_INT by the scale factor, then add it to A. */
1230 int val
= INTVAL (XEXP (idx
, 1));
1231 val
/= (1 << shift_val
);
1233 reg1
= XEXP (XEXP (idx
, 0), 0);
1234 if (GET_CODE (reg1
) != REG
)
1235 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1237 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1239 /* We can now generate a simple scaled indexed address. */
1242 (Pmode
, gen_rtx_PLUS (Pmode
,
1243 gen_rtx_ASHIFT (Pmode
, reg1
,
1244 GEN_INT (shift_val
)),
1248 /* If B + C is still a valid base register, then add them. */
1249 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1250 && INTVAL (XEXP (idx
, 1)) <= 4096
1251 && INTVAL (XEXP (idx
, 1)) >= -4096)
1255 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1257 reg2
= XEXP (XEXP (idx
, 0), 0);
1258 if (GET_CODE (reg2
) != CONST_INT
)
1259 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1261 return force_reg (Pmode
,
1262 gen_rtx_PLUS (Pmode
,
1263 gen_rtx_ASHIFT (Pmode
, reg2
,
1264 GEN_INT (shift_val
)),
1268 /* Get the index into a register, then add the base + index and
1269 return a register holding the result. */
1271 /* First get A into a register. */
1272 reg1
= XEXP (XEXP (idx
, 0), 0);
1273 if (GET_CODE (reg1
) != REG
)
1274 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1276 /* And get B into a register. */
1277 reg2
= XEXP (idx
, 1);
1278 if (GET_CODE (reg2
) != REG
)
1279 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1281 reg1
= force_reg (Pmode
,
1282 gen_rtx_PLUS (Pmode
,
1283 gen_rtx_ASHIFT (Pmode
, reg1
,
1284 GEN_INT (shift_val
)),
1287 /* Add the result to our base register and return. */
1288 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1292 /* Uh-oh. We might have an address for x[n-100000]. This needs
1293 special handling to avoid creating an indexed memory address
1294 with x-100000 as the base.
1296 If the constant part is small enough, then it's still safe because
1297 there is a guard page at the beginning and end of the data segment.
1299 Scaled references are common enough that we want to try and rearrange the
1300 terms so that we can use indexing for these addresses too. Only
1301 do the optimization for floatint point modes. */
1303 if (GET_CODE (x
) == PLUS
1304 && pa_symbolic_expression_p (XEXP (x
, 1)))
1306 /* Ugly. We modify things here so that the address offset specified
1307 by the index expression is computed first, then added to x to form
1308 the entire address. */
1310 rtx regx1
, regx2
, regy1
, regy2
, y
;
1312 /* Strip off any CONST. */
1314 if (GET_CODE (y
) == CONST
)
1317 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1319 /* See if this looks like
1320 (plus (mult (reg) (mem_shadd_const))
1321 (const (plus (symbol_ref) (const_int))))
1323 Where const_int is small. In that case the const
1324 expression is a valid pointer for indexing.
1326 If const_int is big, but can be divided evenly by shadd_const
1327 and added to (reg). This allows more scaled indexed addresses. */
1328 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1329 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1330 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1331 && INTVAL (XEXP (y
, 1)) >= -4096
1332 && INTVAL (XEXP (y
, 1)) <= 4095)
1334 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1336 /* If we were given a MULT, we must fix the constant
1337 as we're going to create the ASHIFT form. */
1338 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1339 shift_val
= exact_log2 (shift_val
);
1344 if (GET_CODE (reg1
) != REG
)
1345 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1347 reg2
= XEXP (XEXP (x
, 0), 0);
1348 if (GET_CODE (reg2
) != REG
)
1349 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1353 gen_rtx_PLUS (Pmode
,
1354 gen_rtx_ASHIFT (Pmode
,
1356 GEN_INT (shift_val
)),
1359 else if ((mode
== DFmode
|| mode
== SFmode
)
1360 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1361 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1362 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1363 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1365 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1367 /* If we were given a MULT, we must fix the constant
1368 as we're going to create the ASHIFT form. */
1369 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1370 shift_val
= exact_log2 (shift_val
);
1373 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1374 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1375 regx2
= XEXP (XEXP (x
, 0), 0);
1376 if (GET_CODE (regx2
) != REG
)
1377 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1378 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1382 gen_rtx_PLUS (Pmode
,
1383 gen_rtx_ASHIFT (Pmode
, regx2
,
1384 GEN_INT (shift_val
)),
1385 force_reg (Pmode
, XEXP (y
, 0))));
1387 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1388 && INTVAL (XEXP (y
, 1)) >= -4096
1389 && INTVAL (XEXP (y
, 1)) <= 4095)
1391 /* This is safe because of the guard page at the
1392 beginning and end of the data space. Just
1393 return the original address. */
1398 /* Doesn't look like one we can optimize. */
1399 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1400 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1401 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1402 regx1
= force_reg (Pmode
,
1403 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1405 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1413 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1415 Compute extra cost of moving data between one register class
1418 Make moves from SAR so expensive they should never happen. We used to
1419 have 0xffff here, but that generates overflow in rare cases.
1421 Copies involving a FP register and a non-FP register are relatively
1422 expensive because they must go through memory.
1424 Other copies are reasonably cheap. */
1427 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1428 reg_class_t from
, reg_class_t to
)
1430 if (from
== SHIFT_REGS
)
1432 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1434 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1435 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1441 /* For the HPPA, REG and REG+CONST is cost 0
1442 and addresses involving symbolic constants are cost 2.
1444 PIC addresses are very expensive.
1446 It is no coincidence that this has the same structure
1447 as pa_legitimate_address_p. */
1450 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1451 addr_space_t as ATTRIBUTE_UNUSED
,
1452 bool speed ATTRIBUTE_UNUSED
)
1454 switch (GET_CODE (X
))
1467 /* Compute a (partial) cost for rtx X. Return true if the complete
1468 cost has been computed, and false if subexpressions should be
1469 scanned. In either case, *TOTAL contains the cost result. */
1472 hppa_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
1473 int opno ATTRIBUTE_UNUSED
,
1474 int *total
, bool speed ATTRIBUTE_UNUSED
)
1477 int code
= GET_CODE (x
);
1482 if (INTVAL (x
) == 0)
1484 else if (INT_14_BITS (x
))
1501 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1502 && outer_code
!= SET
)
1509 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1511 *total
= COSTS_N_INSNS (3);
1515 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1516 factor
= GET_MODE_SIZE (mode
) / 4;
1520 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1521 *total
= factor
* factor
* COSTS_N_INSNS (8);
1523 *total
= factor
* factor
* COSTS_N_INSNS (20);
1527 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1529 *total
= COSTS_N_INSNS (14);
1537 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1538 factor
= GET_MODE_SIZE (mode
) / 4;
1542 *total
= factor
* factor
* COSTS_N_INSNS (60);
1545 case PLUS
: /* this includes shNadd insns */
1547 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1549 *total
= COSTS_N_INSNS (3);
1553 /* A size N times larger than UNITS_PER_WORD needs N times as
1554 many insns, taking N times as long. */
1555 factor
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
1558 *total
= factor
* COSTS_N_INSNS (1);
1564 *total
= COSTS_N_INSNS (1);
1572 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1573 new rtx with the correct mode. */
1575 force_mode (machine_mode mode
, rtx orig
)
1577 if (mode
== GET_MODE (orig
))
1580 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1582 return gen_rtx_REG (mode
, REGNO (orig
));
1585 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1588 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1590 return tls_referenced_p (x
);
1593 /* Emit insns to move operands[1] into operands[0].
1595 Return 1 if we have written out everything that needs to be done to
1596 do the move. Otherwise, return 0 and the caller will emit the move
1599 Note SCRATCH_REG may not be in the proper mode depending on how it
1600 will be used. This routine is responsible for creating a new copy
1601 of SCRATCH_REG in the proper mode. */
1604 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1606 register rtx operand0
= operands
[0];
1607 register rtx operand1
= operands
[1];
1610 /* We can only handle indexed addresses in the destination operand
1611 of floating point stores. Thus, we need to break out indexed
1612 addresses from the destination operand. */
1613 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1615 gcc_assert (can_create_pseudo_p ());
1617 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1618 operand0
= replace_equiv_address (operand0
, tem
);
1621 /* On targets with non-equivalent space registers, break out unscaled
1622 indexed addresses from the source operand before the final CSE.
1623 We have to do this because the REG_POINTER flag is not correctly
1624 carried through various optimization passes and CSE may substitute
1625 a pseudo without the pointer set for one with the pointer set. As
1626 a result, we loose various opportunities to create insns with
1627 unscaled indexed addresses. */
1628 if (!TARGET_NO_SPACE_REGS
1629 && !cse_not_expected
1630 && GET_CODE (operand1
) == MEM
1631 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1632 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1633 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1635 = replace_equiv_address (operand1
,
1636 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1639 && reload_in_progress
&& GET_CODE (operand0
) == REG
1640 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1641 operand0
= reg_equiv_mem (REGNO (operand0
));
1642 else if (scratch_reg
1643 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1644 && GET_CODE (SUBREG_REG (operand0
)) == REG
1645 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1647 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1648 the code which tracks sets/uses for delete_output_reload. */
1649 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1650 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1651 SUBREG_BYTE (operand0
));
1652 operand0
= alter_subreg (&temp
, true);
1656 && reload_in_progress
&& GET_CODE (operand1
) == REG
1657 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1658 operand1
= reg_equiv_mem (REGNO (operand1
));
1659 else if (scratch_reg
1660 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1661 && GET_CODE (SUBREG_REG (operand1
)) == REG
1662 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1664 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1665 the code which tracks sets/uses for delete_output_reload. */
1666 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1667 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1668 SUBREG_BYTE (operand1
));
1669 operand1
= alter_subreg (&temp
, true);
1672 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1673 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1674 != XEXP (operand0
, 0)))
1675 operand0
= replace_equiv_address (operand0
, tem
);
1677 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1678 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1679 != XEXP (operand1
, 0)))
1680 operand1
= replace_equiv_address (operand1
, tem
);
1682 /* Handle secondary reloads for loads/stores of FP registers from
1683 REG+D addresses where D does not fit in 5 or 14 bits, including
1684 (subreg (mem (addr))) cases. */
1686 && FP_REG_P (operand0
)
1687 && (MEM_P (operand1
)
1688 || (GET_CODE (operand1
) == SUBREG
1689 && MEM_P (XEXP (operand1
, 0)))))
1693 if (GET_CODE (op1
) == SUBREG
)
1694 op1
= XEXP (op1
, 0);
1696 if (reg_plus_base_memory_operand (op1
, GET_MODE (op1
))
1699 && INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1700 && !INT_5_BITS (XEXP (XEXP (op1
, 0), 1)))
1702 /* SCRATCH_REG will hold an address and maybe the actual data.
1703 We want it in WORD_MODE regardless of what mode it was
1704 originally given to us. */
1705 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1707 /* D might not fit in 14 bits either; for such cases load D into
1709 if (!INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1711 emit_move_insn (scratch_reg
, XEXP (XEXP (op1
, 0), 1));
1712 emit_move_insn (scratch_reg
,
1713 gen_rtx_fmt_ee (GET_CODE (XEXP (op1
, 0)),
1715 XEXP (XEXP (op1
, 0), 0),
1719 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1720 emit_insn (gen_rtx_SET (operand0
,
1721 replace_equiv_address (op1
, scratch_reg
)));
1725 else if (scratch_reg
1726 && FP_REG_P (operand1
)
1727 && (MEM_P (operand0
)
1728 || (GET_CODE (operand0
) == SUBREG
1729 && MEM_P (XEXP (operand0
, 0)))))
1733 if (GET_CODE (op0
) == SUBREG
)
1734 op0
= XEXP (op0
, 0);
1736 if (reg_plus_base_memory_operand (op0
, GET_MODE (op0
))
1739 && INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1740 && !INT_5_BITS (XEXP (XEXP (op0
, 0), 1)))
1742 /* SCRATCH_REG will hold an address and maybe the actual data.
1743 We want it in WORD_MODE regardless of what mode it was
1744 originally given to us. */
1745 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1747 /* D might not fit in 14 bits either; for such cases load D into
1749 if (!INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1751 emit_move_insn (scratch_reg
, XEXP (XEXP (op0
, 0), 1));
1752 emit_move_insn (scratch_reg
,
1753 gen_rtx_fmt_ee (GET_CODE (XEXP (op0
, 0)),
1755 XEXP (XEXP (op0
, 0), 0),
1759 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1760 emit_insn (gen_rtx_SET (replace_equiv_address (op0
, scratch_reg
),
1765 /* Handle secondary reloads for loads of FP registers from constant
1766 expressions by forcing the constant into memory. For the most part,
1767 this is only necessary for SImode and DImode.
1769 Use scratch_reg to hold the address of the memory location. */
1770 else if (scratch_reg
1771 && CONSTANT_P (operand1
)
1772 && FP_REG_P (operand0
))
1774 rtx const_mem
, xoperands
[2];
1776 if (operand1
== CONST0_RTX (mode
))
1778 emit_insn (gen_rtx_SET (operand0
, operand1
));
1782 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1783 it in WORD_MODE regardless of what mode it was originally given
1785 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1787 /* Force the constant into memory and put the address of the
1788 memory location into scratch_reg. */
1789 const_mem
= force_const_mem (mode
, operand1
);
1790 xoperands
[0] = scratch_reg
;
1791 xoperands
[1] = XEXP (const_mem
, 0);
1792 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1794 /* Now load the destination register. */
1795 emit_insn (gen_rtx_SET (operand0
,
1796 replace_equiv_address (const_mem
, scratch_reg
)));
1799 /* Handle secondary reloads for SAR. These occur when trying to load
1800 the SAR from memory or a constant. */
1801 else if (scratch_reg
1802 && GET_CODE (operand0
) == REG
1803 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1804 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1805 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1807 /* D might not fit in 14 bits either; for such cases load D into
1809 if (GET_CODE (operand1
) == MEM
1810 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1812 /* We are reloading the address into the scratch register, so we
1813 want to make sure the scratch register is a full register. */
1814 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1816 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1817 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1820 XEXP (XEXP (operand1
, 0),
1824 /* Now we are going to load the scratch register from memory,
1825 we want to load it in the same width as the original MEM,
1826 which must be the same as the width of the ultimate destination,
1828 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1830 emit_move_insn (scratch_reg
,
1831 replace_equiv_address (operand1
, scratch_reg
));
1835 /* We want to load the scratch register using the same mode as
1836 the ultimate destination. */
1837 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1839 emit_move_insn (scratch_reg
, operand1
);
1842 /* And emit the insn to set the ultimate destination. We know that
1843 the scratch register has the same mode as the destination at this
1845 emit_move_insn (operand0
, scratch_reg
);
1849 /* Handle the most common case: storing into a register. */
1850 if (register_operand (operand0
, mode
))
1852 /* Legitimize TLS symbol references. This happens for references
1853 that aren't a legitimate constant. */
1854 if (PA_SYMBOL_REF_TLS_P (operand1
))
1855 operand1
= legitimize_tls_address (operand1
);
1857 if (register_operand (operand1
, mode
)
1858 || (GET_CODE (operand1
) == CONST_INT
1859 && pa_cint_ok_for_move (UINTVAL (operand1
)))
1860 || (operand1
== CONST0_RTX (mode
))
1861 || (GET_CODE (operand1
) == HIGH
1862 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1863 /* Only `general_operands' can come here, so MEM is ok. */
1864 || GET_CODE (operand1
) == MEM
)
1866 /* Various sets are created during RTL generation which don't
1867 have the REG_POINTER flag correctly set. After the CSE pass,
1868 instruction recognition can fail if we don't consistently
1869 set this flag when performing register copies. This should
1870 also improve the opportunities for creating insns that use
1871 unscaled indexing. */
1872 if (REG_P (operand0
) && REG_P (operand1
))
1874 if (REG_POINTER (operand1
)
1875 && !REG_POINTER (operand0
)
1876 && !HARD_REGISTER_P (operand0
))
1877 copy_reg_pointer (operand0
, operand1
);
1880 /* When MEMs are broken out, the REG_POINTER flag doesn't
1881 get set. In some cases, we can set the REG_POINTER flag
1882 from the declaration for the MEM. */
1883 if (REG_P (operand0
)
1884 && GET_CODE (operand1
) == MEM
1885 && !REG_POINTER (operand0
))
1887 tree decl
= MEM_EXPR (operand1
);
1889 /* Set the register pointer flag and register alignment
1890 if the declaration for this memory reference is a
1896 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1898 if (TREE_CODE (decl
) == COMPONENT_REF
)
1899 decl
= TREE_OPERAND (decl
, 1);
1901 type
= TREE_TYPE (decl
);
1902 type
= strip_array_types (type
);
1904 if (POINTER_TYPE_P (type
))
1908 type
= TREE_TYPE (type
);
1909 /* Using TYPE_ALIGN_OK is rather conservative as
1910 only the ada frontend actually sets it. */
1911 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1913 mark_reg_pointer (operand0
, align
);
1918 emit_insn (gen_rtx_SET (operand0
, operand1
));
1922 else if (GET_CODE (operand0
) == MEM
)
1924 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1925 && !(reload_in_progress
|| reload_completed
))
1927 rtx temp
= gen_reg_rtx (DFmode
);
1929 emit_insn (gen_rtx_SET (temp
, operand1
));
1930 emit_insn (gen_rtx_SET (operand0
, temp
));
1933 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1935 /* Run this case quickly. */
1936 emit_insn (gen_rtx_SET (operand0
, operand1
));
1939 if (! (reload_in_progress
|| reload_completed
))
1941 operands
[0] = validize_mem (operand0
);
1942 operands
[1] = operand1
= force_reg (mode
, operand1
);
1946 /* Simplify the source if we need to.
1947 Note we do have to handle function labels here, even though we do
1948 not consider them legitimate constants. Loop optimizations can
1949 call the emit_move_xxx with one as a source. */
1950 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1951 || (GET_CODE (operand1
) == HIGH
1952 && symbolic_operand (XEXP (operand1
, 0), mode
))
1953 || function_label_operand (operand1
, VOIDmode
)
1954 || tls_referenced_p (operand1
))
1958 if (GET_CODE (operand1
) == HIGH
)
1961 operand1
= XEXP (operand1
, 0);
1963 if (symbolic_operand (operand1
, mode
))
1965 /* Argh. The assembler and linker can't handle arithmetic
1968 So we force the plabel into memory, load operand0 from
1969 the memory location, then add in the constant part. */
1970 if ((GET_CODE (operand1
) == CONST
1971 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1972 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1974 || function_label_operand (operand1
, VOIDmode
))
1976 rtx temp
, const_part
;
1978 /* Figure out what (if any) scratch register to use. */
1979 if (reload_in_progress
|| reload_completed
)
1981 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1982 /* SCRATCH_REG will hold an address and maybe the actual
1983 data. We want it in WORD_MODE regardless of what mode it
1984 was originally given to us. */
1985 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1988 scratch_reg
= gen_reg_rtx (Pmode
);
1990 if (GET_CODE (operand1
) == CONST
)
1992 /* Save away the constant part of the expression. */
1993 const_part
= XEXP (XEXP (operand1
, 0), 1);
1994 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
1996 /* Force the function label into memory. */
1997 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2001 /* No constant part. */
2002 const_part
= NULL_RTX
;
2004 /* Force the function label into memory. */
2005 temp
= force_const_mem (mode
, operand1
);
2009 /* Get the address of the memory location. PIC-ify it if
2011 temp
= XEXP (temp
, 0);
2013 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2015 /* Put the address of the memory location into our destination
2018 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2020 /* Now load from the memory location into our destination
2022 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2023 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2025 /* And add back in the constant part. */
2026 if (const_part
!= NULL_RTX
)
2027 expand_inc (operand0
, const_part
);
2037 if (reload_in_progress
|| reload_completed
)
2039 temp
= scratch_reg
? scratch_reg
: operand0
;
2040 /* TEMP will hold an address and maybe the actual
2041 data. We want it in WORD_MODE regardless of what mode it
2042 was originally given to us. */
2043 temp
= force_mode (word_mode
, temp
);
2046 temp
= gen_reg_rtx (Pmode
);
2048 /* Force (const (plus (symbol) (const_int))) to memory
2049 if the const_int will not fit in 14 bits. Although
2050 this requires a relocation, the instruction sequence
2051 needed to load the value is shorter. */
2052 if (GET_CODE (operand1
) == CONST
2053 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2054 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2055 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2057 rtx x
, m
= force_const_mem (mode
, operand1
);
2059 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2060 x
= replace_equiv_address (m
, x
);
2061 insn
= emit_move_insn (operand0
, x
);
2065 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2066 if (REG_P (operand0
) && REG_P (operands
[1]))
2067 copy_reg_pointer (operand0
, operands
[1]);
2068 insn
= emit_move_insn (operand0
, operands
[1]);
2071 /* Put a REG_EQUAL note on this insn. */
2072 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2074 /* On the HPPA, references to data space are supposed to use dp,
2075 register 27, but showing it in the RTL inhibits various cse
2076 and loop optimizations. */
2081 if (reload_in_progress
|| reload_completed
)
2083 temp
= scratch_reg
? scratch_reg
: operand0
;
2084 /* TEMP will hold an address and maybe the actual
2085 data. We want it in WORD_MODE regardless of what mode it
2086 was originally given to us. */
2087 temp
= force_mode (word_mode
, temp
);
2090 temp
= gen_reg_rtx (mode
);
2092 /* Loading a SYMBOL_REF into a register makes that register
2093 safe to be used as the base in an indexed address.
2095 Don't mark hard registers though. That loses. */
2096 if (GET_CODE (operand0
) == REG
2097 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2098 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2099 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2100 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2103 set
= gen_rtx_SET (operand0
, temp
);
2105 set
= gen_rtx_SET (operand0
,
2106 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2108 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2114 else if (tls_referenced_p (operand1
))
2119 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2121 addend
= XEXP (XEXP (tmp
, 0), 1);
2122 tmp
= XEXP (XEXP (tmp
, 0), 0);
2125 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2126 tmp
= legitimize_tls_address (tmp
);
2129 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2130 tmp
= force_operand (tmp
, operands
[0]);
2134 else if (GET_CODE (operand1
) != CONST_INT
2135 || !pa_cint_ok_for_move (UINTVAL (operand1
)))
2140 HOST_WIDE_INT value
= 0;
2141 HOST_WIDE_INT insv
= 0;
2144 if (GET_CODE (operand1
) == CONST_INT
)
2145 value
= INTVAL (operand1
);
2148 && GET_CODE (operand1
) == CONST_INT
2149 && HOST_BITS_PER_WIDE_INT
> 32
2150 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2154 /* Extract the low order 32 bits of the value and sign extend.
2155 If the new value is the same as the original value, we can
2156 can use the original value as-is. If the new value is
2157 different, we use it and insert the most-significant 32-bits
2158 of the original value into the final result. */
2159 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2160 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2163 #if HOST_BITS_PER_WIDE_INT > 32
2164 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2168 operand1
= GEN_INT (nval
);
2172 if (reload_in_progress
|| reload_completed
)
2173 temp
= scratch_reg
? scratch_reg
: operand0
;
2175 temp
= gen_reg_rtx (mode
);
2177 /* We don't directly split DImode constants on 32-bit targets
2178 because PLUS uses an 11-bit immediate and the insn sequence
2179 generated is not as efficient as the one using HIGH/LO_SUM. */
2180 if (GET_CODE (operand1
) == CONST_INT
2181 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2182 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2185 /* Directly break constant into high and low parts. This
2186 provides better optimization opportunities because various
2187 passes recognize constants split with PLUS but not LO_SUM.
2188 We use a 14-bit signed low part except when the addition
2189 of 0x4000 to the high part might change the sign of the
2191 HOST_WIDE_INT low
= value
& 0x3fff;
2192 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2196 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2204 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2205 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2209 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2210 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2213 insn
= emit_move_insn (operands
[0], operands
[1]);
2215 /* Now insert the most significant 32 bits of the value
2216 into the register. When we don't have a second register
2217 available, it could take up to nine instructions to load
2218 a 64-bit integer constant. Prior to reload, we force
2219 constants that would take more than three instructions
2220 to load to the constant pool. During and after reload,
2221 we have to handle all possible values. */
2224 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2225 register and the value to be inserted is outside the
2226 range that can be loaded with three depdi instructions. */
2227 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2229 operand1
= GEN_INT (insv
);
2231 emit_insn (gen_rtx_SET (temp
,
2232 gen_rtx_HIGH (mode
, operand1
)));
2233 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2235 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2238 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2243 int len
= 5, pos
= 27;
2245 /* Insert the bits using the depdi instruction. */
2248 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2249 HOST_WIDE_INT sign
= v5
< 0;
2251 /* Left extend the insertion. */
2252 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2253 while (pos
> 0 && (insv
& 1) == sign
)
2255 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2261 insn
= emit_insn (gen_insvdi (operand0
,
2266 insn
= emit_insn (gen_insvsi (operand0
,
2271 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2277 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2282 /* Now have insn-emit do whatever it normally does. */
2286 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2287 it will need a link/runtime reloc). */
2290 pa_reloc_needed (tree exp
)
2294 switch (TREE_CODE (exp
))
2299 case POINTER_PLUS_EXPR
:
2302 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2303 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2307 case NON_LVALUE_EXPR
:
2308 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2314 unsigned HOST_WIDE_INT ix
;
2316 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2318 reloc
|= pa_reloc_needed (value
);
2332 /* Return the best assembler insn template
2333 for moving operands[1] into operands[0] as a fullword. */
2335 pa_singlemove_string (rtx
*operands
)
2337 HOST_WIDE_INT intval
;
2339 if (GET_CODE (operands
[0]) == MEM
)
2340 return "stw %r1,%0";
2341 if (GET_CODE (operands
[1]) == MEM
)
2343 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2347 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2349 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2351 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands
[1]), i
);
2353 operands
[1] = GEN_INT (i
);
2354 /* Fall through to CONST_INT case. */
2356 if (GET_CODE (operands
[1]) == CONST_INT
)
2358 intval
= INTVAL (operands
[1]);
2360 if (VAL_14_BITS_P (intval
))
2362 else if ((intval
& 0x7ff) == 0)
2363 return "ldil L'%1,%0";
2364 else if (pa_zdepi_cint_p (intval
))
2365 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2367 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2369 return "copy %1,%0";
2373 /* Compute position (in OP[1]) and width (in OP[2])
2374 useful for copying IMM to a register using the zdepi
2375 instructions. Store the immediate value to insert in OP[0]. */
2377 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2381 /* Find the least significant set bit in IMM. */
2382 for (lsb
= 0; lsb
< 32; lsb
++)
2389 /* Choose variants based on *sign* of the 5-bit field. */
2390 if ((imm
& 0x10) == 0)
2391 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2394 /* Find the width of the bitstring in IMM. */
2395 for (len
= 5; len
< 32 - lsb
; len
++)
2397 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2401 /* Sign extend IMM as a 5-bit value. */
2402 imm
= (imm
& 0xf) - 0x10;
2410 /* Compute position (in OP[1]) and width (in OP[2])
2411 useful for copying IMM to a register using the depdi,z
2412 instructions. Store the immediate value to insert in OP[0]. */
2415 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2417 int lsb
, len
, maxlen
;
2419 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2421 /* Find the least significant set bit in IMM. */
2422 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2429 /* Choose variants based on *sign* of the 5-bit field. */
2430 if ((imm
& 0x10) == 0)
2431 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2434 /* Find the width of the bitstring in IMM. */
2435 for (len
= 5; len
< maxlen
- lsb
; len
++)
2437 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2441 /* Extend length if host is narrow and IMM is negative. */
2442 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2445 /* Sign extend IMM as a 5-bit value. */
2446 imm
= (imm
& 0xf) - 0x10;
2454 /* Output assembler code to perform a doubleword move insn
2455 with operands OPERANDS. */
2458 pa_output_move_double (rtx
*operands
)
2460 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2462 rtx addreg0
= 0, addreg1
= 0;
2465 /* First classify both operands. */
2467 if (REG_P (operands
[0]))
2469 else if (offsettable_memref_p (operands
[0]))
2471 else if (GET_CODE (operands
[0]) == MEM
)
2476 if (REG_P (operands
[1]))
2478 else if (CONSTANT_P (operands
[1]))
2480 else if (offsettable_memref_p (operands
[1]))
2482 else if (GET_CODE (operands
[1]) == MEM
)
2487 /* Check for the cases that the operand constraints are not
2488 supposed to allow to happen. */
2489 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2491 /* Handle copies between general and floating registers. */
2493 if (optype0
== REGOP
&& optype1
== REGOP
2494 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2496 if (FP_REG_P (operands
[0]))
2498 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2499 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2500 return "{fldds|fldd} -16(%%sp),%0";
2504 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2505 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2506 return "{ldws|ldw} -12(%%sp),%R0";
2510 /* Handle auto decrementing and incrementing loads and stores
2511 specifically, since the structure of the function doesn't work
2512 for them without major modification. Do it better when we learn
2513 this port about the general inc/dec addressing of PA.
2514 (This was written by tege. Chide him if it doesn't work.) */
2516 if (optype0
== MEMOP
)
2518 /* We have to output the address syntax ourselves, since print_operand
2519 doesn't deal with the addresses we want to use. Fix this later. */
2521 rtx addr
= XEXP (operands
[0], 0);
2522 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2524 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2526 operands
[0] = XEXP (addr
, 0);
2527 gcc_assert (GET_CODE (operands
[1]) == REG
2528 && GET_CODE (operands
[0]) == REG
);
2530 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2532 /* No overlap between high target register and address
2533 register. (We do this in a non-obvious way to
2534 save a register file writeback) */
2535 if (GET_CODE (addr
) == POST_INC
)
2536 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2537 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2539 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2541 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2543 operands
[0] = XEXP (addr
, 0);
2544 gcc_assert (GET_CODE (operands
[1]) == REG
2545 && GET_CODE (operands
[0]) == REG
);
2547 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2548 /* No overlap between high target register and address
2549 register. (We do this in a non-obvious way to save a
2550 register file writeback) */
2551 if (GET_CODE (addr
) == PRE_INC
)
2552 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2553 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2556 if (optype1
== MEMOP
)
2558 /* We have to output the address syntax ourselves, since print_operand
2559 doesn't deal with the addresses we want to use. Fix this later. */
2561 rtx addr
= XEXP (operands
[1], 0);
2562 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2564 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2566 operands
[1] = XEXP (addr
, 0);
2567 gcc_assert (GET_CODE (operands
[0]) == REG
2568 && GET_CODE (operands
[1]) == REG
);
2570 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2572 /* No overlap between high target register and address
2573 register. (We do this in a non-obvious way to
2574 save a register file writeback) */
2575 if (GET_CODE (addr
) == POST_INC
)
2576 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2577 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2581 /* This is an undefined situation. We should load into the
2582 address register *and* update that register. Probably
2583 we don't need to handle this at all. */
2584 if (GET_CODE (addr
) == POST_INC
)
2585 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2586 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2589 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2591 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2593 operands
[1] = XEXP (addr
, 0);
2594 gcc_assert (GET_CODE (operands
[0]) == REG
2595 && GET_CODE (operands
[1]) == REG
);
2597 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2599 /* No overlap between high target register and address
2600 register. (We do this in a non-obvious way to
2601 save a register file writeback) */
2602 if (GET_CODE (addr
) == PRE_INC
)
2603 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2604 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2608 /* This is an undefined situation. We should load into the
2609 address register *and* update that register. Probably
2610 we don't need to handle this at all. */
2611 if (GET_CODE (addr
) == PRE_INC
)
2612 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2613 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2616 else if (GET_CODE (addr
) == PLUS
2617 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2621 /* Load address into left half of destination register. */
2622 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2623 xoperands
[1] = XEXP (addr
, 1);
2624 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2625 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2626 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2628 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2630 else if (GET_CODE (addr
) == PLUS
2631 && REG_P (XEXP (addr
, 0))
2632 && REG_P (XEXP (addr
, 1)))
2636 /* Load address into left half of destination register. */
2637 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2638 xoperands
[1] = XEXP (addr
, 0);
2639 xoperands
[2] = XEXP (addr
, 1);
2640 output_asm_insn ("{addl|add,l} %1,%2,%0",
2642 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2646 /* If an operand is an unoffsettable memory ref, find a register
2647 we can increment temporarily to make it refer to the second word. */
2649 if (optype0
== MEMOP
)
2650 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2652 if (optype1
== MEMOP
)
2653 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2655 /* Ok, we can do one word at a time.
2656 Normally we do the low-numbered word first.
2658 In either case, set up in LATEHALF the operands to use
2659 for the high-numbered word and in some cases alter the
2660 operands in OPERANDS to be suitable for the low-numbered word. */
2662 if (optype0
== REGOP
)
2663 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2664 else if (optype0
== OFFSOP
)
2665 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2667 latehalf
[0] = operands
[0];
2669 if (optype1
== REGOP
)
2670 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2671 else if (optype1
== OFFSOP
)
2672 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2673 else if (optype1
== CNSTOP
)
2675 if (GET_CODE (operands
[1]) == HIGH
)
2677 operands
[1] = XEXP (operands
[1], 0);
2680 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2683 latehalf
[1] = operands
[1];
2685 /* If the first move would clobber the source of the second one,
2686 do them in the other order.
2688 This can happen in two cases:
2690 mem -> register where the first half of the destination register
2691 is the same register used in the memory's address. Reload
2692 can create such insns.
2694 mem in this case will be either register indirect or register
2695 indirect plus a valid offset.
2697 register -> register move where REGNO(dst) == REGNO(src + 1)
2698 someone (Tim/Tege?) claimed this can happen for parameter loads.
2700 Handle mem -> register case first. */
2701 if (optype0
== REGOP
2702 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2703 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2705 /* Do the late half first. */
2707 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2708 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2712 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2713 return pa_singlemove_string (operands
);
2716 /* Now handle register -> register case. */
2717 if (optype0
== REGOP
&& optype1
== REGOP
2718 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2720 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2721 return pa_singlemove_string (operands
);
2724 /* Normal case: do the two words, low-numbered first. */
2726 output_asm_insn (pa_singlemove_string (operands
), operands
);
2728 /* Make any unoffsettable addresses point at high-numbered word. */
2730 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2732 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2734 /* Do high-numbered word. */
2736 output_asm_insn ("ldil L'%1,%0", latehalf
);
2738 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2740 /* Undo the adds we just did. */
2742 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2744 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2750 pa_output_fp_move_double (rtx
*operands
)
2752 if (FP_REG_P (operands
[0]))
2754 if (FP_REG_P (operands
[1])
2755 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2756 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2758 output_asm_insn ("fldd%F1 %1,%0", operands
);
2760 else if (FP_REG_P (operands
[1]))
2762 output_asm_insn ("fstd%F0 %1,%0", operands
);
2768 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2770 /* This is a pain. You have to be prepared to deal with an
2771 arbitrary address here including pre/post increment/decrement.
2773 so avoid this in the MD. */
2774 gcc_assert (GET_CODE (operands
[0]) == REG
);
2776 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2777 xoperands
[0] = operands
[0];
2778 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2783 /* Return a REG that occurs in ADDR with coefficient 1.
2784 ADDR can be effectively incremented by incrementing REG. */
2787 find_addr_reg (rtx addr
)
2789 while (GET_CODE (addr
) == PLUS
)
2791 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2792 addr
= XEXP (addr
, 0);
2793 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2794 addr
= XEXP (addr
, 1);
2795 else if (CONSTANT_P (XEXP (addr
, 0)))
2796 addr
= XEXP (addr
, 1);
2797 else if (CONSTANT_P (XEXP (addr
, 1)))
2798 addr
= XEXP (addr
, 0);
2802 gcc_assert (GET_CODE (addr
) == REG
);
2806 /* Emit code to perform a block move.
2808 OPERANDS[0] is the destination pointer as a REG, clobbered.
2809 OPERANDS[1] is the source pointer as a REG, clobbered.
2810 OPERANDS[2] is a register for temporary storage.
2811 OPERANDS[3] is a register for temporary storage.
2812 OPERANDS[4] is the size as a CONST_INT
2813 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2814 OPERANDS[6] is another temporary register. */
2817 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2819 int align
= INTVAL (operands
[5]);
2820 unsigned long n_bytes
= INTVAL (operands
[4]);
2822 /* We can't move more than a word at a time because the PA
2823 has no longer integer move insns. (Could use fp mem ops?) */
2824 if (align
> (TARGET_64BIT
? 8 : 4))
2825 align
= (TARGET_64BIT
? 8 : 4);
2827 /* Note that we know each loop below will execute at least twice
2828 (else we would have open-coded the copy). */
2832 /* Pre-adjust the loop counter. */
2833 operands
[4] = GEN_INT (n_bytes
- 16);
2834 output_asm_insn ("ldi %4,%2", operands
);
2837 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2838 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2839 output_asm_insn ("std,ma %3,8(%0)", operands
);
2840 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2841 output_asm_insn ("std,ma %6,8(%0)", operands
);
2843 /* Handle the residual. There could be up to 7 bytes of
2844 residual to copy! */
2845 if (n_bytes
% 16 != 0)
2847 operands
[4] = GEN_INT (n_bytes
% 8);
2848 if (n_bytes
% 16 >= 8)
2849 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2850 if (n_bytes
% 8 != 0)
2851 output_asm_insn ("ldd 0(%1),%6", operands
);
2852 if (n_bytes
% 16 >= 8)
2853 output_asm_insn ("std,ma %3,8(%0)", operands
);
2854 if (n_bytes
% 8 != 0)
2855 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2860 /* Pre-adjust the loop counter. */
2861 operands
[4] = GEN_INT (n_bytes
- 8);
2862 output_asm_insn ("ldi %4,%2", operands
);
2865 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2866 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2867 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2868 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2869 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2871 /* Handle the residual. There could be up to 7 bytes of
2872 residual to copy! */
2873 if (n_bytes
% 8 != 0)
2875 operands
[4] = GEN_INT (n_bytes
% 4);
2876 if (n_bytes
% 8 >= 4)
2877 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2878 if (n_bytes
% 4 != 0)
2879 output_asm_insn ("ldw 0(%1),%6", operands
);
2880 if (n_bytes
% 8 >= 4)
2881 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2882 if (n_bytes
% 4 != 0)
2883 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2888 /* Pre-adjust the loop counter. */
2889 operands
[4] = GEN_INT (n_bytes
- 4);
2890 output_asm_insn ("ldi %4,%2", operands
);
2893 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2894 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2895 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2896 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2897 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2899 /* Handle the residual. */
2900 if (n_bytes
% 4 != 0)
2902 if (n_bytes
% 4 >= 2)
2903 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2904 if (n_bytes
% 2 != 0)
2905 output_asm_insn ("ldb 0(%1),%6", operands
);
2906 if (n_bytes
% 4 >= 2)
2907 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2908 if (n_bytes
% 2 != 0)
2909 output_asm_insn ("stb %6,0(%0)", operands
);
2914 /* Pre-adjust the loop counter. */
2915 operands
[4] = GEN_INT (n_bytes
- 2);
2916 output_asm_insn ("ldi %4,%2", operands
);
2919 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2920 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2921 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2922 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2923 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2925 /* Handle the residual. */
2926 if (n_bytes
% 2 != 0)
2928 output_asm_insn ("ldb 0(%1),%3", operands
);
2929 output_asm_insn ("stb %3,0(%0)", operands
);
2938 /* Count the number of insns necessary to handle this block move.
2940 Basic structure is the same as emit_block_move, except that we
2941 count insns rather than emit them. */
2944 compute_movmem_length (rtx_insn
*insn
)
2946 rtx pat
= PATTERN (insn
);
2947 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2948 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2949 unsigned int n_insns
= 0;
2951 /* We can't move more than four bytes at a time because the PA
2952 has no longer integer move insns. (Could use fp mem ops?) */
2953 if (align
> (TARGET_64BIT
? 8 : 4))
2954 align
= (TARGET_64BIT
? 8 : 4);
2956 /* The basic copying loop. */
2960 if (n_bytes
% (2 * align
) != 0)
2962 if ((n_bytes
% (2 * align
)) >= align
)
2965 if ((n_bytes
% align
) != 0)
2969 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2973 /* Emit code to perform a block clear.
2975 OPERANDS[0] is the destination pointer as a REG, clobbered.
2976 OPERANDS[1] is a register for temporary storage.
2977 OPERANDS[2] is the size as a CONST_INT
2978 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2981 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2983 int align
= INTVAL (operands
[3]);
2984 unsigned long n_bytes
= INTVAL (operands
[2]);
2986 /* We can't clear more than a word at a time because the PA
2987 has no longer integer move insns. */
2988 if (align
> (TARGET_64BIT
? 8 : 4))
2989 align
= (TARGET_64BIT
? 8 : 4);
2991 /* Note that we know each loop below will execute at least twice
2992 (else we would have open-coded the copy). */
2996 /* Pre-adjust the loop counter. */
2997 operands
[2] = GEN_INT (n_bytes
- 16);
2998 output_asm_insn ("ldi %2,%1", operands
);
3001 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3002 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
3003 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3005 /* Handle the residual. There could be up to 7 bytes of
3006 residual to copy! */
3007 if (n_bytes
% 16 != 0)
3009 operands
[2] = GEN_INT (n_bytes
% 8);
3010 if (n_bytes
% 16 >= 8)
3011 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3012 if (n_bytes
% 8 != 0)
3013 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3018 /* Pre-adjust the loop counter. */
3019 operands
[2] = GEN_INT (n_bytes
- 8);
3020 output_asm_insn ("ldi %2,%1", operands
);
3023 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3024 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3025 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3027 /* Handle the residual. There could be up to 7 bytes of
3028 residual to copy! */
3029 if (n_bytes
% 8 != 0)
3031 operands
[2] = GEN_INT (n_bytes
% 4);
3032 if (n_bytes
% 8 >= 4)
3033 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3034 if (n_bytes
% 4 != 0)
3035 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3040 /* Pre-adjust the loop counter. */
3041 operands
[2] = GEN_INT (n_bytes
- 4);
3042 output_asm_insn ("ldi %2,%1", operands
);
3045 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3046 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3047 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3049 /* Handle the residual. */
3050 if (n_bytes
% 4 != 0)
3052 if (n_bytes
% 4 >= 2)
3053 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3054 if (n_bytes
% 2 != 0)
3055 output_asm_insn ("stb %%r0,0(%0)", operands
);
3060 /* Pre-adjust the loop counter. */
3061 operands
[2] = GEN_INT (n_bytes
- 2);
3062 output_asm_insn ("ldi %2,%1", operands
);
3065 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3066 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3067 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3069 /* Handle the residual. */
3070 if (n_bytes
% 2 != 0)
3071 output_asm_insn ("stb %%r0,0(%0)", operands
);
3080 /* Count the number of insns necessary to handle this block move.
3082 Basic structure is the same as emit_block_move, except that we
3083 count insns rather than emit them. */
3086 compute_clrmem_length (rtx_insn
*insn
)
3088 rtx pat
= PATTERN (insn
);
3089 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3090 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3091 unsigned int n_insns
= 0;
3093 /* We can't clear more than a word at a time because the PA
3094 has no longer integer move insns. */
3095 if (align
> (TARGET_64BIT
? 8 : 4))
3096 align
= (TARGET_64BIT
? 8 : 4);
3098 /* The basic loop. */
3102 if (n_bytes
% (2 * align
) != 0)
3104 if ((n_bytes
% (2 * align
)) >= align
)
3107 if ((n_bytes
% align
) != 0)
3111 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3117 pa_output_and (rtx
*operands
)
3119 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3121 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3122 int ls0
, ls1
, ms0
, p
, len
;
3124 for (ls0
= 0; ls0
< 32; ls0
++)
3125 if ((mask
& (1 << ls0
)) == 0)
3128 for (ls1
= ls0
; ls1
< 32; ls1
++)
3129 if ((mask
& (1 << ls1
)) != 0)
3132 for (ms0
= ls1
; ms0
< 32; ms0
++)
3133 if ((mask
& (1 << ms0
)) == 0)
3136 gcc_assert (ms0
== 32);
3144 operands
[2] = GEN_INT (len
);
3145 return "{extru|extrw,u} %1,31,%2,%0";
3149 /* We could use this `depi' for the case above as well, but `depi'
3150 requires one more register file access than an `extru'. */
3155 operands
[2] = GEN_INT (p
);
3156 operands
[3] = GEN_INT (len
);
3157 return "{depi|depwi} 0,%2,%3,%0";
3161 return "and %1,%2,%0";
3164 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3165 storing the result in operands[0]. */
3167 pa_output_64bit_and (rtx
*operands
)
3169 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3171 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3172 int ls0
, ls1
, ms0
, p
, len
;
3174 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3175 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3178 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3179 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3182 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3183 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3186 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3188 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3194 operands
[2] = GEN_INT (len
);
3195 return "extrd,u %1,63,%2,%0";
3199 /* We could use this `depi' for the case above as well, but `depi'
3200 requires one more register file access than an `extru'. */
3205 operands
[2] = GEN_INT (p
);
3206 operands
[3] = GEN_INT (len
);
3207 return "depdi 0,%2,%3,%0";
3211 return "and %1,%2,%0";
3215 pa_output_ior (rtx
*operands
)
3217 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3218 int bs0
, bs1
, p
, len
;
3220 if (INTVAL (operands
[2]) == 0)
3221 return "copy %1,%0";
3223 for (bs0
= 0; bs0
< 32; bs0
++)
3224 if ((mask
& (1 << bs0
)) != 0)
3227 for (bs1
= bs0
; bs1
< 32; bs1
++)
3228 if ((mask
& (1 << bs1
)) == 0)
3231 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3236 operands
[2] = GEN_INT (p
);
3237 operands
[3] = GEN_INT (len
);
3238 return "{depi|depwi} -1,%2,%3,%0";
3241 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3242 storing the result in operands[0]. */
3244 pa_output_64bit_ior (rtx
*operands
)
3246 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3247 int bs0
, bs1
, p
, len
;
3249 if (INTVAL (operands
[2]) == 0)
3250 return "copy %1,%0";
3252 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3253 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3256 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3257 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3260 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3261 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3266 operands
[2] = GEN_INT (p
);
3267 operands
[3] = GEN_INT (len
);
3268 return "depdi -1,%2,%3,%0";
3271 /* Target hook for assembling integer objects. This code handles
3272 aligned SI and DI integers specially since function references
3273 must be preceded by P%. */
3276 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3278 if (size
== UNITS_PER_WORD
3280 && function_label_operand (x
, VOIDmode
))
3282 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3284 /* We don't want an OPD when generating fast indirect calls. */
3285 if (!TARGET_FAST_INDIRECT_CALLS
)
3286 fputs ("P%", asm_out_file
);
3288 output_addr_const (asm_out_file
, x
);
3289 fputc ('\n', asm_out_file
);
3292 return default_assemble_integer (x
, size
, aligned_p
);
3295 /* Output an ascii string. */
3297 pa_output_ascii (FILE *file
, const char *p
, int size
)
3301 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3303 /* The HP assembler can only take strings of 256 characters at one
3304 time. This is a limitation on input line length, *not* the
3305 length of the string. Sigh. Even worse, it seems that the
3306 restriction is in number of input characters (see \xnn &
3307 \whatever). So we have to do this very carefully. */
3309 fputs ("\t.STRING \"", file
);
3312 for (i
= 0; i
< size
; i
+= 4)
3316 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3318 register unsigned int c
= (unsigned char) p
[i
+ io
];
3320 if (c
== '\"' || c
== '\\')
3321 partial_output
[co
++] = '\\';
3322 if (c
>= ' ' && c
< 0177)
3323 partial_output
[co
++] = c
;
3327 partial_output
[co
++] = '\\';
3328 partial_output
[co
++] = 'x';
3329 hexd
= c
/ 16 - 0 + '0';
3331 hexd
-= '9' - 'a' + 1;
3332 partial_output
[co
++] = hexd
;
3333 hexd
= c
% 16 - 0 + '0';
3335 hexd
-= '9' - 'a' + 1;
3336 partial_output
[co
++] = hexd
;
3339 if (chars_output
+ co
> 243)
3341 fputs ("\"\n\t.STRING \"", file
);
3344 fwrite (partial_output
, 1, (size_t) co
, file
);
3348 fputs ("\"\n", file
);
3351 /* Try to rewrite floating point comparisons & branches to avoid
3352 useless add,tr insns.
3354 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3355 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3356 first attempt to remove useless add,tr insns. It is zero
3357 for the second pass as reorg sometimes leaves bogus REG_DEAD
3360 When CHECK_NOTES is zero we can only eliminate add,tr insns
3361 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3364 remove_useless_addtr_insns (int check_notes
)
3367 static int pass
= 0;
3369 /* This is fairly cheap, so always run it when optimizing. */
3373 int fbranch_count
= 0;
3375 /* Walk all the insns in this function looking for fcmp & fbranch
3376 instructions. Keep track of how many of each we find. */
3377 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3381 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3382 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3385 tmp
= PATTERN (insn
);
3387 /* It must be a set. */
3388 if (GET_CODE (tmp
) != SET
)
3391 /* If the destination is CCFP, then we've found an fcmp insn. */
3392 tmp
= SET_DEST (tmp
);
3393 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3399 tmp
= PATTERN (insn
);
3400 /* If this is an fbranch instruction, bump the fbranch counter. */
3401 if (GET_CODE (tmp
) == SET
3402 && SET_DEST (tmp
) == pc_rtx
3403 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3404 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3405 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3406 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3414 /* Find all floating point compare + branch insns. If possible,
3415 reverse the comparison & the branch to avoid add,tr insns. */
3416 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3421 /* Ignore anything that isn't an INSN. */
3422 if (! NONJUMP_INSN_P (insn
))
3425 tmp
= PATTERN (insn
);
3427 /* It must be a set. */
3428 if (GET_CODE (tmp
) != SET
)
3431 /* The destination must be CCFP, which is register zero. */
3432 tmp
= SET_DEST (tmp
);
3433 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3436 /* INSN should be a set of CCFP.
3438 See if the result of this insn is used in a reversed FP
3439 conditional branch. If so, reverse our condition and
3440 the branch. Doing so avoids useless add,tr insns. */
3441 next
= next_insn (insn
);
3444 /* Jumps, calls and labels stop our search. */
3445 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3448 /* As does another fcmp insn. */
3449 if (NONJUMP_INSN_P (next
)
3450 && GET_CODE (PATTERN (next
)) == SET
3451 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3452 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3455 next
= next_insn (next
);
3458 /* Is NEXT_INSN a branch? */
3459 if (next
&& JUMP_P (next
))
3461 rtx pattern
= PATTERN (next
);
3463 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3464 and CCFP dies, then reverse our conditional and the branch
3465 to avoid the add,tr. */
3466 if (GET_CODE (pattern
) == SET
3467 && SET_DEST (pattern
) == pc_rtx
3468 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3469 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3470 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3471 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3472 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3473 && (fcmp_count
== fbranch_count
3475 && find_regno_note (next
, REG_DEAD
, 0))))
3477 /* Reverse the branch. */
3478 tmp
= XEXP (SET_SRC (pattern
), 1);
3479 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3480 XEXP (SET_SRC (pattern
), 2) = tmp
;
3481 INSN_CODE (next
) = -1;
3483 /* Reverse our condition. */
3484 tmp
= PATTERN (insn
);
3485 PUT_CODE (XEXP (tmp
, 1),
3486 (reverse_condition_maybe_unordered
3487 (GET_CODE (XEXP (tmp
, 1)))));
3497 /* You may have trouble believing this, but this is the 32 bit HP-PA
3502 Variable arguments (optional; any number may be allocated)
3504 SP-(4*(N+9)) arg word N
3509 Fixed arguments (must be allocated; may remain unused)
3518 SP-32 External Data Pointer (DP)
3520 SP-24 External/stub RP (RP')
3524 SP-8 Calling Stub RP (RP'')
3529 SP-0 Stack Pointer (points to next available address)
3533 /* This function saves registers as follows. Registers marked with ' are
3534 this function's registers (as opposed to the previous function's).
3535 If a frame_pointer isn't needed, r4 is saved as a general register;
3536 the space for the frame pointer is still allocated, though, to keep
3542 SP (FP') Previous FP
3543 SP + 4 Alignment filler (sigh)
3544 SP + 8 Space for locals reserved here.
3548 SP + n All call saved register used.
3552 SP + o All call saved fp registers used.
3556 SP + p (SP') points to next available address.
3560 /* Global variables set by output_function_prologue(). */
3561 /* Size of frame. Need to know this to emit return insns from
3563 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3564 static int save_fregs
;
3566 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3567 Handle case where DISP > 8k by using the add_high_const patterns.
3569 Note in DISP > 8k case, we will leave the high part of the address
3570 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3573 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3575 rtx dest
, src
, basereg
;
3578 src
= gen_rtx_REG (word_mode
, reg
);
3579 basereg
= gen_rtx_REG (Pmode
, base
);
3580 if (VAL_14_BITS_P (disp
))
3582 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3583 insn
= emit_move_insn (dest
, src
);
3585 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3587 rtx delta
= GEN_INT (disp
);
3588 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3590 emit_move_insn (tmpreg
, delta
);
3591 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3594 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3595 gen_rtx_SET (tmpreg
,
3596 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3597 RTX_FRAME_RELATED_P (insn
) = 1;
3599 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3600 insn
= emit_move_insn (dest
, src
);
3604 rtx delta
= GEN_INT (disp
);
3605 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3606 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3608 emit_move_insn (tmpreg
, high
);
3609 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3610 insn
= emit_move_insn (dest
, src
);
3612 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3613 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3614 gen_rtx_PLUS (word_mode
,
3621 RTX_FRAME_RELATED_P (insn
) = 1;
3624 /* Emit RTL to store REG at the memory location specified by BASE and then
3625 add MOD to BASE. MOD must be <= 8k. */
3628 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3630 rtx basereg
, srcreg
, delta
;
3633 gcc_assert (VAL_14_BITS_P (mod
));
3635 basereg
= gen_rtx_REG (Pmode
, base
);
3636 srcreg
= gen_rtx_REG (word_mode
, reg
);
3637 delta
= GEN_INT (mod
);
3639 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3642 RTX_FRAME_RELATED_P (insn
) = 1;
3644 /* RTX_FRAME_RELATED_P must be set on each frame related set
3645 in a parallel with more than one element. */
3646 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3647 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3651 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3652 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3653 whether to add a frame note or not.
3655 In the DISP > 8k case, we leave the high part of the address in %r1.
3656 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3659 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3663 if (VAL_14_BITS_P (disp
))
3665 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3666 plus_constant (Pmode
,
3667 gen_rtx_REG (Pmode
, base
), disp
));
3669 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3671 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3672 rtx delta
= GEN_INT (disp
);
3673 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3675 emit_move_insn (tmpreg
, delta
);
3676 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3677 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3679 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3680 gen_rtx_SET (tmpreg
,
3681 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3685 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3686 rtx delta
= GEN_INT (disp
);
3687 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3689 emit_move_insn (tmpreg
,
3690 gen_rtx_PLUS (Pmode
, basereg
,
3691 gen_rtx_HIGH (Pmode
, delta
)));
3692 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3693 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3696 if (DO_FRAME_NOTES
&& note
)
3697 RTX_FRAME_RELATED_P (insn
) = 1;
3701 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3706 /* The code in pa_expand_prologue and pa_expand_epilogue must
3707 be consistent with the rounding and size calculation done here.
3708 Change them at the same time. */
3710 /* We do our own stack alignment. First, round the size of the
3711 stack locals up to a word boundary. */
3712 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3714 /* Space for previous frame pointer + filler. If any frame is
3715 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3716 waste some space here for the sake of HP compatibility. The
3717 first slot is only used when the frame pointer is needed. */
3718 if (size
|| frame_pointer_needed
)
3719 size
+= STARTING_FRAME_OFFSET
;
3721 /* If the current function calls __builtin_eh_return, then we need
3722 to allocate stack space for registers that will hold data for
3723 the exception handler. */
3724 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3728 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3730 size
+= i
* UNITS_PER_WORD
;
3733 /* Account for space used by the callee general register saves. */
3734 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3735 if (df_regs_ever_live_p (i
))
3736 size
+= UNITS_PER_WORD
;
3738 /* Account for space used by the callee floating point register saves. */
3739 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3740 if (df_regs_ever_live_p (i
)
3741 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3745 /* We always save both halves of the FP register, so always
3746 increment the frame size by 8 bytes. */
3750 /* If any of the floating registers are saved, account for the
3751 alignment needed for the floating point register save block. */
3754 size
= (size
+ 7) & ~7;
3759 /* The various ABIs include space for the outgoing parameters in the
3760 size of the current function's stack frame. We don't need to align
3761 for the outgoing arguments as their alignment is set by the final
3762 rounding for the frame as a whole. */
3763 size
+= crtl
->outgoing_args_size
;
3765 /* Allocate space for the fixed frame marker. This space must be
3766 allocated for any function that makes calls or allocates
3768 if (!crtl
->is_leaf
|| size
)
3769 size
+= TARGET_64BIT
? 48 : 32;
3771 /* Finally, round to the preferred stack boundary. */
3772 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3773 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3776 /* Generate the assembly code for function entry. FILE is a stdio
3777 stream to output the code to. SIZE is an int: how many units of
3778 temporary storage to allocate.
3780 Refer to the array `regs_ever_live' to determine which registers to
3781 save; `regs_ever_live[I]' is nonzero if register number I is ever
3782 used in the function. This function is responsible for knowing
3783 which registers should not be saved even if used. */
3785 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3786 of memory. If any fpu reg is used in the function, we allocate
3787 such a block here, at the bottom of the frame, just in case it's needed.
3789 If this function is a leaf procedure, then we may choose not
3790 to do a "save" insn. The decision about whether or not
3791 to do this is made in regclass.c. */
3794 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3796 /* The function's label and associated .PROC must never be
3797 separated and must be output *after* any profiling declarations
3798 to avoid changing spaces/subspaces within a procedure. */
3799 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3800 fputs ("\t.PROC\n", file
);
3802 /* pa_expand_prologue does the dirty work now. We just need
3803 to output the assembler directives which denote the start
3805 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3807 fputs (",NO_CALLS", file
);
3809 fputs (",CALLS", file
);
3811 fputs (",SAVE_RP", file
);
3813 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3814 at the beginning of the frame and that it is used as the frame
3815 pointer for the frame. We do this because our current frame
3816 layout doesn't conform to that specified in the HP runtime
3817 documentation and we need a way to indicate to programs such as
3818 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3819 isn't used by HP compilers but is supported by the assembler.
3820 However, SAVE_SP is supposed to indicate that the previous stack
3821 pointer has been saved in the frame marker. */
3822 if (frame_pointer_needed
)
3823 fputs (",SAVE_SP", file
);
3825 /* Pass on information about the number of callee register saves
3826 performed in the prologue.
3828 The compiler is supposed to pass the highest register number
3829 saved, the assembler then has to adjust that number before
3830 entering it into the unwind descriptor (to account for any
3831 caller saved registers with lower register numbers than the
3832 first callee saved register). */
3834 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3837 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3839 fputs ("\n\t.ENTRY\n", file
);
3841 remove_useless_addtr_insns (0);
3845 pa_expand_prologue (void)
3847 int merge_sp_adjust_with_store
= 0;
3848 HOST_WIDE_INT size
= get_frame_size ();
3849 HOST_WIDE_INT offset
;
3858 /* Compute total size for frame pointer, filler, locals and rounding to
3859 the next word boundary. Similar code appears in pa_compute_frame_size
3860 and must be changed in tandem with this code. */
3861 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3862 if (local_fsize
|| frame_pointer_needed
)
3863 local_fsize
+= STARTING_FRAME_OFFSET
;
3865 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3866 if (flag_stack_usage_info
)
3867 current_function_static_stack_size
= actual_fsize
;
3869 /* Compute a few things we will use often. */
3870 tmpreg
= gen_rtx_REG (word_mode
, 1);
3872 /* Save RP first. The calling conventions manual states RP will
3873 always be stored into the caller's frame at sp - 20 or sp - 16
3874 depending on which ABI is in use. */
3875 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3877 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3883 /* Allocate the local frame and set up the frame pointer if needed. */
3884 if (actual_fsize
!= 0)
3886 if (frame_pointer_needed
)
3888 /* Copy the old frame pointer temporarily into %r1. Set up the
3889 new stack pointer, then store away the saved old frame pointer
3890 into the stack at sp and at the same time update the stack
3891 pointer by actual_fsize bytes. Two versions, first
3892 handles small (<8k) frames. The second handles large (>=8k)
3894 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3896 RTX_FRAME_RELATED_P (insn
) = 1;
3898 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3900 RTX_FRAME_RELATED_P (insn
) = 1;
3902 if (VAL_14_BITS_P (actual_fsize
))
3903 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3906 /* It is incorrect to store the saved frame pointer at *sp,
3907 then increment sp (writes beyond the current stack boundary).
3909 So instead use stwm to store at *sp and post-increment the
3910 stack pointer as an atomic operation. Then increment sp to
3911 finish allocating the new frame. */
3912 HOST_WIDE_INT adjust1
= 8192 - 64;
3913 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3915 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3916 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3920 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3921 we need to store the previous stack pointer (frame pointer)
3922 into the frame marker on targets that use the HP unwind
3923 library. This allows the HP unwind library to be used to
3924 unwind GCC frames. However, we are not fully compatible
3925 with the HP library because our frame layout differs from
3926 that specified in the HP runtime specification.
3928 We don't want a frame note on this instruction as the frame
3929 marker moves during dynamic stack allocation.
3931 This instruction also serves as a blockage to prevent
3932 register spills from being scheduled before the stack
3933 pointer is raised. This is necessary as we store
3934 registers using the frame pointer as a base register,
3935 and the frame pointer is set before sp is raised. */
3936 if (TARGET_HPUX_UNWIND_LIBRARY
)
3938 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3939 GEN_INT (TARGET_64BIT
? -8 : -4));
3941 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3942 hard_frame_pointer_rtx
);
3945 emit_insn (gen_blockage ());
3947 /* no frame pointer needed. */
3950 /* In some cases we can perform the first callee register save
3951 and allocating the stack frame at the same time. If so, just
3952 make a note of it and defer allocating the frame until saving
3953 the callee registers. */
3954 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3955 merge_sp_adjust_with_store
= 1;
3956 /* Can not optimize. Adjust the stack frame by actual_fsize
3959 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3964 /* Normal register save.
3966 Do not save the frame pointer in the frame_pointer_needed case. It
3967 was done earlier. */
3968 if (frame_pointer_needed
)
3970 offset
= local_fsize
;
3972 /* Saving the EH return data registers in the frame is the simplest
3973 way to get the frame unwind information emitted. We put them
3974 just before the general registers. */
3975 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3977 unsigned int i
, regno
;
3981 regno
= EH_RETURN_DATA_REGNO (i
);
3982 if (regno
== INVALID_REGNUM
)
3985 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
3986 offset
+= UNITS_PER_WORD
;
3990 for (i
= 18; i
>= 4; i
--)
3991 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3993 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
3994 offset
+= UNITS_PER_WORD
;
3997 /* Account for %r3 which is saved in a special place. */
4000 /* No frame pointer needed. */
4003 offset
= local_fsize
- actual_fsize
;
4005 /* Saving the EH return data registers in the frame is the simplest
4006 way to get the frame unwind information emitted. */
4007 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4009 unsigned int i
, regno
;
4013 regno
= EH_RETURN_DATA_REGNO (i
);
4014 if (regno
== INVALID_REGNUM
)
4017 /* If merge_sp_adjust_with_store is nonzero, then we can
4018 optimize the first save. */
4019 if (merge_sp_adjust_with_store
)
4021 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4022 merge_sp_adjust_with_store
= 0;
4025 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4026 offset
+= UNITS_PER_WORD
;
4030 for (i
= 18; i
>= 3; i
--)
4031 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4033 /* If merge_sp_adjust_with_store is nonzero, then we can
4034 optimize the first GR save. */
4035 if (merge_sp_adjust_with_store
)
4037 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4038 merge_sp_adjust_with_store
= 0;
4041 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4042 offset
+= UNITS_PER_WORD
;
4046 /* If we wanted to merge the SP adjustment with a GR save, but we never
4047 did any GR saves, then just emit the adjustment here. */
4048 if (merge_sp_adjust_with_store
)
4049 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4053 /* The hppa calling conventions say that %r19, the pic offset
4054 register, is saved at sp - 32 (in this function's frame)
4055 when generating PIC code. FIXME: What is the correct thing
4056 to do for functions which make no calls and allocate no
4057 frame? Do we need to allocate a frame, or can we just omit
4058 the save? For now we'll just omit the save.
4060 We don't want a note on this insn as the frame marker can
4061 move if there is a dynamic stack allocation. */
4062 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4064 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4066 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4070 /* Align pointer properly (doubleword boundary). */
4071 offset
= (offset
+ 7) & ~7;
4073 /* Floating point register store. */
4078 /* First get the frame or stack pointer to the start of the FP register
4080 if (frame_pointer_needed
)
4082 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4083 base
= hard_frame_pointer_rtx
;
4087 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4088 base
= stack_pointer_rtx
;
4091 /* Now actually save the FP registers. */
4092 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4094 if (df_regs_ever_live_p (i
)
4095 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4099 addr
= gen_rtx_MEM (DFmode
,
4100 gen_rtx_POST_INC (word_mode
, tmpreg
));
4101 reg
= gen_rtx_REG (DFmode
, i
);
4102 insn
= emit_move_insn (addr
, reg
);
4105 RTX_FRAME_RELATED_P (insn
) = 1;
4108 rtx mem
= gen_rtx_MEM (DFmode
,
4109 plus_constant (Pmode
, base
,
4111 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4112 gen_rtx_SET (mem
, reg
));
4116 rtx meml
= gen_rtx_MEM (SFmode
,
4117 plus_constant (Pmode
, base
,
4119 rtx memr
= gen_rtx_MEM (SFmode
,
4120 plus_constant (Pmode
, base
,
4122 rtx regl
= gen_rtx_REG (SFmode
, i
);
4123 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4124 rtx setl
= gen_rtx_SET (meml
, regl
);
4125 rtx setr
= gen_rtx_SET (memr
, regr
);
4128 RTX_FRAME_RELATED_P (setl
) = 1;
4129 RTX_FRAME_RELATED_P (setr
) = 1;
4130 vec
= gen_rtvec (2, setl
, setr
);
4131 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4132 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4135 offset
+= GET_MODE_SIZE (DFmode
);
4142 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4143 Handle case where DISP > 8k by using the add_high_const patterns. */
4146 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4148 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4149 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4152 if (VAL_14_BITS_P (disp
))
4153 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4154 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4156 rtx delta
= GEN_INT (disp
);
4157 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4159 emit_move_insn (tmpreg
, delta
);
4160 if (TARGET_DISABLE_INDEXING
)
4162 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4163 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4166 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4170 rtx delta
= GEN_INT (disp
);
4171 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4172 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4174 emit_move_insn (tmpreg
, high
);
4175 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4178 emit_move_insn (dest
, src
);
4181 /* Update the total code bytes output to the text section. */
4184 update_total_code_bytes (unsigned int nbytes
)
4186 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4187 && !IN_NAMED_SECTION_P (cfun
->decl
))
4189 unsigned int old_total
= total_code_bytes
;
4191 total_code_bytes
+= nbytes
;
4193 /* Be prepared to handle overflows. */
4194 if (old_total
> total_code_bytes
)
4195 total_code_bytes
= UINT_MAX
;
4199 /* This function generates the assembly code for function exit.
4200 Args are as for output_function_prologue ().
4202 The function epilogue should not depend on the current stack
4203 pointer! It should use the frame pointer only. This is mandatory
4204 because of alloca; we also take advantage of it to omit stack
4205 adjustments before returning. */
4208 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4210 rtx_insn
*insn
= get_last_insn ();
4213 /* pa_expand_epilogue does the dirty work now. We just need
4214 to output the assembler directives which denote the end
4217 To make debuggers happy, emit a nop if the epilogue was completely
4218 eliminated due to a volatile call as the last insn in the
4219 current function. That way the return address (in %r2) will
4220 always point to a valid instruction in the current function. */
4222 /* Get the last real insn. */
4224 insn
= prev_real_insn (insn
);
4226 /* If it is a sequence, then look inside. */
4227 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4228 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4230 /* If insn is a CALL_INSN, then it must be a call to a volatile
4231 function (otherwise there would be epilogue insns). */
4232 if (insn
&& CALL_P (insn
))
4234 fputs ("\tnop\n", file
);
4240 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4242 if (TARGET_SOM
&& TARGET_GAS
)
4244 /* We are done with this subspace except possibly for some additional
4245 debug information. Forget that we are in this subspace to ensure
4246 that the next function is output in its own subspace. */
4248 cfun
->machine
->in_nsubspa
= 2;
4251 /* Thunks do their own insn accounting. */
4255 if (INSN_ADDRESSES_SET_P ())
4257 last_address
= extra_nop
? 4 : 0;
4258 insn
= get_last_nonnote_insn ();
4261 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4263 last_address
+= insn_default_length (insn
);
4265 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4266 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4269 last_address
= UINT_MAX
;
4271 /* Finally, update the total number of code bytes output so far. */
4272 update_total_code_bytes (last_address
);
4276 pa_expand_epilogue (void)
4279 HOST_WIDE_INT offset
;
4280 HOST_WIDE_INT ret_off
= 0;
4282 int merge_sp_adjust_with_load
= 0;
4284 /* We will use this often. */
4285 tmpreg
= gen_rtx_REG (word_mode
, 1);
4287 /* Try to restore RP early to avoid load/use interlocks when
4288 RP gets used in the return (bv) instruction. This appears to still
4289 be necessary even when we schedule the prologue and epilogue. */
4292 ret_off
= TARGET_64BIT
? -16 : -20;
4293 if (frame_pointer_needed
)
4295 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4300 /* No frame pointer, and stack is smaller than 8k. */
4301 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4303 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4309 /* General register restores. */
4310 if (frame_pointer_needed
)
4312 offset
= local_fsize
;
4314 /* If the current function calls __builtin_eh_return, then we need
4315 to restore the saved EH data registers. */
4316 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4318 unsigned int i
, regno
;
4322 regno
= EH_RETURN_DATA_REGNO (i
);
4323 if (regno
== INVALID_REGNUM
)
4326 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4327 offset
+= UNITS_PER_WORD
;
4331 for (i
= 18; i
>= 4; i
--)
4332 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4334 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4335 offset
+= UNITS_PER_WORD
;
4340 offset
= local_fsize
- actual_fsize
;
4342 /* If the current function calls __builtin_eh_return, then we need
4343 to restore the saved EH data registers. */
4344 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4346 unsigned int i
, regno
;
4350 regno
= EH_RETURN_DATA_REGNO (i
);
4351 if (regno
== INVALID_REGNUM
)
4354 /* Only for the first load.
4355 merge_sp_adjust_with_load holds the register load
4356 with which we will merge the sp adjustment. */
4357 if (merge_sp_adjust_with_load
== 0
4359 && VAL_14_BITS_P (-actual_fsize
))
4360 merge_sp_adjust_with_load
= regno
;
4362 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4363 offset
+= UNITS_PER_WORD
;
4367 for (i
= 18; i
>= 3; i
--)
4369 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4371 /* Only for the first load.
4372 merge_sp_adjust_with_load holds the register load
4373 with which we will merge the sp adjustment. */
4374 if (merge_sp_adjust_with_load
== 0
4376 && VAL_14_BITS_P (-actual_fsize
))
4377 merge_sp_adjust_with_load
= i
;
4379 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4380 offset
+= UNITS_PER_WORD
;
4385 /* Align pointer properly (doubleword boundary). */
4386 offset
= (offset
+ 7) & ~7;
4388 /* FP register restores. */
4391 /* Adjust the register to index off of. */
4392 if (frame_pointer_needed
)
4393 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4395 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4397 /* Actually do the restores now. */
4398 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4399 if (df_regs_ever_live_p (i
)
4400 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4402 rtx src
= gen_rtx_MEM (DFmode
,
4403 gen_rtx_POST_INC (word_mode
, tmpreg
));
4404 rtx dest
= gen_rtx_REG (DFmode
, i
);
4405 emit_move_insn (dest
, src
);
4409 /* Emit a blockage insn here to keep these insns from being moved to
4410 an earlier spot in the epilogue, or into the main instruction stream.
4412 This is necessary as we must not cut the stack back before all the
4413 restores are finished. */
4414 emit_insn (gen_blockage ());
4416 /* Reset stack pointer (and possibly frame pointer). The stack
4417 pointer is initially set to fp + 64 to avoid a race condition. */
4418 if (frame_pointer_needed
)
4420 rtx delta
= GEN_INT (-64);
4422 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4423 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4424 stack_pointer_rtx
, delta
));
4426 /* If we were deferring a callee register restore, do it now. */
4427 else if (merge_sp_adjust_with_load
)
4429 rtx delta
= GEN_INT (-actual_fsize
);
4430 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4432 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4434 else if (actual_fsize
!= 0)
4435 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4438 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4439 frame greater than 8k), do so now. */
4441 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4443 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4445 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4447 emit_insn (gen_blockage ());
4448 emit_insn (TARGET_64BIT
4449 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4450 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4455 pa_can_use_return_insn (void)
4457 if (!reload_completed
)
4460 if (frame_pointer_needed
)
4463 if (df_regs_ever_live_p (2))
4469 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4473 hppa_pic_save_rtx (void)
4475 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4478 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4479 #define NO_DEFERRED_PROFILE_COUNTERS 0
4483 /* Vector of funcdef numbers. */
4484 static vec
<int> funcdef_nos
;
4486 /* Output deferred profile counters. */
4488 output_deferred_profile_counters (void)
4493 if (funcdef_nos
.is_empty ())
4496 switch_to_section (data_section
);
4497 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4498 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4500 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4502 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4503 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4506 funcdef_nos
.release ();
4510 hppa_profile_hook (int label_no
)
4512 /* We use SImode for the address of the function in both 32 and
4513 64-bit code to avoid having to provide DImode versions of the
4514 lcla2 and load_offset_label_address insn patterns. */
4515 rtx reg
= gen_reg_rtx (SImode
);
4516 rtx_code_label
*label_rtx
= gen_label_rtx ();
4517 rtx begin_label_rtx
;
4518 rtx_insn
*call_insn
;
4519 char begin_label_name
[16];
4521 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4523 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4526 emit_move_insn (arg_pointer_rtx
,
4527 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4530 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4532 /* The address of the function is loaded into %r25 with an instruction-
4533 relative sequence that avoids the use of relocations. The sequence
4534 is split so that the load_offset_label_address instruction can
4535 occupy the delay slot of the call to _mcount. */
4537 emit_insn (gen_lcla2 (reg
, label_rtx
));
4539 emit_insn (gen_lcla1 (reg
, label_rtx
));
4541 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4542 reg
, begin_label_rtx
, label_rtx
));
4544 #if !NO_DEFERRED_PROFILE_COUNTERS
4546 rtx count_label_rtx
, addr
, r24
;
4547 char count_label_name
[16];
4549 funcdef_nos
.safe_push (label_no
);
4550 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4551 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4553 addr
= force_reg (Pmode
, count_label_rtx
);
4554 r24
= gen_rtx_REG (Pmode
, 24);
4555 emit_move_insn (r24
, addr
);
4558 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4559 gen_rtx_SYMBOL_REF (Pmode
,
4561 GEN_INT (TARGET_64BIT
? 24 : 12)));
4563 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4568 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4569 gen_rtx_SYMBOL_REF (Pmode
,
4571 GEN_INT (TARGET_64BIT
? 16 : 8)));
4575 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4576 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4578 /* Indicate the _mcount call cannot throw, nor will it execute a
4580 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4583 /* Fetch the return address for the frame COUNT steps up from
4584 the current frame, after the prologue. FRAMEADDR is the
4585 frame pointer of the COUNT frame.
4587 We want to ignore any export stub remnants here. To handle this,
4588 we examine the code at the return address, and if it is an export
4589 stub, we return a memory rtx for the stub return address stored
4592 The value returned is used in two different ways:
4594 1. To find a function's caller.
4596 2. To change the return address for a function.
4598 This function handles most instances of case 1; however, it will
4599 fail if there are two levels of stubs to execute on the return
4600 path. The only way I believe that can happen is if the return value
4601 needs a parameter relocation, which never happens for C code.
4603 This function handles most instances of case 2; however, it will
4604 fail if we did not originally have stub code on the return path
4605 but will need stub code on the new return path. This can happen if
4606 the caller & callee are both in the main program, but the new
4607 return location is in a shared library. */
4610 pa_return_addr_rtx (int count
, rtx frameaddr
)
4617 /* The instruction stream at the return address of a PA1.X export stub is:
4619 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4620 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4621 0x00011820 | stub+16: mtsp r1,sr0
4622 0xe0400002 | stub+20: be,n 0(sr0,rp)
4624 0xe0400002 must be specified as -532676606 so that it won't be
4625 rejected as an invalid immediate operand on 64-bit hosts.
4627 The instruction stream at the return address of a PA2.0 export stub is:
4629 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4630 0xe840d002 | stub+12: bve,n (rp)
4633 HOST_WIDE_INT insns
[4];
4639 rp
= get_hard_reg_initial_val (Pmode
, 2);
4641 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4644 /* If there is no export stub then just use the value saved from
4645 the return pointer register. */
4647 saved_rp
= gen_reg_rtx (Pmode
);
4648 emit_move_insn (saved_rp
, rp
);
4650 /* Get pointer to the instruction stream. We have to mask out the
4651 privilege level from the two low order bits of the return address
4652 pointer here so that ins will point to the start of the first
4653 instruction that would have been executed if we returned. */
4654 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4655 label
= gen_label_rtx ();
4659 insns
[0] = 0x4bc23fd1;
4660 insns
[1] = -398405630;
4665 insns
[0] = 0x4bc23fd1;
4666 insns
[1] = 0x004010a1;
4667 insns
[2] = 0x00011820;
4668 insns
[3] = -532676606;
4672 /* Check the instruction stream at the normal return address for the
4673 export stub. If it is an export stub, than our return address is
4674 really in -24[frameaddr]. */
4676 for (i
= 0; i
< len
; i
++)
4678 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4679 rtx op1
= GEN_INT (insns
[i
]);
4680 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4683 /* Here we know that our return address points to an export
4684 stub. We don't want to return the address of the export stub,
4685 but rather the return address of the export stub. That return
4686 address is stored at -24[frameaddr]. */
4688 emit_move_insn (saved_rp
,
4690 memory_address (Pmode
,
4691 plus_constant (Pmode
, frameaddr
,
4700 pa_emit_bcond_fp (rtx operands
[])
4702 enum rtx_code code
= GET_CODE (operands
[0]);
4703 rtx operand0
= operands
[1];
4704 rtx operand1
= operands
[2];
4705 rtx label
= operands
[3];
4707 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4708 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4710 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4711 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4714 gen_rtx_REG (CCFPmode
, 0),
4716 gen_rtx_LABEL_REF (VOIDmode
, label
),
4721 /* Adjust the cost of a scheduling dependency. Return the new cost of
4722 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4725 pa_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
4727 enum attr_type attr_type
;
4729 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4730 true dependencies as they are described with bypasses now. */
4731 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4734 if (! recog_memoized (insn
))
4737 attr_type
= get_attr_type (insn
);
4739 switch (REG_NOTE_KIND (link
))
4742 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4745 if (attr_type
== TYPE_FPLOAD
)
4747 rtx pat
= PATTERN (insn
);
4748 rtx dep_pat
= PATTERN (dep_insn
);
4749 if (GET_CODE (pat
) == PARALLEL
)
4751 /* This happens for the fldXs,mb patterns. */
4752 pat
= XVECEXP (pat
, 0, 0);
4754 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4755 /* If this happens, we have to extend this to schedule
4756 optimally. Return 0 for now. */
4759 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4761 if (! recog_memoized (dep_insn
))
4763 switch (get_attr_type (dep_insn
))
4770 case TYPE_FPSQRTSGL
:
4771 case TYPE_FPSQRTDBL
:
4772 /* A fpload can't be issued until one cycle before a
4773 preceding arithmetic operation has finished if
4774 the target of the fpload is any of the sources
4775 (or destination) of the arithmetic operation. */
4776 return insn_default_latency (dep_insn
) - 1;
4783 else if (attr_type
== TYPE_FPALU
)
4785 rtx pat
= PATTERN (insn
);
4786 rtx dep_pat
= PATTERN (dep_insn
);
4787 if (GET_CODE (pat
) == PARALLEL
)
4789 /* This happens for the fldXs,mb patterns. */
4790 pat
= XVECEXP (pat
, 0, 0);
4792 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4793 /* If this happens, we have to extend this to schedule
4794 optimally. Return 0 for now. */
4797 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4799 if (! recog_memoized (dep_insn
))
4801 switch (get_attr_type (dep_insn
))
4805 case TYPE_FPSQRTSGL
:
4806 case TYPE_FPSQRTDBL
:
4807 /* An ALU flop can't be issued until two cycles before a
4808 preceding divide or sqrt operation has finished if
4809 the target of the ALU flop is any of the sources
4810 (or destination) of the divide or sqrt operation. */
4811 return insn_default_latency (dep_insn
) - 2;
4819 /* For other anti dependencies, the cost is 0. */
4822 case REG_DEP_OUTPUT
:
4823 /* Output dependency; DEP_INSN writes a register that INSN writes some
4825 if (attr_type
== TYPE_FPLOAD
)
4827 rtx pat
= PATTERN (insn
);
4828 rtx dep_pat
= PATTERN (dep_insn
);
4829 if (GET_CODE (pat
) == PARALLEL
)
4831 /* This happens for the fldXs,mb patterns. */
4832 pat
= XVECEXP (pat
, 0, 0);
4834 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4835 /* If this happens, we have to extend this to schedule
4836 optimally. Return 0 for now. */
4839 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4841 if (! recog_memoized (dep_insn
))
4843 switch (get_attr_type (dep_insn
))
4850 case TYPE_FPSQRTSGL
:
4851 case TYPE_FPSQRTDBL
:
4852 /* A fpload can't be issued until one cycle before a
4853 preceding arithmetic operation has finished if
4854 the target of the fpload is the destination of the
4855 arithmetic operation.
4857 Exception: For PA7100LC, PA7200 and PA7300, the cost
4858 is 3 cycles, unless they bundle together. We also
4859 pay the penalty if the second insn is a fpload. */
4860 return insn_default_latency (dep_insn
) - 1;
4867 else if (attr_type
== TYPE_FPALU
)
4869 rtx pat
= PATTERN (insn
);
4870 rtx dep_pat
= PATTERN (dep_insn
);
4871 if (GET_CODE (pat
) == PARALLEL
)
4873 /* This happens for the fldXs,mb patterns. */
4874 pat
= XVECEXP (pat
, 0, 0);
4876 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4877 /* If this happens, we have to extend this to schedule
4878 optimally. Return 0 for now. */
4881 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4883 if (! recog_memoized (dep_insn
))
4885 switch (get_attr_type (dep_insn
))
4889 case TYPE_FPSQRTSGL
:
4890 case TYPE_FPSQRTDBL
:
4891 /* An ALU flop can't be issued until two cycles before a
4892 preceding divide or sqrt operation has finished if
4893 the target of the ALU flop is also the target of
4894 the divide or sqrt operation. */
4895 return insn_default_latency (dep_insn
) - 2;
4903 /* For other output dependencies, the cost is 0. */
4911 /* Adjust scheduling priorities. We use this to try and keep addil
4912 and the next use of %r1 close together. */
4914 pa_adjust_priority (rtx_insn
*insn
, int priority
)
4916 rtx set
= single_set (insn
);
4920 src
= SET_SRC (set
);
4921 dest
= SET_DEST (set
);
4922 if (GET_CODE (src
) == LO_SUM
4923 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4924 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4927 else if (GET_CODE (src
) == MEM
4928 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4929 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4930 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4933 else if (GET_CODE (dest
) == MEM
4934 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4935 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4936 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4942 /* The 700 can only issue a single insn at a time.
4943 The 7XXX processors can issue two insns at a time.
4944 The 8000 can issue 4 insns at a time. */
4946 pa_issue_rate (void)
4950 case PROCESSOR_700
: return 1;
4951 case PROCESSOR_7100
: return 2;
4952 case PROCESSOR_7100LC
: return 2;
4953 case PROCESSOR_7200
: return 2;
4954 case PROCESSOR_7300
: return 2;
4955 case PROCESSOR_8000
: return 4;
4964 /* Return any length plus adjustment needed by INSN which already has
4965 its length computed as LENGTH. Return LENGTH if no adjustment is
4968 Also compute the length of an inline block move here as it is too
4969 complicated to express as a length attribute in pa.md. */
4971 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
4973 rtx pat
= PATTERN (insn
);
4975 /* If length is negative or undefined, provide initial length. */
4976 if ((unsigned int) length
>= INT_MAX
)
4978 if (GET_CODE (pat
) == SEQUENCE
)
4979 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
4981 switch (get_attr_type (insn
))
4984 length
= pa_attr_length_millicode_call (insn
);
4987 length
= pa_attr_length_call (insn
, 0);
4990 length
= pa_attr_length_call (insn
, 1);
4993 length
= pa_attr_length_indirect_call (insn
);
4995 case TYPE_SH_FUNC_ADRS
:
4996 length
= pa_attr_length_millicode_call (insn
) + 20;
5003 /* Block move pattern. */
5004 if (NONJUMP_INSN_P (insn
)
5005 && GET_CODE (pat
) == PARALLEL
5006 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5007 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5008 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5009 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5010 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5011 length
+= compute_movmem_length (insn
) - 4;
5012 /* Block clear pattern. */
5013 else if (NONJUMP_INSN_P (insn
)
5014 && GET_CODE (pat
) == PARALLEL
5015 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5016 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5017 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5018 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5019 length
+= compute_clrmem_length (insn
) - 4;
5020 /* Conditional branch with an unfilled delay slot. */
5021 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5023 /* Adjust a short backwards conditional with an unfilled delay slot. */
5024 if (GET_CODE (pat
) == SET
5026 && JUMP_LABEL (insn
) != NULL_RTX
5027 && ! forward_branch_p (insn
))
5029 else if (GET_CODE (pat
) == PARALLEL
5030 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5033 /* Adjust dbra insn with short backwards conditional branch with
5034 unfilled delay slot -- only for case where counter is in a
5035 general register register. */
5036 else if (GET_CODE (pat
) == PARALLEL
5037 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5038 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5039 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5041 && ! forward_branch_p (insn
))
5047 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5050 pa_print_operand_punct_valid_p (unsigned char code
)
5061 /* Print operand X (an rtx) in assembler syntax to file FILE.
5062 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5063 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5066 pa_print_operand (FILE *file
, rtx x
, int code
)
5071 /* Output a 'nop' if there's nothing for the delay slot. */
5072 if (dbr_sequence_length () == 0)
5073 fputs ("\n\tnop", file
);
5076 /* Output a nullification completer if there's nothing for the */
5077 /* delay slot or nullification is requested. */
5078 if (dbr_sequence_length () == 0 ||
5080 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5084 /* Print out the second register name of a register pair.
5085 I.e., R (6) => 7. */
5086 fputs (reg_names
[REGNO (x
) + 1], file
);
5089 /* A register or zero. */
5091 || (x
== CONST0_RTX (DFmode
))
5092 || (x
== CONST0_RTX (SFmode
)))
5094 fputs ("%r0", file
);
5100 /* A register or zero (floating point). */
5102 || (x
== CONST0_RTX (DFmode
))
5103 || (x
== CONST0_RTX (SFmode
)))
5105 fputs ("%fr0", file
);
5114 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5115 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5116 pa_output_global_address (file
, xoperands
[1], 0);
5117 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5121 case 'C': /* Plain (C)ondition */
5123 switch (GET_CODE (x
))
5126 fputs ("=", file
); break;
5128 fputs ("<>", file
); break;
5130 fputs (">", file
); break;
5132 fputs (">=", file
); break;
5134 fputs (">>=", file
); break;
5136 fputs (">>", file
); break;
5138 fputs ("<", file
); break;
5140 fputs ("<=", file
); break;
5142 fputs ("<<=", file
); break;
5144 fputs ("<<", file
); break;
5149 case 'N': /* Condition, (N)egated */
5150 switch (GET_CODE (x
))
5153 fputs ("<>", file
); break;
5155 fputs ("=", file
); break;
5157 fputs ("<=", file
); break;
5159 fputs ("<", file
); break;
5161 fputs ("<<", file
); break;
5163 fputs ("<<=", file
); break;
5165 fputs (">=", file
); break;
5167 fputs (">", file
); break;
5169 fputs (">>", file
); break;
5171 fputs (">>=", file
); break;
5176 /* For floating point comparisons. Note that the output
5177 predicates are the complement of the desired mode. The
5178 conditions for GT, GE, LT, LE and LTGT cause an invalid
5179 operation exception if the result is unordered and this
5180 exception is enabled in the floating-point status register. */
5182 switch (GET_CODE (x
))
5185 fputs ("!=", file
); break;
5187 fputs ("=", file
); break;
5189 fputs ("!>", file
); break;
5191 fputs ("!>=", file
); break;
5193 fputs ("!<", file
); break;
5195 fputs ("!<=", file
); break;
5197 fputs ("!<>", file
); break;
5199 fputs ("!?<=", file
); break;
5201 fputs ("!?<", file
); break;
5203 fputs ("!?>=", file
); break;
5205 fputs ("!?>", file
); break;
5207 fputs ("!?=", file
); break;
5209 fputs ("!?", file
); break;
5211 fputs ("?", file
); break;
5216 case 'S': /* Condition, operands are (S)wapped. */
5217 switch (GET_CODE (x
))
5220 fputs ("=", file
); break;
5222 fputs ("<>", file
); break;
5224 fputs ("<", file
); break;
5226 fputs ("<=", file
); break;
5228 fputs ("<<=", file
); break;
5230 fputs ("<<", file
); break;
5232 fputs (">", file
); break;
5234 fputs (">=", file
); break;
5236 fputs (">>=", file
); break;
5238 fputs (">>", file
); break;
5243 case 'B': /* Condition, (B)oth swapped and negate. */
5244 switch (GET_CODE (x
))
5247 fputs ("<>", file
); break;
5249 fputs ("=", file
); break;
5251 fputs (">=", file
); break;
5253 fputs (">", file
); break;
5255 fputs (">>", file
); break;
5257 fputs (">>=", file
); break;
5259 fputs ("<=", file
); break;
5261 fputs ("<", file
); break;
5263 fputs ("<<", file
); break;
5265 fputs ("<<=", file
); break;
5271 gcc_assert (GET_CODE (x
) == CONST_INT
);
5272 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5275 gcc_assert (GET_CODE (x
) == CONST_INT
);
5276 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5279 gcc_assert (GET_CODE (x
) == CONST_INT
);
5280 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5283 gcc_assert (GET_CODE (x
) == CONST_INT
5284 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5285 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5288 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5289 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5292 gcc_assert (GET_CODE (x
) == CONST_INT
);
5293 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5296 gcc_assert (GET_CODE (x
) == CONST_INT
);
5297 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5300 if (GET_CODE (x
) == CONST_INT
)
5305 switch (GET_CODE (XEXP (x
, 0)))
5309 if (ASSEMBLER_DIALECT
== 0)
5310 fputs ("s,mb", file
);
5312 fputs (",mb", file
);
5316 if (ASSEMBLER_DIALECT
== 0)
5317 fputs ("s,ma", file
);
5319 fputs (",ma", file
);
5322 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5323 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5325 if (ASSEMBLER_DIALECT
== 0)
5328 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5329 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5331 if (ASSEMBLER_DIALECT
== 0)
5332 fputs ("x,s", file
);
5336 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5340 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5346 pa_output_global_address (file
, x
, 0);
5349 pa_output_global_address (file
, x
, 1);
5351 case 0: /* Don't do anything special */
5356 compute_zdepwi_operands (INTVAL (x
), op
);
5357 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5363 compute_zdepdi_operands (INTVAL (x
), op
);
5364 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5368 /* We can get here from a .vtable_inherit due to our
5369 CONSTANT_ADDRESS_P rejecting perfectly good constant
5375 if (GET_CODE (x
) == REG
)
5377 fputs (reg_names
[REGNO (x
)], file
);
5378 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5384 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5385 && (REGNO (x
) & 1) == 0)
5388 else if (GET_CODE (x
) == MEM
)
5390 int size
= GET_MODE_SIZE (GET_MODE (x
));
5391 rtx base
= NULL_RTX
;
5392 switch (GET_CODE (XEXP (x
, 0)))
5396 base
= XEXP (XEXP (x
, 0), 0);
5397 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5401 base
= XEXP (XEXP (x
, 0), 0);
5402 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5405 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5406 fprintf (file
, "%s(%s)",
5407 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5408 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5409 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5410 fprintf (file
, "%s(%s)",
5411 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5412 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5413 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5414 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5416 /* Because the REG_POINTER flag can get lost during reload,
5417 pa_legitimate_address_p canonicalizes the order of the
5418 index and base registers in the combined move patterns. */
5419 rtx base
= XEXP (XEXP (x
, 0), 1);
5420 rtx index
= XEXP (XEXP (x
, 0), 0);
5422 fprintf (file
, "%s(%s)",
5423 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5426 output_address (GET_MODE (x
), XEXP (x
, 0));
5429 output_address (GET_MODE (x
), XEXP (x
, 0));
5434 output_addr_const (file
, x
);
5437 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5440 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5443 /* Imagine (high (const (plus ...))). */
5444 if (GET_CODE (x
) == HIGH
)
5447 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5448 output_addr_const (file
, x
);
5449 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5451 output_addr_const (file
, x
);
5452 fputs ("-$global$", file
);
5454 else if (GET_CODE (x
) == CONST
)
5456 const char *sep
= "";
5457 int offset
= 0; /* assembler wants -$global$ at end */
5458 rtx base
= NULL_RTX
;
5460 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5464 base
= XEXP (XEXP (x
, 0), 0);
5465 output_addr_const (file
, base
);
5468 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5474 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5478 base
= XEXP (XEXP (x
, 0), 1);
5479 output_addr_const (file
, base
);
5482 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5488 /* How bogus. The compiler is apparently responsible for
5489 rounding the constant if it uses an LR field selector.
5491 The linker and/or assembler seem a better place since
5492 they have to do this kind of thing already.
5494 If we fail to do this, HP's optimizing linker may eliminate
5495 an addil, but not update the ldw/stw/ldo instruction that
5496 uses the result of the addil. */
5498 offset
= ((offset
+ 0x1000) & ~0x1fff);
5500 switch (GET_CODE (XEXP (x
, 0)))
5513 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5521 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5522 fputs ("-$global$", file
);
5524 fprintf (file
, "%s%d", sep
, offset
);
5527 output_addr_const (file
, x
);
5530 /* Output boilerplate text to appear at the beginning of the file.
5531 There are several possible versions. */
5532 #define aputs(x) fputs(x, asm_out_file)
5534 pa_file_start_level (void)
5537 aputs ("\t.LEVEL 2.0w\n");
5538 else if (TARGET_PA_20
)
5539 aputs ("\t.LEVEL 2.0\n");
5540 else if (TARGET_PA_11
)
5541 aputs ("\t.LEVEL 1.1\n");
5543 aputs ("\t.LEVEL 1.0\n");
5547 pa_file_start_space (int sortspace
)
5549 aputs ("\t.SPACE $PRIVATE$");
5552 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5554 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5555 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5556 "\n\t.SPACE $TEXT$");
5559 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5560 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5564 pa_file_start_file (int want_version
)
5566 if (write_symbols
!= NO_DEBUG
)
5568 output_file_directive (asm_out_file
, main_input_filename
);
5570 aputs ("\t.version\t\"01.01\"\n");
5575 pa_file_start_mcount (const char *aswhat
)
5578 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5582 pa_elf_file_start (void)
5584 pa_file_start_level ();
5585 pa_file_start_mcount ("ENTRY");
5586 pa_file_start_file (0);
5590 pa_som_file_start (void)
5592 pa_file_start_level ();
5593 pa_file_start_space (0);
5594 aputs ("\t.IMPORT $global$,DATA\n"
5595 "\t.IMPORT $$dyncall,MILLICODE\n");
5596 pa_file_start_mcount ("CODE");
5597 pa_file_start_file (0);
5601 pa_linux_file_start (void)
5603 pa_file_start_file (1);
5604 pa_file_start_level ();
5605 pa_file_start_mcount ("CODE");
5609 pa_hpux64_gas_file_start (void)
5611 pa_file_start_level ();
5612 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5614 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5616 pa_file_start_file (1);
5620 pa_hpux64_hpas_file_start (void)
5622 pa_file_start_level ();
5623 pa_file_start_space (1);
5624 pa_file_start_mcount ("CODE");
5625 pa_file_start_file (0);
5629 /* Search the deferred plabel list for SYMBOL and return its internal
5630 label. If an entry for SYMBOL is not found, a new entry is created. */
5633 pa_get_deferred_plabel (rtx symbol
)
5635 const char *fname
= XSTR (symbol
, 0);
5638 /* See if we have already put this function on the list of deferred
5639 plabels. This list is generally small, so a liner search is not
5640 too ugly. If it proves too slow replace it with something faster. */
5641 for (i
= 0; i
< n_deferred_plabels
; i
++)
5642 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5645 /* If the deferred plabel list is empty, or this entry was not found
5646 on the list, create a new entry on the list. */
5647 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5651 if (deferred_plabels
== 0)
5652 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5654 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5656 n_deferred_plabels
+ 1);
5658 i
= n_deferred_plabels
++;
5659 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5660 deferred_plabels
[i
].symbol
= symbol
;
5662 /* Gross. We have just implicitly taken the address of this
5663 function. Mark it in the same manner as assemble_name. */
5664 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5666 mark_referenced (id
);
5669 return deferred_plabels
[i
].internal_label
;
5673 output_deferred_plabels (void)
5677 /* If we have some deferred plabels, then we need to switch into the
5678 data or readonly data section, and align it to a 4 byte boundary
5679 before outputting the deferred plabels. */
5680 if (n_deferred_plabels
)
5682 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5683 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5686 /* Now output the deferred plabels. */
5687 for (i
= 0; i
< n_deferred_plabels
; i
++)
5689 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5690 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5691 assemble_integer (deferred_plabels
[i
].symbol
,
5692 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5696 /* Initialize optabs to point to emulation routines. */
5699 pa_init_libfuncs (void)
5701 if (HPUX_LONG_DOUBLE_LIBRARY
)
5703 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5704 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5705 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5706 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5707 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5708 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5709 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5710 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5711 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5713 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5714 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5715 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5716 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5717 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5718 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5719 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5721 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5722 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5723 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5724 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5726 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5727 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5728 : "_U_Qfcnvfxt_quad_to_sgl");
5729 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5730 "_U_Qfcnvfxt_quad_to_dbl");
5731 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5732 "_U_Qfcnvfxt_quad_to_usgl");
5733 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5734 "_U_Qfcnvfxt_quad_to_udbl");
5736 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5737 "_U_Qfcnvxf_sgl_to_quad");
5738 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5739 "_U_Qfcnvxf_dbl_to_quad");
5740 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5741 "_U_Qfcnvxf_usgl_to_quad");
5742 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5743 "_U_Qfcnvxf_udbl_to_quad");
5746 if (TARGET_SYNC_LIBCALL
)
5747 init_sync_libfuncs (8);
5750 /* HP's millicode routines mean something special to the assembler.
5751 Keep track of which ones we have used. */
5753 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5754 static void import_milli (enum millicodes
);
5755 static char imported
[(int) end1000
];
5756 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5757 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5758 #define MILLI_START 10
5761 import_milli (enum millicodes code
)
5763 char str
[sizeof (import_string
)];
5765 if (!imported
[(int) code
])
5767 imported
[(int) code
] = 1;
5768 strcpy (str
, import_string
);
5769 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5770 output_asm_insn (str
, 0);
5774 /* The register constraints have put the operands and return value in
5775 the proper registers. */
5778 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5780 import_milli (mulI
);
5781 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5784 /* Emit the rtl for doing a division by a constant. */
5786 /* Do magic division millicodes exist for this value? */
5787 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5789 /* We'll use an array to keep track of the magic millicodes and
5790 whether or not we've used them already. [n][0] is signed, [n][1] is
5793 static int div_milli
[16][2];
5796 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5798 if (GET_CODE (operands
[2]) == CONST_INT
5799 && INTVAL (operands
[2]) > 0
5800 && INTVAL (operands
[2]) < 16
5801 && pa_magic_milli
[INTVAL (operands
[2])])
5803 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5805 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5809 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5810 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5812 gen_rtx_REG (SImode
, 26),
5814 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5815 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5816 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5817 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5818 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5819 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5826 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5830 /* If the divisor is a constant, try to use one of the special
5832 if (GET_CODE (operands
[0]) == CONST_INT
)
5834 static char buf
[100];
5835 divisor
= INTVAL (operands
[0]);
5836 if (!div_milli
[divisor
][unsignedp
])
5838 div_milli
[divisor
][unsignedp
] = 1;
5840 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5842 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5846 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5847 INTVAL (operands
[0]));
5848 return pa_output_millicode_call (insn
,
5849 gen_rtx_SYMBOL_REF (SImode
, buf
));
5853 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5854 INTVAL (operands
[0]));
5855 return pa_output_millicode_call (insn
,
5856 gen_rtx_SYMBOL_REF (SImode
, buf
));
5859 /* Divisor isn't a special constant. */
5864 import_milli (divU
);
5865 return pa_output_millicode_call (insn
,
5866 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5870 import_milli (divI
);
5871 return pa_output_millicode_call (insn
,
5872 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5877 /* Output a $$rem millicode to do mod. */
5880 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5884 import_milli (remU
);
5885 return pa_output_millicode_call (insn
,
5886 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5890 import_milli (remI
);
5891 return pa_output_millicode_call (insn
,
5892 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5897 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5899 const char *arg_regs
[4];
5900 machine_mode arg_mode
;
5902 int i
, output_flag
= 0;
5905 /* We neither need nor want argument location descriptors for the
5906 64bit runtime environment or the ELF32 environment. */
5907 if (TARGET_64BIT
|| TARGET_ELF32
)
5910 for (i
= 0; i
< 4; i
++)
5913 /* Specify explicitly that no argument relocations should take place
5914 if using the portable runtime calling conventions. */
5915 if (TARGET_PORTABLE_RUNTIME
)
5917 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5922 gcc_assert (CALL_P (call_insn
));
5923 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5924 link
; link
= XEXP (link
, 1))
5926 rtx use
= XEXP (link
, 0);
5928 if (! (GET_CODE (use
) == USE
5929 && GET_CODE (XEXP (use
, 0)) == REG
5930 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5933 arg_mode
= GET_MODE (XEXP (use
, 0));
5934 regno
= REGNO (XEXP (use
, 0));
5935 if (regno
>= 23 && regno
<= 26)
5937 arg_regs
[26 - regno
] = "GR";
5938 if (arg_mode
== DImode
)
5939 arg_regs
[25 - regno
] = "GR";
5941 else if (regno
>= 32 && regno
<= 39)
5943 if (arg_mode
== SFmode
)
5944 arg_regs
[(regno
- 32) / 2] = "FR";
5947 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5948 arg_regs
[(regno
- 34) / 2] = "FR";
5949 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5951 arg_regs
[(regno
- 34) / 2] = "FU";
5952 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5957 fputs ("\t.CALL ", asm_out_file
);
5958 for (i
= 0; i
< 4; i
++)
5963 fputc (',', asm_out_file
);
5964 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5967 fputc ('\n', asm_out_file
);
5970 /* Inform reload about cases where moving X with a mode MODE to or from
5971 a register in RCLASS requires an extra scratch or immediate register.
5972 Return the class needed for the immediate register. */
5975 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5976 machine_mode mode
, secondary_reload_info
*sri
)
5979 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5981 /* Handle the easy stuff first. */
5982 if (rclass
== R1_REGS
)
5988 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5994 /* If we have something like (mem (mem (...)), we can safely assume the
5995 inner MEM will end up in a general register after reloading, so there's
5996 no need for a secondary reload. */
5997 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
6000 /* Trying to load a constant into a FP register during PIC code
6001 generation requires %r1 as a scratch register. For float modes,
6002 the only legitimate constant is CONST0_RTX. However, there are
6003 a few patterns that accept constant double operands. */
6005 && FP_REG_CLASS_P (rclass
)
6006 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6011 sri
->icode
= CODE_FOR_reload_insi_r1
;
6015 sri
->icode
= CODE_FOR_reload_indi_r1
;
6019 sri
->icode
= CODE_FOR_reload_insf_r1
;
6023 sri
->icode
= CODE_FOR_reload_indf_r1
;
6032 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6033 register when we're generating PIC code or when the operand isn't
6035 if (pa_symbolic_expression_p (x
))
6037 if (GET_CODE (x
) == HIGH
)
6040 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6045 sri
->icode
= CODE_FOR_reload_insi_r1
;
6049 sri
->icode
= CODE_FOR_reload_indi_r1
;
6059 /* Profiling showed the PA port spends about 1.3% of its compilation
6060 time in true_regnum from calls inside pa_secondary_reload_class. */
6061 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6062 regno
= true_regnum (x
);
6064 /* Handle reloads for floating point loads and stores. */
6065 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6066 && FP_REG_CLASS_P (rclass
))
6072 /* We don't need a secondary reload for indexed memory addresses.
6074 When INT14_OK_STRICT is true, it might appear that we could
6075 directly allow register indirect memory addresses. However,
6076 this doesn't work because we don't support SUBREGs in
6077 floating-point register copies and reload doesn't tell us
6078 when it's going to use a SUBREG. */
6079 if (IS_INDEX_ADDR_P (x
))
6083 /* Request a secondary reload with a general scratch register
6084 for everything else. ??? Could symbolic operands be handled
6085 directly when generating non-pic PA 2.0 code? */
6087 ? direct_optab_handler (reload_in_optab
, mode
)
6088 : direct_optab_handler (reload_out_optab
, mode
));
6092 /* A SAR<->FP register copy requires an intermediate general register
6093 and secondary memory. We need a secondary reload with a general
6094 scratch register for spills. */
6095 if (rclass
== SHIFT_REGS
)
6098 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6101 ? direct_optab_handler (reload_in_optab
, mode
)
6102 : direct_optab_handler (reload_out_optab
, mode
));
6106 /* Handle FP copy. */
6107 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6108 return GENERAL_REGS
;
6111 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6112 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6113 && FP_REG_CLASS_P (rclass
))
6114 return GENERAL_REGS
;
6119 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6120 is only marked as live on entry by df-scan when it is a fixed
6121 register. It isn't a fixed register in the 64-bit runtime,
6122 so we need to mark it here. */
6125 pa_extra_live_on_entry (bitmap regs
)
6128 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6131 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6132 to prevent it from being deleted. */
6135 pa_eh_return_handler_rtx (void)
6139 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6140 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6141 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6146 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6147 by invisible reference. As a GCC extension, we also pass anything
6148 with a zero or variable size by reference.
6150 The 64-bit runtime does not describe passing any types by invisible
6151 reference. The internals of GCC can't currently handle passing
6152 empty structures, and zero or variable length arrays when they are
6153 not passed entirely on the stack or by reference. Thus, as a GCC
6154 extension, we pass these types by reference. The HP compiler doesn't
6155 support these types, so hopefully there shouldn't be any compatibility
6156 issues. This may have to be revisited when HP releases a C99 compiler
6157 or updates the ABI. */
6160 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6161 machine_mode mode
, const_tree type
,
6162 bool named ATTRIBUTE_UNUSED
)
6167 size
= int_size_in_bytes (type
);
6169 size
= GET_MODE_SIZE (mode
);
6174 return size
<= 0 || size
> 8;
6178 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6183 && (AGGREGATE_TYPE_P (type
)
6184 || TREE_CODE (type
) == COMPLEX_TYPE
6185 || TREE_CODE (type
) == VECTOR_TYPE
)))
6187 /* Return none if justification is not required. */
6189 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6190 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6193 /* The directions set here are ignored when a BLKmode argument larger
6194 than a word is placed in a register. Different code is used for
6195 the stack and registers. This makes it difficult to have a
6196 consistent data representation for both the stack and registers.
6197 For both runtimes, the justification and padding for arguments on
6198 the stack and in registers should be identical. */
6200 /* The 64-bit runtime specifies left justification for aggregates. */
6203 /* The 32-bit runtime architecture specifies right justification.
6204 When the argument is passed on the stack, the argument is padded
6205 with garbage on the left. The HP compiler pads with zeros. */
6209 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6216 /* Do what is necessary for `va_start'. We look at the current function
6217 to determine if stdargs or varargs is used and fill in an initial
6218 va_list. A pointer to this constructor is returned. */
6221 hppa_builtin_saveregs (void)
6224 tree fntype
= TREE_TYPE (current_function_decl
);
6225 int argadj
= ((!stdarg_p (fntype
))
6226 ? UNITS_PER_WORD
: 0);
6229 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6231 offset
= crtl
->args
.arg_offset_rtx
;
6237 /* Adjust for varargs/stdarg differences. */
6239 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6241 offset
= crtl
->args
.arg_offset_rtx
;
6243 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6244 from the incoming arg pointer and growing to larger addresses. */
6245 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6246 emit_move_insn (gen_rtx_MEM (word_mode
,
6247 plus_constant (Pmode
,
6248 arg_pointer_rtx
, off
)),
6249 gen_rtx_REG (word_mode
, i
));
6251 /* The incoming args pointer points just beyond the flushback area;
6252 normally this is not a serious concern. However, when we are doing
6253 varargs/stdargs we want to make the arg pointer point to the start
6254 of the incoming argument area. */
6255 emit_move_insn (virtual_incoming_args_rtx
,
6256 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6258 /* Now return a pointer to the first anonymous argument. */
6259 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6260 virtual_incoming_args_rtx
,
6261 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6264 /* Store general registers on the stack. */
6265 dest
= gen_rtx_MEM (BLKmode
,
6266 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6268 set_mem_alias_set (dest
, get_varargs_alias_set ());
6269 set_mem_align (dest
, BITS_PER_WORD
);
6270 move_block_from_reg (23, dest
, 4);
6272 /* move_block_from_reg will emit code to store the argument registers
6273 individually as scalar stores.
6275 However, other insns may later load from the same addresses for
6276 a structure load (passing a struct to a varargs routine).
6278 The alias code assumes that such aliasing can never happen, so we
6279 have to keep memory referencing insns from moving up beyond the
6280 last argument register store. So we emit a blockage insn here. */
6281 emit_insn (gen_blockage ());
6283 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6284 crtl
->args
.internal_arg_pointer
,
6285 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6289 hppa_va_start (tree valist
, rtx nextarg
)
6291 nextarg
= expand_builtin_saveregs ();
6292 std_expand_builtin_va_start (valist
, nextarg
);
6296 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6301 /* Args grow upward. We can use the generic routines. */
6302 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6304 else /* !TARGET_64BIT */
6306 tree ptr
= build_pointer_type (type
);
6309 unsigned int size
, ofs
;
6312 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6316 ptr
= build_pointer_type (type
);
6318 size
= int_size_in_bytes (type
);
6319 valist_type
= TREE_TYPE (valist
);
6321 /* Args grow down. Not handled by generic routines. */
6323 u
= fold_convert (sizetype
, size_in_bytes (type
));
6324 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6325 t
= fold_build_pointer_plus (valist
, u
);
6327 /* Align to 4 or 8 byte boundary depending on argument size. */
6329 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6330 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6331 t
= fold_convert (valist_type
, t
);
6333 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6335 ofs
= (8 - size
) % 4;
6337 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6339 t
= fold_convert (ptr
, t
);
6340 t
= build_va_arg_indirect_ref (t
);
6343 t
= build_va_arg_indirect_ref (t
);
6349 /* True if MODE is valid for the target. By "valid", we mean able to
6350 be manipulated in non-trivial ways. In particular, this means all
6351 the arithmetic is supported.
6353 Currently, TImode is not valid as the HP 64-bit runtime documentation
6354 doesn't document the alignment and calling conventions for this type.
6355 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6356 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6359 pa_scalar_mode_supported_p (machine_mode mode
)
6361 int precision
= GET_MODE_PRECISION (mode
);
6363 switch (GET_MODE_CLASS (mode
))
6365 case MODE_PARTIAL_INT
:
6367 if (precision
== CHAR_TYPE_SIZE
)
6369 if (precision
== SHORT_TYPE_SIZE
)
6371 if (precision
== INT_TYPE_SIZE
)
6373 if (precision
== LONG_TYPE_SIZE
)
6375 if (precision
== LONG_LONG_TYPE_SIZE
)
6380 if (precision
== FLOAT_TYPE_SIZE
)
6382 if (precision
== DOUBLE_TYPE_SIZE
)
6384 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6388 case MODE_DECIMAL_FLOAT
:
6396 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6397 it branches into the delay slot. Otherwise, return FALSE. */
6400 branch_to_delay_slot_p (rtx_insn
*insn
)
6402 rtx_insn
*jump_insn
;
6404 if (dbr_sequence_length ())
6407 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6410 insn
= next_active_insn (insn
);
6411 if (jump_insn
== insn
)
6414 /* We can't rely on the length of asms. So, we return FALSE when
6415 the branch is followed by an asm. */
6417 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6418 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
6419 || get_attr_length (insn
) > 0)
6426 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6428 This occurs when INSN has an unfilled delay slot and is followed
6429 by an asm. Disaster can occur if the asm is empty and the jump
6430 branches into the delay slot. So, we add a nop in the delay slot
6431 when this occurs. */
6434 branch_needs_nop_p (rtx_insn
*insn
)
6436 rtx_insn
*jump_insn
;
6438 if (dbr_sequence_length ())
6441 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6444 insn
= next_active_insn (insn
);
6445 if (!insn
|| jump_insn
== insn
)
6448 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6449 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6450 && get_attr_length (insn
) > 0)
6457 /* Return TRUE if INSN, a forward jump insn, can use nullification
6458 to skip the following instruction. This avoids an extra cycle due
6459 to a mis-predicted branch when we fall through. */
6462 use_skip_p (rtx_insn
*insn
)
6464 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6468 insn
= next_active_insn (insn
);
6470 /* We can't rely on the length of asms, so we can't skip asms. */
6472 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6473 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6475 if (get_attr_length (insn
) == 4
6476 && jump_insn
== next_active_insn (insn
))
6478 if (get_attr_length (insn
) > 0)
6485 /* This routine handles all the normal conditional branch sequences we
6486 might need to generate. It handles compare immediate vs compare
6487 register, nullification of delay slots, varying length branches,
6488 negated branches, and all combinations of the above. It returns the
6489 output appropriate to emit the branch corresponding to all given
6493 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6495 static char buf
[100];
6497 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6498 int length
= get_attr_length (insn
);
6501 /* A conditional branch to the following instruction (e.g. the delay slot)
6502 is asking for a disaster. This can happen when not optimizing and
6503 when jump optimization fails.
6505 While it is usually safe to emit nothing, this can fail if the
6506 preceding instruction is a nullified branch with an empty delay
6507 slot and the same branch target as this branch. We could check
6508 for this but jump optimization should eliminate nop jumps. It
6509 is always safe to emit a nop. */
6510 if (branch_to_delay_slot_p (insn
))
6513 /* The doubleword form of the cmpib instruction doesn't have the LEU
6514 and GTU conditions while the cmpb instruction does. Since we accept
6515 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6516 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6517 operands
[2] = gen_rtx_REG (DImode
, 0);
6518 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6519 operands
[1] = gen_rtx_REG (DImode
, 0);
6521 /* If this is a long branch with its delay slot unfilled, set `nullify'
6522 as it can nullify the delay slot and save a nop. */
6523 if (length
== 8 && dbr_sequence_length () == 0)
6526 /* If this is a short forward conditional branch which did not get
6527 its delay slot filled, the delay slot can still be nullified. */
6528 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6529 nullify
= forward_branch_p (insn
);
6531 /* A forward branch over a single nullified insn can be done with a
6532 comclr instruction. This avoids a single cycle penalty due to
6533 mis-predicted branch if we fall through (branch not taken). */
6534 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6538 /* All short conditional branches except backwards with an unfilled
6542 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6544 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6545 if (GET_MODE (operands
[1]) == DImode
)
6548 strcat (buf
, "%B3");
6550 strcat (buf
, "%S3");
6552 strcat (buf
, " %2,%r1,%%r0");
6555 if (branch_needs_nop_p (insn
))
6556 strcat (buf
, ",n %2,%r1,%0%#");
6558 strcat (buf
, ",n %2,%r1,%0");
6561 strcat (buf
, " %2,%r1,%0");
6564 /* All long conditionals. Note a short backward branch with an
6565 unfilled delay slot is treated just like a long backward branch
6566 with an unfilled delay slot. */
6568 /* Handle weird backwards branch with a filled delay slot
6569 which is nullified. */
6570 if (dbr_sequence_length () != 0
6571 && ! forward_branch_p (insn
)
6574 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6575 if (GET_MODE (operands
[1]) == DImode
)
6578 strcat (buf
, "%S3");
6580 strcat (buf
, "%B3");
6581 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6583 /* Handle short backwards branch with an unfilled delay slot.
6584 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6585 taken and untaken branches. */
6586 else if (dbr_sequence_length () == 0
6587 && ! forward_branch_p (insn
)
6588 && INSN_ADDRESSES_SET_P ()
6589 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6590 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6592 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6593 if (GET_MODE (operands
[1]) == DImode
)
6596 strcat (buf
, "%B3 %2,%r1,%0%#");
6598 strcat (buf
, "%S3 %2,%r1,%0%#");
6602 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6603 if (GET_MODE (operands
[1]) == DImode
)
6606 strcat (buf
, "%S3");
6608 strcat (buf
, "%B3");
6610 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6612 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6617 /* The reversed conditional branch must branch over one additional
6618 instruction if the delay slot is filled and needs to be extracted
6619 by pa_output_lbranch. If the delay slot is empty or this is a
6620 nullified forward branch, the instruction after the reversed
6621 condition branch must be nullified. */
6622 if (dbr_sequence_length () == 0
6623 || (nullify
&& forward_branch_p (insn
)))
6627 operands
[4] = GEN_INT (length
);
6632 operands
[4] = GEN_INT (length
+ 4);
6635 /* Create a reversed conditional branch which branches around
6636 the following insns. */
6637 if (GET_MODE (operands
[1]) != DImode
)
6643 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6646 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6652 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6655 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6664 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6667 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6673 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6676 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6680 output_asm_insn (buf
, operands
);
6681 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6686 /* This routine handles output of long unconditional branches that
6687 exceed the maximum range of a simple branch instruction. Since
6688 we don't have a register available for the branch, we save register
6689 %r1 in the frame marker, load the branch destination DEST into %r1,
6690 execute the branch, and restore %r1 in the delay slot of the branch.
6692 Since long branches may have an insn in the delay slot and the
6693 delay slot is used to restore %r1, we in general need to extract
6694 this insn and execute it before the branch. However, to facilitate
6695 use of this function by conditional branches, we also provide an
6696 option to not extract the delay insn so that it will be emitted
6697 after the long branch. So, if there is an insn in the delay slot,
6698 it is extracted if XDELAY is nonzero.
6700 The lengths of the various long-branch sequences are 20, 16 and 24
6701 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6704 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6708 xoperands
[0] = dest
;
6710 /* First, free up the delay slot. */
6711 if (xdelay
&& dbr_sequence_length () != 0)
6713 /* We can't handle a jump in the delay slot. */
6714 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6716 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6719 /* Now delete the delay insn. */
6720 SET_INSN_DELETED (NEXT_INSN (insn
));
6723 /* Output an insn to save %r1. The runtime documentation doesn't
6724 specify whether the "Clean Up" slot in the callers frame can
6725 be clobbered by the callee. It isn't copied by HP's builtin
6726 alloca, so this suggests that it can be clobbered if necessary.
6727 The "Static Link" location is copied by HP builtin alloca, so
6728 we avoid using it. Using the cleanup slot might be a problem
6729 if we have to interoperate with languages that pass cleanup
6730 information. However, it should be possible to handle these
6731 situations with GCC's asm feature.
6733 The "Current RP" slot is reserved for the called procedure, so
6734 we try to use it when we don't have a frame of our own. It's
6735 rather unlikely that we won't have a frame when we need to emit
6738 Really the way to go long term is a register scavenger; goto
6739 the target of the jump and find a register which we can use
6740 as a scratch to hold the value in %r1. Then, we wouldn't have
6741 to free up the delay slot or clobber a slot that may be needed
6742 for other purposes. */
6745 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6746 /* Use the return pointer slot in the frame marker. */
6747 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6749 /* Use the slot at -40 in the frame marker since HP builtin
6750 alloca doesn't copy it. */
6751 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6755 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6756 /* Use the return pointer slot in the frame marker. */
6757 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6759 /* Use the "Clean Up" slot in the frame marker. In GCC,
6760 the only other use of this location is for copying a
6761 floating point double argument from a floating-point
6762 register to two general registers. The copy is done
6763 as an "atomic" operation when outputting a call, so it
6764 won't interfere with our using the location here. */
6765 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6768 if (TARGET_PORTABLE_RUNTIME
)
6770 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6771 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6772 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6776 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6777 if (TARGET_SOM
|| !TARGET_GAS
)
6779 xoperands
[1] = gen_label_rtx ();
6780 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6781 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6782 CODE_LABEL_NUMBER (xoperands
[1]));
6783 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6787 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6788 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6790 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6793 /* Now output a very long branch to the original target. */
6794 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6796 /* Now restore the value of %r1 in the delay slot. */
6799 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6800 return "ldd -16(%%r30),%%r1";
6802 return "ldd -40(%%r30),%%r1";
6806 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6807 return "ldw -20(%%r30),%%r1";
6809 return "ldw -12(%%r30),%%r1";
6813 /* This routine handles all the branch-on-bit conditional branch sequences we
6814 might need to generate. It handles nullification of delay slots,
6815 varying length branches, negated branches and all combinations of the
6816 above. it returns the appropriate output template to emit the branch. */
6819 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6821 static char buf
[100];
6823 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6824 int length
= get_attr_length (insn
);
6827 /* A conditional branch to the following instruction (e.g. the delay slot) is
6828 asking for a disaster. I do not think this can happen as this pattern
6829 is only used when optimizing; jump optimization should eliminate the
6830 jump. But be prepared just in case. */
6832 if (branch_to_delay_slot_p (insn
))
6835 /* If this is a long branch with its delay slot unfilled, set `nullify'
6836 as it can nullify the delay slot and save a nop. */
6837 if (length
== 8 && dbr_sequence_length () == 0)
6840 /* If this is a short forward conditional branch which did not get
6841 its delay slot filled, the delay slot can still be nullified. */
6842 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6843 nullify
= forward_branch_p (insn
);
6845 /* A forward branch over a single nullified insn can be done with a
6846 extrs instruction. This avoids a single cycle penalty due to
6847 mis-predicted branch if we fall through (branch not taken). */
6848 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6853 /* All short conditional branches except backwards with an unfilled
6857 strcpy (buf
, "{extrs,|extrw,s,}");
6859 strcpy (buf
, "bb,");
6860 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6861 strcpy (buf
, "extrd,s,*");
6862 else if (GET_MODE (operands
[0]) == DImode
)
6863 strcpy (buf
, "bb,*");
6864 if ((which
== 0 && negated
)
6865 || (which
== 1 && ! negated
))
6870 strcat (buf
, " %0,%1,1,%%r0");
6871 else if (nullify
&& negated
)
6873 if (branch_needs_nop_p (insn
))
6874 strcat (buf
, ",n %0,%1,%3%#");
6876 strcat (buf
, ",n %0,%1,%3");
6878 else if (nullify
&& ! negated
)
6880 if (branch_needs_nop_p (insn
))
6881 strcat (buf
, ",n %0,%1,%2%#");
6883 strcat (buf
, ",n %0,%1,%2");
6885 else if (! nullify
&& negated
)
6886 strcat (buf
, " %0,%1,%3");
6887 else if (! nullify
&& ! negated
)
6888 strcat (buf
, " %0,%1,%2");
6891 /* All long conditionals. Note a short backward branch with an
6892 unfilled delay slot is treated just like a long backward branch
6893 with an unfilled delay slot. */
6895 /* Handle weird backwards branch with a filled delay slot
6896 which is nullified. */
6897 if (dbr_sequence_length () != 0
6898 && ! forward_branch_p (insn
)
6901 strcpy (buf
, "bb,");
6902 if (GET_MODE (operands
[0]) == DImode
)
6904 if ((which
== 0 && negated
)
6905 || (which
== 1 && ! negated
))
6910 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6912 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6914 /* Handle short backwards branch with an unfilled delay slot.
6915 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6916 taken and untaken branches. */
6917 else if (dbr_sequence_length () == 0
6918 && ! forward_branch_p (insn
)
6919 && INSN_ADDRESSES_SET_P ()
6920 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6921 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6923 strcpy (buf
, "bb,");
6924 if (GET_MODE (operands
[0]) == DImode
)
6926 if ((which
== 0 && negated
)
6927 || (which
== 1 && ! negated
))
6932 strcat (buf
, " %0,%1,%3%#");
6934 strcat (buf
, " %0,%1,%2%#");
6938 if (GET_MODE (operands
[0]) == DImode
)
6939 strcpy (buf
, "extrd,s,*");
6941 strcpy (buf
, "{extrs,|extrw,s,}");
6942 if ((which
== 0 && negated
)
6943 || (which
== 1 && ! negated
))
6947 if (nullify
&& negated
)
6948 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6949 else if (nullify
&& ! negated
)
6950 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6952 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6954 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6959 /* The reversed conditional branch must branch over one additional
6960 instruction if the delay slot is filled and needs to be extracted
6961 by pa_output_lbranch. If the delay slot is empty or this is a
6962 nullified forward branch, the instruction after the reversed
6963 condition branch must be nullified. */
6964 if (dbr_sequence_length () == 0
6965 || (nullify
&& forward_branch_p (insn
)))
6969 operands
[4] = GEN_INT (length
);
6974 operands
[4] = GEN_INT (length
+ 4);
6977 if (GET_MODE (operands
[0]) == DImode
)
6978 strcpy (buf
, "bb,*");
6980 strcpy (buf
, "bb,");
6981 if ((which
== 0 && negated
)
6982 || (which
== 1 && !negated
))
6987 strcat (buf
, ",n %0,%1,.+%4");
6989 strcat (buf
, " %0,%1,.+%4");
6990 output_asm_insn (buf
, operands
);
6991 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
6997 /* This routine handles all the branch-on-variable-bit conditional branch
6998 sequences we might need to generate. It handles nullification of delay
6999 slots, varying length branches, negated branches and all combinations
7000 of the above. it returns the appropriate output template to emit the
7004 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7007 static char buf
[100];
7009 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7010 int length
= get_attr_length (insn
);
7013 /* A conditional branch to the following instruction (e.g. the delay slot) is
7014 asking for a disaster. I do not think this can happen as this pattern
7015 is only used when optimizing; jump optimization should eliminate the
7016 jump. But be prepared just in case. */
7018 if (branch_to_delay_slot_p (insn
))
7021 /* If this is a long branch with its delay slot unfilled, set `nullify'
7022 as it can nullify the delay slot and save a nop. */
7023 if (length
== 8 && dbr_sequence_length () == 0)
7026 /* If this is a short forward conditional branch which did not get
7027 its delay slot filled, the delay slot can still be nullified. */
7028 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7029 nullify
= forward_branch_p (insn
);
7031 /* A forward branch over a single nullified insn can be done with a
7032 extrs instruction. This avoids a single cycle penalty due to
7033 mis-predicted branch if we fall through (branch not taken). */
7034 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7039 /* All short conditional branches except backwards with an unfilled
7043 strcpy (buf
, "{vextrs,|extrw,s,}");
7045 strcpy (buf
, "{bvb,|bb,}");
7046 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7047 strcpy (buf
, "extrd,s,*");
7048 else if (GET_MODE (operands
[0]) == DImode
)
7049 strcpy (buf
, "bb,*");
7050 if ((which
== 0 && negated
)
7051 || (which
== 1 && ! negated
))
7056 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7057 else if (nullify
&& negated
)
7059 if (branch_needs_nop_p (insn
))
7060 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7062 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7064 else if (nullify
&& ! negated
)
7066 if (branch_needs_nop_p (insn
))
7067 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7069 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7071 else if (! nullify
&& negated
)
7072 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7073 else if (! nullify
&& ! negated
)
7074 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7077 /* All long conditionals. Note a short backward branch with an
7078 unfilled delay slot is treated just like a long backward branch
7079 with an unfilled delay slot. */
7081 /* Handle weird backwards branch with a filled delay slot
7082 which is nullified. */
7083 if (dbr_sequence_length () != 0
7084 && ! forward_branch_p (insn
)
7087 strcpy (buf
, "{bvb,|bb,}");
7088 if (GET_MODE (operands
[0]) == DImode
)
7090 if ((which
== 0 && negated
)
7091 || (which
== 1 && ! negated
))
7096 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7098 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7100 /* Handle short backwards branch with an unfilled delay slot.
7101 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7102 taken and untaken branches. */
7103 else if (dbr_sequence_length () == 0
7104 && ! forward_branch_p (insn
)
7105 && INSN_ADDRESSES_SET_P ()
7106 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7107 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7109 strcpy (buf
, "{bvb,|bb,}");
7110 if (GET_MODE (operands
[0]) == DImode
)
7112 if ((which
== 0 && negated
)
7113 || (which
== 1 && ! negated
))
7118 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7120 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7124 strcpy (buf
, "{vextrs,|extrw,s,}");
7125 if (GET_MODE (operands
[0]) == DImode
)
7126 strcpy (buf
, "extrd,s,*");
7127 if ((which
== 0 && negated
)
7128 || (which
== 1 && ! negated
))
7132 if (nullify
&& negated
)
7133 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7134 else if (nullify
&& ! negated
)
7135 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7137 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7139 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7144 /* The reversed conditional branch must branch over one additional
7145 instruction if the delay slot is filled and needs to be extracted
7146 by pa_output_lbranch. If the delay slot is empty or this is a
7147 nullified forward branch, the instruction after the reversed
7148 condition branch must be nullified. */
7149 if (dbr_sequence_length () == 0
7150 || (nullify
&& forward_branch_p (insn
)))
7154 operands
[4] = GEN_INT (length
);
7159 operands
[4] = GEN_INT (length
+ 4);
7162 if (GET_MODE (operands
[0]) == DImode
)
7163 strcpy (buf
, "bb,*");
7165 strcpy (buf
, "{bvb,|bb,}");
7166 if ((which
== 0 && negated
)
7167 || (which
== 1 && !negated
))
7172 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7174 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7175 output_asm_insn (buf
, operands
);
7176 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7182 /* Return the output template for emitting a dbra type insn.
7184 Note it may perform some output operations on its own before
7185 returning the final output string. */
7187 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7189 int length
= get_attr_length (insn
);
7191 /* A conditional branch to the following instruction (e.g. the delay slot) is
7192 asking for a disaster. Be prepared! */
7194 if (branch_to_delay_slot_p (insn
))
7196 if (which_alternative
== 0)
7197 return "ldo %1(%0),%0";
7198 else if (which_alternative
== 1)
7200 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7201 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7202 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7203 return "{fldws|fldw} -16(%%r30),%0";
7207 output_asm_insn ("ldw %0,%4", operands
);
7208 return "ldo %1(%4),%4\n\tstw %4,%0";
7212 if (which_alternative
== 0)
7214 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7217 /* If this is a long branch with its delay slot unfilled, set `nullify'
7218 as it can nullify the delay slot and save a nop. */
7219 if (length
== 8 && dbr_sequence_length () == 0)
7222 /* If this is a short forward conditional branch which did not get
7223 its delay slot filled, the delay slot can still be nullified. */
7224 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7225 nullify
= forward_branch_p (insn
);
7232 if (branch_needs_nop_p (insn
))
7233 return "addib,%C2,n %1,%0,%3%#";
7235 return "addib,%C2,n %1,%0,%3";
7238 return "addib,%C2 %1,%0,%3";
7241 /* Handle weird backwards branch with a fulled delay slot
7242 which is nullified. */
7243 if (dbr_sequence_length () != 0
7244 && ! forward_branch_p (insn
)
7246 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7247 /* Handle short backwards branch with an unfilled delay slot.
7248 Using a addb;nop rather than addi;bl saves 1 cycle for both
7249 taken and untaken branches. */
7250 else if (dbr_sequence_length () == 0
7251 && ! forward_branch_p (insn
)
7252 && INSN_ADDRESSES_SET_P ()
7253 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7254 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7255 return "addib,%C2 %1,%0,%3%#";
7257 /* Handle normal cases. */
7259 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7261 return "addi,%N2 %1,%0,%0\n\tb %3";
7264 /* The reversed conditional branch must branch over one additional
7265 instruction if the delay slot is filled and needs to be extracted
7266 by pa_output_lbranch. If the delay slot is empty or this is a
7267 nullified forward branch, the instruction after the reversed
7268 condition branch must be nullified. */
7269 if (dbr_sequence_length () == 0
7270 || (nullify
&& forward_branch_p (insn
)))
7274 operands
[4] = GEN_INT (length
);
7279 operands
[4] = GEN_INT (length
+ 4);
7283 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7285 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7287 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7291 /* Deal with gross reload from FP register case. */
7292 else if (which_alternative
== 1)
7294 /* Move loop counter from FP register to MEM then into a GR,
7295 increment the GR, store the GR into MEM, and finally reload
7296 the FP register from MEM from within the branch's delay slot. */
7297 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7299 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7301 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7302 else if (length
== 28)
7303 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7306 operands
[5] = GEN_INT (length
- 16);
7307 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7308 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7309 return pa_output_lbranch (operands
[3], insn
, 0);
7312 /* Deal with gross reload from memory case. */
7315 /* Reload loop counter from memory, the store back to memory
7316 happens in the branch's delay slot. */
7317 output_asm_insn ("ldw %0,%4", operands
);
7319 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7320 else if (length
== 16)
7321 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7324 operands
[5] = GEN_INT (length
- 4);
7325 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7326 return pa_output_lbranch (operands
[3], insn
, 0);
7331 /* Return the output template for emitting a movb type insn.
7333 Note it may perform some output operations on its own before
7334 returning the final output string. */
7336 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7337 int reverse_comparison
)
7339 int length
= get_attr_length (insn
);
7341 /* A conditional branch to the following instruction (e.g. the delay slot) is
7342 asking for a disaster. Be prepared! */
7344 if (branch_to_delay_slot_p (insn
))
7346 if (which_alternative
== 0)
7347 return "copy %1,%0";
7348 else if (which_alternative
== 1)
7350 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7351 return "{fldws|fldw} -16(%%r30),%0";
7353 else if (which_alternative
== 2)
7359 /* Support the second variant. */
7360 if (reverse_comparison
)
7361 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7363 if (which_alternative
== 0)
7365 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7368 /* If this is a long branch with its delay slot unfilled, set `nullify'
7369 as it can nullify the delay slot and save a nop. */
7370 if (length
== 8 && dbr_sequence_length () == 0)
7373 /* If this is a short forward conditional branch which did not get
7374 its delay slot filled, the delay slot can still be nullified. */
7375 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7376 nullify
= forward_branch_p (insn
);
7383 if (branch_needs_nop_p (insn
))
7384 return "movb,%C2,n %1,%0,%3%#";
7386 return "movb,%C2,n %1,%0,%3";
7389 return "movb,%C2 %1,%0,%3";
7392 /* Handle weird backwards branch with a filled delay slot
7393 which is nullified. */
7394 if (dbr_sequence_length () != 0
7395 && ! forward_branch_p (insn
)
7397 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7399 /* Handle short backwards branch with an unfilled delay slot.
7400 Using a movb;nop rather than or;bl saves 1 cycle for both
7401 taken and untaken branches. */
7402 else if (dbr_sequence_length () == 0
7403 && ! forward_branch_p (insn
)
7404 && INSN_ADDRESSES_SET_P ()
7405 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7406 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7407 return "movb,%C2 %1,%0,%3%#";
7408 /* Handle normal cases. */
7410 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7412 return "or,%N2 %1,%%r0,%0\n\tb %3";
7415 /* The reversed conditional branch must branch over one additional
7416 instruction if the delay slot is filled and needs to be extracted
7417 by pa_output_lbranch. If the delay slot is empty or this is a
7418 nullified forward branch, the instruction after the reversed
7419 condition branch must be nullified. */
7420 if (dbr_sequence_length () == 0
7421 || (nullify
&& forward_branch_p (insn
)))
7425 operands
[4] = GEN_INT (length
);
7430 operands
[4] = GEN_INT (length
+ 4);
7434 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7436 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7438 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7441 /* Deal with gross reload for FP destination register case. */
7442 else if (which_alternative
== 1)
7444 /* Move source register to MEM, perform the branch test, then
7445 finally load the FP register from MEM from within the branch's
7447 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7449 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7450 else if (length
== 16)
7451 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7454 operands
[4] = GEN_INT (length
- 4);
7455 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7456 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7457 return pa_output_lbranch (operands
[3], insn
, 0);
7460 /* Deal with gross reload from memory case. */
7461 else if (which_alternative
== 2)
7463 /* Reload loop counter from memory, the store back to memory
7464 happens in the branch's delay slot. */
7466 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7467 else if (length
== 12)
7468 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7471 operands
[4] = GEN_INT (length
);
7472 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7474 return pa_output_lbranch (operands
[3], insn
, 0);
7477 /* Handle SAR as a destination. */
7481 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7482 else if (length
== 12)
7483 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7486 operands
[4] = GEN_INT (length
);
7487 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7489 return pa_output_lbranch (operands
[3], insn
, 0);
7494 /* Copy any FP arguments in INSN into integer registers. */
7496 copy_fp_args (rtx_insn
*insn
)
7501 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7503 int arg_mode
, regno
;
7504 rtx use
= XEXP (link
, 0);
7506 if (! (GET_CODE (use
) == USE
7507 && GET_CODE (XEXP (use
, 0)) == REG
7508 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7511 arg_mode
= GET_MODE (XEXP (use
, 0));
7512 regno
= REGNO (XEXP (use
, 0));
7514 /* Is it a floating point register? */
7515 if (regno
>= 32 && regno
<= 39)
7517 /* Copy the FP register into an integer register via memory. */
7518 if (arg_mode
== SFmode
)
7520 xoperands
[0] = XEXP (use
, 0);
7521 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7522 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7523 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7527 xoperands
[0] = XEXP (use
, 0);
7528 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7529 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7530 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7531 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7537 /* Compute length of the FP argument copy sequence for INSN. */
7539 length_fp_args (rtx_insn
*insn
)
7544 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7546 int arg_mode
, regno
;
7547 rtx use
= XEXP (link
, 0);
7549 if (! (GET_CODE (use
) == USE
7550 && GET_CODE (XEXP (use
, 0)) == REG
7551 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7554 arg_mode
= GET_MODE (XEXP (use
, 0));
7555 regno
= REGNO (XEXP (use
, 0));
7557 /* Is it a floating point register? */
7558 if (regno
>= 32 && regno
<= 39)
7560 if (arg_mode
== SFmode
)
7570 /* Return the attribute length for the millicode call instruction INSN.
7571 The length must match the code generated by pa_output_millicode_call.
7572 We include the delay slot in the returned length as it is better to
7573 over estimate the length than to under estimate it. */
7576 pa_attr_length_millicode_call (rtx_insn
*insn
)
7578 unsigned long distance
= -1;
7579 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7581 if (INSN_ADDRESSES_SET_P ())
7583 distance
= (total
+ insn_current_reference_address (insn
));
7584 if (distance
< total
)
7590 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7595 else if (TARGET_PORTABLE_RUNTIME
)
7599 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7609 /* INSN is a function call.
7611 CALL_DEST is the routine we are calling. */
7614 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7616 int attr_length
= get_attr_length (insn
);
7617 int seq_length
= dbr_sequence_length ();
7620 xoperands
[0] = call_dest
;
7621 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7623 /* Handle the common case where we are sure that the branch will
7624 reach the beginning of the $CODE$ subspace. The within reach
7625 form of the $$sh_func_adrs call has a length of 28. Because it
7626 has an attribute type of sh_func_adrs, it never has a nonzero
7627 sequence length (i.e., the delay slot is never filled). */
7628 if (!TARGET_LONG_CALLS
7629 && (attr_length
== 8
7630 || (attr_length
== 28
7631 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7633 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7639 /* It might seem that one insn could be saved by accessing
7640 the millicode function using the linkage table. However,
7641 this doesn't work in shared libraries and other dynamically
7642 loaded objects. Using a pc-relative sequence also avoids
7643 problems related to the implicit use of the gp register. */
7644 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7648 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7649 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7653 xoperands
[1] = gen_label_rtx ();
7654 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7655 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7656 CODE_LABEL_NUMBER (xoperands
[1]));
7657 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7660 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7662 else if (TARGET_PORTABLE_RUNTIME
)
7664 /* Pure portable runtime doesn't allow be/ble; we also don't
7665 have PIC support in the assembler/linker, so this sequence
7668 /* Get the address of our target into %r1. */
7669 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7670 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7672 /* Get our return address into %r31. */
7673 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7674 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7676 /* Jump to our target address in %r1. */
7677 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7681 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7683 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7685 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7689 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7690 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7692 if (TARGET_SOM
|| !TARGET_GAS
)
7694 /* The HP assembler can generate relocations for the
7695 difference of two symbols. GAS can do this for a
7696 millicode symbol but not an arbitrary external
7697 symbol when generating SOM output. */
7698 xoperands
[1] = gen_label_rtx ();
7699 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7700 CODE_LABEL_NUMBER (xoperands
[1]));
7701 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7702 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7706 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7707 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7711 /* Jump to our target address in %r1. */
7712 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7716 if (seq_length
== 0)
7717 output_asm_insn ("nop", xoperands
);
7722 /* Return the attribute length of the call instruction INSN. The SIBCALL
7723 flag indicates whether INSN is a regular call or a sibling call. The
7724 length returned must be longer than the code actually generated by
7725 pa_output_call. Since branch shortening is done before delay branch
7726 sequencing, there is no way to determine whether or not the delay
7727 slot will be filled during branch shortening. Even when the delay
7728 slot is filled, we may have to add a nop if the delay slot contains
7729 a branch that can't reach its target. Thus, we always have to include
7730 the delay slot in the length estimate. This used to be done in
7731 pa_adjust_insn_length but we do it here now as some sequences always
7732 fill the delay slot and we can save four bytes in the estimate for
7736 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7739 rtx call
, call_dest
;
7742 rtx pat
= PATTERN (insn
);
7743 unsigned long distance
= -1;
7745 gcc_assert (CALL_P (insn
));
7747 if (INSN_ADDRESSES_SET_P ())
7749 unsigned long total
;
7751 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7752 distance
= (total
+ insn_current_reference_address (insn
));
7753 if (distance
< total
)
7757 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7759 /* Get the call rtx. */
7760 call
= XVECEXP (pat
, 0, 0);
7761 if (GET_CODE (call
) == SET
)
7762 call
= SET_SRC (call
);
7764 gcc_assert (GET_CODE (call
) == CALL
);
7766 /* Determine if this is a local call. */
7767 call_dest
= XEXP (XEXP (call
, 0), 0);
7768 call_decl
= SYMBOL_REF_DECL (call_dest
);
7769 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7771 /* pc-relative branch. */
7772 if (!TARGET_LONG_CALLS
7773 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7774 || distance
< MAX_PCREL17F_OFFSET
))
7777 /* 64-bit plabel sequence. */
7778 else if (TARGET_64BIT
&& !local_call
)
7779 length
+= sibcall
? 28 : 24;
7781 /* non-pic long absolute branch sequence. */
7782 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7785 /* long pc-relative branch sequence. */
7786 else if (TARGET_LONG_PIC_SDIFF_CALL
7787 || (TARGET_GAS
&& !TARGET_SOM
7788 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7792 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7796 /* 32-bit plabel sequence. */
7802 length
+= length_fp_args (insn
);
7812 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7820 /* INSN is a function call.
7822 CALL_DEST is the routine we are calling. */
7825 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7827 int seq_length
= dbr_sequence_length ();
7828 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7829 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7832 xoperands
[0] = call_dest
;
7834 /* Handle the common case where we're sure that the branch will reach
7835 the beginning of the "$CODE$" subspace. This is the beginning of
7836 the current function if we are in a named section. */
7837 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7839 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7840 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7844 if (TARGET_64BIT
&& !local_call
)
7846 /* ??? As far as I can tell, the HP linker doesn't support the
7847 long pc-relative sequence described in the 64-bit runtime
7848 architecture. So, we use a slightly longer indirect call. */
7849 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7850 xoperands
[1] = gen_label_rtx ();
7852 /* If this isn't a sibcall, we put the load of %r27 into the
7853 delay slot. We can't do this in a sibcall as we don't
7854 have a second call-clobbered scratch register available.
7855 We don't need to do anything when generating fast indirect
7857 if (seq_length
!= 0 && !sibcall
)
7859 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7862 /* Now delete the delay insn. */
7863 SET_INSN_DELETED (NEXT_INSN (insn
));
7867 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7868 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7869 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7873 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7874 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7875 output_asm_insn ("bve (%%r1)", xoperands
);
7879 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7880 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7881 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7887 int indirect_call
= 0;
7889 /* Emit a long call. There are several different sequences
7890 of increasing length and complexity. In most cases,
7891 they don't allow an instruction in the delay slot. */
7892 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7893 && !TARGET_LONG_PIC_SDIFF_CALL
7894 && !(TARGET_GAS
&& !TARGET_SOM
7895 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7903 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7905 /* A non-jump insn in the delay slot. By definition we can
7906 emit this insn before the call (and in fact before argument
7908 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7911 /* Now delete the delay insn. */
7912 SET_INSN_DELETED (NEXT_INSN (insn
));
7916 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7918 /* This is the best sequence for making long calls in
7919 non-pic code. Unfortunately, GNU ld doesn't provide
7920 the stub needed for external calls, and GAS's support
7921 for this with the SOM linker is buggy. It is safe
7922 to use this for local calls. */
7923 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7925 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7929 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7932 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7934 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7940 if (TARGET_LONG_PIC_SDIFF_CALL
)
7942 /* The HP assembler and linker can handle relocations
7943 for the difference of two symbols. The HP assembler
7944 recognizes the sequence as a pc-relative call and
7945 the linker provides stubs when needed. */
7946 xoperands
[1] = gen_label_rtx ();
7947 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7948 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7949 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7950 CODE_LABEL_NUMBER (xoperands
[1]));
7951 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7953 else if (TARGET_GAS
&& !TARGET_SOM
7954 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7956 /* GAS currently can't generate the relocations that
7957 are needed for the SOM linker under HP-UX using this
7958 sequence. The GNU linker doesn't generate the stubs
7959 that are needed for external calls on TARGET_ELF32
7960 with this sequence. For now, we have to use a
7961 longer plabel sequence when using GAS. */
7962 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7963 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7965 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7970 /* Emit a long plabel-based call sequence. This is
7971 essentially an inline implementation of $$dyncall.
7972 We don't actually try to call $$dyncall as this is
7973 as difficult as calling the function itself. */
7974 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7975 xoperands
[1] = gen_label_rtx ();
7977 /* Since the call is indirect, FP arguments in registers
7978 need to be copied to the general registers. Then, the
7979 argument relocation stub will copy them back. */
7981 copy_fp_args (insn
);
7985 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7986 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7987 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7991 output_asm_insn ("addil LR'%0-$global$,%%r27",
7993 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7997 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7998 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7999 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
8000 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
8002 if (!sibcall
&& !TARGET_PA_20
)
8004 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8005 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8006 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8008 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8015 output_asm_insn ("bve (%%r1)", xoperands
);
8020 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8021 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8025 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8030 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8031 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8036 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8037 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8039 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8043 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8044 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8046 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8049 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8051 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8059 if (seq_length
== 0)
8060 output_asm_insn ("nop", xoperands
);
8065 /* Return the attribute length of the indirect call instruction INSN.
8066 The length must match the code generated by output_indirect call.
8067 The returned length includes the delay slot. Currently, the delay
8068 slot of an indirect call sequence is not exposed and it is used by
8069 the sequence itself. */
8072 pa_attr_length_indirect_call (rtx_insn
*insn
)
8074 unsigned long distance
= -1;
8075 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8077 if (INSN_ADDRESSES_SET_P ())
8079 distance
= (total
+ insn_current_reference_address (insn
));
8080 if (distance
< total
)
8087 if (TARGET_FAST_INDIRECT_CALLS
8088 || (!TARGET_LONG_CALLS
8089 && !TARGET_PORTABLE_RUNTIME
8090 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8091 || distance
< MAX_PCREL17F_OFFSET
)))
8097 if (TARGET_PORTABLE_RUNTIME
)
8100 /* Out of reach, can use ble. */
8105 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8111 xoperands
[0] = call_dest
;
8112 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
8113 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
8117 /* First the special case for kernels, level 0 systems, etc. */
8118 if (TARGET_FAST_INDIRECT_CALLS
)
8119 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8121 /* Now the normal case -- we can reach $$dyncall directly or
8122 we're sure that we can get there via a long-branch stub.
8124 No need to check target flags as the length uniquely identifies
8125 the remaining cases. */
8126 if (pa_attr_length_indirect_call (insn
) == 8)
8128 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8129 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8130 variant of the B,L instruction can't be used on the SOM target. */
8131 if (TARGET_PA_20
&& !TARGET_SOM
)
8132 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8134 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8137 /* Long millicode call, but we are not generating PIC or portable runtime
8139 if (pa_attr_length_indirect_call (insn
) == 12)
8140 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8142 /* Long millicode call for portable runtime. */
8143 if (pa_attr_length_indirect_call (insn
) == 16)
8144 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8146 /* We need a long PIC call to $$dyncall. */
8147 xoperands
[0] = NULL_RTX
;
8148 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8149 if (TARGET_SOM
|| !TARGET_GAS
)
8151 xoperands
[0] = gen_label_rtx ();
8152 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands
);
8153 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8154 CODE_LABEL_NUMBER (xoperands
[0]));
8155 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
8159 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands
);
8160 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8163 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8164 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands
);
8168 /* In HPUX 8.0's shared library scheme, special relocations are needed
8169 for function labels if they might be passed to a function
8170 in a shared library (because shared libraries don't live in code
8171 space), and special magic is needed to construct their address. */
8174 pa_encode_label (rtx sym
)
8176 const char *str
= XSTR (sym
, 0);
8177 int len
= strlen (str
) + 1;
8180 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8184 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8188 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8190 int old_referenced
= 0;
8192 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8194 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8196 default_encode_section_info (decl
, rtl
, first
);
8198 if (first
&& TEXT_SPACE_P (decl
))
8200 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8201 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8202 pa_encode_label (XEXP (rtl
, 0));
8204 else if (old_referenced
)
8205 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8208 /* This is sort of inverse to pa_encode_section_info. */
8211 pa_strip_name_encoding (const char *str
)
8213 str
+= (*str
== '@');
8214 str
+= (*str
== '*');
8218 /* Returns 1 if OP is a function label involved in a simple addition
8219 with a constant. Used to keep certain patterns from matching
8220 during instruction combination. */
8222 pa_is_function_label_plus_const (rtx op
)
8224 /* Strip off any CONST. */
8225 if (GET_CODE (op
) == CONST
)
8228 return (GET_CODE (op
) == PLUS
8229 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8230 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8233 /* Output assembly code for a thunk to FUNCTION. */
8236 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8237 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8240 static unsigned int current_thunk_number
;
8241 int val_14
= VAL_14_BITS_P (delta
);
8242 unsigned int old_last_address
= last_address
, nbytes
= 0;
8246 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8247 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8248 xoperands
[2] = GEN_INT (delta
);
8250 final_start_function (emit_barrier (), file
, 1);
8252 /* Output the thunk. We know that the function is in the same
8253 translation unit (i.e., the same space) as the thunk, and that
8254 thunks are output after their method. Thus, we don't need an
8255 external branch to reach the function. With SOM and GAS,
8256 functions and thunks are effectively in different sections.
8257 Thus, we can always use a IA-relative branch and the linker
8258 will add a long branch stub if necessary.
8260 However, we have to be careful when generating PIC code on the
8261 SOM port to ensure that the sequence does not transfer to an
8262 import stub for the target function as this could clobber the
8263 return value saved at SP-24. This would also apply to the
8264 32-bit linux port if the multi-space model is implemented. */
8265 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8266 && !(flag_pic
&& TREE_PUBLIC (function
))
8267 && (TARGET_GAS
|| last_address
< 262132))
8268 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8269 && ((targetm_common
.have_named_sections
8270 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8271 /* The GNU 64-bit linker has rather poor stub management.
8272 So, we use a long branch from thunks that aren't in
8273 the same section as the target function. */
8275 && (DECL_SECTION_NAME (thunk_fndecl
)
8276 != DECL_SECTION_NAME (function
)))
8277 || ((DECL_SECTION_NAME (thunk_fndecl
)
8278 == DECL_SECTION_NAME (function
))
8279 && last_address
< 262132)))
8280 /* In this case, we need to be able to reach the start of
8281 the stub table even though the function is likely closer
8282 and can be jumped to directly. */
8283 || (targetm_common
.have_named_sections
8284 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8285 && DECL_SECTION_NAME (function
) == NULL
8286 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8288 || (!targetm_common
.have_named_sections
8289 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8292 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8294 output_asm_insn ("b %0", xoperands
);
8298 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8303 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8307 else if (TARGET_64BIT
)
8309 /* We only have one call-clobbered scratch register, so we can't
8310 make use of the delay slot if delta doesn't fit in 14 bits. */
8313 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8314 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8317 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8321 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8322 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8326 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8327 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8332 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8333 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8338 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8342 else if (TARGET_PORTABLE_RUNTIME
)
8344 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8345 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8348 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8350 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8354 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8359 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8363 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8365 /* The function is accessible from outside this module. The only
8366 way to avoid an import stub between the thunk and function is to
8367 call the function directly with an indirect sequence similar to
8368 that used by $$dyncall. This is possible because $$dyncall acts
8369 as the import stub in an indirect call. */
8370 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8371 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8372 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8373 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8374 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8375 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8376 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8377 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8378 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8382 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8388 output_asm_insn ("bve (%%r22)", xoperands
);
8391 else if (TARGET_NO_SPACE_REGS
)
8393 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8398 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8399 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8400 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8405 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8407 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8411 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8413 if (TARGET_SOM
|| !TARGET_GAS
)
8415 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8416 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8420 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8421 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8425 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8427 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8431 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8436 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8443 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8445 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8446 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8450 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8455 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8460 final_end_function ();
8462 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8464 switch_to_section (data_section
);
8465 output_asm_insn (".align 4", xoperands
);
8466 ASM_OUTPUT_LABEL (file
, label
);
8467 output_asm_insn (".word P'%0", xoperands
);
8470 current_thunk_number
++;
8471 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8472 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8473 last_address
+= nbytes
;
8474 if (old_last_address
> last_address
)
8475 last_address
= UINT_MAX
;
8476 update_total_code_bytes (nbytes
);
8479 /* Only direct calls to static functions are allowed to be sibling (tail)
8482 This restriction is necessary because some linker generated stubs will
8483 store return pointers into rp' in some cases which might clobber a
8484 live value already in rp'.
8486 In a sibcall the current function and the target function share stack
8487 space. Thus if the path to the current function and the path to the
8488 target function save a value in rp', they save the value into the
8489 same stack slot, which has undesirable consequences.
8491 Because of the deferred binding nature of shared libraries any function
8492 with external scope could be in a different load module and thus require
8493 rp' to be saved when calling that function. So sibcall optimizations
8494 can only be safe for static function.
8496 Note that GCC never needs return value relocations, so we don't have to
8497 worry about static calls with return value relocations (which require
8500 It is safe to perform a sibcall optimization when the target function
8501 will never return. */
8503 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8505 if (TARGET_PORTABLE_RUNTIME
)
8508 /* Sibcalls are not ok because the arg pointer register is not a fixed
8509 register. This prevents the sibcall optimization from occurring. In
8510 addition, there are problems with stub placement using GNU ld. This
8511 is because a normal sibcall branch uses a 17-bit relocation while
8512 a regular call branch uses a 22-bit relocation. As a result, more
8513 care needs to be taken in the placement of long-branch stubs. */
8517 /* Sibcalls are only ok within a translation unit. */
8518 return (decl
&& !TREE_PUBLIC (decl
));
8521 /* ??? Addition is not commutative on the PA due to the weird implicit
8522 space register selection rules for memory addresses. Therefore, we
8523 don't consider a + b == b + a, as this might be inside a MEM. */
8525 pa_commutative_p (const_rtx x
, int outer_code
)
8527 return (COMMUTATIVE_P (x
)
8528 && (TARGET_NO_SPACE_REGS
8529 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8530 || GET_CODE (x
) != PLUS
));
8533 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8534 use in fmpyadd instructions. */
8536 pa_fmpyaddoperands (rtx
*operands
)
8538 machine_mode mode
= GET_MODE (operands
[0]);
8540 /* Must be a floating point mode. */
8541 if (mode
!= SFmode
&& mode
!= DFmode
)
8544 /* All modes must be the same. */
8545 if (! (mode
== GET_MODE (operands
[1])
8546 && mode
== GET_MODE (operands
[2])
8547 && mode
== GET_MODE (operands
[3])
8548 && mode
== GET_MODE (operands
[4])
8549 && mode
== GET_MODE (operands
[5])))
8552 /* All operands must be registers. */
8553 if (! (GET_CODE (operands
[1]) == REG
8554 && GET_CODE (operands
[2]) == REG
8555 && GET_CODE (operands
[3]) == REG
8556 && GET_CODE (operands
[4]) == REG
8557 && GET_CODE (operands
[5]) == REG
))
8560 /* Only 2 real operands to the addition. One of the input operands must
8561 be the same as the output operand. */
8562 if (! rtx_equal_p (operands
[3], operands
[4])
8563 && ! rtx_equal_p (operands
[3], operands
[5]))
8566 /* Inout operand of add cannot conflict with any operands from multiply. */
8567 if (rtx_equal_p (operands
[3], operands
[0])
8568 || rtx_equal_p (operands
[3], operands
[1])
8569 || rtx_equal_p (operands
[3], operands
[2]))
8572 /* multiply cannot feed into addition operands. */
8573 if (rtx_equal_p (operands
[4], operands
[0])
8574 || rtx_equal_p (operands
[5], operands
[0]))
8577 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8579 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8580 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8581 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8582 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8583 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8584 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8587 /* Passed. Operands are suitable for fmpyadd. */
8591 #if !defined(USE_COLLECT2)
8593 pa_asm_out_constructor (rtx symbol
, int priority
)
8595 if (!function_label_operand (symbol
, VOIDmode
))
8596 pa_encode_label (symbol
);
8598 #ifdef CTORS_SECTION_ASM_OP
8599 default_ctor_section_asm_out_constructor (symbol
, priority
);
8601 # ifdef TARGET_ASM_NAMED_SECTION
8602 default_named_section_asm_out_constructor (symbol
, priority
);
8604 default_stabs_asm_out_constructor (symbol
, priority
);
8610 pa_asm_out_destructor (rtx symbol
, int priority
)
8612 if (!function_label_operand (symbol
, VOIDmode
))
8613 pa_encode_label (symbol
);
8615 #ifdef DTORS_SECTION_ASM_OP
8616 default_dtor_section_asm_out_destructor (symbol
, priority
);
8618 # ifdef TARGET_ASM_NAMED_SECTION
8619 default_named_section_asm_out_destructor (symbol
, priority
);
8621 default_stabs_asm_out_destructor (symbol
, priority
);
8627 /* This function places uninitialized global data in the bss section.
8628 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8629 function on the SOM port to prevent uninitialized global data from
8630 being placed in the data section. */
8633 pa_asm_output_aligned_bss (FILE *stream
,
8635 unsigned HOST_WIDE_INT size
,
8638 switch_to_section (bss_section
);
8639 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8641 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8642 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8645 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8646 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8649 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8650 ASM_OUTPUT_LABEL (stream
, name
);
8651 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8654 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8655 that doesn't allow the alignment of global common storage to be directly
8656 specified. The SOM linker aligns common storage based on the rounded
8657 value of the NUM_BYTES parameter in the .comm directive. It's not
8658 possible to use the .align directive as it doesn't affect the alignment
8659 of the label associated with a .comm directive. */
8662 pa_asm_output_aligned_common (FILE *stream
,
8664 unsigned HOST_WIDE_INT size
,
8667 unsigned int max_common_align
;
8669 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8670 if (align
> max_common_align
)
8672 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8673 "for global common data. Using %u",
8674 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8675 align
= max_common_align
;
8678 switch_to_section (bss_section
);
8680 assemble_name (stream
, name
);
8681 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8682 MAX (size
, align
/ BITS_PER_UNIT
));
8685 /* We can't use .comm for local common storage as the SOM linker effectively
8686 treats the symbol as universal and uses the same storage for local symbols
8687 with the same name in different object files. The .block directive
8688 reserves an uninitialized block of storage. However, it's not common
8689 storage. Fortunately, GCC never requests common storage with the same
8690 name in any given translation unit. */
8693 pa_asm_output_aligned_local (FILE *stream
,
8695 unsigned HOST_WIDE_INT size
,
8698 switch_to_section (bss_section
);
8699 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8702 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8703 assemble_name (stream
, name
);
8704 fprintf (stream
, "\n");
8707 ASM_OUTPUT_LABEL (stream
, name
);
8708 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8711 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8712 use in fmpysub instructions. */
8714 pa_fmpysuboperands (rtx
*operands
)
8716 machine_mode mode
= GET_MODE (operands
[0]);
8718 /* Must be a floating point mode. */
8719 if (mode
!= SFmode
&& mode
!= DFmode
)
8722 /* All modes must be the same. */
8723 if (! (mode
== GET_MODE (operands
[1])
8724 && mode
== GET_MODE (operands
[2])
8725 && mode
== GET_MODE (operands
[3])
8726 && mode
== GET_MODE (operands
[4])
8727 && mode
== GET_MODE (operands
[5])))
8730 /* All operands must be registers. */
8731 if (! (GET_CODE (operands
[1]) == REG
8732 && GET_CODE (operands
[2]) == REG
8733 && GET_CODE (operands
[3]) == REG
8734 && GET_CODE (operands
[4]) == REG
8735 && GET_CODE (operands
[5]) == REG
))
8738 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8739 operation, so operands[4] must be the same as operand[3]. */
8740 if (! rtx_equal_p (operands
[3], operands
[4]))
8743 /* multiply cannot feed into subtraction. */
8744 if (rtx_equal_p (operands
[5], operands
[0]))
8747 /* Inout operand of sub cannot conflict with any operands from multiply. */
8748 if (rtx_equal_p (operands
[3], operands
[0])
8749 || rtx_equal_p (operands
[3], operands
[1])
8750 || rtx_equal_p (operands
[3], operands
[2]))
8753 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8755 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8756 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8757 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8758 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8759 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8760 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8763 /* Passed. Operands are suitable for fmpysub. */
8767 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8768 constants for a MULT embedded inside a memory address. */
8770 pa_mem_shadd_constant_p (int val
)
8772 if (val
== 2 || val
== 4 || val
== 8)
8778 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8779 constants for shadd instructions. */
8781 pa_shadd_constant_p (int val
)
8783 if (val
== 1 || val
== 2 || val
== 3)
8789 /* Return TRUE if INSN branches forward. */
8792 forward_branch_p (rtx_insn
*insn
)
8794 rtx lab
= JUMP_LABEL (insn
);
8796 /* The INSN must have a jump label. */
8797 gcc_assert (lab
!= NULL_RTX
);
8799 if (INSN_ADDRESSES_SET_P ())
8800 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8807 insn
= NEXT_INSN (insn
);
8813 /* Output an unconditional move and branch insn. */
8816 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8818 int length
= get_attr_length (insn
);
8820 /* These are the cases in which we win. */
8822 return "mov%I1b,tr %1,%0,%2";
8824 /* None of the following cases win, but they don't lose either. */
8827 if (dbr_sequence_length () == 0)
8829 /* Nothing in the delay slot, fake it by putting the combined
8830 insn (the copy or add) in the delay slot of a bl. */
8831 if (GET_CODE (operands
[1]) == CONST_INT
)
8832 return "b %2\n\tldi %1,%0";
8834 return "b %2\n\tcopy %1,%0";
8838 /* Something in the delay slot, but we've got a long branch. */
8839 if (GET_CODE (operands
[1]) == CONST_INT
)
8840 return "ldi %1,%0\n\tb %2";
8842 return "copy %1,%0\n\tb %2";
8846 if (GET_CODE (operands
[1]) == CONST_INT
)
8847 output_asm_insn ("ldi %1,%0", operands
);
8849 output_asm_insn ("copy %1,%0", operands
);
8850 return pa_output_lbranch (operands
[2], insn
, 1);
8853 /* Output an unconditional add and branch insn. */
8856 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
8858 int length
= get_attr_length (insn
);
8860 /* To make life easy we want operand0 to be the shared input/output
8861 operand and operand1 to be the readonly operand. */
8862 if (operands
[0] == operands
[1])
8863 operands
[1] = operands
[2];
8865 /* These are the cases in which we win. */
8867 return "add%I1b,tr %1,%0,%3";
8869 /* None of the following cases win, but they don't lose either. */
8872 if (dbr_sequence_length () == 0)
8873 /* Nothing in the delay slot, fake it by putting the combined
8874 insn (the copy or add) in the delay slot of a bl. */
8875 return "b %3\n\tadd%I1 %1,%0,%0";
8877 /* Something in the delay slot, but we've got a long branch. */
8878 return "add%I1 %1,%0,%0\n\tb %3";
8881 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8882 return pa_output_lbranch (operands
[3], insn
, 1);
8885 /* We use this hook to perform a PA specific optimization which is difficult
8886 to do in earlier passes. */
8891 remove_useless_addtr_insns (1);
8893 if (pa_cpu
< PROCESSOR_8000
)
8894 pa_combine_instructions ();
8897 /* The PA has a number of odd instructions which can perform multiple
8898 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8899 it may be profitable to combine two instructions into one instruction
8900 with two outputs. It's not profitable PA2.0 machines because the
8901 two outputs would take two slots in the reorder buffers.
8903 This routine finds instructions which can be combined and combines
8904 them. We only support some of the potential combinations, and we
8905 only try common ways to find suitable instructions.
8907 * addb can add two registers or a register and a small integer
8908 and jump to a nearby (+-8k) location. Normally the jump to the
8909 nearby location is conditional on the result of the add, but by
8910 using the "true" condition we can make the jump unconditional.
8911 Thus addb can perform two independent operations in one insn.
8913 * movb is similar to addb in that it can perform a reg->reg
8914 or small immediate->reg copy and jump to a nearby (+-8k location).
8916 * fmpyadd and fmpysub can perform a FP multiply and either an
8917 FP add or FP sub if the operands of the multiply and add/sub are
8918 independent (there are other minor restrictions). Note both
8919 the fmpy and fadd/fsub can in theory move to better spots according
8920 to data dependencies, but for now we require the fmpy stay at a
8923 * Many of the memory operations can perform pre & post updates
8924 of index registers. GCC's pre/post increment/decrement addressing
8925 is far too simple to take advantage of all the possibilities. This
8926 pass may not be suitable since those insns may not be independent.
8928 * comclr can compare two ints or an int and a register, nullify
8929 the following instruction and zero some other register. This
8930 is more difficult to use as it's harder to find an insn which
8931 will generate a comclr than finding something like an unconditional
8932 branch. (conditional moves & long branches create comclr insns).
8934 * Most arithmetic operations can conditionally skip the next
8935 instruction. They can be viewed as "perform this operation
8936 and conditionally jump to this nearby location" (where nearby
8937 is an insns away). These are difficult to use due to the
8938 branch length restrictions. */
8941 pa_combine_instructions (void)
8945 /* This can get expensive since the basic algorithm is on the
8946 order of O(n^2) (or worse). Only do it for -O2 or higher
8947 levels of optimization. */
8951 /* Walk down the list of insns looking for "anchor" insns which
8952 may be combined with "floating" insns. As the name implies,
8953 "anchor" instructions don't move, while "floating" insns may
8955 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
8956 rtx_insn
*new_rtx
= make_insn_raw (par
);
8958 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
8960 enum attr_pa_combine_type anchor_attr
;
8961 enum attr_pa_combine_type floater_attr
;
8963 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8964 Also ignore any special USE insns. */
8965 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
8966 || GET_CODE (PATTERN (anchor
)) == USE
8967 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
8970 anchor_attr
= get_attr_pa_combine_type (anchor
);
8971 /* See if anchor is an insn suitable for combination. */
8972 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
8973 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
8974 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
8975 && ! forward_branch_p (anchor
)))
8979 for (floater
= PREV_INSN (anchor
);
8981 floater
= PREV_INSN (floater
))
8983 if (NOTE_P (floater
)
8984 || (NONJUMP_INSN_P (floater
)
8985 && (GET_CODE (PATTERN (floater
)) == USE
8986 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
8989 /* Anything except a regular INSN will stop our search. */
8990 if (! NONJUMP_INSN_P (floater
))
8996 /* See if FLOATER is suitable for combination with the
8998 floater_attr
= get_attr_pa_combine_type (floater
);
8999 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9000 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9001 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9002 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9004 /* If ANCHOR and FLOATER can be combined, then we're
9005 done with this pass. */
9006 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9007 SET_DEST (PATTERN (floater
)),
9008 XEXP (SET_SRC (PATTERN (floater
)), 0),
9009 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9013 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9014 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9016 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9018 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9019 SET_DEST (PATTERN (floater
)),
9020 XEXP (SET_SRC (PATTERN (floater
)), 0),
9021 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9026 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9027 SET_DEST (PATTERN (floater
)),
9028 SET_SRC (PATTERN (floater
)),
9029 SET_SRC (PATTERN (floater
))))
9035 /* If we didn't find anything on the backwards scan try forwards. */
9037 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9038 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9040 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9042 if (NOTE_P (floater
)
9043 || (NONJUMP_INSN_P (floater
)
9044 && (GET_CODE (PATTERN (floater
)) == USE
9045 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9049 /* Anything except a regular INSN will stop our search. */
9050 if (! NONJUMP_INSN_P (floater
))
9056 /* See if FLOATER is suitable for combination with the
9058 floater_attr
= get_attr_pa_combine_type (floater
);
9059 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9060 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9061 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9062 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9064 /* If ANCHOR and FLOATER can be combined, then we're
9065 done with this pass. */
9066 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9067 SET_DEST (PATTERN (floater
)),
9068 XEXP (SET_SRC (PATTERN (floater
)),
9070 XEXP (SET_SRC (PATTERN (floater
)),
9077 /* FLOATER will be nonzero if we found a suitable floating
9078 insn for combination with ANCHOR. */
9080 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9081 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9083 /* Emit the new instruction and delete the old anchor. */
9084 emit_insn_before (gen_rtx_PARALLEL
9086 gen_rtvec (2, PATTERN (anchor
),
9087 PATTERN (floater
))),
9090 SET_INSN_DELETED (anchor
);
9092 /* Emit a special USE insn for FLOATER, then delete
9093 the floating insn. */
9094 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9095 delete_insn (floater
);
9100 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9103 /* Emit the new_jump instruction and delete the old anchor. */
9105 = emit_jump_insn_before (gen_rtx_PARALLEL
9107 gen_rtvec (2, PATTERN (anchor
),
9108 PATTERN (floater
))),
9111 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9112 SET_INSN_DELETED (anchor
);
9114 /* Emit a special USE insn for FLOATER, then delete
9115 the floating insn. */
9116 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9117 delete_insn (floater
);
9125 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9126 int reversed
, rtx dest
,
9129 int insn_code_number
;
9130 rtx_insn
*start
, *end
;
9132 /* Create a PARALLEL with the patterns of ANCHOR and
9133 FLOATER, try to recognize it, then test constraints
9134 for the resulting pattern.
9136 If the pattern doesn't match or the constraints
9137 aren't met keep searching for a suitable floater
9139 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9140 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9141 INSN_CODE (new_rtx
) = -1;
9142 insn_code_number
= recog_memoized (new_rtx
);
9143 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9144 if (insn_code_number
< 0
9145 || (extract_insn (new_rtx
),
9146 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9160 /* There's up to three operands to consider. One
9161 output and two inputs.
9163 The output must not be used between FLOATER & ANCHOR
9164 exclusive. The inputs must not be set between
9165 FLOATER and ANCHOR exclusive. */
9167 if (reg_used_between_p (dest
, start
, end
))
9170 if (reg_set_between_p (src1
, start
, end
))
9173 if (reg_set_between_p (src2
, start
, end
))
9176 /* If we get here, then everything is good. */
9180 /* Return nonzero if references for INSN are delayed.
9182 Millicode insns are actually function calls with some special
9183 constraints on arguments and register usage.
9185 Millicode calls always expect their arguments in the integer argument
9186 registers, and always return their result in %r29 (ret1). They
9187 are expected to clobber their arguments, %r1, %r29, and the return
9188 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9190 This function tells reorg that the references to arguments and
9191 millicode calls do not appear to happen until after the millicode call.
9192 This allows reorg to put insns which set the argument registers into the
9193 delay slot of the millicode call -- thus they act more like traditional
9196 Note we cannot consider side effects of the insn to be delayed because
9197 the branch and link insn will clobber the return pointer. If we happened
9198 to use the return pointer in the delay slot of the call, then we lose.
9200 get_attr_type will try to recognize the given insn, so make sure to
9201 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9204 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9206 return ((NONJUMP_INSN_P (insn
)
9207 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9208 && GET_CODE (PATTERN (insn
)) != USE
9209 && GET_CODE (PATTERN (insn
)) != CLOBBER
9210 && get_attr_type (insn
) == TYPE_MILLI
));
9213 /* Promote the return value, but not the arguments. */
9216 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9218 int *punsignedp ATTRIBUTE_UNUSED
,
9219 const_tree fntype ATTRIBUTE_UNUSED
,
9222 if (for_return
== 0)
9224 return promote_mode (type
, mode
, punsignedp
);
9227 /* On the HP-PA the value is found in register(s) 28(-29), unless
9228 the mode is SF or DF. Then the value is returned in fr4 (32).
9230 This must perform the same promotions as PROMOTE_MODE, else promoting
9231 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9233 Small structures must be returned in a PARALLEL on PA64 in order
9234 to match the HP Compiler ABI. */
9237 pa_function_value (const_tree valtype
,
9238 const_tree func ATTRIBUTE_UNUSED
,
9239 bool outgoing ATTRIBUTE_UNUSED
)
9241 machine_mode valmode
;
9243 if (AGGREGATE_TYPE_P (valtype
)
9244 || TREE_CODE (valtype
) == COMPLEX_TYPE
9245 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9247 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9249 /* Handle aggregates that fit exactly in a word or double word. */
9250 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9251 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9255 /* Aggregates with a size less than or equal to 128 bits are
9256 returned in GR 28(-29). They are left justified. The pad
9257 bits are undefined. Larger aggregates are returned in
9261 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9263 for (i
= 0; i
< ub
; i
++)
9265 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9266 gen_rtx_REG (DImode
, 28 + i
),
9271 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9273 else if (valsize
> UNITS_PER_WORD
)
9275 /* Aggregates 5 to 8 bytes in size are returned in general
9276 registers r28-r29 in the same manner as other non
9277 floating-point objects. The data is right-justified and
9278 zero-extended to 64 bits. This is opposite to the normal
9279 justification used on big endian targets and requires
9280 special treatment. */
9281 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9282 gen_rtx_REG (DImode
, 28), const0_rtx
);
9283 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9287 if ((INTEGRAL_TYPE_P (valtype
)
9288 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9289 || POINTER_TYPE_P (valtype
))
9290 valmode
= word_mode
;
9292 valmode
= TYPE_MODE (valtype
);
9294 if (TREE_CODE (valtype
) == REAL_TYPE
9295 && !AGGREGATE_TYPE_P (valtype
)
9296 && TYPE_MODE (valtype
) != TFmode
9297 && !TARGET_SOFT_FLOAT
)
9298 return gen_rtx_REG (valmode
, 32);
9300 return gen_rtx_REG (valmode
, 28);
9303 /* Implement the TARGET_LIBCALL_VALUE hook. */
9306 pa_libcall_value (machine_mode mode
,
9307 const_rtx fun ATTRIBUTE_UNUSED
)
9309 if (! TARGET_SOFT_FLOAT
9310 && (mode
== SFmode
|| mode
== DFmode
))
9311 return gen_rtx_REG (mode
, 32);
9313 return gen_rtx_REG (mode
, 28);
9316 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9319 pa_function_value_regno_p (const unsigned int regno
)
9322 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9328 /* Update the data in CUM to advance over an argument
9329 of mode MODE and data type TYPE.
9330 (TYPE is null for libcalls where that information may not be available.) */
9333 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9334 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9336 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9337 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9339 cum
->nargs_prototype
--;
9340 cum
->words
+= (arg_size
9341 + ((cum
->words
& 01)
9342 && type
!= NULL_TREE
9346 /* Return the location of a parameter that is passed in a register or NULL
9347 if the parameter has any component that is passed in memory.
9349 This is new code and will be pushed to into the net sources after
9352 ??? We might want to restructure this so that it looks more like other
9355 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9356 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9358 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9359 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9366 if (mode
== VOIDmode
)
9369 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9371 /* If this arg would be passed partially or totally on the stack, then
9372 this routine should return zero. pa_arg_partial_bytes will
9373 handle arguments which are split between regs and stack slots if
9374 the ABI mandates split arguments. */
9377 /* The 32-bit ABI does not split arguments. */
9378 if (cum
->words
+ arg_size
> max_arg_words
)
9384 alignment
= cum
->words
& 1;
9385 if (cum
->words
+ alignment
>= max_arg_words
)
9389 /* The 32bit ABIs and the 64bit ABIs are rather different,
9390 particularly in their handling of FP registers. We might
9391 be able to cleverly share code between them, but I'm not
9392 going to bother in the hope that splitting them up results
9393 in code that is more easily understood. */
9397 /* Advance the base registers to their current locations.
9399 Remember, gprs grow towards smaller register numbers while
9400 fprs grow to higher register numbers. Also remember that
9401 although FP regs are 32-bit addressable, we pretend that
9402 the registers are 64-bits wide. */
9403 gpr_reg_base
= 26 - cum
->words
;
9404 fpr_reg_base
= 32 + cum
->words
;
9406 /* Arguments wider than one word and small aggregates need special
9410 || (type
&& (AGGREGATE_TYPE_P (type
)
9411 || TREE_CODE (type
) == COMPLEX_TYPE
9412 || TREE_CODE (type
) == VECTOR_TYPE
)))
9414 /* Double-extended precision (80-bit), quad-precision (128-bit)
9415 and aggregates including complex numbers are aligned on
9416 128-bit boundaries. The first eight 64-bit argument slots
9417 are associated one-to-one, with general registers r26
9418 through r19, and also with floating-point registers fr4
9419 through fr11. Arguments larger than one word are always
9420 passed in general registers.
9422 Using a PARALLEL with a word mode register results in left
9423 justified data on a big-endian target. */
9426 int i
, offset
= 0, ub
= arg_size
;
9428 /* Align the base register. */
9429 gpr_reg_base
-= alignment
;
9431 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9432 for (i
= 0; i
< ub
; i
++)
9434 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9435 gen_rtx_REG (DImode
, gpr_reg_base
),
9441 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9446 /* If the argument is larger than a word, then we know precisely
9447 which registers we must use. */
9461 /* Structures 5 to 8 bytes in size are passed in the general
9462 registers in the same manner as other non floating-point
9463 objects. The data is right-justified and zero-extended
9464 to 64 bits. This is opposite to the normal justification
9465 used on big endian targets and requires special treatment.
9466 We now define BLOCK_REG_PADDING to pad these objects.
9467 Aggregates, complex and vector types are passed in the same
9468 manner as structures. */
9470 || (type
&& (AGGREGATE_TYPE_P (type
)
9471 || TREE_CODE (type
) == COMPLEX_TYPE
9472 || TREE_CODE (type
) == VECTOR_TYPE
)))
9474 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9475 gen_rtx_REG (DImode
, gpr_reg_base
),
9477 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9482 /* We have a single word (32 bits). A simple computation
9483 will get us the register #s we need. */
9484 gpr_reg_base
= 26 - cum
->words
;
9485 fpr_reg_base
= 32 + 2 * cum
->words
;
9489 /* Determine if the argument needs to be passed in both general and
9490 floating point registers. */
9491 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9492 /* If we are doing soft-float with portable runtime, then there
9493 is no need to worry about FP regs. */
9494 && !TARGET_SOFT_FLOAT
9495 /* The parameter must be some kind of scalar float, else we just
9496 pass it in integer registers. */
9497 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9498 /* The target function must not have a prototype. */
9499 && cum
->nargs_prototype
<= 0
9500 /* libcalls do not need to pass items in both FP and general
9502 && type
!= NULL_TREE
9503 /* All this hair applies to "outgoing" args only. This includes
9504 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9506 /* Also pass outgoing floating arguments in both registers in indirect
9507 calls with the 32 bit ABI and the HP assembler since there is no
9508 way to the specify argument locations in static functions. */
9513 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9519 gen_rtx_EXPR_LIST (VOIDmode
,
9520 gen_rtx_REG (mode
, fpr_reg_base
),
9522 gen_rtx_EXPR_LIST (VOIDmode
,
9523 gen_rtx_REG (mode
, gpr_reg_base
),
9528 /* See if we should pass this parameter in a general register. */
9529 if (TARGET_SOFT_FLOAT
9530 /* Indirect calls in the normal 32bit ABI require all arguments
9531 to be passed in general registers. */
9532 || (!TARGET_PORTABLE_RUNTIME
9536 /* If the parameter is not a scalar floating-point parameter,
9537 then it belongs in GPRs. */
9538 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9539 /* Structure with single SFmode field belongs in GPR. */
9540 || (type
&& AGGREGATE_TYPE_P (type
)))
9541 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9543 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9548 /* Arguments larger than one word are double word aligned. */
9551 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9553 bool singleword
= (type
9554 ? (integer_zerop (TYPE_SIZE (type
))
9555 || !TREE_CONSTANT (TYPE_SIZE (type
))
9556 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9557 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9559 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9562 /* If this arg would be passed totally in registers or totally on the stack,
9563 then this routine should return zero. */
9566 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9567 tree type
, bool named ATTRIBUTE_UNUSED
)
9569 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9570 unsigned int max_arg_words
= 8;
9571 unsigned int offset
= 0;
9576 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9579 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9580 /* Arg fits fully into registers. */
9582 else if (cum
->words
+ offset
>= max_arg_words
)
9583 /* Arg fully on the stack. */
9587 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9591 /* A get_unnamed_section callback for switching to the text section.
9593 This function is only used with SOM. Because we don't support
9594 named subspaces, we can only create a new subspace or switch back
9595 to the default text subspace. */
9598 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9600 gcc_assert (TARGET_SOM
);
9603 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9605 /* We only want to emit a .nsubspa directive once at the
9606 start of the function. */
9607 cfun
->machine
->in_nsubspa
= 1;
9609 /* Create a new subspace for the text. This provides
9610 better stub placement and one-only functions. */
9612 && DECL_ONE_ONLY (cfun
->decl
)
9613 && !DECL_WEAK (cfun
->decl
))
9615 output_section_asm_op ("\t.SPACE $TEXT$\n"
9616 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9617 "ACCESS=44,SORT=24,COMDAT");
9623 /* There isn't a current function or the body of the current
9624 function has been completed. So, we are changing to the
9625 text section to output debugging information. Thus, we
9626 need to forget that we are in the text section so that
9627 varasm.c will call us when text_section is selected again. */
9628 gcc_assert (!cfun
|| !cfun
->machine
9629 || cfun
->machine
->in_nsubspa
== 2);
9632 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9635 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9638 /* A get_unnamed_section callback for switching to comdat data
9639 sections. This function is only used with SOM. */
9642 som_output_comdat_data_section_asm_op (const void *data
)
9645 output_section_asm_op (data
);
9648 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9651 pa_som_asm_init_sections (void)
9654 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9656 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9657 is not being generated. */
9658 som_readonly_data_section
9659 = get_unnamed_section (0, output_section_asm_op
,
9660 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9662 /* When secondary definitions are not supported, SOM makes readonly
9663 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9665 som_one_only_readonly_data_section
9666 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9668 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9669 "ACCESS=0x2c,SORT=16,COMDAT");
9672 /* When secondary definitions are not supported, SOM makes data one-only
9673 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9674 som_one_only_data_section
9675 = get_unnamed_section (SECTION_WRITE
,
9676 som_output_comdat_data_section_asm_op
,
9677 "\t.SPACE $PRIVATE$\n"
9678 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9679 "ACCESS=31,SORT=24,COMDAT");
9682 som_tm_clone_table_section
9683 = get_unnamed_section (0, output_section_asm_op
,
9684 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9686 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9687 which reference data within the $TEXT$ space (for example constant
9688 strings in the $LIT$ subspace).
9690 The assemblers (GAS and HP as) both have problems with handling
9691 the difference of two symbols which is the other correct way to
9692 reference constant data during PIC code generation.
9694 So, there's no way to reference constant data which is in the
9695 $TEXT$ space during PIC generation. Instead place all constant
9696 data into the $PRIVATE$ subspace (this reduces sharing, but it
9697 works correctly). */
9698 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9700 /* We must not have a reference to an external symbol defined in a
9701 shared library in a readonly section, else the SOM linker will
9704 So, we force exception information into the data section. */
9705 exception_section
= data_section
;
9708 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9711 pa_som_tm_clone_table_section (void)
9713 return som_tm_clone_table_section
;
9716 /* On hpux10, the linker will give an error if we have a reference
9717 in the read-only data section to a symbol defined in a shared
9718 library. Therefore, expressions that might require a reloc can
9719 not be placed in the read-only data section. */
9722 pa_select_section (tree exp
, int reloc
,
9723 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9725 if (TREE_CODE (exp
) == VAR_DECL
9726 && TREE_READONLY (exp
)
9727 && !TREE_THIS_VOLATILE (exp
)
9728 && DECL_INITIAL (exp
)
9729 && (DECL_INITIAL (exp
) == error_mark_node
9730 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9734 && DECL_ONE_ONLY (exp
)
9735 && !DECL_WEAK (exp
))
9736 return som_one_only_readonly_data_section
;
9738 return readonly_data_section
;
9740 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9741 return readonly_data_section
;
9743 && TREE_CODE (exp
) == VAR_DECL
9744 && DECL_ONE_ONLY (exp
)
9745 && !DECL_WEAK (exp
))
9746 return som_one_only_data_section
;
9748 return data_section
;
9751 /* Implement pa_reloc_rw_mask. */
9754 pa_reloc_rw_mask (void)
9756 /* We force (const (plus (symbol) (const_int))) to memory when the
9757 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9758 handle this construct in read-only memory and we want to avoid
9759 this for ELF. So, we always force an RTX needing relocation to
9760 the data section. */
9765 pa_globalize_label (FILE *stream
, const char *name
)
9767 /* We only handle DATA objects here, functions are globalized in
9768 ASM_DECLARE_FUNCTION_NAME. */
9769 if (! FUNCTION_NAME_P (name
))
9771 fputs ("\t.EXPORT ", stream
);
9772 assemble_name (stream
, name
);
9773 fputs (",DATA\n", stream
);
9777 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9780 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9781 int incoming ATTRIBUTE_UNUSED
)
9783 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9786 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9789 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9791 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9792 PA64 ABI says that objects larger than 128 bits are returned in memory.
9793 Note, int_size_in_bytes can return -1 if the size of the object is
9794 variable or larger than the maximum value that can be expressed as
9795 a HOST_WIDE_INT. It can also return zero for an empty type. The
9796 simplest way to handle variable and empty types is to pass them in
9797 memory. This avoids problems in defining the boundaries of argument
9798 slots, allocating registers, etc. */
9799 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9800 || int_size_in_bytes (type
) <= 0);
9803 /* Structure to hold declaration and name of external symbols that are
9804 emitted by GCC. We generate a vector of these symbols and output them
9805 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9806 This avoids putting out names that are never really used. */
9808 typedef struct GTY(()) extern_symbol
9814 /* Define gc'd vector type for extern_symbol. */
9816 /* Vector of extern_symbol pointers. */
9817 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9819 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9820 /* Mark DECL (name NAME) as an external reference (assembler output
9821 file FILE). This saves the names to output at the end of the file
9822 if actually referenced. */
9825 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9827 gcc_assert (file
== asm_out_file
);
9828 extern_symbol p
= {decl
, name
};
9829 vec_safe_push (extern_symbols
, p
);
9832 /* Output text required at the end of an assembler file.
9833 This includes deferred plabels and .import directives for
9834 all external symbols that were actually referenced. */
9837 pa_hpux_file_end (void)
9842 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9843 output_deferred_profile_counters ();
9845 output_deferred_plabels ();
9847 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9849 tree decl
= p
->decl
;
9851 if (!TREE_ASM_WRITTEN (decl
)
9852 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9853 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9856 vec_free (extern_symbols
);
9860 /* Return true if a change from mode FROM to mode TO for a register
9861 in register class RCLASS is invalid. */
9864 pa_cannot_change_mode_class (machine_mode from
, machine_mode to
,
9865 enum reg_class rclass
)
9870 /* Reject changes to/from complex and vector modes. */
9871 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9872 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
9875 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9878 /* There is no way to load QImode or HImode values directly from
9879 memory. SImode loads to the FP registers are not zero extended.
9880 On the 64-bit target, this conflicts with the definition of
9881 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9882 with different sizes in the floating-point registers. */
9883 if (MAYBE_FP_REG_CLASS_P (rclass
))
9886 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9887 in specific sets of registers. Thus, we cannot allow changing
9888 to a larger mode when it's larger than a word. */
9889 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
9890 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
9896 /* Returns TRUE if it is a good idea to tie two pseudo registers
9897 when one has mode MODE1 and one has mode MODE2.
9898 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9899 for any hard reg, then this must be FALSE for correct output.
9901 We should return FALSE for QImode and HImode because these modes
9902 are not ok in the floating-point registers. However, this prevents
9903 tieing these modes to SImode and DImode in the general registers.
9904 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9905 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9906 in the floating-point registers. */
9909 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
9911 /* Don't tie modes in different classes. */
9912 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
9919 /* Length in units of the trampoline instruction code. */
9921 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9924 /* Output assembler code for a block containing the constant parts
9925 of a trampoline, leaving space for the variable parts.\
9927 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9928 and then branches to the specified routine.
9930 This code template is copied from text segment to stack location
9931 and then patched with pa_trampoline_init to contain valid values,
9932 and then entered as a subroutine.
9934 It is best to keep this as small as possible to avoid having to
9935 flush multiple lines in the cache. */
9938 pa_asm_trampoline_template (FILE *f
)
9942 fputs ("\tldw 36(%r22),%r21\n", f
);
9943 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
9944 if (ASSEMBLER_DIALECT
== 0)
9945 fputs ("\tdepi 0,31,2,%r21\n", f
);
9947 fputs ("\tdepwi 0,31,2,%r21\n", f
);
9948 fputs ("\tldw 4(%r21),%r19\n", f
);
9949 fputs ("\tldw 0(%r21),%r21\n", f
);
9952 fputs ("\tbve (%r21)\n", f
);
9953 fputs ("\tldw 40(%r22),%r29\n", f
);
9954 fputs ("\t.word 0\n", f
);
9955 fputs ("\t.word 0\n", f
);
9959 fputs ("\tldsid (%r21),%r1\n", f
);
9960 fputs ("\tmtsp %r1,%sr0\n", f
);
9961 fputs ("\tbe 0(%sr0,%r21)\n", f
);
9962 fputs ("\tldw 40(%r22),%r29\n", f
);
9964 fputs ("\t.word 0\n", f
);
9965 fputs ("\t.word 0\n", f
);
9966 fputs ("\t.word 0\n", f
);
9967 fputs ("\t.word 0\n", f
);
9971 fputs ("\t.dword 0\n", f
);
9972 fputs ("\t.dword 0\n", f
);
9973 fputs ("\t.dword 0\n", f
);
9974 fputs ("\t.dword 0\n", f
);
9975 fputs ("\tmfia %r31\n", f
);
9976 fputs ("\tldd 24(%r31),%r1\n", f
);
9977 fputs ("\tldd 24(%r1),%r27\n", f
);
9978 fputs ("\tldd 16(%r1),%r1\n", f
);
9979 fputs ("\tbve (%r1)\n", f
);
9980 fputs ("\tldd 32(%r31),%r31\n", f
);
9981 fputs ("\t.dword 0 ; fptr\n", f
);
9982 fputs ("\t.dword 0 ; static link\n", f
);
9986 /* Emit RTL insns to initialize the variable parts of a trampoline.
9987 FNADDR is an RTX for the address of the function's pure code.
9988 CXT is an RTX for the static chain value for the function.
9990 Move the function address to the trampoline template at offset 36.
9991 Move the static chain value to trampoline template at offset 40.
9992 Move the trampoline address to trampoline template at offset 44.
9993 Move r19 to trampoline template at offset 48. The latter two
9994 words create a plabel for the indirect call to the trampoline.
9996 A similar sequence is used for the 64-bit port but the plabel is
9997 at the beginning of the trampoline.
9999 Finally, the cache entries for the trampoline code are flushed.
10000 This is necessary to ensure that the trampoline instruction sequence
10001 is written to memory prior to any attempts at prefetching the code
10005 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10007 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10008 rtx start_addr
= gen_reg_rtx (Pmode
);
10009 rtx end_addr
= gen_reg_rtx (Pmode
);
10010 rtx line_length
= gen_reg_rtx (Pmode
);
10013 emit_block_move (m_tramp
, assemble_trampoline_template (),
10014 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10015 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10019 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10020 emit_move_insn (tmp
, fnaddr
);
10021 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10022 emit_move_insn (tmp
, chain_value
);
10024 /* Create a fat pointer for the trampoline. */
10025 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10026 emit_move_insn (tmp
, r_tramp
);
10027 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10028 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10030 /* fdc and fic only use registers for the address to flush,
10031 they do not accept integer displacements. We align the
10032 start and end addresses to the beginning of their respective
10033 cache lines to minimize the number of lines flushed. */
10034 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10035 GEN_INT (-MIN_CACHELINE_SIZE
)));
10036 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10037 TRAMPOLINE_CODE_SIZE
-1));
10038 emit_insn (gen_andsi3 (end_addr
, tmp
,
10039 GEN_INT (-MIN_CACHELINE_SIZE
)));
10040 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10041 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10042 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10043 gen_reg_rtx (Pmode
),
10044 gen_reg_rtx (Pmode
)));
10048 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10049 emit_move_insn (tmp
, fnaddr
);
10050 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10051 emit_move_insn (tmp
, chain_value
);
10053 /* Create a fat pointer for the trampoline. */
10054 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10055 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10057 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10058 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10060 /* fdc and fic only use registers for the address to flush,
10061 they do not accept integer displacements. We align the
10062 start and end addresses to the beginning of their respective
10063 cache lines to minimize the number of lines flushed. */
10064 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10065 emit_insn (gen_anddi3 (start_addr
, tmp
,
10066 GEN_INT (-MIN_CACHELINE_SIZE
)));
10067 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10068 TRAMPOLINE_CODE_SIZE
- 1));
10069 emit_insn (gen_anddi3 (end_addr
, tmp
,
10070 GEN_INT (-MIN_CACHELINE_SIZE
)));
10071 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10072 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10073 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10074 gen_reg_rtx (Pmode
),
10075 gen_reg_rtx (Pmode
)));
10078 #ifdef HAVE_ENABLE_EXECUTE_STACK
10079 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10080 Â Â Â Â LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
10084 /* Perform any machine-specific adjustment in the address of the trampoline.
10085 ADDR contains the address that was passed to pa_trampoline_init.
10086 Adjust the trampoline address to point to the plabel at offset 44. */
10089 pa_trampoline_adjust_address (rtx addr
)
10092 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10097 pa_delegitimize_address (rtx orig_x
)
10099 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10101 if (GET_CODE (x
) == LO_SUM
10102 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10103 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10104 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10109 pa_internal_arg_pointer (void)
10111 /* The argument pointer and the hard frame pointer are the same in
10112 the 32-bit runtime, so we don't need a copy. */
10114 return copy_to_reg (virtual_incoming_args_rtx
);
10116 return virtual_incoming_args_rtx
;
10119 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10120 Frame pointer elimination is automatically handled. */
10123 pa_can_eliminate (const int from
, const int to
)
10125 /* The argument cannot be eliminated in the 64-bit runtime. */
10126 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10129 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10130 ? ! frame_pointer_needed
10134 /* Define the offset between two registers, FROM to be eliminated and its
10135 replacement TO, at the start of a routine. */
10137 pa_initial_elimination_offset (int from
, int to
)
10139 HOST_WIDE_INT offset
;
10141 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10142 && to
== STACK_POINTER_REGNUM
)
10143 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10144 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10147 gcc_unreachable ();
10153 pa_conditional_register_usage (void)
10157 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10159 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10160 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10161 for (i
= 33; i
< 56; i
+= 2)
10162 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10164 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10166 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10167 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10170 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10173 /* Target hook for c_mode_for_suffix. */
10175 static machine_mode
10176 pa_c_mode_for_suffix (char suffix
)
10178 if (HPUX_LONG_DOUBLE_LIBRARY
)
10187 /* Target hook for function_section. */
10190 pa_function_section (tree decl
, enum node_frequency freq
,
10191 bool startup
, bool exit
)
10193 /* Put functions in text section if target doesn't have named sections. */
10194 if (!targetm_common
.have_named_sections
)
10195 return text_section
;
10197 /* Force nested functions into the same section as the containing
10200 && DECL_SECTION_NAME (decl
) == NULL
10201 && DECL_CONTEXT (decl
) != NULL_TREE
10202 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10203 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10204 return function_section (DECL_CONTEXT (decl
));
10206 /* Otherwise, use the default function section. */
10207 return default_function_section (decl
, freq
, startup
, exit
);
10210 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10212 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10213 that need more than three instructions to load prior to reload. This
10214 limit is somewhat arbitrary. It takes three instructions to load a
10215 CONST_INT from memory but two are memory accesses. It may be better
10216 to increase the allowed range for CONST_INTS. We may also be able
10217 to handle CONST_DOUBLES. */
10220 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10222 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10225 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10228 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10229 legitimate constants. The other variants can't be handled by
10230 the move patterns after reload starts. */
10231 if (tls_referenced_p (x
))
10234 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10238 && HOST_BITS_PER_WIDE_INT
> 32
10239 && GET_CODE (x
) == CONST_INT
10240 && !reload_in_progress
10241 && !reload_completed
10242 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10243 && !pa_cint_ok_for_move (UINTVAL (x
)))
10246 if (function_label_operand (x
, mode
))
10252 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10254 static unsigned int
10255 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10257 unsigned int flags
;
10259 flags
= default_section_type_flags (decl
, name
, reloc
);
10261 /* Function labels are placed in the constant pool. This can
10262 cause a section conflict if decls are put in ".data.rel.ro"
10263 or ".data.rel.ro.local" using the __attribute__ construct. */
10264 if (strcmp (name
, ".data.rel.ro") == 0
10265 || strcmp (name
, ".data.rel.ro.local") == 0)
10266 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10271 /* pa_legitimate_address_p recognizes an RTL expression that is a
10272 valid memory address for an instruction. The MODE argument is the
10273 machine mode for the MEM expression that wants to use this address.
10275 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10276 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10277 available with floating point loads and stores, and integer loads.
10278 We get better code by allowing indexed addresses in the initial
10281 The acceptance of indexed addresses as legitimate implies that we
10282 must provide patterns for doing indexed integer stores, or the move
10283 expanders must force the address of an indexed store to a register.
10284 We have adopted the latter approach.
10286 Another function of pa_legitimate_address_p is to ensure that
10287 the base register is a valid pointer for indexed instructions.
10288 On targets that have non-equivalent space registers, we have to
10289 know at the time of assembler output which register in a REG+REG
10290 pair is the base register. The REG_POINTER flag is sometimes lost
10291 in reload and the following passes, so it can't be relied on during
10292 code generation. Thus, we either have to canonicalize the order
10293 of the registers in REG+REG indexed addresses, or treat REG+REG
10294 addresses separately and provide patterns for both permutations.
10296 The latter approach requires several hundred additional lines of
10297 code in pa.md. The downside to canonicalizing is that a PLUS
10298 in the wrong order can't combine to form to make a scaled indexed
10299 memory operand. As we won't need to canonicalize the operands if
10300 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10302 We initially break out scaled indexed addresses in canonical order
10303 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10304 scaled indexed addresses during RTL generation. However, fold_rtx
10305 has its own opinion on how the operands of a PLUS should be ordered.
10306 If one of the operands is equivalent to a constant, it will make
10307 that operand the second operand. As the base register is likely to
10308 be equivalent to a SYMBOL_REF, we have made it the second operand.
10310 pa_legitimate_address_p accepts REG+REG as legitimate when the
10311 operands are in the order INDEX+BASE on targets with non-equivalent
10312 space registers, and in any order on targets with equivalent space
10313 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10315 We treat a SYMBOL_REF as legitimate if it is part of the current
10316 function's constant-pool, because such addresses can actually be
10317 output as REG+SMALLINT. */
10320 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10323 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10324 : REG_OK_FOR_BASE_P (x
)))
10325 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10326 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10327 && REG_P (XEXP (x
, 0))
10328 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10329 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10332 if (GET_CODE (x
) == PLUS
)
10336 /* For REG+REG, the base register should be in XEXP (x, 1),
10337 so check it first. */
10338 if (REG_P (XEXP (x
, 1))
10339 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10340 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10341 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10342 else if (REG_P (XEXP (x
, 0))
10343 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10344 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10345 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10349 if (GET_CODE (index
) == CONST_INT
)
10351 if (INT_5_BITS (index
))
10354 /* When INT14_OK_STRICT is false, a secondary reload is needed
10355 to adjust the displacement of SImode and DImode floating point
10356 instructions but this may fail when the register also needs
10357 reloading. So, we return false when STRICT is true. We
10358 also reject long displacements for float mode addresses since
10359 the majority of accesses will use floating point instructions
10360 that don't support 14-bit offsets. */
10361 if (!INT14_OK_STRICT
10362 && (strict
|| !(reload_in_progress
|| reload_completed
))
10367 return base14_operand (index
, mode
);
10370 if (!TARGET_DISABLE_INDEXING
10371 /* Only accept the "canonical" INDEX+BASE operand order
10372 on targets with non-equivalent space registers. */
10373 && (TARGET_NO_SPACE_REGS
10375 : (base
== XEXP (x
, 1) && REG_P (index
)
10376 && (reload_completed
10377 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10378 || REG_POINTER (base
))
10379 && (reload_completed
10380 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10381 || !REG_POINTER (index
))))
10382 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10383 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10384 : REG_OK_FOR_INDEX_P (index
))
10385 && borx_reg_operand (base
, Pmode
)
10386 && borx_reg_operand (index
, Pmode
))
10389 if (!TARGET_DISABLE_INDEXING
10390 && GET_CODE (index
) == MULT
10391 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10392 && REG_P (XEXP (index
, 0))
10393 && GET_MODE (XEXP (index
, 0)) == Pmode
10394 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10395 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10396 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10397 && INTVAL (XEXP (index
, 1))
10398 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10399 && borx_reg_operand (base
, Pmode
))
10405 if (GET_CODE (x
) == LO_SUM
)
10407 rtx y
= XEXP (x
, 0);
10409 if (GET_CODE (y
) == SUBREG
)
10410 y
= SUBREG_REG (y
);
10413 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10414 : REG_OK_FOR_BASE_P (y
)))
10416 /* Needed for -fPIC */
10418 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10421 if (!INT14_OK_STRICT
10422 && (strict
|| !(reload_in_progress
|| reload_completed
))
10427 if (CONSTANT_P (XEXP (x
, 1)))
10433 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10439 /* Look for machine dependent ways to make the invalid address AD a
10442 For the PA, transform:
10444 memory(X + <large int>)
10448 if (<large int> & mask) >= 16
10449 Y = (<large int> & ~mask) + mask + 1 Round up.
10451 Y = (<large int> & ~mask) Round down.
10453 memory (Z + (<large int> - Y));
10455 This makes reload inheritance and reload_cse work better since Z
10458 There may be more opportunities to improve code with this hook. */
10461 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10462 int opnum
, int type
,
10463 int ind_levels ATTRIBUTE_UNUSED
)
10465 long offset
, newoffset
, mask
;
10466 rtx new_rtx
, temp
= NULL_RTX
;
10468 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10469 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10471 if (optimize
&& GET_CODE (ad
) == PLUS
)
10472 temp
= simplify_binary_operation (PLUS
, Pmode
,
10473 XEXP (ad
, 0), XEXP (ad
, 1));
10475 new_rtx
= temp
? temp
: ad
;
10478 && GET_CODE (new_rtx
) == PLUS
10479 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10480 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10482 offset
= INTVAL (XEXP ((new_rtx
), 1));
10484 /* Choose rounding direction. Round up if we are >= halfway. */
10485 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10486 newoffset
= (offset
& ~mask
) + mask
+ 1;
10488 newoffset
= offset
& ~mask
;
10490 /* Ensure that long displacements are aligned. */
10492 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10493 || (TARGET_64BIT
&& (mode
) == DImode
)))
10494 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10496 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10498 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10499 GEN_INT (newoffset
));
10500 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10501 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10502 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10503 opnum
, (enum reload_type
) type
);
10511 /* Output address vector. */
10514 pa_output_addr_vec (rtx lab
, rtx body
)
10516 int idx
, vlen
= XVECLEN (body
, 0);
10518 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10520 fputs ("\t.begin_brtab\n", asm_out_file
);
10521 for (idx
= 0; idx
< vlen
; idx
++)
10523 ASM_OUTPUT_ADDR_VEC_ELT
10524 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10527 fputs ("\t.end_brtab\n", asm_out_file
);
10530 /* Output address difference vector. */
10533 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10535 rtx base
= XEXP (XEXP (body
, 0), 0);
10536 int idx
, vlen
= XVECLEN (body
, 1);
10538 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10540 fputs ("\t.begin_brtab\n", asm_out_file
);
10541 for (idx
= 0; idx
< vlen
; idx
++)
10543 ASM_OUTPUT_ADDR_DIFF_ELT
10546 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10547 CODE_LABEL_NUMBER (base
));
10550 fputs ("\t.end_brtab\n", asm_out_file
);
10553 /* This is a helper function for the other atomic operations. This function
10554 emits a loop that contains SEQ that iterates until a compare-and-swap
10555 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10556 a set of instructions that takes a value from OLD_REG as an input and
10557 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10558 set to the current contents of MEM. After SEQ, a compare-and-swap will
10559 attempt to update MEM with NEW_REG. The function returns true when the
10560 loop was generated successfully. */
10563 pa_expand_compare_and_swap_loop (rtx mem
, rtx old_reg
, rtx new_reg
, rtx seq
)
10565 machine_mode mode
= GET_MODE (mem
);
10566 rtx_code_label
*label
;
10567 rtx cmp_reg
, success
, oldval
;
10569 /* The loop we want to generate looks like
10575 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10579 Note that we only do the plain load from memory once. Subsequent
10580 iterations use the value loaded by the compare-and-swap pattern. */
10582 label
= gen_label_rtx ();
10583 cmp_reg
= gen_reg_rtx (mode
);
10585 emit_move_insn (cmp_reg
, mem
);
10586 emit_label (label
);
10587 emit_move_insn (old_reg
, cmp_reg
);
10591 success
= NULL_RTX
;
10593 if (!expand_atomic_compare_and_swap (&success
, &oldval
, mem
, old_reg
,
10594 new_reg
, false, MEMMODEL_SYNC_SEQ_CST
,
10598 if (oldval
!= cmp_reg
)
10599 emit_move_insn (cmp_reg
, oldval
);
10601 /* Mark this jump predicted not taken. */
10602 emit_cmp_and_jump_insns (success
, const0_rtx
, EQ
, const0_rtx
,
10603 GET_MODE (success
), 1, label
, 0);
10607 /* This function tries to implement an atomic exchange operation using a
10608 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10609 *MEM are returned, using TARGET if possible. No memory model is required
10610 since a compare_and_swap loop is seq-cst. */
10613 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target
, rtx mem
, rtx val
)
10615 machine_mode mode
= GET_MODE (mem
);
10617 if (can_compare_and_swap_p (mode
, true))
10619 if (!target
|| !register_operand (target
, mode
))
10620 target
= gen_reg_rtx (mode
);
10621 if (pa_expand_compare_and_swap_loop (mem
, target
, val
, NULL_RTX
))