1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
42 #include "diagnostic-core.h"
48 #include "common/common-target.h"
49 #include "target-def.h"
50 #include "langhooks.h"
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
57 pa_fpstore_bypass_p (rtx out_insn
, rtx in_insn
)
59 enum machine_mode store_mode
;
60 enum machine_mode other_mode
;
63 if (recog_memoized (in_insn
) < 0
64 || (get_attr_type (in_insn
) != TYPE_FPSTORE
65 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
66 || recog_memoized (out_insn
) < 0)
69 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
71 set
= single_set (out_insn
);
75 other_mode
= GET_MODE (SET_SRC (set
));
77 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
81 #ifndef DO_FRAME_NOTES
82 #ifdef INCOMING_RETURN_ADDR_RTX
83 #define DO_FRAME_NOTES 1
85 #define DO_FRAME_NOTES 0
89 static void pa_option_override (void);
90 static void copy_reg_pointer (rtx
, rtx
);
91 static void fix_range (const char *);
92 static int hppa_register_move_cost (enum machine_mode mode
, reg_class_t
,
94 static int hppa_address_cost (rtx
, enum machine_mode mode
, addr_space_t
, bool);
95 static bool hppa_rtx_costs (rtx
, int, int, int, int *, bool);
96 static inline rtx
force_mode (enum machine_mode
, rtx
);
97 static void pa_reorg (void);
98 static void pa_combine_instructions (void);
99 static int pa_can_combine_p (rtx
, rtx
, rtx
, int, rtx
, rtx
, rtx
);
100 static bool forward_branch_p (rtx
);
101 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
102 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
103 static int compute_movmem_length (rtx
);
104 static int compute_clrmem_length (rtx
);
105 static bool pa_assemble_integer (rtx
, unsigned int, int);
106 static void remove_useless_addtr_insns (int);
107 static void store_reg (int, HOST_WIDE_INT
, int);
108 static void store_reg_modify (int, int, HOST_WIDE_INT
);
109 static void load_reg (int, HOST_WIDE_INT
, int);
110 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
111 static rtx
pa_function_value (const_tree
, const_tree
, bool);
112 static rtx
pa_libcall_value (enum machine_mode
, const_rtx
);
113 static bool pa_function_value_regno_p (const unsigned int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
115 static void update_total_code_bytes (unsigned int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
117 static int pa_adjust_cost (rtx
, rtx
, rtx
, int);
118 static int pa_adjust_priority (rtx
, int);
119 static int pa_issue_rate (void);
120 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
121 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
122 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
124 static void pa_encode_section_info (tree
, rtx
, int);
125 static const char *pa_strip_name_encoding (const char *);
126 static bool pa_function_ok_for_sibcall (tree
, tree
);
127 static void pa_globalize_label (FILE *, const char *)
129 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
130 HOST_WIDE_INT
, tree
);
131 #if !defined(USE_COLLECT2)
132 static void pa_asm_out_constructor (rtx
, int);
133 static void pa_asm_out_destructor (rtx
, int);
135 static void pa_init_builtins (void);
136 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, enum machine_mode mode
, int);
137 static rtx
hppa_builtin_saveregs (void);
138 static void hppa_va_start (tree
, rtx
);
139 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
140 static bool pa_scalar_mode_supported_p (enum machine_mode
);
141 static bool pa_commutative_p (const_rtx x
, int outer_code
);
142 static void copy_fp_args (rtx
) ATTRIBUTE_UNUSED
;
143 static int length_fp_args (rtx
) ATTRIBUTE_UNUSED
;
144 static rtx
hppa_legitimize_address (rtx
, rtx
, enum machine_mode
);
145 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
146 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
147 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
148 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
149 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
150 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
151 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
152 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
153 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
154 static void output_deferred_plabels (void);
155 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
156 #ifdef ASM_OUTPUT_EXTERNAL_REAL
157 static void pa_hpux_file_end (void);
159 static void pa_init_libfuncs (void);
160 static rtx
pa_struct_value_rtx (tree
, int);
161 static bool pa_pass_by_reference (cumulative_args_t
, enum machine_mode
,
163 static int pa_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
165 static void pa_function_arg_advance (cumulative_args_t
, enum machine_mode
,
167 static rtx
pa_function_arg (cumulative_args_t
, enum machine_mode
,
169 static unsigned int pa_function_arg_boundary (enum machine_mode
, const_tree
);
170 static struct machine_function
* pa_init_machine_status (void);
171 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
173 secondary_reload_info
*);
174 static void pa_extra_live_on_entry (bitmap
);
175 static enum machine_mode
pa_promote_function_mode (const_tree
,
176 enum machine_mode
, int *,
179 static void pa_asm_trampoline_template (FILE *);
180 static void pa_trampoline_init (rtx
, tree
, rtx
);
181 static rtx
pa_trampoline_adjust_address (rtx
);
182 static rtx
pa_delegitimize_address (rtx
);
183 static bool pa_print_operand_punct_valid_p (unsigned char);
184 static rtx
pa_internal_arg_pointer (void);
185 static bool pa_can_eliminate (const int, const int);
186 static void pa_conditional_register_usage (void);
187 static enum machine_mode
pa_c_mode_for_suffix (char);
188 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
189 static bool pa_cannot_force_const_mem (enum machine_mode
, rtx
);
190 static bool pa_legitimate_constant_p (enum machine_mode
, rtx
);
191 static unsigned int pa_section_type_flags (tree
, const char *, int);
193 /* The following extra sections are only used for SOM. */
194 static GTY(()) section
*som_readonly_data_section
;
195 static GTY(()) section
*som_one_only_readonly_data_section
;
196 static GTY(()) section
*som_one_only_data_section
;
197 static GTY(()) section
*som_tm_clone_table_section
;
199 /* Counts for the number of callee-saved general and floating point
200 registers which were saved by the current function's prologue. */
201 static int gr_saved
, fr_saved
;
203 /* Boolean indicating whether the return pointer was saved by the
204 current function's prologue. */
205 static bool rp_saved
;
207 static rtx
find_addr_reg (rtx
);
209 /* Keep track of the number of bytes we have output in the CODE subspace
210 during this compilation so we'll know when to emit inline long-calls. */
211 unsigned long total_code_bytes
;
213 /* The last address of the previous function plus the number of bytes in
214 associated thunks that have been output. This is used to determine if
215 a thunk can use an IA-relative branch to reach its target function. */
216 static unsigned int last_address
;
218 /* Variables to handle plabels that we discover are necessary at assembly
219 output time. They are output after the current function. */
220 struct GTY(()) deferred_plabel
225 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
227 static size_t n_deferred_plabels
= 0;
229 /* Initialize the GCC target structure. */
231 #undef TARGET_OPTION_OVERRIDE
232 #define TARGET_OPTION_OVERRIDE pa_option_override
234 #undef TARGET_ASM_ALIGNED_HI_OP
235 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
236 #undef TARGET_ASM_ALIGNED_SI_OP
237 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
238 #undef TARGET_ASM_ALIGNED_DI_OP
239 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
240 #undef TARGET_ASM_UNALIGNED_HI_OP
241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
242 #undef TARGET_ASM_UNALIGNED_SI_OP
243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
244 #undef TARGET_ASM_UNALIGNED_DI_OP
245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
246 #undef TARGET_ASM_INTEGER
247 #define TARGET_ASM_INTEGER pa_assemble_integer
249 #undef TARGET_ASM_FUNCTION_PROLOGUE
250 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
251 #undef TARGET_ASM_FUNCTION_EPILOGUE
252 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
254 #undef TARGET_FUNCTION_VALUE
255 #define TARGET_FUNCTION_VALUE pa_function_value
256 #undef TARGET_LIBCALL_VALUE
257 #define TARGET_LIBCALL_VALUE pa_libcall_value
258 #undef TARGET_FUNCTION_VALUE_REGNO_P
259 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
261 #undef TARGET_LEGITIMIZE_ADDRESS
262 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
264 #undef TARGET_SCHED_ADJUST_COST
265 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
266 #undef TARGET_SCHED_ADJUST_PRIORITY
267 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
268 #undef TARGET_SCHED_ISSUE_RATE
269 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
271 #undef TARGET_ENCODE_SECTION_INFO
272 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
273 #undef TARGET_STRIP_NAME_ENCODING
274 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
276 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
277 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
279 #undef TARGET_COMMUTATIVE_P
280 #define TARGET_COMMUTATIVE_P pa_commutative_p
282 #undef TARGET_ASM_OUTPUT_MI_THUNK
283 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
284 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
285 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
287 #undef TARGET_ASM_FILE_END
288 #ifdef ASM_OUTPUT_EXTERNAL_REAL
289 #define TARGET_ASM_FILE_END pa_hpux_file_end
291 #define TARGET_ASM_FILE_END output_deferred_plabels
294 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
295 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
297 #if !defined(USE_COLLECT2)
298 #undef TARGET_ASM_CONSTRUCTOR
299 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
300 #undef TARGET_ASM_DESTRUCTOR
301 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
304 #undef TARGET_INIT_BUILTINS
305 #define TARGET_INIT_BUILTINS pa_init_builtins
307 #undef TARGET_EXPAND_BUILTIN
308 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
310 #undef TARGET_REGISTER_MOVE_COST
311 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
312 #undef TARGET_RTX_COSTS
313 #define TARGET_RTX_COSTS hppa_rtx_costs
314 #undef TARGET_ADDRESS_COST
315 #define TARGET_ADDRESS_COST hppa_address_cost
317 #undef TARGET_MACHINE_DEPENDENT_REORG
318 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
320 #undef TARGET_INIT_LIBFUNCS
321 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
323 #undef TARGET_PROMOTE_FUNCTION_MODE
324 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
325 #undef TARGET_PROMOTE_PROTOTYPES
326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
328 #undef TARGET_STRUCT_VALUE_RTX
329 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
330 #undef TARGET_RETURN_IN_MEMORY
331 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
336 #undef TARGET_CALLEE_COPIES
337 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
338 #undef TARGET_ARG_PARTIAL_BYTES
339 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
340 #undef TARGET_FUNCTION_ARG
341 #define TARGET_FUNCTION_ARG pa_function_arg
342 #undef TARGET_FUNCTION_ARG_ADVANCE
343 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
344 #undef TARGET_FUNCTION_ARG_BOUNDARY
345 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
347 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
348 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
349 #undef TARGET_EXPAND_BUILTIN_VA_START
350 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
351 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
352 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
354 #undef TARGET_SCALAR_MODE_SUPPORTED_P
355 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
357 #undef TARGET_CANNOT_FORCE_CONST_MEM
358 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
360 #undef TARGET_SECONDARY_RELOAD
361 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
363 #undef TARGET_EXTRA_LIVE_ON_ENTRY
364 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
366 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
367 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
368 #undef TARGET_TRAMPOLINE_INIT
369 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
370 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
371 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
372 #undef TARGET_DELEGITIMIZE_ADDRESS
373 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
374 #undef TARGET_INTERNAL_ARG_POINTER
375 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
376 #undef TARGET_CAN_ELIMINATE
377 #define TARGET_CAN_ELIMINATE pa_can_eliminate
378 #undef TARGET_CONDITIONAL_REGISTER_USAGE
379 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
380 #undef TARGET_C_MODE_FOR_SUFFIX
381 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
382 #undef TARGET_ASM_FUNCTION_SECTION
383 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
385 #undef TARGET_LEGITIMATE_CONSTANT_P
386 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
387 #undef TARGET_SECTION_TYPE_FLAGS
388 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
390 struct gcc_target targetm
= TARGET_INITIALIZER
;
392 /* Parse the -mfixed-range= option string. */
395 fix_range (const char *const_str
)
398 char *str
, *dash
, *comma
;
400 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
401 REG2 are either register names or register numbers. The effect
402 of this option is to mark the registers in the range from REG1 to
403 REG2 as ``fixed'' so they won't be used by the compiler. This is
404 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
406 i
= strlen (const_str
);
407 str
= (char *) alloca (i
+ 1);
408 memcpy (str
, const_str
, i
+ 1);
412 dash
= strchr (str
, '-');
415 warning (0, "value of -mfixed-range must have form REG1-REG2");
420 comma
= strchr (dash
+ 1, ',');
424 first
= decode_reg_name (str
);
427 warning (0, "unknown register name: %s", str
);
431 last
= decode_reg_name (dash
+ 1);
434 warning (0, "unknown register name: %s", dash
+ 1);
442 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
446 for (i
= first
; i
<= last
; ++i
)
447 fixed_regs
[i
] = call_used_regs
[i
] = 1;
456 /* Check if all floating point registers have been fixed. */
457 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
462 target_flags
|= MASK_DISABLE_FPREGS
;
465 /* Implement the TARGET_OPTION_OVERRIDE hook. */
468 pa_option_override (void)
471 cl_deferred_option
*opt
;
472 vec
<cl_deferred_option
> *v
473 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
476 FOR_EACH_VEC_ELT (*v
, i
, opt
)
478 switch (opt
->opt_index
)
480 case OPT_mfixed_range_
:
481 fix_range (opt
->arg
);
489 /* Unconditional branches in the delay slot are not compatible with dwarf2
490 call frame information. There is no benefit in using this optimization
491 on PA8000 and later processors. */
492 if (pa_cpu
>= PROCESSOR_8000
493 || (targetm_common
.except_unwind_info (&global_options
) == UI_DWARF2
495 || flag_unwind_tables
)
496 target_flags
&= ~MASK_JUMP_IN_DELAY
;
498 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
500 warning (0, "PIC code generation is not supported in the portable runtime model");
503 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
505 warning (0, "PIC code generation is not compatible with fast indirect calls");
508 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
510 warning (0, "-g is only supported when using GAS on this processor,");
511 warning (0, "-g option disabled");
512 write_symbols
= NO_DEBUG
;
515 /* We only support the "big PIC" model now. And we always generate PIC
516 code when in 64bit mode. */
517 if (flag_pic
== 1 || TARGET_64BIT
)
520 /* Disable -freorder-blocks-and-partition as we don't support hot and
521 cold partitioning. */
522 if (flag_reorder_blocks_and_partition
)
524 inform (input_location
,
525 "-freorder-blocks-and-partition does not work "
526 "on this architecture");
527 flag_reorder_blocks_and_partition
= 0;
528 flag_reorder_blocks
= 1;
531 /* We can't guarantee that .dword is available for 32-bit targets. */
532 if (UNITS_PER_WORD
== 4)
533 targetm
.asm_out
.aligned_op
.di
= NULL
;
535 /* The unaligned ops are only available when using GAS. */
538 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
539 targetm
.asm_out
.unaligned_op
.si
= NULL
;
540 targetm
.asm_out
.unaligned_op
.di
= NULL
;
543 init_machine_status
= pa_init_machine_status
;
548 PA_BUILTIN_COPYSIGNQ
,
551 PA_BUILTIN_HUGE_VALQ
,
555 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
558 pa_init_builtins (void)
560 #ifdef DONT_HAVE_FPUTC_UNLOCKED
562 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
563 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
564 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
571 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
572 set_user_assembler_name (decl
, "_Isfinite");
573 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
574 set_user_assembler_name (decl
, "_Isfinitef");
578 if (HPUX_LONG_DOUBLE_LIBRARY
)
582 /* Under HPUX, the __float128 type is a synonym for "long double". */
583 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
586 /* TFmode support builtins. */
587 ftype
= build_function_type_list (long_double_type_node
,
588 long_double_type_node
,
590 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
591 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
592 "_U_Qfabs", NULL_TREE
);
593 TREE_READONLY (decl
) = 1;
594 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
596 ftype
= build_function_type_list (long_double_type_node
,
597 long_double_type_node
,
598 long_double_type_node
,
600 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
601 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
602 "_U_Qfcopysign", NULL_TREE
);
603 TREE_READONLY (decl
) = 1;
604 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
606 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
607 decl
= add_builtin_function ("__builtin_infq", ftype
,
608 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
610 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
612 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
613 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
615 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
620 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
621 enum machine_mode mode ATTRIBUTE_UNUSED
,
622 int ignore ATTRIBUTE_UNUSED
)
624 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
625 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
629 case PA_BUILTIN_FABSQ
:
630 case PA_BUILTIN_COPYSIGNQ
:
631 return expand_call (exp
, target
, ignore
);
633 case PA_BUILTIN_INFQ
:
634 case PA_BUILTIN_HUGE_VALQ
:
636 enum machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
641 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
643 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
646 target
= gen_reg_rtx (target_mode
);
648 emit_move_insn (target
, tmp
);
659 /* Function to init struct machine_function.
660 This will be called, via a pointer variable,
661 from push_function_context. */
663 static struct machine_function
*
664 pa_init_machine_status (void)
666 return ggc_alloc_cleared_machine_function ();
669 /* If FROM is a probable pointer register, mark TO as a probable
670 pointer register with the same pointer alignment as FROM. */
673 copy_reg_pointer (rtx to
, rtx from
)
675 if (REG_POINTER (from
))
676 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
679 /* Return 1 if X contains a symbolic expression. We know these
680 expressions will have one of a few well defined forms, so
681 we need only check those forms. */
683 pa_symbolic_expression_p (rtx x
)
686 /* Strip off any HIGH. */
687 if (GET_CODE (x
) == HIGH
)
690 return (symbolic_operand (x
, VOIDmode
));
693 /* Accept any constant that can be moved in one instruction into a
696 pa_cint_ok_for_move (HOST_WIDE_INT ival
)
698 /* OK if ldo, ldil, or zdepi, can be used. */
699 return (VAL_14_BITS_P (ival
)
700 || pa_ldil_cint_p (ival
)
701 || pa_zdepi_cint_p (ival
));
704 /* True iff ldil can be used to load this CONST_INT. The least
705 significant 11 bits of the value must be zero and the value must
706 not change sign when extended from 32 to 64 bits. */
708 pa_ldil_cint_p (HOST_WIDE_INT ival
)
710 HOST_WIDE_INT x
= ival
& (((HOST_WIDE_INT
) -1 << 31) | 0x7ff);
712 return x
== 0 || x
== ((HOST_WIDE_INT
) -1 << 31);
715 /* True iff zdepi can be used to generate this CONST_INT.
716 zdepi first sign extends a 5-bit signed number to a given field
717 length, then places this field anywhere in a zero. */
719 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
721 unsigned HOST_WIDE_INT lsb_mask
, t
;
723 /* This might not be obvious, but it's at least fast.
724 This function is critical; we don't have the time loops would take. */
726 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
727 /* Return true iff t is a power of two. */
728 return ((t
& (t
- 1)) == 0);
731 /* True iff depi or extru can be used to compute (reg & mask).
732 Accept bit pattern like these:
737 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
740 mask
+= mask
& -mask
;
741 return (mask
& (mask
- 1)) == 0;
744 /* True iff depi can be used to compute (reg | MASK). */
746 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
748 mask
+= mask
& -mask
;
749 return (mask
& (mask
- 1)) == 0;
752 /* Legitimize PIC addresses. If the address is already
753 position-independent, we return ORIG. Newly generated
754 position-independent addresses go to REG. If we need more
755 than one register, we lose. */
758 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
762 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
764 /* Labels need special handling. */
765 if (pic_label_operand (orig
, mode
))
769 /* We do not want to go through the movXX expanders here since that
770 would create recursion.
772 Nor do we really want to call a generator for a named pattern
773 since that requires multiple patterns if we want to support
776 So instead we just emit the raw set, which avoids the movXX
777 expanders completely. */
778 mark_reg_pointer (reg
, BITS_PER_UNIT
);
779 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, orig
));
781 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
782 add_reg_note (insn
, REG_EQUAL
, orig
);
784 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
785 and update LABEL_NUSES because this is not done automatically. */
786 if (reload_in_progress
|| reload_completed
)
788 /* Extract LABEL_REF. */
789 if (GET_CODE (orig
) == CONST
)
790 orig
= XEXP (XEXP (orig
, 0), 0);
791 /* Extract CODE_LABEL. */
792 orig
= XEXP (orig
, 0);
793 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
794 LABEL_NUSES (orig
)++;
796 crtl
->uses_pic_offset_table
= 1;
799 if (GET_CODE (orig
) == SYMBOL_REF
)
805 /* Before reload, allocate a temporary register for the intermediate
806 result. This allows the sequence to be deleted when the final
807 result is unused and the insns are trivially dead. */
808 tmp_reg
= ((reload_in_progress
|| reload_completed
)
809 ? reg
: gen_reg_rtx (Pmode
));
811 if (function_label_operand (orig
, VOIDmode
))
813 /* Force function label into memory in word mode. */
814 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
815 /* Load plabel address from DLT. */
816 emit_move_insn (tmp_reg
,
817 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
818 gen_rtx_HIGH (word_mode
, orig
)));
820 = gen_const_mem (Pmode
,
821 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
822 gen_rtx_UNSPEC (Pmode
,
825 emit_move_insn (reg
, pic_ref
);
826 /* Now load address of function descriptor. */
827 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
831 /* Load symbol reference from DLT. */
832 emit_move_insn (tmp_reg
,
833 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
834 gen_rtx_HIGH (word_mode
, orig
)));
836 = gen_const_mem (Pmode
,
837 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
838 gen_rtx_UNSPEC (Pmode
,
843 crtl
->uses_pic_offset_table
= 1;
844 mark_reg_pointer (reg
, BITS_PER_UNIT
);
845 insn
= emit_move_insn (reg
, pic_ref
);
847 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
848 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
852 else if (GET_CODE (orig
) == CONST
)
856 if (GET_CODE (XEXP (orig
, 0)) == PLUS
857 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
861 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
863 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
864 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
865 base
== reg
? 0 : reg
);
867 if (GET_CODE (orig
) == CONST_INT
)
869 if (INT_14_BITS (orig
))
870 return plus_constant (Pmode
, base
, INTVAL (orig
));
871 orig
= force_reg (Pmode
, orig
);
873 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
874 /* Likewise, should we set special REG_NOTEs here? */
880 static GTY(()) rtx gen_tls_tga
;
883 gen_tls_get_addr (void)
886 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
891 hppa_tls_call (rtx arg
)
895 ret
= gen_reg_rtx (Pmode
);
896 emit_library_call_value (gen_tls_get_addr (), ret
,
897 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
903 legitimize_tls_address (rtx addr
)
905 rtx ret
, insn
, tmp
, t1
, t2
, tp
;
906 enum tls_model model
= SYMBOL_REF_TLS_MODEL (addr
);
910 case TLS_MODEL_GLOBAL_DYNAMIC
:
911 tmp
= gen_reg_rtx (Pmode
);
913 emit_insn (gen_tgd_load_pic (tmp
, addr
));
915 emit_insn (gen_tgd_load (tmp
, addr
));
916 ret
= hppa_tls_call (tmp
);
919 case TLS_MODEL_LOCAL_DYNAMIC
:
920 ret
= gen_reg_rtx (Pmode
);
921 tmp
= gen_reg_rtx (Pmode
);
924 emit_insn (gen_tld_load_pic (tmp
, addr
));
926 emit_insn (gen_tld_load (tmp
, addr
));
927 t1
= hppa_tls_call (tmp
);
930 t2
= gen_reg_rtx (Pmode
);
931 emit_libcall_block (insn
, t2
, t1
,
932 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
934 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
937 case TLS_MODEL_INITIAL_EXEC
:
938 tp
= gen_reg_rtx (Pmode
);
939 tmp
= gen_reg_rtx (Pmode
);
940 ret
= gen_reg_rtx (Pmode
);
941 emit_insn (gen_tp_load (tp
));
943 emit_insn (gen_tie_load_pic (tmp
, addr
));
945 emit_insn (gen_tie_load (tmp
, addr
));
946 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
949 case TLS_MODEL_LOCAL_EXEC
:
950 tp
= gen_reg_rtx (Pmode
);
951 ret
= gen_reg_rtx (Pmode
);
952 emit_insn (gen_tp_load (tp
));
953 emit_insn (gen_tle_load (ret
, addr
, tp
));
963 /* Try machine-dependent ways of modifying an illegitimate address
964 to be legitimate. If we find one, return the new, valid address.
965 This macro is used in only one place: `memory_address' in explow.c.
967 OLDX is the address as it was before break_out_memory_refs was called.
968 In some cases it is useful to look at this to decide what needs to be done.
970 It is always safe for this macro to do nothing. It exists to recognize
971 opportunities to optimize the output.
973 For the PA, transform:
975 memory(X + <large int>)
979 if (<large int> & mask) >= 16
980 Y = (<large int> & ~mask) + mask + 1 Round up.
982 Y = (<large int> & ~mask) Round down.
984 memory (Z + (<large int> - Y));
986 This is for CSE to find several similar references, and only use one Z.
988 X can either be a SYMBOL_REF or REG, but because combine cannot
989 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
990 D will not fit in 14 bits.
992 MODE_FLOAT references allow displacements which fit in 5 bits, so use
995 MODE_INT references allow displacements which fit in 14 bits, so use
998 This relies on the fact that most mode MODE_FLOAT references will use FP
999 registers and most mode MODE_INT references will use integer registers.
1000 (In the rare case of an FP register used in an integer MODE, we depend
1001 on secondary reloads to clean things up.)
1004 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1005 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1006 addressing modes to be used).
1008 Put X and Z into registers. Then put the entire expression into
1012 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1013 enum machine_mode mode
)
1017 /* We need to canonicalize the order of operands in unscaled indexed
1018 addresses since the code that checks if an address is valid doesn't
1019 always try both orders. */
1020 if (!TARGET_NO_SPACE_REGS
1021 && GET_CODE (x
) == PLUS
1022 && GET_MODE (x
) == Pmode
1023 && REG_P (XEXP (x
, 0))
1024 && REG_P (XEXP (x
, 1))
1025 && REG_POINTER (XEXP (x
, 0))
1026 && !REG_POINTER (XEXP (x
, 1)))
1027 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1029 if (PA_SYMBOL_REF_TLS_P (x
))
1030 return legitimize_tls_address (x
);
1032 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1034 /* Strip off CONST. */
1035 if (GET_CODE (x
) == CONST
)
1038 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1039 That should always be safe. */
1040 if (GET_CODE (x
) == PLUS
1041 && GET_CODE (XEXP (x
, 0)) == REG
1042 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1044 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1045 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1048 /* Note we must reject symbols which represent function addresses
1049 since the assembler/linker can't handle arithmetic on plabels. */
1050 if (GET_CODE (x
) == PLUS
1051 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1052 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1053 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1054 || GET_CODE (XEXP (x
, 0)) == REG
))
1056 rtx int_part
, ptr_reg
;
1058 int offset
= INTVAL (XEXP (x
, 1));
1061 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1062 ? (INT14_OK_STRICT
? 0x3fff : 0x1f) : 0x3fff);
1064 /* Choose which way to round the offset. Round up if we
1065 are >= halfway to the next boundary. */
1066 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1067 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1069 newoffset
= (offset
& ~ mask
);
1071 /* If the newoffset will not fit in 14 bits (ldo), then
1072 handling this would take 4 or 5 instructions (2 to load
1073 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1074 add the new offset and the SYMBOL_REF.) Combine can
1075 not handle 4->2 or 5->2 combinations, so do not create
1077 if (! VAL_14_BITS_P (newoffset
)
1078 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1080 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1083 gen_rtx_HIGH (Pmode
, const_part
));
1086 gen_rtx_LO_SUM (Pmode
,
1087 tmp_reg
, const_part
));
1091 if (! VAL_14_BITS_P (newoffset
))
1092 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1094 int_part
= GEN_INT (newoffset
);
1096 ptr_reg
= force_reg (Pmode
,
1097 gen_rtx_PLUS (Pmode
,
1098 force_reg (Pmode
, XEXP (x
, 0)),
1101 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1104 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1106 if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == MULT
1107 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1108 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1)))
1109 && (OBJECT_P (XEXP (x
, 1))
1110 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1111 && GET_CODE (XEXP (x
, 1)) != CONST
)
1113 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1117 if (GET_CODE (reg1
) != REG
)
1118 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1120 reg2
= XEXP (XEXP (x
, 0), 0);
1121 if (GET_CODE (reg2
) != REG
)
1122 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1124 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1125 gen_rtx_MULT (Pmode
,
1131 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1133 Only do so for floating point modes since this is more speculative
1134 and we lose if it's an integer store. */
1135 if (GET_CODE (x
) == PLUS
1136 && GET_CODE (XEXP (x
, 0)) == PLUS
1137 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
1138 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
1139 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
1140 && (mode
== SFmode
|| mode
== DFmode
))
1143 /* First, try and figure out what to use as a base register. */
1144 rtx reg1
, reg2
, base
, idx
;
1146 reg1
= XEXP (XEXP (x
, 0), 1);
1151 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1152 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1153 it's a base register below. */
1154 if (GET_CODE (reg1
) != REG
)
1155 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1157 if (GET_CODE (reg2
) != REG
)
1158 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1160 /* Figure out what the base and index are. */
1162 if (GET_CODE (reg1
) == REG
1163 && REG_POINTER (reg1
))
1166 idx
= gen_rtx_PLUS (Pmode
,
1167 gen_rtx_MULT (Pmode
,
1168 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1169 XEXP (XEXP (XEXP (x
, 0), 0), 1)),
1172 else if (GET_CODE (reg2
) == REG
1173 && REG_POINTER (reg2
))
1182 /* If the index adds a large constant, try to scale the
1183 constant so that it can be loaded with only one insn. */
1184 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1185 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1186 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1187 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1189 /* Divide the CONST_INT by the scale factor, then add it to A. */
1190 int val
= INTVAL (XEXP (idx
, 1));
1192 val
/= INTVAL (XEXP (XEXP (idx
, 0), 1));
1193 reg1
= XEXP (XEXP (idx
, 0), 0);
1194 if (GET_CODE (reg1
) != REG
)
1195 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1197 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1199 /* We can now generate a simple scaled indexed address. */
1202 (Pmode
, gen_rtx_PLUS (Pmode
,
1203 gen_rtx_MULT (Pmode
, reg1
,
1204 XEXP (XEXP (idx
, 0), 1)),
1208 /* If B + C is still a valid base register, then add them. */
1209 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1210 && INTVAL (XEXP (idx
, 1)) <= 4096
1211 && INTVAL (XEXP (idx
, 1)) >= -4096)
1213 int val
= INTVAL (XEXP (XEXP (idx
, 0), 1));
1216 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1218 reg2
= XEXP (XEXP (idx
, 0), 0);
1219 if (GET_CODE (reg2
) != CONST_INT
)
1220 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1222 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1223 gen_rtx_MULT (Pmode
,
1229 /* Get the index into a register, then add the base + index and
1230 return a register holding the result. */
1232 /* First get A into a register. */
1233 reg1
= XEXP (XEXP (idx
, 0), 0);
1234 if (GET_CODE (reg1
) != REG
)
1235 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1237 /* And get B into a register. */
1238 reg2
= XEXP (idx
, 1);
1239 if (GET_CODE (reg2
) != REG
)
1240 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1242 reg1
= force_reg (Pmode
,
1243 gen_rtx_PLUS (Pmode
,
1244 gen_rtx_MULT (Pmode
, reg1
,
1245 XEXP (XEXP (idx
, 0), 1)),
1248 /* Add the result to our base register and return. */
1249 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1253 /* Uh-oh. We might have an address for x[n-100000]. This needs
1254 special handling to avoid creating an indexed memory address
1255 with x-100000 as the base.
1257 If the constant part is small enough, then it's still safe because
1258 there is a guard page at the beginning and end of the data segment.
1260 Scaled references are common enough that we want to try and rearrange the
1261 terms so that we can use indexing for these addresses too. Only
1262 do the optimization for floatint point modes. */
1264 if (GET_CODE (x
) == PLUS
1265 && pa_symbolic_expression_p (XEXP (x
, 1)))
1267 /* Ugly. We modify things here so that the address offset specified
1268 by the index expression is computed first, then added to x to form
1269 the entire address. */
1271 rtx regx1
, regx2
, regy1
, regy2
, y
;
1273 /* Strip off any CONST. */
1275 if (GET_CODE (y
) == CONST
)
1278 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1280 /* See if this looks like
1281 (plus (mult (reg) (shadd_const))
1282 (const (plus (symbol_ref) (const_int))))
1284 Where const_int is small. In that case the const
1285 expression is a valid pointer for indexing.
1287 If const_int is big, but can be divided evenly by shadd_const
1288 and added to (reg). This allows more scaled indexed addresses. */
1289 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1290 && GET_CODE (XEXP (x
, 0)) == MULT
1291 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1292 && INTVAL (XEXP (y
, 1)) >= -4096
1293 && INTVAL (XEXP (y
, 1)) <= 4095
1294 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1295 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1297 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1301 if (GET_CODE (reg1
) != REG
)
1302 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1304 reg2
= XEXP (XEXP (x
, 0), 0);
1305 if (GET_CODE (reg2
) != REG
)
1306 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1308 return force_reg (Pmode
,
1309 gen_rtx_PLUS (Pmode
,
1310 gen_rtx_MULT (Pmode
,
1315 else if ((mode
== DFmode
|| mode
== SFmode
)
1316 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1317 && GET_CODE (XEXP (x
, 0)) == MULT
1318 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1319 && INTVAL (XEXP (y
, 1)) % INTVAL (XEXP (XEXP (x
, 0), 1)) == 0
1320 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1321 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1324 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1325 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1326 regx2
= XEXP (XEXP (x
, 0), 0);
1327 if (GET_CODE (regx2
) != REG
)
1328 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1329 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1333 gen_rtx_PLUS (Pmode
,
1334 gen_rtx_MULT (Pmode
, regx2
,
1335 XEXP (XEXP (x
, 0), 1)),
1336 force_reg (Pmode
, XEXP (y
, 0))));
1338 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1339 && INTVAL (XEXP (y
, 1)) >= -4096
1340 && INTVAL (XEXP (y
, 1)) <= 4095)
1342 /* This is safe because of the guard page at the
1343 beginning and end of the data space. Just
1344 return the original address. */
1349 /* Doesn't look like one we can optimize. */
1350 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1351 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1352 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1353 regx1
= force_reg (Pmode
,
1354 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1356 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1364 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1366 Compute extra cost of moving data between one register class
1369 Make moves from SAR so expensive they should never happen. We used to
1370 have 0xffff here, but that generates overflow in rare cases.
1372 Copies involving a FP register and a non-FP register are relatively
1373 expensive because they must go through memory.
1375 Other copies are reasonably cheap. */
1378 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
1379 reg_class_t from
, reg_class_t to
)
1381 if (from
== SHIFT_REGS
)
1383 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1385 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1386 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1392 /* For the HPPA, REG and REG+CONST is cost 0
1393 and addresses involving symbolic constants are cost 2.
1395 PIC addresses are very expensive.
1397 It is no coincidence that this has the same structure
1398 as GO_IF_LEGITIMATE_ADDRESS. */
1401 hppa_address_cost (rtx X
, enum machine_mode mode ATTRIBUTE_UNUSED
,
1402 addr_space_t as ATTRIBUTE_UNUSED
,
1403 bool speed ATTRIBUTE_UNUSED
)
1405 switch (GET_CODE (X
))
1418 /* Compute a (partial) cost for rtx X. Return true if the complete
1419 cost has been computed, and false if subexpressions should be
1420 scanned. In either case, *TOTAL contains the cost result. */
1423 hppa_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
1424 int *total
, bool speed ATTRIBUTE_UNUSED
)
1431 if (INTVAL (x
) == 0)
1433 else if (INT_14_BITS (x
))
1450 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1451 && outer_code
!= SET
)
1458 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1460 *total
= COSTS_N_INSNS (3);
1464 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1465 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1469 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1470 *total
= factor
* factor
* COSTS_N_INSNS (8);
1472 *total
= factor
* factor
* COSTS_N_INSNS (20);
1476 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1478 *total
= COSTS_N_INSNS (14);
1486 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1487 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1491 *total
= factor
* factor
* COSTS_N_INSNS (60);
1494 case PLUS
: /* this includes shNadd insns */
1496 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1498 *total
= COSTS_N_INSNS (3);
1502 /* A size N times larger than UNITS_PER_WORD needs N times as
1503 many insns, taking N times as long. */
1504 factor
= GET_MODE_SIZE (GET_MODE (x
)) / UNITS_PER_WORD
;
1507 *total
= factor
* COSTS_N_INSNS (1);
1513 *total
= COSTS_N_INSNS (1);
1521 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1522 new rtx with the correct mode. */
1524 force_mode (enum machine_mode mode
, rtx orig
)
1526 if (mode
== GET_MODE (orig
))
1529 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1531 return gen_rtx_REG (mode
, REGNO (orig
));
1534 /* Return 1 if *X is a thread-local symbol. */
1537 pa_tls_symbol_ref_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
1539 return PA_SYMBOL_REF_TLS_P (*x
);
1542 /* Return 1 if X contains a thread-local symbol. */
1545 pa_tls_referenced_p (rtx x
)
1547 if (!TARGET_HAVE_TLS
)
1550 return for_each_rtx (&x
, &pa_tls_symbol_ref_1
, 0);
1553 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1556 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1558 return pa_tls_referenced_p (x
);
1561 /* Emit insns to move operands[1] into operands[0].
1563 Return 1 if we have written out everything that needs to be done to
1564 do the move. Otherwise, return 0 and the caller will emit the move
1567 Note SCRATCH_REG may not be in the proper mode depending on how it
1568 will be used. This routine is responsible for creating a new copy
1569 of SCRATCH_REG in the proper mode. */
1572 pa_emit_move_sequence (rtx
*operands
, enum machine_mode mode
, rtx scratch_reg
)
1574 register rtx operand0
= operands
[0];
1575 register rtx operand1
= operands
[1];
1578 /* We can only handle indexed addresses in the destination operand
1579 of floating point stores. Thus, we need to break out indexed
1580 addresses from the destination operand. */
1581 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1583 gcc_assert (can_create_pseudo_p ());
1585 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1586 operand0
= replace_equiv_address (operand0
, tem
);
1589 /* On targets with non-equivalent space registers, break out unscaled
1590 indexed addresses from the source operand before the final CSE.
1591 We have to do this because the REG_POINTER flag is not correctly
1592 carried through various optimization passes and CSE may substitute
1593 a pseudo without the pointer set for one with the pointer set. As
1594 a result, we loose various opportunities to create insns with
1595 unscaled indexed addresses. */
1596 if (!TARGET_NO_SPACE_REGS
1597 && !cse_not_expected
1598 && GET_CODE (operand1
) == MEM
1599 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1600 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1601 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1603 = replace_equiv_address (operand1
,
1604 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1607 && reload_in_progress
&& GET_CODE (operand0
) == REG
1608 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1609 operand0
= reg_equiv_mem (REGNO (operand0
));
1610 else if (scratch_reg
1611 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1612 && GET_CODE (SUBREG_REG (operand0
)) == REG
1613 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1615 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1616 the code which tracks sets/uses for delete_output_reload. */
1617 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1618 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1619 SUBREG_BYTE (operand0
));
1620 operand0
= alter_subreg (&temp
, true);
1624 && reload_in_progress
&& GET_CODE (operand1
) == REG
1625 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1626 operand1
= reg_equiv_mem (REGNO (operand1
));
1627 else if (scratch_reg
1628 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1629 && GET_CODE (SUBREG_REG (operand1
)) == REG
1630 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1632 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1633 the code which tracks sets/uses for delete_output_reload. */
1634 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1635 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1636 SUBREG_BYTE (operand1
));
1637 operand1
= alter_subreg (&temp
, true);
1640 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1641 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1642 != XEXP (operand0
, 0)))
1643 operand0
= replace_equiv_address (operand0
, tem
);
1645 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1646 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1647 != XEXP (operand1
, 0)))
1648 operand1
= replace_equiv_address (operand1
, tem
);
1650 /* Handle secondary reloads for loads/stores of FP registers from
1651 REG+D addresses where D does not fit in 5 or 14 bits, including
1652 (subreg (mem (addr))) cases. */
1654 && fp_reg_operand (operand0
, mode
)
1655 && ((GET_CODE (operand1
) == MEM
1656 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4 ? SFmode
: DFmode
),
1657 XEXP (operand1
, 0)))
1658 || ((GET_CODE (operand1
) == SUBREG
1659 && GET_CODE (XEXP (operand1
, 0)) == MEM
1660 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1662 XEXP (XEXP (operand1
, 0), 0))))))
1664 if (GET_CODE (operand1
) == SUBREG
)
1665 operand1
= XEXP (operand1
, 0);
1667 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1668 it in WORD_MODE regardless of what mode it was originally given
1670 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1672 /* D might not fit in 14 bits either; for such cases load D into
1674 if (!memory_address_p (Pmode
, XEXP (operand1
, 0)))
1676 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1677 emit_move_insn (scratch_reg
,
1678 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
, 0)),
1680 XEXP (XEXP (operand1
, 0), 0),
1684 emit_move_insn (scratch_reg
, XEXP (operand1
, 0));
1685 emit_insn (gen_rtx_SET (VOIDmode
, operand0
,
1686 replace_equiv_address (operand1
, scratch_reg
)));
1689 else if (scratch_reg
1690 && fp_reg_operand (operand1
, mode
)
1691 && ((GET_CODE (operand0
) == MEM
1692 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1694 XEXP (operand0
, 0)))
1695 || ((GET_CODE (operand0
) == SUBREG
)
1696 && GET_CODE (XEXP (operand0
, 0)) == MEM
1697 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1699 XEXP (XEXP (operand0
, 0), 0)))))
1701 if (GET_CODE (operand0
) == SUBREG
)
1702 operand0
= XEXP (operand0
, 0);
1704 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1705 it in WORD_MODE regardless of what mode it was originally given
1707 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1709 /* D might not fit in 14 bits either; for such cases load D into
1711 if (!memory_address_p (Pmode
, XEXP (operand0
, 0)))
1713 emit_move_insn (scratch_reg
, XEXP (XEXP (operand0
, 0), 1));
1714 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0
,
1717 XEXP (XEXP (operand0
, 0),
1722 emit_move_insn (scratch_reg
, XEXP (operand0
, 0));
1723 emit_insn (gen_rtx_SET (VOIDmode
,
1724 replace_equiv_address (operand0
, scratch_reg
),
1728 /* Handle secondary reloads for loads of FP registers from constant
1729 expressions by forcing the constant into memory.
1731 Use scratch_reg to hold the address of the memory location.
1733 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1734 NO_REGS when presented with a const_int and a register class
1735 containing only FP registers. Doing so unfortunately creates
1736 more problems than it solves. Fix this for 2.5. */
1737 else if (scratch_reg
1738 && CONSTANT_P (operand1
)
1739 && fp_reg_operand (operand0
, mode
))
1741 rtx const_mem
, xoperands
[2];
1743 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1744 it in WORD_MODE regardless of what mode it was originally given
1746 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1748 /* Force the constant into memory and put the address of the
1749 memory location into scratch_reg. */
1750 const_mem
= force_const_mem (mode
, operand1
);
1751 xoperands
[0] = scratch_reg
;
1752 xoperands
[1] = XEXP (const_mem
, 0);
1753 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1755 /* Now load the destination register. */
1756 emit_insn (gen_rtx_SET (mode
, operand0
,
1757 replace_equiv_address (const_mem
, scratch_reg
)));
1760 /* Handle secondary reloads for SAR. These occur when trying to load
1761 the SAR from memory or a constant. */
1762 else if (scratch_reg
1763 && GET_CODE (operand0
) == REG
1764 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1765 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1766 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1768 /* D might not fit in 14 bits either; for such cases load D into
1770 if (GET_CODE (operand1
) == MEM
1771 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1773 /* We are reloading the address into the scratch register, so we
1774 want to make sure the scratch register is a full register. */
1775 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1777 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1778 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1781 XEXP (XEXP (operand1
, 0),
1785 /* Now we are going to load the scratch register from memory,
1786 we want to load it in the same width as the original MEM,
1787 which must be the same as the width of the ultimate destination,
1789 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1791 emit_move_insn (scratch_reg
,
1792 replace_equiv_address (operand1
, scratch_reg
));
1796 /* We want to load the scratch register using the same mode as
1797 the ultimate destination. */
1798 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1800 emit_move_insn (scratch_reg
, operand1
);
1803 /* And emit the insn to set the ultimate destination. We know that
1804 the scratch register has the same mode as the destination at this
1806 emit_move_insn (operand0
, scratch_reg
);
1809 /* Handle the most common case: storing into a register. */
1810 else if (register_operand (operand0
, mode
))
1812 /* Legitimize TLS symbol references. This happens for references
1813 that aren't a legitimate constant. */
1814 if (PA_SYMBOL_REF_TLS_P (operand1
))
1815 operand1
= legitimize_tls_address (operand1
);
1817 if (register_operand (operand1
, mode
)
1818 || (GET_CODE (operand1
) == CONST_INT
1819 && pa_cint_ok_for_move (INTVAL (operand1
)))
1820 || (operand1
== CONST0_RTX (mode
))
1821 || (GET_CODE (operand1
) == HIGH
1822 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1823 /* Only `general_operands' can come here, so MEM is ok. */
1824 || GET_CODE (operand1
) == MEM
)
1826 /* Various sets are created during RTL generation which don't
1827 have the REG_POINTER flag correctly set. After the CSE pass,
1828 instruction recognition can fail if we don't consistently
1829 set this flag when performing register copies. This should
1830 also improve the opportunities for creating insns that use
1831 unscaled indexing. */
1832 if (REG_P (operand0
) && REG_P (operand1
))
1834 if (REG_POINTER (operand1
)
1835 && !REG_POINTER (operand0
)
1836 && !HARD_REGISTER_P (operand0
))
1837 copy_reg_pointer (operand0
, operand1
);
1840 /* When MEMs are broken out, the REG_POINTER flag doesn't
1841 get set. In some cases, we can set the REG_POINTER flag
1842 from the declaration for the MEM. */
1843 if (REG_P (operand0
)
1844 && GET_CODE (operand1
) == MEM
1845 && !REG_POINTER (operand0
))
1847 tree decl
= MEM_EXPR (operand1
);
1849 /* Set the register pointer flag and register alignment
1850 if the declaration for this memory reference is a
1856 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1858 if (TREE_CODE (decl
) == COMPONENT_REF
)
1859 decl
= TREE_OPERAND (decl
, 1);
1861 type
= TREE_TYPE (decl
);
1862 type
= strip_array_types (type
);
1864 if (POINTER_TYPE_P (type
))
1868 type
= TREE_TYPE (type
);
1869 /* Using TYPE_ALIGN_OK is rather conservative as
1870 only the ada frontend actually sets it. */
1871 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1873 mark_reg_pointer (operand0
, align
);
1878 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1882 else if (GET_CODE (operand0
) == MEM
)
1884 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1885 && !(reload_in_progress
|| reload_completed
))
1887 rtx temp
= gen_reg_rtx (DFmode
);
1889 emit_insn (gen_rtx_SET (VOIDmode
, temp
, operand1
));
1890 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, temp
));
1893 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1895 /* Run this case quickly. */
1896 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1899 if (! (reload_in_progress
|| reload_completed
))
1901 operands
[0] = validize_mem (operand0
);
1902 operands
[1] = operand1
= force_reg (mode
, operand1
);
1906 /* Simplify the source if we need to.
1907 Note we do have to handle function labels here, even though we do
1908 not consider them legitimate constants. Loop optimizations can
1909 call the emit_move_xxx with one as a source. */
1910 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1911 || function_label_operand (operand1
, VOIDmode
)
1912 || (GET_CODE (operand1
) == HIGH
1913 && symbolic_operand (XEXP (operand1
, 0), mode
)))
1917 if (GET_CODE (operand1
) == HIGH
)
1920 operand1
= XEXP (operand1
, 0);
1922 if (symbolic_operand (operand1
, mode
))
1924 /* Argh. The assembler and linker can't handle arithmetic
1927 So we force the plabel into memory, load operand0 from
1928 the memory location, then add in the constant part. */
1929 if ((GET_CODE (operand1
) == CONST
1930 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1931 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1933 || function_label_operand (operand1
, VOIDmode
))
1935 rtx temp
, const_part
;
1937 /* Figure out what (if any) scratch register to use. */
1938 if (reload_in_progress
|| reload_completed
)
1940 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1941 /* SCRATCH_REG will hold an address and maybe the actual
1942 data. We want it in WORD_MODE regardless of what mode it
1943 was originally given to us. */
1944 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1947 scratch_reg
= gen_reg_rtx (Pmode
);
1949 if (GET_CODE (operand1
) == CONST
)
1951 /* Save away the constant part of the expression. */
1952 const_part
= XEXP (XEXP (operand1
, 0), 1);
1953 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
1955 /* Force the function label into memory. */
1956 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
1960 /* No constant part. */
1961 const_part
= NULL_RTX
;
1963 /* Force the function label into memory. */
1964 temp
= force_const_mem (mode
, operand1
);
1968 /* Get the address of the memory location. PIC-ify it if
1970 temp
= XEXP (temp
, 0);
1972 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
1974 /* Put the address of the memory location into our destination
1977 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
1979 /* Now load from the memory location into our destination
1981 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
1982 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
1984 /* And add back in the constant part. */
1985 if (const_part
!= NULL_RTX
)
1986 expand_inc (operand0
, const_part
);
1995 if (reload_in_progress
|| reload_completed
)
1997 temp
= scratch_reg
? scratch_reg
: operand0
;
1998 /* TEMP will hold an address and maybe the actual
1999 data. We want it in WORD_MODE regardless of what mode it
2000 was originally given to us. */
2001 temp
= force_mode (word_mode
, temp
);
2004 temp
= gen_reg_rtx (Pmode
);
2006 /* (const (plus (symbol) (const_int))) must be forced to
2007 memory during/after reload if the const_int will not fit
2009 if (GET_CODE (operand1
) == CONST
2010 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2011 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2012 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1))
2013 && (reload_completed
|| reload_in_progress
)
2016 rtx const_mem
= force_const_mem (mode
, operand1
);
2017 operands
[1] = legitimize_pic_address (XEXP (const_mem
, 0),
2019 operands
[1] = replace_equiv_address (const_mem
, operands
[1]);
2020 pa_emit_move_sequence (operands
, mode
, temp
);
2024 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2025 if (REG_P (operand0
) && REG_P (operands
[1]))
2026 copy_reg_pointer (operand0
, operands
[1]);
2027 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operands
[1]));
2030 /* On the HPPA, references to data space are supposed to use dp,
2031 register 27, but showing it in the RTL inhibits various cse
2032 and loop optimizations. */
2037 if (reload_in_progress
|| reload_completed
)
2039 temp
= scratch_reg
? scratch_reg
: operand0
;
2040 /* TEMP will hold an address and maybe the actual
2041 data. We want it in WORD_MODE regardless of what mode it
2042 was originally given to us. */
2043 temp
= force_mode (word_mode
, temp
);
2046 temp
= gen_reg_rtx (mode
);
2048 /* Loading a SYMBOL_REF into a register makes that register
2049 safe to be used as the base in an indexed address.
2051 Don't mark hard registers though. That loses. */
2052 if (GET_CODE (operand0
) == REG
2053 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2054 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2055 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2056 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2059 set
= gen_rtx_SET (mode
, operand0
, temp
);
2061 set
= gen_rtx_SET (VOIDmode
,
2063 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2065 emit_insn (gen_rtx_SET (VOIDmode
,
2067 gen_rtx_HIGH (mode
, operand1
)));
2073 else if (pa_tls_referenced_p (operand1
))
2078 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2080 addend
= XEXP (XEXP (tmp
, 0), 1);
2081 tmp
= XEXP (XEXP (tmp
, 0), 0);
2084 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2085 tmp
= legitimize_tls_address (tmp
);
2088 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2089 tmp
= force_operand (tmp
, operands
[0]);
2093 else if (GET_CODE (operand1
) != CONST_INT
2094 || !pa_cint_ok_for_move (INTVAL (operand1
)))
2098 HOST_WIDE_INT value
= 0;
2099 HOST_WIDE_INT insv
= 0;
2102 if (GET_CODE (operand1
) == CONST_INT
)
2103 value
= INTVAL (operand1
);
2106 && GET_CODE (operand1
) == CONST_INT
2107 && HOST_BITS_PER_WIDE_INT
> 32
2108 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2112 /* Extract the low order 32 bits of the value and sign extend.
2113 If the new value is the same as the original value, we can
2114 can use the original value as-is. If the new value is
2115 different, we use it and insert the most-significant 32-bits
2116 of the original value into the final result. */
2117 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2118 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2121 #if HOST_BITS_PER_WIDE_INT > 32
2122 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2126 operand1
= GEN_INT (nval
);
2130 if (reload_in_progress
|| reload_completed
)
2131 temp
= scratch_reg
? scratch_reg
: operand0
;
2133 temp
= gen_reg_rtx (mode
);
2135 /* We don't directly split DImode constants on 32-bit targets
2136 because PLUS uses an 11-bit immediate and the insn sequence
2137 generated is not as efficient as the one using HIGH/LO_SUM. */
2138 if (GET_CODE (operand1
) == CONST_INT
2139 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2140 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2143 /* Directly break constant into high and low parts. This
2144 provides better optimization opportunities because various
2145 passes recognize constants split with PLUS but not LO_SUM.
2146 We use a 14-bit signed low part except when the addition
2147 of 0x4000 to the high part might change the sign of the
2149 HOST_WIDE_INT low
= value
& 0x3fff;
2150 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2154 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2162 emit_insn (gen_rtx_SET (VOIDmode
, temp
, GEN_INT (high
)));
2163 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2167 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2168 gen_rtx_HIGH (mode
, operand1
)));
2169 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2172 insn
= emit_move_insn (operands
[0], operands
[1]);
2174 /* Now insert the most significant 32 bits of the value
2175 into the register. When we don't have a second register
2176 available, it could take up to nine instructions to load
2177 a 64-bit integer constant. Prior to reload, we force
2178 constants that would take more than three instructions
2179 to load to the constant pool. During and after reload,
2180 we have to handle all possible values. */
2183 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2184 register and the value to be inserted is outside the
2185 range that can be loaded with three depdi instructions. */
2186 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2188 operand1
= GEN_INT (insv
);
2190 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2191 gen_rtx_HIGH (mode
, operand1
)));
2192 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2193 emit_insn (gen_insv (operand0
, GEN_INT (32),
2198 int len
= 5, pos
= 27;
2200 /* Insert the bits using the depdi instruction. */
2203 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2204 HOST_WIDE_INT sign
= v5
< 0;
2206 /* Left extend the insertion. */
2207 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2208 while (pos
> 0 && (insv
& 1) == sign
)
2210 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2215 emit_insn (gen_insv (operand0
, GEN_INT (len
),
2216 GEN_INT (pos
), GEN_INT (v5
)));
2218 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2224 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2229 /* Now have insn-emit do whatever it normally does. */
2233 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2234 it will need a link/runtime reloc). */
2237 pa_reloc_needed (tree exp
)
2241 switch (TREE_CODE (exp
))
2246 case POINTER_PLUS_EXPR
:
2249 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2250 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2254 case NON_LVALUE_EXPR
:
2255 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2261 unsigned HOST_WIDE_INT ix
;
2263 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2265 reloc
|= pa_reloc_needed (value
);
2279 /* Return the best assembler insn template
2280 for moving operands[1] into operands[0] as a fullword. */
2282 pa_singlemove_string (rtx
*operands
)
2284 HOST_WIDE_INT intval
;
2286 if (GET_CODE (operands
[0]) == MEM
)
2287 return "stw %r1,%0";
2288 if (GET_CODE (operands
[1]) == MEM
)
2290 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2295 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2297 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2299 REAL_VALUE_FROM_CONST_DOUBLE (d
, operands
[1]);
2300 REAL_VALUE_TO_TARGET_SINGLE (d
, i
);
2302 operands
[1] = GEN_INT (i
);
2303 /* Fall through to CONST_INT case. */
2305 if (GET_CODE (operands
[1]) == CONST_INT
)
2307 intval
= INTVAL (operands
[1]);
2309 if (VAL_14_BITS_P (intval
))
2311 else if ((intval
& 0x7ff) == 0)
2312 return "ldil L'%1,%0";
2313 else if (pa_zdepi_cint_p (intval
))
2314 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2316 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2318 return "copy %1,%0";
2322 /* Compute position (in OP[1]) and width (in OP[2])
2323 useful for copying IMM to a register using the zdepi
2324 instructions. Store the immediate value to insert in OP[0]. */
2326 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2330 /* Find the least significant set bit in IMM. */
2331 for (lsb
= 0; lsb
< 32; lsb
++)
2338 /* Choose variants based on *sign* of the 5-bit field. */
2339 if ((imm
& 0x10) == 0)
2340 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2343 /* Find the width of the bitstring in IMM. */
2344 for (len
= 5; len
< 32 - lsb
; len
++)
2346 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2350 /* Sign extend IMM as a 5-bit value. */
2351 imm
= (imm
& 0xf) - 0x10;
2359 /* Compute position (in OP[1]) and width (in OP[2])
2360 useful for copying IMM to a register using the depdi,z
2361 instructions. Store the immediate value to insert in OP[0]. */
2364 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2366 int lsb
, len
, maxlen
;
2368 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2370 /* Find the least significant set bit in IMM. */
2371 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2378 /* Choose variants based on *sign* of the 5-bit field. */
2379 if ((imm
& 0x10) == 0)
2380 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2383 /* Find the width of the bitstring in IMM. */
2384 for (len
= 5; len
< maxlen
- lsb
; len
++)
2386 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2390 /* Extend length if host is narrow and IMM is negative. */
2391 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2394 /* Sign extend IMM as a 5-bit value. */
2395 imm
= (imm
& 0xf) - 0x10;
2403 /* Output assembler code to perform a doubleword move insn
2404 with operands OPERANDS. */
2407 pa_output_move_double (rtx
*operands
)
2409 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2411 rtx addreg0
= 0, addreg1
= 0;
2413 /* First classify both operands. */
2415 if (REG_P (operands
[0]))
2417 else if (offsettable_memref_p (operands
[0]))
2419 else if (GET_CODE (operands
[0]) == MEM
)
2424 if (REG_P (operands
[1]))
2426 else if (CONSTANT_P (operands
[1]))
2428 else if (offsettable_memref_p (operands
[1]))
2430 else if (GET_CODE (operands
[1]) == MEM
)
2435 /* Check for the cases that the operand constraints are not
2436 supposed to allow to happen. */
2437 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2439 /* Handle copies between general and floating registers. */
2441 if (optype0
== REGOP
&& optype1
== REGOP
2442 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2444 if (FP_REG_P (operands
[0]))
2446 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2447 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2448 return "{fldds|fldd} -16(%%sp),%0";
2452 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2453 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2454 return "{ldws|ldw} -12(%%sp),%R0";
2458 /* Handle auto decrementing and incrementing loads and stores
2459 specifically, since the structure of the function doesn't work
2460 for them without major modification. Do it better when we learn
2461 this port about the general inc/dec addressing of PA.
2462 (This was written by tege. Chide him if it doesn't work.) */
2464 if (optype0
== MEMOP
)
2466 /* We have to output the address syntax ourselves, since print_operand
2467 doesn't deal with the addresses we want to use. Fix this later. */
2469 rtx addr
= XEXP (operands
[0], 0);
2470 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2472 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2474 operands
[0] = XEXP (addr
, 0);
2475 gcc_assert (GET_CODE (operands
[1]) == REG
2476 && GET_CODE (operands
[0]) == REG
);
2478 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2480 /* No overlap between high target register and address
2481 register. (We do this in a non-obvious way to
2482 save a register file writeback) */
2483 if (GET_CODE (addr
) == POST_INC
)
2484 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2485 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2487 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2489 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2491 operands
[0] = XEXP (addr
, 0);
2492 gcc_assert (GET_CODE (operands
[1]) == REG
2493 && GET_CODE (operands
[0]) == REG
);
2495 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2496 /* No overlap between high target register and address
2497 register. (We do this in a non-obvious way to save a
2498 register file writeback) */
2499 if (GET_CODE (addr
) == PRE_INC
)
2500 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2501 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2504 if (optype1
== MEMOP
)
2506 /* We have to output the address syntax ourselves, since print_operand
2507 doesn't deal with the addresses we want to use. Fix this later. */
2509 rtx addr
= XEXP (operands
[1], 0);
2510 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2512 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2514 operands
[1] = XEXP (addr
, 0);
2515 gcc_assert (GET_CODE (operands
[0]) == REG
2516 && GET_CODE (operands
[1]) == REG
);
2518 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2520 /* No overlap between high target register and address
2521 register. (We do this in a non-obvious way to
2522 save a register file writeback) */
2523 if (GET_CODE (addr
) == POST_INC
)
2524 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2525 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2529 /* This is an undefined situation. We should load into the
2530 address register *and* update that register. Probably
2531 we don't need to handle this at all. */
2532 if (GET_CODE (addr
) == POST_INC
)
2533 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2534 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2537 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2539 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2541 operands
[1] = XEXP (addr
, 0);
2542 gcc_assert (GET_CODE (operands
[0]) == REG
2543 && GET_CODE (operands
[1]) == REG
);
2545 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2547 /* No overlap between high target register and address
2548 register. (We do this in a non-obvious way to
2549 save a register file writeback) */
2550 if (GET_CODE (addr
) == PRE_INC
)
2551 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2552 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2556 /* This is an undefined situation. We should load into the
2557 address register *and* update that register. Probably
2558 we don't need to handle this at all. */
2559 if (GET_CODE (addr
) == PRE_INC
)
2560 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2561 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2564 else if (GET_CODE (addr
) == PLUS
2565 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2568 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2570 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2572 xoperands
[0] = high_reg
;
2573 xoperands
[1] = XEXP (addr
, 1);
2574 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2575 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2576 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2578 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2582 xoperands
[0] = high_reg
;
2583 xoperands
[1] = XEXP (addr
, 1);
2584 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2585 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2586 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2588 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2593 /* If an operand is an unoffsettable memory ref, find a register
2594 we can increment temporarily to make it refer to the second word. */
2596 if (optype0
== MEMOP
)
2597 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2599 if (optype1
== MEMOP
)
2600 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2602 /* Ok, we can do one word at a time.
2603 Normally we do the low-numbered word first.
2605 In either case, set up in LATEHALF the operands to use
2606 for the high-numbered word and in some cases alter the
2607 operands in OPERANDS to be suitable for the low-numbered word. */
2609 if (optype0
== REGOP
)
2610 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2611 else if (optype0
== OFFSOP
)
2612 latehalf
[0] = adjust_address (operands
[0], SImode
, 4);
2614 latehalf
[0] = operands
[0];
2616 if (optype1
== REGOP
)
2617 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2618 else if (optype1
== OFFSOP
)
2619 latehalf
[1] = adjust_address (operands
[1], SImode
, 4);
2620 else if (optype1
== CNSTOP
)
2621 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2623 latehalf
[1] = operands
[1];
2625 /* If the first move would clobber the source of the second one,
2626 do them in the other order.
2628 This can happen in two cases:
2630 mem -> register where the first half of the destination register
2631 is the same register used in the memory's address. Reload
2632 can create such insns.
2634 mem in this case will be either register indirect or register
2635 indirect plus a valid offset.
2637 register -> register move where REGNO(dst) == REGNO(src + 1)
2638 someone (Tim/Tege?) claimed this can happen for parameter loads.
2640 Handle mem -> register case first. */
2641 if (optype0
== REGOP
2642 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2643 && refers_to_regno_p (REGNO (operands
[0]), REGNO (operands
[0]) + 1,
2646 /* Do the late half first. */
2648 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2649 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2653 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2654 return pa_singlemove_string (operands
);
2657 /* Now handle register -> register case. */
2658 if (optype0
== REGOP
&& optype1
== REGOP
2659 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2661 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2662 return pa_singlemove_string (operands
);
2665 /* Normal case: do the two words, low-numbered first. */
2667 output_asm_insn (pa_singlemove_string (operands
), operands
);
2669 /* Make any unoffsettable addresses point at high-numbered word. */
2671 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2673 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2676 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2678 /* Undo the adds we just did. */
2680 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2682 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2688 pa_output_fp_move_double (rtx
*operands
)
2690 if (FP_REG_P (operands
[0]))
2692 if (FP_REG_P (operands
[1])
2693 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2694 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2696 output_asm_insn ("fldd%F1 %1,%0", operands
);
2698 else if (FP_REG_P (operands
[1]))
2700 output_asm_insn ("fstd%F0 %1,%0", operands
);
2706 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2708 /* This is a pain. You have to be prepared to deal with an
2709 arbitrary address here including pre/post increment/decrement.
2711 so avoid this in the MD. */
2712 gcc_assert (GET_CODE (operands
[0]) == REG
);
2714 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2715 xoperands
[0] = operands
[0];
2716 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2721 /* Return a REG that occurs in ADDR with coefficient 1.
2722 ADDR can be effectively incremented by incrementing REG. */
2725 find_addr_reg (rtx addr
)
2727 while (GET_CODE (addr
) == PLUS
)
2729 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2730 addr
= XEXP (addr
, 0);
2731 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2732 addr
= XEXP (addr
, 1);
2733 else if (CONSTANT_P (XEXP (addr
, 0)))
2734 addr
= XEXP (addr
, 1);
2735 else if (CONSTANT_P (XEXP (addr
, 1)))
2736 addr
= XEXP (addr
, 0);
2740 gcc_assert (GET_CODE (addr
) == REG
);
2744 /* Emit code to perform a block move.
2746 OPERANDS[0] is the destination pointer as a REG, clobbered.
2747 OPERANDS[1] is the source pointer as a REG, clobbered.
2748 OPERANDS[2] is a register for temporary storage.
2749 OPERANDS[3] is a register for temporary storage.
2750 OPERANDS[4] is the size as a CONST_INT
2751 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2752 OPERANDS[6] is another temporary register. */
2755 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2757 int align
= INTVAL (operands
[5]);
2758 unsigned long n_bytes
= INTVAL (operands
[4]);
2760 /* We can't move more than a word at a time because the PA
2761 has no longer integer move insns. (Could use fp mem ops?) */
2762 if (align
> (TARGET_64BIT
? 8 : 4))
2763 align
= (TARGET_64BIT
? 8 : 4);
2765 /* Note that we know each loop below will execute at least twice
2766 (else we would have open-coded the copy). */
2770 /* Pre-adjust the loop counter. */
2771 operands
[4] = GEN_INT (n_bytes
- 16);
2772 output_asm_insn ("ldi %4,%2", operands
);
2775 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2776 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2777 output_asm_insn ("std,ma %3,8(%0)", operands
);
2778 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2779 output_asm_insn ("std,ma %6,8(%0)", operands
);
2781 /* Handle the residual. There could be up to 7 bytes of
2782 residual to copy! */
2783 if (n_bytes
% 16 != 0)
2785 operands
[4] = GEN_INT (n_bytes
% 8);
2786 if (n_bytes
% 16 >= 8)
2787 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2788 if (n_bytes
% 8 != 0)
2789 output_asm_insn ("ldd 0(%1),%6", operands
);
2790 if (n_bytes
% 16 >= 8)
2791 output_asm_insn ("std,ma %3,8(%0)", operands
);
2792 if (n_bytes
% 8 != 0)
2793 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2798 /* Pre-adjust the loop counter. */
2799 operands
[4] = GEN_INT (n_bytes
- 8);
2800 output_asm_insn ("ldi %4,%2", operands
);
2803 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2804 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2805 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2806 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2807 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2809 /* Handle the residual. There could be up to 7 bytes of
2810 residual to copy! */
2811 if (n_bytes
% 8 != 0)
2813 operands
[4] = GEN_INT (n_bytes
% 4);
2814 if (n_bytes
% 8 >= 4)
2815 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2816 if (n_bytes
% 4 != 0)
2817 output_asm_insn ("ldw 0(%1),%6", operands
);
2818 if (n_bytes
% 8 >= 4)
2819 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2820 if (n_bytes
% 4 != 0)
2821 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2826 /* Pre-adjust the loop counter. */
2827 operands
[4] = GEN_INT (n_bytes
- 4);
2828 output_asm_insn ("ldi %4,%2", operands
);
2831 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2832 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2833 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2834 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2835 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2837 /* Handle the residual. */
2838 if (n_bytes
% 4 != 0)
2840 if (n_bytes
% 4 >= 2)
2841 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2842 if (n_bytes
% 2 != 0)
2843 output_asm_insn ("ldb 0(%1),%6", operands
);
2844 if (n_bytes
% 4 >= 2)
2845 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2846 if (n_bytes
% 2 != 0)
2847 output_asm_insn ("stb %6,0(%0)", operands
);
2852 /* Pre-adjust the loop counter. */
2853 operands
[4] = GEN_INT (n_bytes
- 2);
2854 output_asm_insn ("ldi %4,%2", operands
);
2857 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2858 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2859 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2860 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2861 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2863 /* Handle the residual. */
2864 if (n_bytes
% 2 != 0)
2866 output_asm_insn ("ldb 0(%1),%3", operands
);
2867 output_asm_insn ("stb %3,0(%0)", operands
);
2876 /* Count the number of insns necessary to handle this block move.
2878 Basic structure is the same as emit_block_move, except that we
2879 count insns rather than emit them. */
2882 compute_movmem_length (rtx insn
)
2884 rtx pat
= PATTERN (insn
);
2885 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2886 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2887 unsigned int n_insns
= 0;
2889 /* We can't move more than four bytes at a time because the PA
2890 has no longer integer move insns. (Could use fp mem ops?) */
2891 if (align
> (TARGET_64BIT
? 8 : 4))
2892 align
= (TARGET_64BIT
? 8 : 4);
2894 /* The basic copying loop. */
2898 if (n_bytes
% (2 * align
) != 0)
2900 if ((n_bytes
% (2 * align
)) >= align
)
2903 if ((n_bytes
% align
) != 0)
2907 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2911 /* Emit code to perform a block clear.
2913 OPERANDS[0] is the destination pointer as a REG, clobbered.
2914 OPERANDS[1] is a register for temporary storage.
2915 OPERANDS[2] is the size as a CONST_INT
2916 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2919 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2921 int align
= INTVAL (operands
[3]);
2922 unsigned long n_bytes
= INTVAL (operands
[2]);
2924 /* We can't clear more than a word at a time because the PA
2925 has no longer integer move insns. */
2926 if (align
> (TARGET_64BIT
? 8 : 4))
2927 align
= (TARGET_64BIT
? 8 : 4);
2929 /* Note that we know each loop below will execute at least twice
2930 (else we would have open-coded the copy). */
2934 /* Pre-adjust the loop counter. */
2935 operands
[2] = GEN_INT (n_bytes
- 16);
2936 output_asm_insn ("ldi %2,%1", operands
);
2939 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2940 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
2941 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2943 /* Handle the residual. There could be up to 7 bytes of
2944 residual to copy! */
2945 if (n_bytes
% 16 != 0)
2947 operands
[2] = GEN_INT (n_bytes
% 8);
2948 if (n_bytes
% 16 >= 8)
2949 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2950 if (n_bytes
% 8 != 0)
2951 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
2956 /* Pre-adjust the loop counter. */
2957 operands
[2] = GEN_INT (n_bytes
- 8);
2958 output_asm_insn ("ldi %2,%1", operands
);
2961 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2962 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
2963 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2965 /* Handle the residual. There could be up to 7 bytes of
2966 residual to copy! */
2967 if (n_bytes
% 8 != 0)
2969 operands
[2] = GEN_INT (n_bytes
% 4);
2970 if (n_bytes
% 8 >= 4)
2971 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2972 if (n_bytes
% 4 != 0)
2973 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
2978 /* Pre-adjust the loop counter. */
2979 operands
[2] = GEN_INT (n_bytes
- 4);
2980 output_asm_insn ("ldi %2,%1", operands
);
2983 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2984 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
2985 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2987 /* Handle the residual. */
2988 if (n_bytes
% 4 != 0)
2990 if (n_bytes
% 4 >= 2)
2991 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2992 if (n_bytes
% 2 != 0)
2993 output_asm_insn ("stb %%r0,0(%0)", operands
);
2998 /* Pre-adjust the loop counter. */
2999 operands
[2] = GEN_INT (n_bytes
- 2);
3000 output_asm_insn ("ldi %2,%1", operands
);
3003 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3004 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3005 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3007 /* Handle the residual. */
3008 if (n_bytes
% 2 != 0)
3009 output_asm_insn ("stb %%r0,0(%0)", operands
);
3018 /* Count the number of insns necessary to handle this block move.
3020 Basic structure is the same as emit_block_move, except that we
3021 count insns rather than emit them. */
3024 compute_clrmem_length (rtx insn
)
3026 rtx pat
= PATTERN (insn
);
3027 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3028 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3029 unsigned int n_insns
= 0;
3031 /* We can't clear more than a word at a time because the PA
3032 has no longer integer move insns. */
3033 if (align
> (TARGET_64BIT
? 8 : 4))
3034 align
= (TARGET_64BIT
? 8 : 4);
3036 /* The basic loop. */
3040 if (n_bytes
% (2 * align
) != 0)
3042 if ((n_bytes
% (2 * align
)) >= align
)
3045 if ((n_bytes
% align
) != 0)
3049 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3055 pa_output_and (rtx
*operands
)
3057 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3059 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3060 int ls0
, ls1
, ms0
, p
, len
;
3062 for (ls0
= 0; ls0
< 32; ls0
++)
3063 if ((mask
& (1 << ls0
)) == 0)
3066 for (ls1
= ls0
; ls1
< 32; ls1
++)
3067 if ((mask
& (1 << ls1
)) != 0)
3070 for (ms0
= ls1
; ms0
< 32; ms0
++)
3071 if ((mask
& (1 << ms0
)) == 0)
3074 gcc_assert (ms0
== 32);
3082 operands
[2] = GEN_INT (len
);
3083 return "{extru|extrw,u} %1,31,%2,%0";
3087 /* We could use this `depi' for the case above as well, but `depi'
3088 requires one more register file access than an `extru'. */
3093 operands
[2] = GEN_INT (p
);
3094 operands
[3] = GEN_INT (len
);
3095 return "{depi|depwi} 0,%2,%3,%0";
3099 return "and %1,%2,%0";
3102 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3103 storing the result in operands[0]. */
3105 pa_output_64bit_and (rtx
*operands
)
3107 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3109 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3110 int ls0
, ls1
, ms0
, p
, len
;
3112 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3113 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3116 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3117 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3120 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3121 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3124 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3126 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3132 operands
[2] = GEN_INT (len
);
3133 return "extrd,u %1,63,%2,%0";
3137 /* We could use this `depi' for the case above as well, but `depi'
3138 requires one more register file access than an `extru'. */
3143 operands
[2] = GEN_INT (p
);
3144 operands
[3] = GEN_INT (len
);
3145 return "depdi 0,%2,%3,%0";
3149 return "and %1,%2,%0";
3153 pa_output_ior (rtx
*operands
)
3155 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3156 int bs0
, bs1
, p
, len
;
3158 if (INTVAL (operands
[2]) == 0)
3159 return "copy %1,%0";
3161 for (bs0
= 0; bs0
< 32; bs0
++)
3162 if ((mask
& (1 << bs0
)) != 0)
3165 for (bs1
= bs0
; bs1
< 32; bs1
++)
3166 if ((mask
& (1 << bs1
)) == 0)
3169 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3174 operands
[2] = GEN_INT (p
);
3175 operands
[3] = GEN_INT (len
);
3176 return "{depi|depwi} -1,%2,%3,%0";
3179 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3180 storing the result in operands[0]. */
3182 pa_output_64bit_ior (rtx
*operands
)
3184 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3185 int bs0
, bs1
, p
, len
;
3187 if (INTVAL (operands
[2]) == 0)
3188 return "copy %1,%0";
3190 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3191 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3194 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3195 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3198 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3199 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3204 operands
[2] = GEN_INT (p
);
3205 operands
[3] = GEN_INT (len
);
3206 return "depdi -1,%2,%3,%0";
3209 /* Target hook for assembling integer objects. This code handles
3210 aligned SI and DI integers specially since function references
3211 must be preceded by P%. */
3214 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3216 if (size
== UNITS_PER_WORD
3218 && function_label_operand (x
, VOIDmode
))
3220 fputs (size
== 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file
);
3221 output_addr_const (asm_out_file
, x
);
3222 fputc ('\n', asm_out_file
);
3225 return default_assemble_integer (x
, size
, aligned_p
);
3228 /* Output an ascii string. */
3230 pa_output_ascii (FILE *file
, const char *p
, int size
)
3234 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3236 /* The HP assembler can only take strings of 256 characters at one
3237 time. This is a limitation on input line length, *not* the
3238 length of the string. Sigh. Even worse, it seems that the
3239 restriction is in number of input characters (see \xnn &
3240 \whatever). So we have to do this very carefully. */
3242 fputs ("\t.STRING \"", file
);
3245 for (i
= 0; i
< size
; i
+= 4)
3249 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3251 register unsigned int c
= (unsigned char) p
[i
+ io
];
3253 if (c
== '\"' || c
== '\\')
3254 partial_output
[co
++] = '\\';
3255 if (c
>= ' ' && c
< 0177)
3256 partial_output
[co
++] = c
;
3260 partial_output
[co
++] = '\\';
3261 partial_output
[co
++] = 'x';
3262 hexd
= c
/ 16 - 0 + '0';
3264 hexd
-= '9' - 'a' + 1;
3265 partial_output
[co
++] = hexd
;
3266 hexd
= c
% 16 - 0 + '0';
3268 hexd
-= '9' - 'a' + 1;
3269 partial_output
[co
++] = hexd
;
3272 if (chars_output
+ co
> 243)
3274 fputs ("\"\n\t.STRING \"", file
);
3277 fwrite (partial_output
, 1, (size_t) co
, file
);
3281 fputs ("\"\n", file
);
3284 /* Try to rewrite floating point comparisons & branches to avoid
3285 useless add,tr insns.
3287 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3288 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3289 first attempt to remove useless add,tr insns. It is zero
3290 for the second pass as reorg sometimes leaves bogus REG_DEAD
3293 When CHECK_NOTES is zero we can only eliminate add,tr insns
3294 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3297 remove_useless_addtr_insns (int check_notes
)
3300 static int pass
= 0;
3302 /* This is fairly cheap, so always run it when optimizing. */
3306 int fbranch_count
= 0;
3308 /* Walk all the insns in this function looking for fcmp & fbranch
3309 instructions. Keep track of how many of each we find. */
3310 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3314 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3315 if (GET_CODE (insn
) != INSN
&& GET_CODE (insn
) != JUMP_INSN
)
3318 tmp
= PATTERN (insn
);
3320 /* It must be a set. */
3321 if (GET_CODE (tmp
) != SET
)
3324 /* If the destination is CCFP, then we've found an fcmp insn. */
3325 tmp
= SET_DEST (tmp
);
3326 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3332 tmp
= PATTERN (insn
);
3333 /* If this is an fbranch instruction, bump the fbranch counter. */
3334 if (GET_CODE (tmp
) == SET
3335 && SET_DEST (tmp
) == pc_rtx
3336 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3337 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3338 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3339 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3347 /* Find all floating point compare + branch insns. If possible,
3348 reverse the comparison & the branch to avoid add,tr insns. */
3349 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3353 /* Ignore anything that isn't an INSN. */
3354 if (GET_CODE (insn
) != INSN
)
3357 tmp
= PATTERN (insn
);
3359 /* It must be a set. */
3360 if (GET_CODE (tmp
) != SET
)
3363 /* The destination must be CCFP, which is register zero. */
3364 tmp
= SET_DEST (tmp
);
3365 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3368 /* INSN should be a set of CCFP.
3370 See if the result of this insn is used in a reversed FP
3371 conditional branch. If so, reverse our condition and
3372 the branch. Doing so avoids useless add,tr insns. */
3373 next
= next_insn (insn
);
3376 /* Jumps, calls and labels stop our search. */
3377 if (GET_CODE (next
) == JUMP_INSN
3378 || GET_CODE (next
) == CALL_INSN
3379 || GET_CODE (next
) == CODE_LABEL
)
3382 /* As does another fcmp insn. */
3383 if (GET_CODE (next
) == INSN
3384 && GET_CODE (PATTERN (next
)) == SET
3385 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3386 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3389 next
= next_insn (next
);
3392 /* Is NEXT_INSN a branch? */
3394 && GET_CODE (next
) == JUMP_INSN
)
3396 rtx pattern
= PATTERN (next
);
3398 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3399 and CCFP dies, then reverse our conditional and the branch
3400 to avoid the add,tr. */
3401 if (GET_CODE (pattern
) == SET
3402 && SET_DEST (pattern
) == pc_rtx
3403 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3404 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3405 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3406 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3407 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3408 && (fcmp_count
== fbranch_count
3410 && find_regno_note (next
, REG_DEAD
, 0))))
3412 /* Reverse the branch. */
3413 tmp
= XEXP (SET_SRC (pattern
), 1);
3414 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3415 XEXP (SET_SRC (pattern
), 2) = tmp
;
3416 INSN_CODE (next
) = -1;
3418 /* Reverse our condition. */
3419 tmp
= PATTERN (insn
);
3420 PUT_CODE (XEXP (tmp
, 1),
3421 (reverse_condition_maybe_unordered
3422 (GET_CODE (XEXP (tmp
, 1)))));
3432 /* You may have trouble believing this, but this is the 32 bit HP-PA
3437 Variable arguments (optional; any number may be allocated)
3439 SP-(4*(N+9)) arg word N
3444 Fixed arguments (must be allocated; may remain unused)
3453 SP-32 External Data Pointer (DP)
3455 SP-24 External/stub RP (RP')
3459 SP-8 Calling Stub RP (RP'')
3464 SP-0 Stack Pointer (points to next available address)
3468 /* This function saves registers as follows. Registers marked with ' are
3469 this function's registers (as opposed to the previous function's).
3470 If a frame_pointer isn't needed, r4 is saved as a general register;
3471 the space for the frame pointer is still allocated, though, to keep
3477 SP (FP') Previous FP
3478 SP + 4 Alignment filler (sigh)
3479 SP + 8 Space for locals reserved here.
3483 SP + n All call saved register used.
3487 SP + o All call saved fp registers used.
3491 SP + p (SP') points to next available address.
3495 /* Global variables set by output_function_prologue(). */
3496 /* Size of frame. Need to know this to emit return insns from
3498 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3499 static int save_fregs
;
3501 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3502 Handle case where DISP > 8k by using the add_high_const patterns.
3504 Note in DISP > 8k case, we will leave the high part of the address
3505 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3508 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3510 rtx insn
, dest
, src
, basereg
;
3512 src
= gen_rtx_REG (word_mode
, reg
);
3513 basereg
= gen_rtx_REG (Pmode
, base
);
3514 if (VAL_14_BITS_P (disp
))
3516 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3517 insn
= emit_move_insn (dest
, src
);
3519 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3521 rtx delta
= GEN_INT (disp
);
3522 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3524 emit_move_insn (tmpreg
, delta
);
3525 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3528 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3529 gen_rtx_SET (VOIDmode
, tmpreg
,
3530 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3531 RTX_FRAME_RELATED_P (insn
) = 1;
3533 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3534 insn
= emit_move_insn (dest
, src
);
3538 rtx delta
= GEN_INT (disp
);
3539 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3540 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3542 emit_move_insn (tmpreg
, high
);
3543 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3544 insn
= emit_move_insn (dest
, src
);
3546 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3547 gen_rtx_SET (VOIDmode
,
3548 gen_rtx_MEM (word_mode
,
3549 gen_rtx_PLUS (word_mode
,
3556 RTX_FRAME_RELATED_P (insn
) = 1;
3559 /* Emit RTL to store REG at the memory location specified by BASE and then
3560 add MOD to BASE. MOD must be <= 8k. */
3563 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3565 rtx insn
, basereg
, srcreg
, delta
;
3567 gcc_assert (VAL_14_BITS_P (mod
));
3569 basereg
= gen_rtx_REG (Pmode
, base
);
3570 srcreg
= gen_rtx_REG (word_mode
, reg
);
3571 delta
= GEN_INT (mod
);
3573 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3576 RTX_FRAME_RELATED_P (insn
) = 1;
3578 /* RTX_FRAME_RELATED_P must be set on each frame related set
3579 in a parallel with more than one element. */
3580 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3581 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3585 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3586 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3587 whether to add a frame note or not.
3589 In the DISP > 8k case, we leave the high part of the address in %r1.
3590 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3593 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3597 if (VAL_14_BITS_P (disp
))
3599 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3600 plus_constant (Pmode
,
3601 gen_rtx_REG (Pmode
, base
), disp
));
3603 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3605 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3606 rtx delta
= GEN_INT (disp
);
3607 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3609 emit_move_insn (tmpreg
, delta
);
3610 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3611 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3613 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3614 gen_rtx_SET (VOIDmode
, tmpreg
,
3615 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3619 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3620 rtx delta
= GEN_INT (disp
);
3621 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3623 emit_move_insn (tmpreg
,
3624 gen_rtx_PLUS (Pmode
, basereg
,
3625 gen_rtx_HIGH (Pmode
, delta
)));
3626 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3627 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3630 if (DO_FRAME_NOTES
&& note
)
3631 RTX_FRAME_RELATED_P (insn
) = 1;
3635 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3640 /* The code in pa_expand_prologue and pa_expand_epilogue must
3641 be consistent with the rounding and size calculation done here.
3642 Change them at the same time. */
3644 /* We do our own stack alignment. First, round the size of the
3645 stack locals up to a word boundary. */
3646 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3648 /* Space for previous frame pointer + filler. If any frame is
3649 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3650 waste some space here for the sake of HP compatibility. The
3651 first slot is only used when the frame pointer is needed. */
3652 if (size
|| frame_pointer_needed
)
3653 size
+= STARTING_FRAME_OFFSET
;
3655 /* If the current function calls __builtin_eh_return, then we need
3656 to allocate stack space for registers that will hold data for
3657 the exception handler. */
3658 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3662 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3664 size
+= i
* UNITS_PER_WORD
;
3667 /* Account for space used by the callee general register saves. */
3668 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3669 if (df_regs_ever_live_p (i
))
3670 size
+= UNITS_PER_WORD
;
3672 /* Account for space used by the callee floating point register saves. */
3673 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3674 if (df_regs_ever_live_p (i
)
3675 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3679 /* We always save both halves of the FP register, so always
3680 increment the frame size by 8 bytes. */
3684 /* If any of the floating registers are saved, account for the
3685 alignment needed for the floating point register save block. */
3688 size
= (size
+ 7) & ~7;
3693 /* The various ABIs include space for the outgoing parameters in the
3694 size of the current function's stack frame. We don't need to align
3695 for the outgoing arguments as their alignment is set by the final
3696 rounding for the frame as a whole. */
3697 size
+= crtl
->outgoing_args_size
;
3699 /* Allocate space for the fixed frame marker. This space must be
3700 allocated for any function that makes calls or allocates
3702 if (!crtl
->is_leaf
|| size
)
3703 size
+= TARGET_64BIT
? 48 : 32;
3705 /* Finally, round to the preferred stack boundary. */
3706 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3707 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3710 /* Generate the assembly code for function entry. FILE is a stdio
3711 stream to output the code to. SIZE is an int: how many units of
3712 temporary storage to allocate.
3714 Refer to the array `regs_ever_live' to determine which registers to
3715 save; `regs_ever_live[I]' is nonzero if register number I is ever
3716 used in the function. This function is responsible for knowing
3717 which registers should not be saved even if used. */
3719 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3720 of memory. If any fpu reg is used in the function, we allocate
3721 such a block here, at the bottom of the frame, just in case it's needed.
3723 If this function is a leaf procedure, then we may choose not
3724 to do a "save" insn. The decision about whether or not
3725 to do this is made in regclass.c. */
3728 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3730 /* The function's label and associated .PROC must never be
3731 separated and must be output *after* any profiling declarations
3732 to avoid changing spaces/subspaces within a procedure. */
3733 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3734 fputs ("\t.PROC\n", file
);
3736 /* pa_expand_prologue does the dirty work now. We just need
3737 to output the assembler directives which denote the start
3739 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3741 fputs (",NO_CALLS", file
);
3743 fputs (",CALLS", file
);
3745 fputs (",SAVE_RP", file
);
3747 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3748 at the beginning of the frame and that it is used as the frame
3749 pointer for the frame. We do this because our current frame
3750 layout doesn't conform to that specified in the HP runtime
3751 documentation and we need a way to indicate to programs such as
3752 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3753 isn't used by HP compilers but is supported by the assembler.
3754 However, SAVE_SP is supposed to indicate that the previous stack
3755 pointer has been saved in the frame marker. */
3756 if (frame_pointer_needed
)
3757 fputs (",SAVE_SP", file
);
3759 /* Pass on information about the number of callee register saves
3760 performed in the prologue.
3762 The compiler is supposed to pass the highest register number
3763 saved, the assembler then has to adjust that number before
3764 entering it into the unwind descriptor (to account for any
3765 caller saved registers with lower register numbers than the
3766 first callee saved register). */
3768 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3771 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3773 fputs ("\n\t.ENTRY\n", file
);
3775 remove_useless_addtr_insns (0);
3779 pa_expand_prologue (void)
3781 int merge_sp_adjust_with_store
= 0;
3782 HOST_WIDE_INT size
= get_frame_size ();
3783 HOST_WIDE_INT offset
;
3791 /* Compute total size for frame pointer, filler, locals and rounding to
3792 the next word boundary. Similar code appears in pa_compute_frame_size
3793 and must be changed in tandem with this code. */
3794 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3795 if (local_fsize
|| frame_pointer_needed
)
3796 local_fsize
+= STARTING_FRAME_OFFSET
;
3798 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3799 if (flag_stack_usage_info
)
3800 current_function_static_stack_size
= actual_fsize
;
3802 /* Compute a few things we will use often. */
3803 tmpreg
= gen_rtx_REG (word_mode
, 1);
3805 /* Save RP first. The calling conventions manual states RP will
3806 always be stored into the caller's frame at sp - 20 or sp - 16
3807 depending on which ABI is in use. */
3808 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3810 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3816 /* Allocate the local frame and set up the frame pointer if needed. */
3817 if (actual_fsize
!= 0)
3819 if (frame_pointer_needed
)
3821 /* Copy the old frame pointer temporarily into %r1. Set up the
3822 new stack pointer, then store away the saved old frame pointer
3823 into the stack at sp and at the same time update the stack
3824 pointer by actual_fsize bytes. Two versions, first
3825 handles small (<8k) frames. The second handles large (>=8k)
3827 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3829 RTX_FRAME_RELATED_P (insn
) = 1;
3831 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3833 RTX_FRAME_RELATED_P (insn
) = 1;
3835 if (VAL_14_BITS_P (actual_fsize
))
3836 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3839 /* It is incorrect to store the saved frame pointer at *sp,
3840 then increment sp (writes beyond the current stack boundary).
3842 So instead use stwm to store at *sp and post-increment the
3843 stack pointer as an atomic operation. Then increment sp to
3844 finish allocating the new frame. */
3845 HOST_WIDE_INT adjust1
= 8192 - 64;
3846 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3848 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3849 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3853 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3854 we need to store the previous stack pointer (frame pointer)
3855 into the frame marker on targets that use the HP unwind
3856 library. This allows the HP unwind library to be used to
3857 unwind GCC frames. However, we are not fully compatible
3858 with the HP library because our frame layout differs from
3859 that specified in the HP runtime specification.
3861 We don't want a frame note on this instruction as the frame
3862 marker moves during dynamic stack allocation.
3864 This instruction also serves as a blockage to prevent
3865 register spills from being scheduled before the stack
3866 pointer is raised. This is necessary as we store
3867 registers using the frame pointer as a base register,
3868 and the frame pointer is set before sp is raised. */
3869 if (TARGET_HPUX_UNWIND_LIBRARY
)
3871 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3872 GEN_INT (TARGET_64BIT
? -8 : -4));
3874 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3875 hard_frame_pointer_rtx
);
3878 emit_insn (gen_blockage ());
3880 /* no frame pointer needed. */
3883 /* In some cases we can perform the first callee register save
3884 and allocating the stack frame at the same time. If so, just
3885 make a note of it and defer allocating the frame until saving
3886 the callee registers. */
3887 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3888 merge_sp_adjust_with_store
= 1;
3889 /* Can not optimize. Adjust the stack frame by actual_fsize
3892 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3897 /* Normal register save.
3899 Do not save the frame pointer in the frame_pointer_needed case. It
3900 was done earlier. */
3901 if (frame_pointer_needed
)
3903 offset
= local_fsize
;
3905 /* Saving the EH return data registers in the frame is the simplest
3906 way to get the frame unwind information emitted. We put them
3907 just before the general registers. */
3908 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3910 unsigned int i
, regno
;
3914 regno
= EH_RETURN_DATA_REGNO (i
);
3915 if (regno
== INVALID_REGNUM
)
3918 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
3919 offset
+= UNITS_PER_WORD
;
3923 for (i
= 18; i
>= 4; i
--)
3924 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3926 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
3927 offset
+= UNITS_PER_WORD
;
3930 /* Account for %r3 which is saved in a special place. */
3933 /* No frame pointer needed. */
3936 offset
= local_fsize
- actual_fsize
;
3938 /* Saving the EH return data registers in the frame is the simplest
3939 way to get the frame unwind information emitted. */
3940 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3942 unsigned int i
, regno
;
3946 regno
= EH_RETURN_DATA_REGNO (i
);
3947 if (regno
== INVALID_REGNUM
)
3950 /* If merge_sp_adjust_with_store is nonzero, then we can
3951 optimize the first save. */
3952 if (merge_sp_adjust_with_store
)
3954 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
3955 merge_sp_adjust_with_store
= 0;
3958 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
3959 offset
+= UNITS_PER_WORD
;
3963 for (i
= 18; i
>= 3; i
--)
3964 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3966 /* If merge_sp_adjust_with_store is nonzero, then we can
3967 optimize the first GR save. */
3968 if (merge_sp_adjust_with_store
)
3970 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
3971 merge_sp_adjust_with_store
= 0;
3974 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
3975 offset
+= UNITS_PER_WORD
;
3979 /* If we wanted to merge the SP adjustment with a GR save, but we never
3980 did any GR saves, then just emit the adjustment here. */
3981 if (merge_sp_adjust_with_store
)
3982 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3986 /* The hppa calling conventions say that %r19, the pic offset
3987 register, is saved at sp - 32 (in this function's frame)
3988 when generating PIC code. FIXME: What is the correct thing
3989 to do for functions which make no calls and allocate no
3990 frame? Do we need to allocate a frame, or can we just omit
3991 the save? For now we'll just omit the save.
3993 We don't want a note on this insn as the frame marker can
3994 move if there is a dynamic stack allocation. */
3995 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
3997 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
3999 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4003 /* Align pointer properly (doubleword boundary). */
4004 offset
= (offset
+ 7) & ~7;
4006 /* Floating point register store. */
4011 /* First get the frame or stack pointer to the start of the FP register
4013 if (frame_pointer_needed
)
4015 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4016 base
= hard_frame_pointer_rtx
;
4020 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4021 base
= stack_pointer_rtx
;
4024 /* Now actually save the FP registers. */
4025 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4027 if (df_regs_ever_live_p (i
)
4028 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4030 rtx addr
, insn
, reg
;
4031 addr
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
4032 reg
= gen_rtx_REG (DFmode
, i
);
4033 insn
= emit_move_insn (addr
, reg
);
4036 RTX_FRAME_RELATED_P (insn
) = 1;
4039 rtx mem
= gen_rtx_MEM (DFmode
,
4040 plus_constant (Pmode
, base
,
4042 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4043 gen_rtx_SET (VOIDmode
, mem
, reg
));
4047 rtx meml
= gen_rtx_MEM (SFmode
,
4048 plus_constant (Pmode
, base
,
4050 rtx memr
= gen_rtx_MEM (SFmode
,
4051 plus_constant (Pmode
, base
,
4053 rtx regl
= gen_rtx_REG (SFmode
, i
);
4054 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4055 rtx setl
= gen_rtx_SET (VOIDmode
, meml
, regl
);
4056 rtx setr
= gen_rtx_SET (VOIDmode
, memr
, regr
);
4059 RTX_FRAME_RELATED_P (setl
) = 1;
4060 RTX_FRAME_RELATED_P (setr
) = 1;
4061 vec
= gen_rtvec (2, setl
, setr
);
4062 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4063 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4066 offset
+= GET_MODE_SIZE (DFmode
);
4073 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4074 Handle case where DISP > 8k by using the add_high_const patterns. */
4077 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4079 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4080 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4083 if (VAL_14_BITS_P (disp
))
4084 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4085 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4087 rtx delta
= GEN_INT (disp
);
4088 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4090 emit_move_insn (tmpreg
, delta
);
4091 if (TARGET_DISABLE_INDEXING
)
4093 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4094 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4097 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4101 rtx delta
= GEN_INT (disp
);
4102 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4103 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4105 emit_move_insn (tmpreg
, high
);
4106 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4109 emit_move_insn (dest
, src
);
4112 /* Update the total code bytes output to the text section. */
4115 update_total_code_bytes (unsigned int nbytes
)
4117 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4118 && !IN_NAMED_SECTION_P (cfun
->decl
))
4120 unsigned int old_total
= total_code_bytes
;
4122 total_code_bytes
+= nbytes
;
4124 /* Be prepared to handle overflows. */
4125 if (old_total
> total_code_bytes
)
4126 total_code_bytes
= UINT_MAX
;
4130 /* This function generates the assembly code for function exit.
4131 Args are as for output_function_prologue ().
4133 The function epilogue should not depend on the current stack
4134 pointer! It should use the frame pointer only. This is mandatory
4135 because of alloca; we also take advantage of it to omit stack
4136 adjustments before returning. */
4139 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4141 rtx insn
= get_last_insn ();
4145 /* pa_expand_epilogue does the dirty work now. We just need
4146 to output the assembler directives which denote the end
4149 To make debuggers happy, emit a nop if the epilogue was completely
4150 eliminated due to a volatile call as the last insn in the
4151 current function. That way the return address (in %r2) will
4152 always point to a valid instruction in the current function. */
4154 /* Get the last real insn. */
4155 if (GET_CODE (insn
) == NOTE
)
4156 insn
= prev_real_insn (insn
);
4158 /* If it is a sequence, then look inside. */
4159 if (insn
&& GET_CODE (insn
) == INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4160 insn
= XVECEXP (PATTERN (insn
), 0, 0);
4162 /* If insn is a CALL_INSN, then it must be a call to a volatile
4163 function (otherwise there would be epilogue insns). */
4164 if (insn
&& GET_CODE (insn
) == CALL_INSN
)
4166 fputs ("\tnop\n", file
);
4170 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4172 if (TARGET_SOM
&& TARGET_GAS
)
4174 /* We done with this subspace except possibly for some additional
4175 debug information. Forget that we are in this subspace to ensure
4176 that the next function is output in its own subspace. */
4178 cfun
->machine
->in_nsubspa
= 2;
4181 if (INSN_ADDRESSES_SET_P ())
4183 insn
= get_last_nonnote_insn ();
4184 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4186 last_address
+= insn_default_length (insn
);
4187 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4188 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4191 last_address
= UINT_MAX
;
4193 /* Finally, update the total number of code bytes output so far. */
4194 update_total_code_bytes (last_address
);
4198 pa_expand_epilogue (void)
4201 HOST_WIDE_INT offset
;
4202 HOST_WIDE_INT ret_off
= 0;
4204 int merge_sp_adjust_with_load
= 0;
4206 /* We will use this often. */
4207 tmpreg
= gen_rtx_REG (word_mode
, 1);
4209 /* Try to restore RP early to avoid load/use interlocks when
4210 RP gets used in the return (bv) instruction. This appears to still
4211 be necessary even when we schedule the prologue and epilogue. */
4214 ret_off
= TARGET_64BIT
? -16 : -20;
4215 if (frame_pointer_needed
)
4217 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4222 /* No frame pointer, and stack is smaller than 8k. */
4223 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4225 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4231 /* General register restores. */
4232 if (frame_pointer_needed
)
4234 offset
= local_fsize
;
4236 /* If the current function calls __builtin_eh_return, then we need
4237 to restore the saved EH data registers. */
4238 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4240 unsigned int i
, regno
;
4244 regno
= EH_RETURN_DATA_REGNO (i
);
4245 if (regno
== INVALID_REGNUM
)
4248 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4249 offset
+= UNITS_PER_WORD
;
4253 for (i
= 18; i
>= 4; i
--)
4254 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4256 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4257 offset
+= UNITS_PER_WORD
;
4262 offset
= local_fsize
- actual_fsize
;
4264 /* If the current function calls __builtin_eh_return, then we need
4265 to restore the saved EH data registers. */
4266 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4268 unsigned int i
, regno
;
4272 regno
= EH_RETURN_DATA_REGNO (i
);
4273 if (regno
== INVALID_REGNUM
)
4276 /* Only for the first load.
4277 merge_sp_adjust_with_load holds the register load
4278 with which we will merge the sp adjustment. */
4279 if (merge_sp_adjust_with_load
== 0
4281 && VAL_14_BITS_P (-actual_fsize
))
4282 merge_sp_adjust_with_load
= regno
;
4284 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4285 offset
+= UNITS_PER_WORD
;
4289 for (i
= 18; i
>= 3; i
--)
4291 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4293 /* Only for the first load.
4294 merge_sp_adjust_with_load holds the register load
4295 with which we will merge the sp adjustment. */
4296 if (merge_sp_adjust_with_load
== 0
4298 && VAL_14_BITS_P (-actual_fsize
))
4299 merge_sp_adjust_with_load
= i
;
4301 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4302 offset
+= UNITS_PER_WORD
;
4307 /* Align pointer properly (doubleword boundary). */
4308 offset
= (offset
+ 7) & ~7;
4310 /* FP register restores. */
4313 /* Adjust the register to index off of. */
4314 if (frame_pointer_needed
)
4315 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4317 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4319 /* Actually do the restores now. */
4320 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4321 if (df_regs_ever_live_p (i
)
4322 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4324 rtx src
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
4325 rtx dest
= gen_rtx_REG (DFmode
, i
);
4326 emit_move_insn (dest
, src
);
4330 /* Emit a blockage insn here to keep these insns from being moved to
4331 an earlier spot in the epilogue, or into the main instruction stream.
4333 This is necessary as we must not cut the stack back before all the
4334 restores are finished. */
4335 emit_insn (gen_blockage ());
4337 /* Reset stack pointer (and possibly frame pointer). The stack
4338 pointer is initially set to fp + 64 to avoid a race condition. */
4339 if (frame_pointer_needed
)
4341 rtx delta
= GEN_INT (-64);
4343 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4344 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4345 stack_pointer_rtx
, delta
));
4347 /* If we were deferring a callee register restore, do it now. */
4348 else if (merge_sp_adjust_with_load
)
4350 rtx delta
= GEN_INT (-actual_fsize
);
4351 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4353 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4355 else if (actual_fsize
!= 0)
4356 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4359 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4360 frame greater than 8k), do so now. */
4362 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4364 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4366 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4368 emit_insn (gen_blockage ());
4369 emit_insn (TARGET_64BIT
4370 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4371 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4376 pa_can_use_return_insn (void)
4378 if (!reload_completed
)
4381 if (frame_pointer_needed
)
4384 if (df_regs_ever_live_p (2))
4390 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4394 hppa_pic_save_rtx (void)
4396 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4399 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4400 #define NO_DEFERRED_PROFILE_COUNTERS 0
4404 /* Vector of funcdef numbers. */
4405 static vec
<int> funcdef_nos
;
4407 /* Output deferred profile counters. */
4409 output_deferred_profile_counters (void)
4414 if (funcdef_nos
.is_empty ())
4417 switch_to_section (data_section
);
4418 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4419 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4421 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4423 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4424 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4427 funcdef_nos
.release ();
4431 hppa_profile_hook (int label_no
)
4433 /* We use SImode for the address of the function in both 32 and
4434 64-bit code to avoid having to provide DImode versions of the
4435 lcla2 and load_offset_label_address insn patterns. */
4436 rtx reg
= gen_reg_rtx (SImode
);
4437 rtx label_rtx
= gen_label_rtx ();
4438 rtx begin_label_rtx
, call_insn
;
4439 char begin_label_name
[16];
4441 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4443 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4446 emit_move_insn (arg_pointer_rtx
,
4447 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4450 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4452 /* The address of the function is loaded into %r25 with an instruction-
4453 relative sequence that avoids the use of relocations. The sequence
4454 is split so that the load_offset_label_address instruction can
4455 occupy the delay slot of the call to _mcount. */
4457 emit_insn (gen_lcla2 (reg
, label_rtx
));
4459 emit_insn (gen_lcla1 (reg
, label_rtx
));
4461 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4462 reg
, begin_label_rtx
, label_rtx
));
4464 #if !NO_DEFERRED_PROFILE_COUNTERS
4466 rtx count_label_rtx
, addr
, r24
;
4467 char count_label_name
[16];
4469 funcdef_nos
.safe_push (label_no
);
4470 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4471 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4473 addr
= force_reg (Pmode
, count_label_rtx
);
4474 r24
= gen_rtx_REG (Pmode
, 24);
4475 emit_move_insn (r24
, addr
);
4478 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4479 gen_rtx_SYMBOL_REF (Pmode
,
4481 GEN_INT (TARGET_64BIT
? 24 : 12)));
4483 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4488 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4489 gen_rtx_SYMBOL_REF (Pmode
,
4491 GEN_INT (TARGET_64BIT
? 16 : 8)));
4495 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4496 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4498 /* Indicate the _mcount call cannot throw, nor will it execute a
4500 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4503 /* Fetch the return address for the frame COUNT steps up from
4504 the current frame, after the prologue. FRAMEADDR is the
4505 frame pointer of the COUNT frame.
4507 We want to ignore any export stub remnants here. To handle this,
4508 we examine the code at the return address, and if it is an export
4509 stub, we return a memory rtx for the stub return address stored
4512 The value returned is used in two different ways:
4514 1. To find a function's caller.
4516 2. To change the return address for a function.
4518 This function handles most instances of case 1; however, it will
4519 fail if there are two levels of stubs to execute on the return
4520 path. The only way I believe that can happen is if the return value
4521 needs a parameter relocation, which never happens for C code.
4523 This function handles most instances of case 2; however, it will
4524 fail if we did not originally have stub code on the return path
4525 but will need stub code on the new return path. This can happen if
4526 the caller & callee are both in the main program, but the new
4527 return location is in a shared library. */
4530 pa_return_addr_rtx (int count
, rtx frameaddr
)
4537 /* The instruction stream at the return address of a PA1.X export stub is:
4539 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4540 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4541 0x00011820 | stub+16: mtsp r1,sr0
4542 0xe0400002 | stub+20: be,n 0(sr0,rp)
4544 0xe0400002 must be specified as -532676606 so that it won't be
4545 rejected as an invalid immediate operand on 64-bit hosts.
4547 The instruction stream at the return address of a PA2.0 export stub is:
4549 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4550 0xe840d002 | stub+12: bve,n (rp)
4553 HOST_WIDE_INT insns
[4];
4559 rp
= get_hard_reg_initial_val (Pmode
, 2);
4561 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4564 /* If there is no export stub then just use the value saved from
4565 the return pointer register. */
4567 saved_rp
= gen_reg_rtx (Pmode
);
4568 emit_move_insn (saved_rp
, rp
);
4570 /* Get pointer to the instruction stream. We have to mask out the
4571 privilege level from the two low order bits of the return address
4572 pointer here so that ins will point to the start of the first
4573 instruction that would have been executed if we returned. */
4574 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4575 label
= gen_label_rtx ();
4579 insns
[0] = 0x4bc23fd1;
4580 insns
[1] = -398405630;
4585 insns
[0] = 0x4bc23fd1;
4586 insns
[1] = 0x004010a1;
4587 insns
[2] = 0x00011820;
4588 insns
[3] = -532676606;
4592 /* Check the instruction stream at the normal return address for the
4593 export stub. If it is an export stub, than our return address is
4594 really in -24[frameaddr]. */
4596 for (i
= 0; i
< len
; i
++)
4598 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4599 rtx op1
= GEN_INT (insns
[i
]);
4600 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4603 /* Here we know that our return address points to an export
4604 stub. We don't want to return the address of the export stub,
4605 but rather the return address of the export stub. That return
4606 address is stored at -24[frameaddr]. */
4608 emit_move_insn (saved_rp
,
4610 memory_address (Pmode
,
4611 plus_constant (Pmode
, frameaddr
,
4620 pa_emit_bcond_fp (rtx operands
[])
4622 enum rtx_code code
= GET_CODE (operands
[0]);
4623 rtx operand0
= operands
[1];
4624 rtx operand1
= operands
[2];
4625 rtx label
= operands
[3];
4627 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (CCFPmode
, 0),
4628 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4630 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4631 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4634 gen_rtx_REG (CCFPmode
, 0),
4636 gen_rtx_LABEL_REF (VOIDmode
, label
),
4641 /* Adjust the cost of a scheduling dependency. Return the new cost of
4642 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4645 pa_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
4647 enum attr_type attr_type
;
4649 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4650 true dependencies as they are described with bypasses now. */
4651 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4654 if (! recog_memoized (insn
))
4657 attr_type
= get_attr_type (insn
);
4659 switch (REG_NOTE_KIND (link
))
4662 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4665 if (attr_type
== TYPE_FPLOAD
)
4667 rtx pat
= PATTERN (insn
);
4668 rtx dep_pat
= PATTERN (dep_insn
);
4669 if (GET_CODE (pat
) == PARALLEL
)
4671 /* This happens for the fldXs,mb patterns. */
4672 pat
= XVECEXP (pat
, 0, 0);
4674 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4675 /* If this happens, we have to extend this to schedule
4676 optimally. Return 0 for now. */
4679 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4681 if (! recog_memoized (dep_insn
))
4683 switch (get_attr_type (dep_insn
))
4690 case TYPE_FPSQRTSGL
:
4691 case TYPE_FPSQRTDBL
:
4692 /* A fpload can't be issued until one cycle before a
4693 preceding arithmetic operation has finished if
4694 the target of the fpload is any of the sources
4695 (or destination) of the arithmetic operation. */
4696 return insn_default_latency (dep_insn
) - 1;
4703 else if (attr_type
== TYPE_FPALU
)
4705 rtx pat
= PATTERN (insn
);
4706 rtx dep_pat
= PATTERN (dep_insn
);
4707 if (GET_CODE (pat
) == PARALLEL
)
4709 /* This happens for the fldXs,mb patterns. */
4710 pat
= XVECEXP (pat
, 0, 0);
4712 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4713 /* If this happens, we have to extend this to schedule
4714 optimally. Return 0 for now. */
4717 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4719 if (! recog_memoized (dep_insn
))
4721 switch (get_attr_type (dep_insn
))
4725 case TYPE_FPSQRTSGL
:
4726 case TYPE_FPSQRTDBL
:
4727 /* An ALU flop can't be issued until two cycles before a
4728 preceding divide or sqrt operation has finished if
4729 the target of the ALU flop is any of the sources
4730 (or destination) of the divide or sqrt operation. */
4731 return insn_default_latency (dep_insn
) - 2;
4739 /* For other anti dependencies, the cost is 0. */
4742 case REG_DEP_OUTPUT
:
4743 /* Output dependency; DEP_INSN writes a register that INSN writes some
4745 if (attr_type
== TYPE_FPLOAD
)
4747 rtx pat
= PATTERN (insn
);
4748 rtx dep_pat
= PATTERN (dep_insn
);
4749 if (GET_CODE (pat
) == PARALLEL
)
4751 /* This happens for the fldXs,mb patterns. */
4752 pat
= XVECEXP (pat
, 0, 0);
4754 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4755 /* If this happens, we have to extend this to schedule
4756 optimally. Return 0 for now. */
4759 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4761 if (! recog_memoized (dep_insn
))
4763 switch (get_attr_type (dep_insn
))
4770 case TYPE_FPSQRTSGL
:
4771 case TYPE_FPSQRTDBL
:
4772 /* A fpload can't be issued until one cycle before a
4773 preceding arithmetic operation has finished if
4774 the target of the fpload is the destination of the
4775 arithmetic operation.
4777 Exception: For PA7100LC, PA7200 and PA7300, the cost
4778 is 3 cycles, unless they bundle together. We also
4779 pay the penalty if the second insn is a fpload. */
4780 return insn_default_latency (dep_insn
) - 1;
4787 else if (attr_type
== TYPE_FPALU
)
4789 rtx pat
= PATTERN (insn
);
4790 rtx dep_pat
= PATTERN (dep_insn
);
4791 if (GET_CODE (pat
) == PARALLEL
)
4793 /* This happens for the fldXs,mb patterns. */
4794 pat
= XVECEXP (pat
, 0, 0);
4796 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4797 /* If this happens, we have to extend this to schedule
4798 optimally. Return 0 for now. */
4801 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4803 if (! recog_memoized (dep_insn
))
4805 switch (get_attr_type (dep_insn
))
4809 case TYPE_FPSQRTSGL
:
4810 case TYPE_FPSQRTDBL
:
4811 /* An ALU flop can't be issued until two cycles before a
4812 preceding divide or sqrt operation has finished if
4813 the target of the ALU flop is also the target of
4814 the divide or sqrt operation. */
4815 return insn_default_latency (dep_insn
) - 2;
4823 /* For other output dependencies, the cost is 0. */
4831 /* Adjust scheduling priorities. We use this to try and keep addil
4832 and the next use of %r1 close together. */
4834 pa_adjust_priority (rtx insn
, int priority
)
4836 rtx set
= single_set (insn
);
4840 src
= SET_SRC (set
);
4841 dest
= SET_DEST (set
);
4842 if (GET_CODE (src
) == LO_SUM
4843 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4844 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4847 else if (GET_CODE (src
) == MEM
4848 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4849 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4850 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4853 else if (GET_CODE (dest
) == MEM
4854 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4855 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4856 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4862 /* The 700 can only issue a single insn at a time.
4863 The 7XXX processors can issue two insns at a time.
4864 The 8000 can issue 4 insns at a time. */
4866 pa_issue_rate (void)
4870 case PROCESSOR_700
: return 1;
4871 case PROCESSOR_7100
: return 2;
4872 case PROCESSOR_7100LC
: return 2;
4873 case PROCESSOR_7200
: return 2;
4874 case PROCESSOR_7300
: return 2;
4875 case PROCESSOR_8000
: return 4;
4884 /* Return any length plus adjustment needed by INSN which already has
4885 its length computed as LENGTH. Return LENGTH if no adjustment is
4888 Also compute the length of an inline block move here as it is too
4889 complicated to express as a length attribute in pa.md. */
4891 pa_adjust_insn_length (rtx insn
, int length
)
4893 rtx pat
= PATTERN (insn
);
4895 /* If length is negative or undefined, provide initial length. */
4896 if ((unsigned int) length
>= INT_MAX
)
4898 if (GET_CODE (pat
) == SEQUENCE
)
4899 insn
= XVECEXP (pat
, 0, 0);
4901 switch (get_attr_type (insn
))
4904 length
= pa_attr_length_millicode_call (insn
);
4907 length
= pa_attr_length_call (insn
, 0);
4910 length
= pa_attr_length_call (insn
, 1);
4913 length
= pa_attr_length_indirect_call (insn
);
4915 case TYPE_SH_FUNC_ADRS
:
4916 length
= pa_attr_length_millicode_call (insn
) + 20;
4923 /* Jumps inside switch tables which have unfilled delay slots need
4925 if (GET_CODE (insn
) == JUMP_INSN
4926 && GET_CODE (pat
) == PARALLEL
4927 && get_attr_type (insn
) == TYPE_BTABLE_BRANCH
)
4929 /* Block move pattern. */
4930 else if (GET_CODE (insn
) == INSN
4931 && GET_CODE (pat
) == PARALLEL
4932 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4933 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4934 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
4935 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
4936 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
4937 length
+= compute_movmem_length (insn
) - 4;
4938 /* Block clear pattern. */
4939 else if (GET_CODE (insn
) == INSN
4940 && GET_CODE (pat
) == PARALLEL
4941 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4942 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4943 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
4944 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
4945 length
+= compute_clrmem_length (insn
) - 4;
4946 /* Conditional branch with an unfilled delay slot. */
4947 else if (GET_CODE (insn
) == JUMP_INSN
&& ! simplejump_p (insn
))
4949 /* Adjust a short backwards conditional with an unfilled delay slot. */
4950 if (GET_CODE (pat
) == SET
4952 && JUMP_LABEL (insn
) != NULL_RTX
4953 && ! forward_branch_p (insn
))
4955 else if (GET_CODE (pat
) == PARALLEL
4956 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
4959 /* Adjust dbra insn with short backwards conditional branch with
4960 unfilled delay slot -- only for case where counter is in a
4961 general register register. */
4962 else if (GET_CODE (pat
) == PARALLEL
4963 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
4964 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
4965 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
4967 && ! forward_branch_p (insn
))
4973 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4976 pa_print_operand_punct_valid_p (unsigned char code
)
4987 /* Print operand X (an rtx) in assembler syntax to file FILE.
4988 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4989 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4992 pa_print_operand (FILE *file
, rtx x
, int code
)
4997 /* Output a 'nop' if there's nothing for the delay slot. */
4998 if (dbr_sequence_length () == 0)
4999 fputs ("\n\tnop", file
);
5002 /* Output a nullification completer if there's nothing for the */
5003 /* delay slot or nullification is requested. */
5004 if (dbr_sequence_length () == 0 ||
5006 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5010 /* Print out the second register name of a register pair.
5011 I.e., R (6) => 7. */
5012 fputs (reg_names
[REGNO (x
) + 1], file
);
5015 /* A register or zero. */
5017 || (x
== CONST0_RTX (DFmode
))
5018 || (x
== CONST0_RTX (SFmode
)))
5020 fputs ("%r0", file
);
5026 /* A register or zero (floating point). */
5028 || (x
== CONST0_RTX (DFmode
))
5029 || (x
== CONST0_RTX (SFmode
)))
5031 fputs ("%fr0", file
);
5040 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5041 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5042 pa_output_global_address (file
, xoperands
[1], 0);
5043 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5047 case 'C': /* Plain (C)ondition */
5049 switch (GET_CODE (x
))
5052 fputs ("=", file
); break;
5054 fputs ("<>", file
); break;
5056 fputs (">", file
); break;
5058 fputs (">=", file
); break;
5060 fputs (">>=", file
); break;
5062 fputs (">>", file
); break;
5064 fputs ("<", file
); break;
5066 fputs ("<=", file
); break;
5068 fputs ("<<=", file
); break;
5070 fputs ("<<", file
); break;
5075 case 'N': /* Condition, (N)egated */
5076 switch (GET_CODE (x
))
5079 fputs ("<>", file
); break;
5081 fputs ("=", file
); break;
5083 fputs ("<=", file
); break;
5085 fputs ("<", file
); break;
5087 fputs ("<<", file
); break;
5089 fputs ("<<=", file
); break;
5091 fputs (">=", file
); break;
5093 fputs (">", file
); break;
5095 fputs (">>", file
); break;
5097 fputs (">>=", file
); break;
5102 /* For floating point comparisons. Note that the output
5103 predicates are the complement of the desired mode. The
5104 conditions for GT, GE, LT, LE and LTGT cause an invalid
5105 operation exception if the result is unordered and this
5106 exception is enabled in the floating-point status register. */
5108 switch (GET_CODE (x
))
5111 fputs ("!=", file
); break;
5113 fputs ("=", file
); break;
5115 fputs ("!>", file
); break;
5117 fputs ("!>=", file
); break;
5119 fputs ("!<", file
); break;
5121 fputs ("!<=", file
); break;
5123 fputs ("!<>", file
); break;
5125 fputs ("!?<=", file
); break;
5127 fputs ("!?<", file
); break;
5129 fputs ("!?>=", file
); break;
5131 fputs ("!?>", file
); break;
5133 fputs ("!?=", file
); break;
5135 fputs ("!?", file
); break;
5137 fputs ("?", file
); break;
5142 case 'S': /* Condition, operands are (S)wapped. */
5143 switch (GET_CODE (x
))
5146 fputs ("=", file
); break;
5148 fputs ("<>", file
); break;
5150 fputs ("<", file
); break;
5152 fputs ("<=", file
); break;
5154 fputs ("<<=", file
); break;
5156 fputs ("<<", file
); break;
5158 fputs (">", file
); break;
5160 fputs (">=", file
); break;
5162 fputs (">>=", file
); break;
5164 fputs (">>", file
); break;
5169 case 'B': /* Condition, (B)oth swapped and negate. */
5170 switch (GET_CODE (x
))
5173 fputs ("<>", file
); break;
5175 fputs ("=", file
); break;
5177 fputs (">=", file
); break;
5179 fputs (">", file
); break;
5181 fputs (">>", file
); break;
5183 fputs (">>=", file
); break;
5185 fputs ("<=", file
); break;
5187 fputs ("<", file
); break;
5189 fputs ("<<", file
); break;
5191 fputs ("<<=", file
); break;
5197 gcc_assert (GET_CODE (x
) == CONST_INT
);
5198 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5201 gcc_assert (GET_CODE (x
) == CONST_INT
);
5202 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5205 gcc_assert (GET_CODE (x
) == CONST_INT
);
5206 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5209 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5210 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5213 gcc_assert (GET_CODE (x
) == CONST_INT
);
5214 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5217 gcc_assert (GET_CODE (x
) == CONST_INT
);
5218 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5221 if (GET_CODE (x
) == CONST_INT
)
5226 switch (GET_CODE (XEXP (x
, 0)))
5230 if (ASSEMBLER_DIALECT
== 0)
5231 fputs ("s,mb", file
);
5233 fputs (",mb", file
);
5237 if (ASSEMBLER_DIALECT
== 0)
5238 fputs ("s,ma", file
);
5240 fputs (",ma", file
);
5243 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5244 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5246 if (ASSEMBLER_DIALECT
== 0)
5249 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5250 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5252 if (ASSEMBLER_DIALECT
== 0)
5253 fputs ("x,s", file
);
5257 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5261 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5267 pa_output_global_address (file
, x
, 0);
5270 pa_output_global_address (file
, x
, 1);
5272 case 0: /* Don't do anything special */
5277 compute_zdepwi_operands (INTVAL (x
), op
);
5278 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5284 compute_zdepdi_operands (INTVAL (x
), op
);
5285 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5289 /* We can get here from a .vtable_inherit due to our
5290 CONSTANT_ADDRESS_P rejecting perfectly good constant
5296 if (GET_CODE (x
) == REG
)
5298 fputs (reg_names
[REGNO (x
)], file
);
5299 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5305 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5306 && (REGNO (x
) & 1) == 0)
5309 else if (GET_CODE (x
) == MEM
)
5311 int size
= GET_MODE_SIZE (GET_MODE (x
));
5312 rtx base
= NULL_RTX
;
5313 switch (GET_CODE (XEXP (x
, 0)))
5317 base
= XEXP (XEXP (x
, 0), 0);
5318 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5322 base
= XEXP (XEXP (x
, 0), 0);
5323 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5326 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5327 fprintf (file
, "%s(%s)",
5328 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5329 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5330 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5331 fprintf (file
, "%s(%s)",
5332 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5333 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5334 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5335 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5337 /* Because the REG_POINTER flag can get lost during reload,
5338 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5339 index and base registers in the combined move patterns. */
5340 rtx base
= XEXP (XEXP (x
, 0), 1);
5341 rtx index
= XEXP (XEXP (x
, 0), 0);
5343 fprintf (file
, "%s(%s)",
5344 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5347 output_address (XEXP (x
, 0));
5350 output_address (XEXP (x
, 0));
5355 output_addr_const (file
, x
);
5358 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5361 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5364 /* Imagine (high (const (plus ...))). */
5365 if (GET_CODE (x
) == HIGH
)
5368 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5369 output_addr_const (file
, x
);
5370 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5372 output_addr_const (file
, x
);
5373 fputs ("-$global$", file
);
5375 else if (GET_CODE (x
) == CONST
)
5377 const char *sep
= "";
5378 int offset
= 0; /* assembler wants -$global$ at end */
5379 rtx base
= NULL_RTX
;
5381 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5384 base
= XEXP (XEXP (x
, 0), 0);
5385 output_addr_const (file
, base
);
5388 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5394 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5397 base
= XEXP (XEXP (x
, 0), 1);
5398 output_addr_const (file
, base
);
5401 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5407 /* How bogus. The compiler is apparently responsible for
5408 rounding the constant if it uses an LR field selector.
5410 The linker and/or assembler seem a better place since
5411 they have to do this kind of thing already.
5413 If we fail to do this, HP's optimizing linker may eliminate
5414 an addil, but not update the ldw/stw/ldo instruction that
5415 uses the result of the addil. */
5417 offset
= ((offset
+ 0x1000) & ~0x1fff);
5419 switch (GET_CODE (XEXP (x
, 0)))
5432 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5440 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5441 fputs ("-$global$", file
);
5443 fprintf (file
, "%s%d", sep
, offset
);
5446 output_addr_const (file
, x
);
5449 /* Output boilerplate text to appear at the beginning of the file.
5450 There are several possible versions. */
5451 #define aputs(x) fputs(x, asm_out_file)
5453 pa_file_start_level (void)
5456 aputs ("\t.LEVEL 2.0w\n");
5457 else if (TARGET_PA_20
)
5458 aputs ("\t.LEVEL 2.0\n");
5459 else if (TARGET_PA_11
)
5460 aputs ("\t.LEVEL 1.1\n");
5462 aputs ("\t.LEVEL 1.0\n");
5466 pa_file_start_space (int sortspace
)
5468 aputs ("\t.SPACE $PRIVATE$");
5471 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5473 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5474 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5475 "\n\t.SPACE $TEXT$");
5478 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5479 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5483 pa_file_start_file (int want_version
)
5485 if (write_symbols
!= NO_DEBUG
)
5487 output_file_directive (asm_out_file
, main_input_filename
);
5489 aputs ("\t.version\t\"01.01\"\n");
5494 pa_file_start_mcount (const char *aswhat
)
5497 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5501 pa_elf_file_start (void)
5503 pa_file_start_level ();
5504 pa_file_start_mcount ("ENTRY");
5505 pa_file_start_file (0);
5509 pa_som_file_start (void)
5511 pa_file_start_level ();
5512 pa_file_start_space (0);
5513 aputs ("\t.IMPORT $global$,DATA\n"
5514 "\t.IMPORT $$dyncall,MILLICODE\n");
5515 pa_file_start_mcount ("CODE");
5516 pa_file_start_file (0);
5520 pa_linux_file_start (void)
5522 pa_file_start_file (1);
5523 pa_file_start_level ();
5524 pa_file_start_mcount ("CODE");
5528 pa_hpux64_gas_file_start (void)
5530 pa_file_start_level ();
5531 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5533 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5535 pa_file_start_file (1);
5539 pa_hpux64_hpas_file_start (void)
5541 pa_file_start_level ();
5542 pa_file_start_space (1);
5543 pa_file_start_mcount ("CODE");
5544 pa_file_start_file (0);
5548 /* Search the deferred plabel list for SYMBOL and return its internal
5549 label. If an entry for SYMBOL is not found, a new entry is created. */
5552 pa_get_deferred_plabel (rtx symbol
)
5554 const char *fname
= XSTR (symbol
, 0);
5557 /* See if we have already put this function on the list of deferred
5558 plabels. This list is generally small, so a liner search is not
5559 too ugly. If it proves too slow replace it with something faster. */
5560 for (i
= 0; i
< n_deferred_plabels
; i
++)
5561 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5564 /* If the deferred plabel list is empty, or this entry was not found
5565 on the list, create a new entry on the list. */
5566 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5570 if (deferred_plabels
== 0)
5571 deferred_plabels
= ggc_alloc_deferred_plabel ();
5573 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5575 n_deferred_plabels
+ 1);
5577 i
= n_deferred_plabels
++;
5578 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5579 deferred_plabels
[i
].symbol
= symbol
;
5581 /* Gross. We have just implicitly taken the address of this
5582 function. Mark it in the same manner as assemble_name. */
5583 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5585 mark_referenced (id
);
5588 return deferred_plabels
[i
].internal_label
;
5592 output_deferred_plabels (void)
5596 /* If we have some deferred plabels, then we need to switch into the
5597 data or readonly data section, and align it to a 4 byte boundary
5598 before outputting the deferred plabels. */
5599 if (n_deferred_plabels
)
5601 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5602 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5605 /* Now output the deferred plabels. */
5606 for (i
= 0; i
< n_deferred_plabels
; i
++)
5608 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5609 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5610 assemble_integer (deferred_plabels
[i
].symbol
,
5611 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5615 /* Initialize optabs to point to emulation routines. */
5618 pa_init_libfuncs (void)
5620 if (HPUX_LONG_DOUBLE_LIBRARY
)
5622 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5623 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5624 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5625 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5626 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5627 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5628 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5629 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5630 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5632 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5633 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5634 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5635 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5636 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5637 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5638 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5640 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5641 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5642 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5643 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5645 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5646 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5647 : "_U_Qfcnvfxt_quad_to_sgl");
5648 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5649 "_U_Qfcnvfxt_quad_to_dbl");
5650 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5651 "_U_Qfcnvfxt_quad_to_usgl");
5652 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5653 "_U_Qfcnvfxt_quad_to_udbl");
5655 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5656 "_U_Qfcnvxf_sgl_to_quad");
5657 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5658 "_U_Qfcnvxf_dbl_to_quad");
5659 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5660 "_U_Qfcnvxf_usgl_to_quad");
5661 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5662 "_U_Qfcnvxf_udbl_to_quad");
5665 if (TARGET_SYNC_LIBCALL
)
5666 init_sync_libfuncs (UNITS_PER_WORD
);
5669 /* HP's millicode routines mean something special to the assembler.
5670 Keep track of which ones we have used. */
5672 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5673 static void import_milli (enum millicodes
);
5674 static char imported
[(int) end1000
];
5675 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5676 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5677 #define MILLI_START 10
5680 import_milli (enum millicodes code
)
5682 char str
[sizeof (import_string
)];
5684 if (!imported
[(int) code
])
5686 imported
[(int) code
] = 1;
5687 strcpy (str
, import_string
);
5688 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5689 output_asm_insn (str
, 0);
5693 /* The register constraints have put the operands and return value in
5694 the proper registers. */
5697 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx insn
)
5699 import_milli (mulI
);
5700 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5703 /* Emit the rtl for doing a division by a constant. */
5705 /* Do magic division millicodes exist for this value? */
5706 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5708 /* We'll use an array to keep track of the magic millicodes and
5709 whether or not we've used them already. [n][0] is signed, [n][1] is
5712 static int div_milli
[16][2];
5715 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5717 if (GET_CODE (operands
[2]) == CONST_INT
5718 && INTVAL (operands
[2]) > 0
5719 && INTVAL (operands
[2]) < 16
5720 && pa_magic_milli
[INTVAL (operands
[2])])
5722 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5724 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5728 gen_rtvec (6, gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, 29),
5729 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5731 gen_rtx_REG (SImode
, 26),
5733 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5734 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5735 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5736 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5737 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5738 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5745 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx insn
)
5749 /* If the divisor is a constant, try to use one of the special
5751 if (GET_CODE (operands
[0]) == CONST_INT
)
5753 static char buf
[100];
5754 divisor
= INTVAL (operands
[0]);
5755 if (!div_milli
[divisor
][unsignedp
])
5757 div_milli
[divisor
][unsignedp
] = 1;
5759 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5761 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5765 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5766 INTVAL (operands
[0]));
5767 return pa_output_millicode_call (insn
,
5768 gen_rtx_SYMBOL_REF (SImode
, buf
));
5772 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5773 INTVAL (operands
[0]));
5774 return pa_output_millicode_call (insn
,
5775 gen_rtx_SYMBOL_REF (SImode
, buf
));
5778 /* Divisor isn't a special constant. */
5783 import_milli (divU
);
5784 return pa_output_millicode_call (insn
,
5785 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5789 import_milli (divI
);
5790 return pa_output_millicode_call (insn
,
5791 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5796 /* Output a $$rem millicode to do mod. */
5799 pa_output_mod_insn (int unsignedp
, rtx insn
)
5803 import_milli (remU
);
5804 return pa_output_millicode_call (insn
,
5805 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5809 import_milli (remI
);
5810 return pa_output_millicode_call (insn
,
5811 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5816 pa_output_arg_descriptor (rtx call_insn
)
5818 const char *arg_regs
[4];
5819 enum machine_mode arg_mode
;
5821 int i
, output_flag
= 0;
5824 /* We neither need nor want argument location descriptors for the
5825 64bit runtime environment or the ELF32 environment. */
5826 if (TARGET_64BIT
|| TARGET_ELF32
)
5829 for (i
= 0; i
< 4; i
++)
5832 /* Specify explicitly that no argument relocations should take place
5833 if using the portable runtime calling conventions. */
5834 if (TARGET_PORTABLE_RUNTIME
)
5836 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5841 gcc_assert (GET_CODE (call_insn
) == CALL_INSN
);
5842 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5843 link
; link
= XEXP (link
, 1))
5845 rtx use
= XEXP (link
, 0);
5847 if (! (GET_CODE (use
) == USE
5848 && GET_CODE (XEXP (use
, 0)) == REG
5849 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5852 arg_mode
= GET_MODE (XEXP (use
, 0));
5853 regno
= REGNO (XEXP (use
, 0));
5854 if (regno
>= 23 && regno
<= 26)
5856 arg_regs
[26 - regno
] = "GR";
5857 if (arg_mode
== DImode
)
5858 arg_regs
[25 - regno
] = "GR";
5860 else if (regno
>= 32 && regno
<= 39)
5862 if (arg_mode
== SFmode
)
5863 arg_regs
[(regno
- 32) / 2] = "FR";
5866 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5867 arg_regs
[(regno
- 34) / 2] = "FR";
5868 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5870 arg_regs
[(regno
- 34) / 2] = "FU";
5871 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5876 fputs ("\t.CALL ", asm_out_file
);
5877 for (i
= 0; i
< 4; i
++)
5882 fputc (',', asm_out_file
);
5883 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5886 fputc ('\n', asm_out_file
);
5889 /* Inform reload about cases where moving X with a mode MODE to a register in
5890 RCLASS requires an extra scratch or immediate register. Return the class
5891 needed for the immediate register. */
5894 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5895 enum machine_mode mode
, secondary_reload_info
*sri
)
5898 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5900 /* Handle the easy stuff first. */
5901 if (rclass
== R1_REGS
)
5907 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5913 /* If we have something like (mem (mem (...)), we can safely assume the
5914 inner MEM will end up in a general register after reloading, so there's
5915 no need for a secondary reload. */
5916 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
5919 /* Trying to load a constant into a FP register during PIC code
5920 generation requires %r1 as a scratch register. */
5922 && (mode
== SImode
|| mode
== DImode
)
5923 && FP_REG_CLASS_P (rclass
)
5924 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
5926 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5927 : CODE_FOR_reload_indi_r1
);
5931 /* Secondary reloads of symbolic operands require %r1 as a scratch
5932 register when we're generating PIC code and when the operand isn't
5934 if (pa_symbolic_expression_p (x
))
5936 if (GET_CODE (x
) == HIGH
)
5939 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
5941 gcc_assert (mode
== SImode
|| mode
== DImode
);
5942 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5943 : CODE_FOR_reload_indi_r1
);
5948 /* Profiling showed the PA port spends about 1.3% of its compilation
5949 time in true_regnum from calls inside pa_secondary_reload_class. */
5950 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
5951 regno
= true_regnum (x
);
5953 /* In order to allow 14-bit displacements in integer loads and stores,
5954 we need to prevent reload from generating out of range integer mode
5955 loads and stores to the floating point registers. Previously, we
5956 used to call for a secondary reload and have pa_emit_move_sequence()
5957 fix the instruction sequence. However, reload occasionally wouldn't
5958 generate the reload and we would end up with an invalid REG+D memory
5959 address. So, now we use an intermediate general register for most
5960 memory loads and stores. */
5961 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
5962 && GET_MODE_CLASS (mode
) == MODE_INT
5963 && FP_REG_CLASS_P (rclass
))
5965 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5966 the secondary reload needed for a pseudo. It never passes a
5968 if (GET_CODE (x
) == MEM
)
5972 /* We don't need an intermediate for indexed and LO_SUM DLT
5973 memory addresses. When INT14_OK_STRICT is true, it might
5974 appear that we could directly allow register indirect
5975 memory addresses. However, this doesn't work because we
5976 don't support SUBREGs in floating-point register copies
5977 and reload doesn't tell us when it's going to use a SUBREG. */
5978 if (IS_INDEX_ADDR_P (x
)
5979 || IS_LO_SUM_DLT_ADDR_P (x
))
5982 /* Otherwise, we need an intermediate general register. */
5983 return GENERAL_REGS
;
5986 /* Request a secondary reload with a general scratch register
5987 for everything else. ??? Could symbolic operands be handled
5988 directly when generating non-pic PA 2.0 code? */
5990 ? direct_optab_handler (reload_in_optab
, mode
)
5991 : direct_optab_handler (reload_out_optab
, mode
));
5995 /* A SAR<->FP register copy requires an intermediate general register
5996 and secondary memory. We need a secondary reload with a general
5997 scratch register for spills. */
5998 if (rclass
== SHIFT_REGS
)
6001 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6004 ? direct_optab_handler (reload_in_optab
, mode
)
6005 : direct_optab_handler (reload_out_optab
, mode
));
6009 /* Handle FP copy. */
6010 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6011 return GENERAL_REGS
;
6014 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6015 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6016 && FP_REG_CLASS_P (rclass
))
6017 return GENERAL_REGS
;
6022 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6023 is only marked as live on entry by df-scan when it is a fixed
6024 register. It isn't a fixed register in the 64-bit runtime,
6025 so we need to mark it here. */
6028 pa_extra_live_on_entry (bitmap regs
)
6031 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6034 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6035 to prevent it from being deleted. */
6038 pa_eh_return_handler_rtx (void)
6042 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6043 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6044 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6049 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6050 by invisible reference. As a GCC extension, we also pass anything
6051 with a zero or variable size by reference.
6053 The 64-bit runtime does not describe passing any types by invisible
6054 reference. The internals of GCC can't currently handle passing
6055 empty structures, and zero or variable length arrays when they are
6056 not passed entirely on the stack or by reference. Thus, as a GCC
6057 extension, we pass these types by reference. The HP compiler doesn't
6058 support these types, so hopefully there shouldn't be any compatibility
6059 issues. This may have to be revisited when HP releases a C99 compiler
6060 or updates the ABI. */
6063 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6064 enum machine_mode mode
, const_tree type
,
6065 bool named ATTRIBUTE_UNUSED
)
6070 size
= int_size_in_bytes (type
);
6072 size
= GET_MODE_SIZE (mode
);
6077 return size
<= 0 || size
> 8;
6081 pa_function_arg_padding (enum machine_mode mode
, const_tree type
)
6086 && (AGGREGATE_TYPE_P (type
)
6087 || TREE_CODE (type
) == COMPLEX_TYPE
6088 || TREE_CODE (type
) == VECTOR_TYPE
)))
6090 /* Return none if justification is not required. */
6092 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6093 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6096 /* The directions set here are ignored when a BLKmode argument larger
6097 than a word is placed in a register. Different code is used for
6098 the stack and registers. This makes it difficult to have a
6099 consistent data representation for both the stack and registers.
6100 For both runtimes, the justification and padding for arguments on
6101 the stack and in registers should be identical. */
6103 /* The 64-bit runtime specifies left justification for aggregates. */
6106 /* The 32-bit runtime architecture specifies right justification.
6107 When the argument is passed on the stack, the argument is padded
6108 with garbage on the left. The HP compiler pads with zeros. */
6112 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6119 /* Do what is necessary for `va_start'. We look at the current function
6120 to determine if stdargs or varargs is used and fill in an initial
6121 va_list. A pointer to this constructor is returned. */
6124 hppa_builtin_saveregs (void)
6127 tree fntype
= TREE_TYPE (current_function_decl
);
6128 int argadj
= ((!stdarg_p (fntype
))
6129 ? UNITS_PER_WORD
: 0);
6132 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6134 offset
= crtl
->args
.arg_offset_rtx
;
6140 /* Adjust for varargs/stdarg differences. */
6142 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6144 offset
= crtl
->args
.arg_offset_rtx
;
6146 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6147 from the incoming arg pointer and growing to larger addresses. */
6148 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6149 emit_move_insn (gen_rtx_MEM (word_mode
,
6150 plus_constant (Pmode
,
6151 arg_pointer_rtx
, off
)),
6152 gen_rtx_REG (word_mode
, i
));
6154 /* The incoming args pointer points just beyond the flushback area;
6155 normally this is not a serious concern. However, when we are doing
6156 varargs/stdargs we want to make the arg pointer point to the start
6157 of the incoming argument area. */
6158 emit_move_insn (virtual_incoming_args_rtx
,
6159 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6161 /* Now return a pointer to the first anonymous argument. */
6162 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6163 virtual_incoming_args_rtx
,
6164 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6167 /* Store general registers on the stack. */
6168 dest
= gen_rtx_MEM (BLKmode
,
6169 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6171 set_mem_alias_set (dest
, get_varargs_alias_set ());
6172 set_mem_align (dest
, BITS_PER_WORD
);
6173 move_block_from_reg (23, dest
, 4);
6175 /* move_block_from_reg will emit code to store the argument registers
6176 individually as scalar stores.
6178 However, other insns may later load from the same addresses for
6179 a structure load (passing a struct to a varargs routine).
6181 The alias code assumes that such aliasing can never happen, so we
6182 have to keep memory referencing insns from moving up beyond the
6183 last argument register store. So we emit a blockage insn here. */
6184 emit_insn (gen_blockage ());
6186 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6187 crtl
->args
.internal_arg_pointer
,
6188 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6192 hppa_va_start (tree valist
, rtx nextarg
)
6194 nextarg
= expand_builtin_saveregs ();
6195 std_expand_builtin_va_start (valist
, nextarg
);
6199 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6204 /* Args grow upward. We can use the generic routines. */
6205 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6207 else /* !TARGET_64BIT */
6209 tree ptr
= build_pointer_type (type
);
6212 unsigned int size
, ofs
;
6215 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6219 ptr
= build_pointer_type (type
);
6221 size
= int_size_in_bytes (type
);
6222 valist_type
= TREE_TYPE (valist
);
6224 /* Args grow down. Not handled by generic routines. */
6226 u
= fold_convert (sizetype
, size_in_bytes (type
));
6227 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6228 t
= fold_build_pointer_plus (valist
, u
);
6230 /* Align to 4 or 8 byte boundary depending on argument size. */
6232 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6233 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6234 t
= fold_convert (valist_type
, t
);
6236 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6238 ofs
= (8 - size
) % 4;
6240 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6242 t
= fold_convert (ptr
, t
);
6243 t
= build_va_arg_indirect_ref (t
);
6246 t
= build_va_arg_indirect_ref (t
);
6252 /* True if MODE is valid for the target. By "valid", we mean able to
6253 be manipulated in non-trivial ways. In particular, this means all
6254 the arithmetic is supported.
6256 Currently, TImode is not valid as the HP 64-bit runtime documentation
6257 doesn't document the alignment and calling conventions for this type.
6258 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6259 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6262 pa_scalar_mode_supported_p (enum machine_mode mode
)
6264 int precision
= GET_MODE_PRECISION (mode
);
6266 switch (GET_MODE_CLASS (mode
))
6268 case MODE_PARTIAL_INT
:
6270 if (precision
== CHAR_TYPE_SIZE
)
6272 if (precision
== SHORT_TYPE_SIZE
)
6274 if (precision
== INT_TYPE_SIZE
)
6276 if (precision
== LONG_TYPE_SIZE
)
6278 if (precision
== LONG_LONG_TYPE_SIZE
)
6283 if (precision
== FLOAT_TYPE_SIZE
)
6285 if (precision
== DOUBLE_TYPE_SIZE
)
6287 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6291 case MODE_DECIMAL_FLOAT
:
6299 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6300 it branches into the delay slot. Otherwise, return FALSE. */
6303 branch_to_delay_slot_p (rtx insn
)
6307 if (dbr_sequence_length ())
6310 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6313 insn
= next_active_insn (insn
);
6314 if (jump_insn
== insn
)
6317 /* We can't rely on the length of asms. So, we return FALSE when
6318 the branch is followed by an asm. */
6320 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6321 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
6322 || get_attr_length (insn
) > 0)
6329 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6331 This occurs when INSN has an unfilled delay slot and is followed
6332 by an asm. Disaster can occur if the asm is empty and the jump
6333 branches into the delay slot. So, we add a nop in the delay slot
6334 when this occurs. */
6337 branch_needs_nop_p (rtx insn
)
6341 if (dbr_sequence_length ())
6344 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6347 insn
= next_active_insn (insn
);
6348 if (!insn
|| jump_insn
== insn
)
6351 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6352 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6353 && get_attr_length (insn
) > 0)
6360 /* Return TRUE if INSN, a forward jump insn, can use nullification
6361 to skip the following instruction. This avoids an extra cycle due
6362 to a mis-predicted branch when we fall through. */
6365 use_skip_p (rtx insn
)
6367 rtx jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6371 insn
= next_active_insn (insn
);
6373 /* We can't rely on the length of asms, so we can't skip asms. */
6375 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6376 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6378 if (get_attr_length (insn
) == 4
6379 && jump_insn
== next_active_insn (insn
))
6381 if (get_attr_length (insn
) > 0)
6388 /* This routine handles all the normal conditional branch sequences we
6389 might need to generate. It handles compare immediate vs compare
6390 register, nullification of delay slots, varying length branches,
6391 negated branches, and all combinations of the above. It returns the
6392 output appropriate to emit the branch corresponding to all given
6396 pa_output_cbranch (rtx
*operands
, int negated
, rtx insn
)
6398 static char buf
[100];
6400 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6401 int length
= get_attr_length (insn
);
6404 /* A conditional branch to the following instruction (e.g. the delay slot)
6405 is asking for a disaster. This can happen when not optimizing and
6406 when jump optimization fails.
6408 While it is usually safe to emit nothing, this can fail if the
6409 preceding instruction is a nullified branch with an empty delay
6410 slot and the same branch target as this branch. We could check
6411 for this but jump optimization should eliminate nop jumps. It
6412 is always safe to emit a nop. */
6413 if (branch_to_delay_slot_p (insn
))
6416 /* The doubleword form of the cmpib instruction doesn't have the LEU
6417 and GTU conditions while the cmpb instruction does. Since we accept
6418 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6419 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6420 operands
[2] = gen_rtx_REG (DImode
, 0);
6421 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6422 operands
[1] = gen_rtx_REG (DImode
, 0);
6424 /* If this is a long branch with its delay slot unfilled, set `nullify'
6425 as it can nullify the delay slot and save a nop. */
6426 if (length
== 8 && dbr_sequence_length () == 0)
6429 /* If this is a short forward conditional branch which did not get
6430 its delay slot filled, the delay slot can still be nullified. */
6431 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6432 nullify
= forward_branch_p (insn
);
6434 /* A forward branch over a single nullified insn can be done with a
6435 comclr instruction. This avoids a single cycle penalty due to
6436 mis-predicted branch if we fall through (branch not taken). */
6437 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6441 /* All short conditional branches except backwards with an unfilled
6445 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6447 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6448 if (GET_MODE (operands
[1]) == DImode
)
6451 strcat (buf
, "%B3");
6453 strcat (buf
, "%S3");
6455 strcat (buf
, " %2,%r1,%%r0");
6458 if (branch_needs_nop_p (insn
))
6459 strcat (buf
, ",n %2,%r1,%0%#");
6461 strcat (buf
, ",n %2,%r1,%0");
6464 strcat (buf
, " %2,%r1,%0");
6467 /* All long conditionals. Note a short backward branch with an
6468 unfilled delay slot is treated just like a long backward branch
6469 with an unfilled delay slot. */
6471 /* Handle weird backwards branch with a filled delay slot
6472 which is nullified. */
6473 if (dbr_sequence_length () != 0
6474 && ! forward_branch_p (insn
)
6477 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6478 if (GET_MODE (operands
[1]) == DImode
)
6481 strcat (buf
, "%S3");
6483 strcat (buf
, "%B3");
6484 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6486 /* Handle short backwards branch with an unfilled delay slot.
6487 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6488 taken and untaken branches. */
6489 else if (dbr_sequence_length () == 0
6490 && ! forward_branch_p (insn
)
6491 && INSN_ADDRESSES_SET_P ()
6492 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6493 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6495 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6496 if (GET_MODE (operands
[1]) == DImode
)
6499 strcat (buf
, "%B3 %2,%r1,%0%#");
6501 strcat (buf
, "%S3 %2,%r1,%0%#");
6505 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6506 if (GET_MODE (operands
[1]) == DImode
)
6509 strcat (buf
, "%S3");
6511 strcat (buf
, "%B3");
6513 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6515 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6520 /* The reversed conditional branch must branch over one additional
6521 instruction if the delay slot is filled and needs to be extracted
6522 by pa_output_lbranch. If the delay slot is empty or this is a
6523 nullified forward branch, the instruction after the reversed
6524 condition branch must be nullified. */
6525 if (dbr_sequence_length () == 0
6526 || (nullify
&& forward_branch_p (insn
)))
6530 operands
[4] = GEN_INT (length
);
6535 operands
[4] = GEN_INT (length
+ 4);
6538 /* Create a reversed conditional branch which branches around
6539 the following insns. */
6540 if (GET_MODE (operands
[1]) != DImode
)
6546 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6549 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6555 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6558 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6567 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6570 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6576 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6579 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6583 output_asm_insn (buf
, operands
);
6584 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6589 /* This routine handles output of long unconditional branches that
6590 exceed the maximum range of a simple branch instruction. Since
6591 we don't have a register available for the branch, we save register
6592 %r1 in the frame marker, load the branch destination DEST into %r1,
6593 execute the branch, and restore %r1 in the delay slot of the branch.
6595 Since long branches may have an insn in the delay slot and the
6596 delay slot is used to restore %r1, we in general need to extract
6597 this insn and execute it before the branch. However, to facilitate
6598 use of this function by conditional branches, we also provide an
6599 option to not extract the delay insn so that it will be emitted
6600 after the long branch. So, if there is an insn in the delay slot,
6601 it is extracted if XDELAY is nonzero.
6603 The lengths of the various long-branch sequences are 20, 16 and 24
6604 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6607 pa_output_lbranch (rtx dest
, rtx insn
, int xdelay
)
6611 xoperands
[0] = dest
;
6613 /* First, free up the delay slot. */
6614 if (xdelay
&& dbr_sequence_length () != 0)
6616 /* We can't handle a jump in the delay slot. */
6617 gcc_assert (GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
);
6619 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6622 /* Now delete the delay insn. */
6623 SET_INSN_DELETED (NEXT_INSN (insn
));
6626 /* Output an insn to save %r1. The runtime documentation doesn't
6627 specify whether the "Clean Up" slot in the callers frame can
6628 be clobbered by the callee. It isn't copied by HP's builtin
6629 alloca, so this suggests that it can be clobbered if necessary.
6630 The "Static Link" location is copied by HP builtin alloca, so
6631 we avoid using it. Using the cleanup slot might be a problem
6632 if we have to interoperate with languages that pass cleanup
6633 information. However, it should be possible to handle these
6634 situations with GCC's asm feature.
6636 The "Current RP" slot is reserved for the called procedure, so
6637 we try to use it when we don't have a frame of our own. It's
6638 rather unlikely that we won't have a frame when we need to emit
6641 Really the way to go long term is a register scavenger; goto
6642 the target of the jump and find a register which we can use
6643 as a scratch to hold the value in %r1. Then, we wouldn't have
6644 to free up the delay slot or clobber a slot that may be needed
6645 for other purposes. */
6648 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6649 /* Use the return pointer slot in the frame marker. */
6650 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6652 /* Use the slot at -40 in the frame marker since HP builtin
6653 alloca doesn't copy it. */
6654 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6658 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6659 /* Use the return pointer slot in the frame marker. */
6660 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6662 /* Use the "Clean Up" slot in the frame marker. In GCC,
6663 the only other use of this location is for copying a
6664 floating point double argument from a floating-point
6665 register to two general registers. The copy is done
6666 as an "atomic" operation when outputting a call, so it
6667 won't interfere with our using the location here. */
6668 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6671 if (TARGET_PORTABLE_RUNTIME
)
6673 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6674 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6675 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6679 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6680 if (TARGET_SOM
|| !TARGET_GAS
)
6682 xoperands
[1] = gen_label_rtx ();
6683 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6684 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6685 CODE_LABEL_NUMBER (xoperands
[1]));
6686 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6690 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6691 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6693 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6696 /* Now output a very long branch to the original target. */
6697 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6699 /* Now restore the value of %r1 in the delay slot. */
6702 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6703 return "ldd -16(%%r30),%%r1";
6705 return "ldd -40(%%r30),%%r1";
6709 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6710 return "ldw -20(%%r30),%%r1";
6712 return "ldw -12(%%r30),%%r1";
6716 /* This routine handles all the branch-on-bit conditional branch sequences we
6717 might need to generate. It handles nullification of delay slots,
6718 varying length branches, negated branches and all combinations of the
6719 above. it returns the appropriate output template to emit the branch. */
6722 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
, int which
)
6724 static char buf
[100];
6726 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6727 int length
= get_attr_length (insn
);
6730 /* A conditional branch to the following instruction (e.g. the delay slot) is
6731 asking for a disaster. I do not think this can happen as this pattern
6732 is only used when optimizing; jump optimization should eliminate the
6733 jump. But be prepared just in case. */
6735 if (branch_to_delay_slot_p (insn
))
6738 /* If this is a long branch with its delay slot unfilled, set `nullify'
6739 as it can nullify the delay slot and save a nop. */
6740 if (length
== 8 && dbr_sequence_length () == 0)
6743 /* If this is a short forward conditional branch which did not get
6744 its delay slot filled, the delay slot can still be nullified. */
6745 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6746 nullify
= forward_branch_p (insn
);
6748 /* A forward branch over a single nullified insn can be done with a
6749 extrs instruction. This avoids a single cycle penalty due to
6750 mis-predicted branch if we fall through (branch not taken). */
6751 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6756 /* All short conditional branches except backwards with an unfilled
6760 strcpy (buf
, "{extrs,|extrw,s,}");
6762 strcpy (buf
, "bb,");
6763 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6764 strcpy (buf
, "extrd,s,*");
6765 else if (GET_MODE (operands
[0]) == DImode
)
6766 strcpy (buf
, "bb,*");
6767 if ((which
== 0 && negated
)
6768 || (which
== 1 && ! negated
))
6773 strcat (buf
, " %0,%1,1,%%r0");
6774 else if (nullify
&& negated
)
6776 if (branch_needs_nop_p (insn
))
6777 strcat (buf
, ",n %0,%1,%3%#");
6779 strcat (buf
, ",n %0,%1,%3");
6781 else if (nullify
&& ! negated
)
6783 if (branch_needs_nop_p (insn
))
6784 strcat (buf
, ",n %0,%1,%2%#");
6786 strcat (buf
, ",n %0,%1,%2");
6788 else if (! nullify
&& negated
)
6789 strcat (buf
, " %0,%1,%3");
6790 else if (! nullify
&& ! negated
)
6791 strcat (buf
, " %0,%1,%2");
6794 /* All long conditionals. Note a short backward branch with an
6795 unfilled delay slot is treated just like a long backward branch
6796 with an unfilled delay slot. */
6798 /* Handle weird backwards branch with a filled delay slot
6799 which is nullified. */
6800 if (dbr_sequence_length () != 0
6801 && ! forward_branch_p (insn
)
6804 strcpy (buf
, "bb,");
6805 if (GET_MODE (operands
[0]) == DImode
)
6807 if ((which
== 0 && negated
)
6808 || (which
== 1 && ! negated
))
6813 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6815 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6817 /* Handle short backwards branch with an unfilled delay slot.
6818 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6819 taken and untaken branches. */
6820 else if (dbr_sequence_length () == 0
6821 && ! forward_branch_p (insn
)
6822 && INSN_ADDRESSES_SET_P ()
6823 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6824 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6826 strcpy (buf
, "bb,");
6827 if (GET_MODE (operands
[0]) == DImode
)
6829 if ((which
== 0 && negated
)
6830 || (which
== 1 && ! negated
))
6835 strcat (buf
, " %0,%1,%3%#");
6837 strcat (buf
, " %0,%1,%2%#");
6841 if (GET_MODE (operands
[0]) == DImode
)
6842 strcpy (buf
, "extrd,s,*");
6844 strcpy (buf
, "{extrs,|extrw,s,}");
6845 if ((which
== 0 && negated
)
6846 || (which
== 1 && ! negated
))
6850 if (nullify
&& negated
)
6851 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6852 else if (nullify
&& ! negated
)
6853 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6855 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6857 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6862 /* The reversed conditional branch must branch over one additional
6863 instruction if the delay slot is filled and needs to be extracted
6864 by pa_output_lbranch. If the delay slot is empty or this is a
6865 nullified forward branch, the instruction after the reversed
6866 condition branch must be nullified. */
6867 if (dbr_sequence_length () == 0
6868 || (nullify
&& forward_branch_p (insn
)))
6872 operands
[4] = GEN_INT (length
);
6877 operands
[4] = GEN_INT (length
+ 4);
6880 if (GET_MODE (operands
[0]) == DImode
)
6881 strcpy (buf
, "bb,*");
6883 strcpy (buf
, "bb,");
6884 if ((which
== 0 && negated
)
6885 || (which
== 1 && !negated
))
6890 strcat (buf
, ",n %0,%1,.+%4");
6892 strcat (buf
, " %0,%1,.+%4");
6893 output_asm_insn (buf
, operands
);
6894 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
6900 /* This routine handles all the branch-on-variable-bit conditional branch
6901 sequences we might need to generate. It handles nullification of delay
6902 slots, varying length branches, negated branches and all combinations
6903 of the above. it returns the appropriate output template to emit the
6907 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
,
6910 static char buf
[100];
6912 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6913 int length
= get_attr_length (insn
);
6916 /* A conditional branch to the following instruction (e.g. the delay slot) is
6917 asking for a disaster. I do not think this can happen as this pattern
6918 is only used when optimizing; jump optimization should eliminate the
6919 jump. But be prepared just in case. */
6921 if (branch_to_delay_slot_p (insn
))
6924 /* If this is a long branch with its delay slot unfilled, set `nullify'
6925 as it can nullify the delay slot and save a nop. */
6926 if (length
== 8 && dbr_sequence_length () == 0)
6929 /* If this is a short forward conditional branch which did not get
6930 its delay slot filled, the delay slot can still be nullified. */
6931 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6932 nullify
= forward_branch_p (insn
);
6934 /* A forward branch over a single nullified insn can be done with a
6935 extrs instruction. This avoids a single cycle penalty due to
6936 mis-predicted branch if we fall through (branch not taken). */
6937 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6942 /* All short conditional branches except backwards with an unfilled
6946 strcpy (buf
, "{vextrs,|extrw,s,}");
6948 strcpy (buf
, "{bvb,|bb,}");
6949 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6950 strcpy (buf
, "extrd,s,*");
6951 else if (GET_MODE (operands
[0]) == DImode
)
6952 strcpy (buf
, "bb,*");
6953 if ((which
== 0 && negated
)
6954 || (which
== 1 && ! negated
))
6959 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6960 else if (nullify
&& negated
)
6962 if (branch_needs_nop_p (insn
))
6963 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6965 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
6967 else if (nullify
&& ! negated
)
6969 if (branch_needs_nop_p (insn
))
6970 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6972 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
6974 else if (! nullify
&& negated
)
6975 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
6976 else if (! nullify
&& ! negated
)
6977 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
6980 /* All long conditionals. Note a short backward branch with an
6981 unfilled delay slot is treated just like a long backward branch
6982 with an unfilled delay slot. */
6984 /* Handle weird backwards branch with a filled delay slot
6985 which is nullified. */
6986 if (dbr_sequence_length () != 0
6987 && ! forward_branch_p (insn
)
6990 strcpy (buf
, "{bvb,|bb,}");
6991 if (GET_MODE (operands
[0]) == DImode
)
6993 if ((which
== 0 && negated
)
6994 || (which
== 1 && ! negated
))
6999 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7001 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7003 /* Handle short backwards branch with an unfilled delay slot.
7004 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7005 taken and untaken branches. */
7006 else if (dbr_sequence_length () == 0
7007 && ! forward_branch_p (insn
)
7008 && INSN_ADDRESSES_SET_P ()
7009 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7010 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7012 strcpy (buf
, "{bvb,|bb,}");
7013 if (GET_MODE (operands
[0]) == DImode
)
7015 if ((which
== 0 && negated
)
7016 || (which
== 1 && ! negated
))
7021 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7023 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7027 strcpy (buf
, "{vextrs,|extrw,s,}");
7028 if (GET_MODE (operands
[0]) == DImode
)
7029 strcpy (buf
, "extrd,s,*");
7030 if ((which
== 0 && negated
)
7031 || (which
== 1 && ! negated
))
7035 if (nullify
&& negated
)
7036 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7037 else if (nullify
&& ! negated
)
7038 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7040 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7042 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7047 /* The reversed conditional branch must branch over one additional
7048 instruction if the delay slot is filled and needs to be extracted
7049 by pa_output_lbranch. If the delay slot is empty or this is a
7050 nullified forward branch, the instruction after the reversed
7051 condition branch must be nullified. */
7052 if (dbr_sequence_length () == 0
7053 || (nullify
&& forward_branch_p (insn
)))
7057 operands
[4] = GEN_INT (length
);
7062 operands
[4] = GEN_INT (length
+ 4);
7065 if (GET_MODE (operands
[0]) == DImode
)
7066 strcpy (buf
, "bb,*");
7068 strcpy (buf
, "{bvb,|bb,}");
7069 if ((which
== 0 && negated
)
7070 || (which
== 1 && !negated
))
7075 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7077 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7078 output_asm_insn (buf
, operands
);
7079 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7085 /* Return the output template for emitting a dbra type insn.
7087 Note it may perform some output operations on its own before
7088 returning the final output string. */
7090 pa_output_dbra (rtx
*operands
, rtx insn
, int which_alternative
)
7092 int length
= get_attr_length (insn
);
7094 /* A conditional branch to the following instruction (e.g. the delay slot) is
7095 asking for a disaster. Be prepared! */
7097 if (branch_to_delay_slot_p (insn
))
7099 if (which_alternative
== 0)
7100 return "ldo %1(%0),%0";
7101 else if (which_alternative
== 1)
7103 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7104 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7105 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7106 return "{fldws|fldw} -16(%%r30),%0";
7110 output_asm_insn ("ldw %0,%4", operands
);
7111 return "ldo %1(%4),%4\n\tstw %4,%0";
7115 if (which_alternative
== 0)
7117 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7120 /* If this is a long branch with its delay slot unfilled, set `nullify'
7121 as it can nullify the delay slot and save a nop. */
7122 if (length
== 8 && dbr_sequence_length () == 0)
7125 /* If this is a short forward conditional branch which did not get
7126 its delay slot filled, the delay slot can still be nullified. */
7127 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7128 nullify
= forward_branch_p (insn
);
7135 if (branch_needs_nop_p (insn
))
7136 return "addib,%C2,n %1,%0,%3%#";
7138 return "addib,%C2,n %1,%0,%3";
7141 return "addib,%C2 %1,%0,%3";
7144 /* Handle weird backwards branch with a fulled delay slot
7145 which is nullified. */
7146 if (dbr_sequence_length () != 0
7147 && ! forward_branch_p (insn
)
7149 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7150 /* Handle short backwards branch with an unfilled delay slot.
7151 Using a addb;nop rather than addi;bl saves 1 cycle for both
7152 taken and untaken branches. */
7153 else if (dbr_sequence_length () == 0
7154 && ! forward_branch_p (insn
)
7155 && INSN_ADDRESSES_SET_P ()
7156 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7157 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7158 return "addib,%C2 %1,%0,%3%#";
7160 /* Handle normal cases. */
7162 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7164 return "addi,%N2 %1,%0,%0\n\tb %3";
7167 /* The reversed conditional branch must branch over one additional
7168 instruction if the delay slot is filled and needs to be extracted
7169 by pa_output_lbranch. If the delay slot is empty or this is a
7170 nullified forward branch, the instruction after the reversed
7171 condition branch must be nullified. */
7172 if (dbr_sequence_length () == 0
7173 || (nullify
&& forward_branch_p (insn
)))
7177 operands
[4] = GEN_INT (length
);
7182 operands
[4] = GEN_INT (length
+ 4);
7186 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7188 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7190 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7194 /* Deal with gross reload from FP register case. */
7195 else if (which_alternative
== 1)
7197 /* Move loop counter from FP register to MEM then into a GR,
7198 increment the GR, store the GR into MEM, and finally reload
7199 the FP register from MEM from within the branch's delay slot. */
7200 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7202 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7204 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7205 else if (length
== 28)
7206 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7209 operands
[5] = GEN_INT (length
- 16);
7210 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7211 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7212 return pa_output_lbranch (operands
[3], insn
, 0);
7215 /* Deal with gross reload from memory case. */
7218 /* Reload loop counter from memory, the store back to memory
7219 happens in the branch's delay slot. */
7220 output_asm_insn ("ldw %0,%4", operands
);
7222 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7223 else if (length
== 16)
7224 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7227 operands
[5] = GEN_INT (length
- 4);
7228 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7229 return pa_output_lbranch (operands
[3], insn
, 0);
7234 /* Return the output template for emitting a movb type insn.
7236 Note it may perform some output operations on its own before
7237 returning the final output string. */
7239 pa_output_movb (rtx
*operands
, rtx insn
, int which_alternative
,
7240 int reverse_comparison
)
7242 int length
= get_attr_length (insn
);
7244 /* A conditional branch to the following instruction (e.g. the delay slot) is
7245 asking for a disaster. Be prepared! */
7247 if (branch_to_delay_slot_p (insn
))
7249 if (which_alternative
== 0)
7250 return "copy %1,%0";
7251 else if (which_alternative
== 1)
7253 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7254 return "{fldws|fldw} -16(%%r30),%0";
7256 else if (which_alternative
== 2)
7262 /* Support the second variant. */
7263 if (reverse_comparison
)
7264 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7266 if (which_alternative
== 0)
7268 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7271 /* If this is a long branch with its delay slot unfilled, set `nullify'
7272 as it can nullify the delay slot and save a nop. */
7273 if (length
== 8 && dbr_sequence_length () == 0)
7276 /* If this is a short forward conditional branch which did not get
7277 its delay slot filled, the delay slot can still be nullified. */
7278 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7279 nullify
= forward_branch_p (insn
);
7286 if (branch_needs_nop_p (insn
))
7287 return "movb,%C2,n %1,%0,%3%#";
7289 return "movb,%C2,n %1,%0,%3";
7292 return "movb,%C2 %1,%0,%3";
7295 /* Handle weird backwards branch with a filled delay slot
7296 which is nullified. */
7297 if (dbr_sequence_length () != 0
7298 && ! forward_branch_p (insn
)
7300 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7302 /* Handle short backwards branch with an unfilled delay slot.
7303 Using a movb;nop rather than or;bl saves 1 cycle for both
7304 taken and untaken branches. */
7305 else if (dbr_sequence_length () == 0
7306 && ! forward_branch_p (insn
)
7307 && INSN_ADDRESSES_SET_P ()
7308 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7309 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7310 return "movb,%C2 %1,%0,%3%#";
7311 /* Handle normal cases. */
7313 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7315 return "or,%N2 %1,%%r0,%0\n\tb %3";
7318 /* The reversed conditional branch must branch over one additional
7319 instruction if the delay slot is filled and needs to be extracted
7320 by pa_output_lbranch. If the delay slot is empty or this is a
7321 nullified forward branch, the instruction after the reversed
7322 condition branch must be nullified. */
7323 if (dbr_sequence_length () == 0
7324 || (nullify
&& forward_branch_p (insn
)))
7328 operands
[4] = GEN_INT (length
);
7333 operands
[4] = GEN_INT (length
+ 4);
7337 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7339 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7341 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7344 /* Deal with gross reload for FP destination register case. */
7345 else if (which_alternative
== 1)
7347 /* Move source register to MEM, perform the branch test, then
7348 finally load the FP register from MEM from within the branch's
7350 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7352 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7353 else if (length
== 16)
7354 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7357 operands
[4] = GEN_INT (length
- 4);
7358 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7359 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7360 return pa_output_lbranch (operands
[3], insn
, 0);
7363 /* Deal with gross reload from memory case. */
7364 else if (which_alternative
== 2)
7366 /* Reload loop counter from memory, the store back to memory
7367 happens in the branch's delay slot. */
7369 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7370 else if (length
== 12)
7371 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7374 operands
[4] = GEN_INT (length
);
7375 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7377 return pa_output_lbranch (operands
[3], insn
, 0);
7380 /* Handle SAR as a destination. */
7384 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7385 else if (length
== 12)
7386 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7389 operands
[4] = GEN_INT (length
);
7390 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7392 return pa_output_lbranch (operands
[3], insn
, 0);
7397 /* Copy any FP arguments in INSN into integer registers. */
7399 copy_fp_args (rtx insn
)
7404 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7406 int arg_mode
, regno
;
7407 rtx use
= XEXP (link
, 0);
7409 if (! (GET_CODE (use
) == USE
7410 && GET_CODE (XEXP (use
, 0)) == REG
7411 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7414 arg_mode
= GET_MODE (XEXP (use
, 0));
7415 regno
= REGNO (XEXP (use
, 0));
7417 /* Is it a floating point register? */
7418 if (regno
>= 32 && regno
<= 39)
7420 /* Copy the FP register into an integer register via memory. */
7421 if (arg_mode
== SFmode
)
7423 xoperands
[0] = XEXP (use
, 0);
7424 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7425 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7426 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7430 xoperands
[0] = XEXP (use
, 0);
7431 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7432 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7433 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7434 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7440 /* Compute length of the FP argument copy sequence for INSN. */
7442 length_fp_args (rtx insn
)
7447 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7449 int arg_mode
, regno
;
7450 rtx use
= XEXP (link
, 0);
7452 if (! (GET_CODE (use
) == USE
7453 && GET_CODE (XEXP (use
, 0)) == REG
7454 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7457 arg_mode
= GET_MODE (XEXP (use
, 0));
7458 regno
= REGNO (XEXP (use
, 0));
7460 /* Is it a floating point register? */
7461 if (regno
>= 32 && regno
<= 39)
7463 if (arg_mode
== SFmode
)
7473 /* Return the attribute length for the millicode call instruction INSN.
7474 The length must match the code generated by pa_output_millicode_call.
7475 We include the delay slot in the returned length as it is better to
7476 over estimate the length than to under estimate it. */
7479 pa_attr_length_millicode_call (rtx insn
)
7481 unsigned long distance
= -1;
7482 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7484 if (INSN_ADDRESSES_SET_P ())
7486 distance
= (total
+ insn_current_reference_address (insn
));
7487 if (distance
< total
)
7493 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7498 else if (TARGET_PORTABLE_RUNTIME
)
7502 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7505 if (TARGET_LONG_ABS_CALL
&& !flag_pic
)
7512 /* INSN is a function call. It may have an unconditional jump
7515 CALL_DEST is the routine we are calling. */
7518 pa_output_millicode_call (rtx insn
, rtx call_dest
)
7520 int attr_length
= get_attr_length (insn
);
7521 int seq_length
= dbr_sequence_length ();
7526 xoperands
[0] = call_dest
;
7527 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7529 /* Handle the common case where we are sure that the branch will
7530 reach the beginning of the $CODE$ subspace. The within reach
7531 form of the $$sh_func_adrs call has a length of 28. Because it
7532 has an attribute type of sh_func_adrs, it never has a nonzero
7533 sequence length (i.e., the delay slot is never filled). */
7534 if (!TARGET_LONG_CALLS
7535 && (attr_length
== 8
7536 || (attr_length
== 28
7537 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7539 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7545 /* It might seem that one insn could be saved by accessing
7546 the millicode function using the linkage table. However,
7547 this doesn't work in shared libraries and other dynamically
7548 loaded objects. Using a pc-relative sequence also avoids
7549 problems related to the implicit use of the gp register. */
7550 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7554 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7555 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7559 xoperands
[1] = gen_label_rtx ();
7560 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7561 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7562 CODE_LABEL_NUMBER (xoperands
[1]));
7563 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7566 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7568 else if (TARGET_PORTABLE_RUNTIME
)
7570 /* Pure portable runtime doesn't allow be/ble; we also don't
7571 have PIC support in the assembler/linker, so this sequence
7574 /* Get the address of our target into %r1. */
7575 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7576 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7578 /* Get our return address into %r31. */
7579 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7580 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7582 /* Jump to our target address in %r1. */
7583 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7587 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7589 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7591 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7595 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7596 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7598 if (TARGET_SOM
|| !TARGET_GAS
)
7600 /* The HP assembler can generate relocations for the
7601 difference of two symbols. GAS can do this for a
7602 millicode symbol but not an arbitrary external
7603 symbol when generating SOM output. */
7604 xoperands
[1] = gen_label_rtx ();
7605 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7606 CODE_LABEL_NUMBER (xoperands
[1]));
7607 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7608 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7612 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7613 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7617 /* Jump to our target address in %r1. */
7618 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7622 if (seq_length
== 0)
7623 output_asm_insn ("nop", xoperands
);
7625 /* We are done if there isn't a jump in the delay slot. */
7626 if (seq_length
== 0 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
7629 /* This call has an unconditional jump in its delay slot. */
7630 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
7632 /* See if the return address can be adjusted. Use the containing
7633 sequence insn's address. */
7634 if (INSN_ADDRESSES_SET_P ())
7636 seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
7637 distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
7638 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
7640 if (VAL_14_BITS_P (distance
))
7642 xoperands
[1] = gen_label_rtx ();
7643 output_asm_insn ("ldo %0-%1(%2),%2", xoperands
);
7644 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7645 CODE_LABEL_NUMBER (xoperands
[1]));
7648 /* ??? This branch may not reach its target. */
7649 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7652 /* ??? This branch may not reach its target. */
7653 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7655 /* Delete the jump. */
7656 SET_INSN_DELETED (NEXT_INSN (insn
));
7661 /* Return the attribute length of the call instruction INSN. The SIBCALL
7662 flag indicates whether INSN is a regular call or a sibling call. The
7663 length returned must be longer than the code actually generated by
7664 pa_output_call. Since branch shortening is done before delay branch
7665 sequencing, there is no way to determine whether or not the delay
7666 slot will be filled during branch shortening. Even when the delay
7667 slot is filled, we may have to add a nop if the delay slot contains
7668 a branch that can't reach its target. Thus, we always have to include
7669 the delay slot in the length estimate. This used to be done in
7670 pa_adjust_insn_length but we do it here now as some sequences always
7671 fill the delay slot and we can save four bytes in the estimate for
7675 pa_attr_length_call (rtx insn
, int sibcall
)
7678 rtx call
, call_dest
;
7681 rtx pat
= PATTERN (insn
);
7682 unsigned long distance
= -1;
7684 gcc_assert (GET_CODE (insn
) == CALL_INSN
);
7686 if (INSN_ADDRESSES_SET_P ())
7688 unsigned long total
;
7690 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7691 distance
= (total
+ insn_current_reference_address (insn
));
7692 if (distance
< total
)
7696 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7698 /* Get the call rtx. */
7699 call
= XVECEXP (pat
, 0, 0);
7700 if (GET_CODE (call
) == SET
)
7701 call
= SET_SRC (call
);
7703 gcc_assert (GET_CODE (call
) == CALL
);
7705 /* Determine if this is a local call. */
7706 call_dest
= XEXP (XEXP (call
, 0), 0);
7707 call_decl
= SYMBOL_REF_DECL (call_dest
);
7708 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7710 /* pc-relative branch. */
7711 if (!TARGET_LONG_CALLS
7712 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7713 || distance
< MAX_PCREL17F_OFFSET
))
7716 /* 64-bit plabel sequence. */
7717 else if (TARGET_64BIT
&& !local_call
)
7718 length
+= sibcall
? 28 : 24;
7720 /* non-pic long absolute branch sequence. */
7721 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7724 /* long pc-relative branch sequence. */
7725 else if (TARGET_LONG_PIC_SDIFF_CALL
7726 || (TARGET_GAS
&& !TARGET_SOM
7727 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7731 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7735 /* 32-bit plabel sequence. */
7741 length
+= length_fp_args (insn
);
7751 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7759 /* INSN is a function call. It may have an unconditional jump
7762 CALL_DEST is the routine we are calling. */
7765 pa_output_call (rtx insn
, rtx call_dest
, int sibcall
)
7767 int delay_insn_deleted
= 0;
7768 int delay_slot_filled
= 0;
7769 int seq_length
= dbr_sequence_length ();
7770 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7771 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7774 xoperands
[0] = call_dest
;
7776 /* Handle the common case where we're sure that the branch will reach
7777 the beginning of the "$CODE$" subspace. This is the beginning of
7778 the current function if we are in a named section. */
7779 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7781 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7782 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7786 if (TARGET_64BIT
&& !local_call
)
7788 /* ??? As far as I can tell, the HP linker doesn't support the
7789 long pc-relative sequence described in the 64-bit runtime
7790 architecture. So, we use a slightly longer indirect call. */
7791 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7792 xoperands
[1] = gen_label_rtx ();
7794 /* If this isn't a sibcall, we put the load of %r27 into the
7795 delay slot. We can't do this in a sibcall as we don't
7796 have a second call-clobbered scratch register available. */
7798 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7801 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7804 /* Now delete the delay insn. */
7805 SET_INSN_DELETED (NEXT_INSN (insn
));
7806 delay_insn_deleted
= 1;
7809 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7810 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7811 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7815 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7816 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7817 output_asm_insn ("bve (%%r1)", xoperands
);
7821 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7822 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7823 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7824 delay_slot_filled
= 1;
7829 int indirect_call
= 0;
7831 /* Emit a long call. There are several different sequences
7832 of increasing length and complexity. In most cases,
7833 they don't allow an instruction in the delay slot. */
7834 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7835 && !TARGET_LONG_PIC_SDIFF_CALL
7836 && !(TARGET_GAS
&& !TARGET_SOM
7837 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7842 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7846 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7848 /* A non-jump insn in the delay slot. By definition we can
7849 emit this insn before the call (and in fact before argument
7851 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7854 /* Now delete the delay insn. */
7855 SET_INSN_DELETED (NEXT_INSN (insn
));
7856 delay_insn_deleted
= 1;
7859 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7861 /* This is the best sequence for making long calls in
7862 non-pic code. Unfortunately, GNU ld doesn't provide
7863 the stub needed for external calls, and GAS's support
7864 for this with the SOM linker is buggy. It is safe
7865 to use this for local calls. */
7866 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7868 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7872 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7875 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7877 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7878 delay_slot_filled
= 1;
7883 if (TARGET_LONG_PIC_SDIFF_CALL
)
7885 /* The HP assembler and linker can handle relocations
7886 for the difference of two symbols. The HP assembler
7887 recognizes the sequence as a pc-relative call and
7888 the linker provides stubs when needed. */
7889 xoperands
[1] = gen_label_rtx ();
7890 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7891 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7892 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7893 CODE_LABEL_NUMBER (xoperands
[1]));
7894 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7896 else if (TARGET_GAS
&& !TARGET_SOM
7897 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7899 /* GAS currently can't generate the relocations that
7900 are needed for the SOM linker under HP-UX using this
7901 sequence. The GNU linker doesn't generate the stubs
7902 that are needed for external calls on TARGET_ELF32
7903 with this sequence. For now, we have to use a
7904 longer plabel sequence when using GAS. */
7905 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7906 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7908 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7913 /* Emit a long plabel-based call sequence. This is
7914 essentially an inline implementation of $$dyncall.
7915 We don't actually try to call $$dyncall as this is
7916 as difficult as calling the function itself. */
7917 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7918 xoperands
[1] = gen_label_rtx ();
7920 /* Since the call is indirect, FP arguments in registers
7921 need to be copied to the general registers. Then, the
7922 argument relocation stub will copy them back. */
7924 copy_fp_args (insn
);
7928 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7929 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7930 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7934 output_asm_insn ("addil LR'%0-$global$,%%r27",
7936 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7940 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7941 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7942 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
7943 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
7945 if (!sibcall
&& !TARGET_PA_20
)
7947 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
7948 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7949 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
7951 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
7958 output_asm_insn ("bve (%%r1)", xoperands
);
7963 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7964 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
7965 delay_slot_filled
= 1;
7968 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7973 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7974 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7979 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7980 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
7982 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
7986 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7987 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
7989 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
7992 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
7994 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7995 delay_slot_filled
= 1;
8002 if (!delay_slot_filled
&& (seq_length
== 0 || delay_insn_deleted
))
8003 output_asm_insn ("nop", xoperands
);
8005 /* We are done if there isn't a jump in the delay slot. */
8007 || delay_insn_deleted
8008 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
8011 /* A sibcall should never have a branch in the delay slot. */
8012 gcc_assert (!sibcall
);
8014 /* This call has an unconditional jump in its delay slot. */
8015 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
8017 if (!delay_slot_filled
&& INSN_ADDRESSES_SET_P ())
8019 /* See if the return address can be adjusted. Use the containing
8020 sequence insn's address. This would break the regular call/return@
8021 relationship assumed by the table based eh unwinder, so only do that
8022 if the call is not possibly throwing. */
8023 rtx seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
8024 int distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
8025 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
8027 if (VAL_14_BITS_P (distance
)
8028 && !(can_throw_internal (insn
) || can_throw_external (insn
)))
8030 xoperands
[1] = gen_label_rtx ();
8031 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands
);
8032 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8033 CODE_LABEL_NUMBER (xoperands
[1]));
8036 output_asm_insn ("nop\n\tb,n %0", xoperands
);
8039 output_asm_insn ("b,n %0", xoperands
);
8041 /* Delete the jump. */
8042 SET_INSN_DELETED (NEXT_INSN (insn
));
8047 /* Return the attribute length of the indirect call instruction INSN.
8048 The length must match the code generated by output_indirect call.
8049 The returned length includes the delay slot. Currently, the delay
8050 slot of an indirect call sequence is not exposed and it is used by
8051 the sequence itself. */
8054 pa_attr_length_indirect_call (rtx insn
)
8056 unsigned long distance
= -1;
8057 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8059 if (INSN_ADDRESSES_SET_P ())
8061 distance
= (total
+ insn_current_reference_address (insn
));
8062 if (distance
< total
)
8069 if (TARGET_FAST_INDIRECT_CALLS
8070 || (!TARGET_PORTABLE_RUNTIME
8071 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8072 || distance
< MAX_PCREL17F_OFFSET
)))
8078 if (TARGET_PORTABLE_RUNTIME
)
8081 /* Out of reach, can use ble. */
8086 pa_output_indirect_call (rtx insn
, rtx call_dest
)
8092 xoperands
[0] = call_dest
;
8093 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
8094 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
8098 /* First the special case for kernels, level 0 systems, etc. */
8099 if (TARGET_FAST_INDIRECT_CALLS
)
8100 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8102 /* Now the normal case -- we can reach $$dyncall directly or
8103 we're sure that we can get there via a long-branch stub.
8105 No need to check target flags as the length uniquely identifies
8106 the remaining cases. */
8107 if (pa_attr_length_indirect_call (insn
) == 8)
8109 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8110 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8111 variant of the B,L instruction can't be used on the SOM target. */
8112 if (TARGET_PA_20
&& !TARGET_SOM
)
8113 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8115 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8118 /* Long millicode call, but we are not generating PIC or portable runtime
8120 if (pa_attr_length_indirect_call (insn
) == 12)
8121 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8123 /* Long millicode call for portable runtime. */
8124 if (pa_attr_length_indirect_call (insn
) == 20)
8125 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8127 /* We need a long PIC call to $$dyncall. */
8128 xoperands
[0] = NULL_RTX
;
8129 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8130 if (TARGET_SOM
|| !TARGET_GAS
)
8132 xoperands
[0] = gen_label_rtx ();
8133 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands
);
8134 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8135 CODE_LABEL_NUMBER (xoperands
[0]));
8136 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
8140 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands
);
8141 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8144 output_asm_insn ("blr %%r0,%%r2", xoperands
);
8145 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands
);
8149 /* In HPUX 8.0's shared library scheme, special relocations are needed
8150 for function labels if they might be passed to a function
8151 in a shared library (because shared libraries don't live in code
8152 space), and special magic is needed to construct their address. */
8155 pa_encode_label (rtx sym
)
8157 const char *str
= XSTR (sym
, 0);
8158 int len
= strlen (str
) + 1;
8161 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8165 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8169 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8171 int old_referenced
= 0;
8173 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8175 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8177 default_encode_section_info (decl
, rtl
, first
);
8179 if (first
&& TEXT_SPACE_P (decl
))
8181 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8182 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8183 pa_encode_label (XEXP (rtl
, 0));
8185 else if (old_referenced
)
8186 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8189 /* This is sort of inverse to pa_encode_section_info. */
8192 pa_strip_name_encoding (const char *str
)
8194 str
+= (*str
== '@');
8195 str
+= (*str
== '*');
8199 /* Returns 1 if OP is a function label involved in a simple addition
8200 with a constant. Used to keep certain patterns from matching
8201 during instruction combination. */
8203 pa_is_function_label_plus_const (rtx op
)
8205 /* Strip off any CONST. */
8206 if (GET_CODE (op
) == CONST
)
8209 return (GET_CODE (op
) == PLUS
8210 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8211 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8214 /* Output assembly code for a thunk to FUNCTION. */
8217 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8218 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8221 static unsigned int current_thunk_number
;
8222 int val_14
= VAL_14_BITS_P (delta
);
8223 unsigned int old_last_address
= last_address
, nbytes
= 0;
8227 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8228 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8229 xoperands
[2] = GEN_INT (delta
);
8231 ASM_OUTPUT_LABEL (file
, XSTR (xoperands
[1], 0));
8232 fprintf (file
, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8234 /* Output the thunk. We know that the function is in the same
8235 translation unit (i.e., the same space) as the thunk, and that
8236 thunks are output after their method. Thus, we don't need an
8237 external branch to reach the function. With SOM and GAS,
8238 functions and thunks are effectively in different sections.
8239 Thus, we can always use a IA-relative branch and the linker
8240 will add a long branch stub if necessary.
8242 However, we have to be careful when generating PIC code on the
8243 SOM port to ensure that the sequence does not transfer to an
8244 import stub for the target function as this could clobber the
8245 return value saved at SP-24. This would also apply to the
8246 32-bit linux port if the multi-space model is implemented. */
8247 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8248 && !(flag_pic
&& TREE_PUBLIC (function
))
8249 && (TARGET_GAS
|| last_address
< 262132))
8250 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8251 && ((targetm_common
.have_named_sections
8252 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8253 /* The GNU 64-bit linker has rather poor stub management.
8254 So, we use a long branch from thunks that aren't in
8255 the same section as the target function. */
8257 && (DECL_SECTION_NAME (thunk_fndecl
)
8258 != DECL_SECTION_NAME (function
)))
8259 || ((DECL_SECTION_NAME (thunk_fndecl
)
8260 == DECL_SECTION_NAME (function
))
8261 && last_address
< 262132)))
8262 || (targetm_common
.have_named_sections
8263 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8264 && DECL_SECTION_NAME (function
) == NULL
8265 && last_address
< 262132)
8266 || (!targetm_common
.have_named_sections
8267 && last_address
< 262132))))
8270 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8272 output_asm_insn ("b %0", xoperands
);
8276 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8281 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8285 else if (TARGET_64BIT
)
8287 /* We only have one call-clobbered scratch register, so we can't
8288 make use of the delay slot if delta doesn't fit in 14 bits. */
8291 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8292 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8295 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8299 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8300 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8304 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8305 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8310 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8311 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8316 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8320 else if (TARGET_PORTABLE_RUNTIME
)
8322 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8323 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8326 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8328 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8332 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8337 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8341 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8343 /* The function is accessible from outside this module. The only
8344 way to avoid an import stub between the thunk and function is to
8345 call the function directly with an indirect sequence similar to
8346 that used by $$dyncall. This is possible because $$dyncall acts
8347 as the import stub in an indirect call. */
8348 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8349 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8350 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8351 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8352 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8353 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8354 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8355 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8356 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8360 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8366 output_asm_insn ("bve (%%r22)", xoperands
);
8369 else if (TARGET_NO_SPACE_REGS
)
8371 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8376 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8377 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8378 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8383 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8385 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8389 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8391 if (TARGET_SOM
|| !TARGET_GAS
)
8393 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8394 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8398 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8399 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8403 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8405 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8409 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8414 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8421 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8423 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8424 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8428 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8433 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8438 fprintf (file
, "\t.EXIT\n\t.PROCEND\n");
8440 if (TARGET_SOM
&& TARGET_GAS
)
8442 /* We done with this subspace except possibly for some additional
8443 debug information. Forget that we are in this subspace to ensure
8444 that the next function is output in its own subspace. */
8446 cfun
->machine
->in_nsubspa
= 2;
8449 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8451 switch_to_section (data_section
);
8452 output_asm_insn (".align 4", xoperands
);
8453 ASM_OUTPUT_LABEL (file
, label
);
8454 output_asm_insn (".word P'%0", xoperands
);
8457 current_thunk_number
++;
8458 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8459 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8460 last_address
+= nbytes
;
8461 if (old_last_address
> last_address
)
8462 last_address
= UINT_MAX
;
8463 update_total_code_bytes (nbytes
);
8466 /* Only direct calls to static functions are allowed to be sibling (tail)
8469 This restriction is necessary because some linker generated stubs will
8470 store return pointers into rp' in some cases which might clobber a
8471 live value already in rp'.
8473 In a sibcall the current function and the target function share stack
8474 space. Thus if the path to the current function and the path to the
8475 target function save a value in rp', they save the value into the
8476 same stack slot, which has undesirable consequences.
8478 Because of the deferred binding nature of shared libraries any function
8479 with external scope could be in a different load module and thus require
8480 rp' to be saved when calling that function. So sibcall optimizations
8481 can only be safe for static function.
8483 Note that GCC never needs return value relocations, so we don't have to
8484 worry about static calls with return value relocations (which require
8487 It is safe to perform a sibcall optimization when the target function
8488 will never return. */
8490 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8492 if (TARGET_PORTABLE_RUNTIME
)
8495 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8496 single subspace mode and the call is not indirect. As far as I know,
8497 there is no operating system support for the multiple subspace mode.
8498 It might be possible to support indirect calls if we didn't use
8499 $$dyncall (see the indirect sequence generated in pa_output_call). */
8501 return (decl
!= NULL_TREE
);
8503 /* Sibcalls are not ok because the arg pointer register is not a fixed
8504 register. This prevents the sibcall optimization from occurring. In
8505 addition, there are problems with stub placement using GNU ld. This
8506 is because a normal sibcall branch uses a 17-bit relocation while
8507 a regular call branch uses a 22-bit relocation. As a result, more
8508 care needs to be taken in the placement of long-branch stubs. */
8512 /* Sibcalls are only ok within a translation unit. */
8513 return (decl
&& !TREE_PUBLIC (decl
));
8516 /* ??? Addition is not commutative on the PA due to the weird implicit
8517 space register selection rules for memory addresses. Therefore, we
8518 don't consider a + b == b + a, as this might be inside a MEM. */
8520 pa_commutative_p (const_rtx x
, int outer_code
)
8522 return (COMMUTATIVE_P (x
)
8523 && (TARGET_NO_SPACE_REGS
8524 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8525 || GET_CODE (x
) != PLUS
));
8528 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8529 use in fmpyadd instructions. */
8531 pa_fmpyaddoperands (rtx
*operands
)
8533 enum machine_mode mode
= GET_MODE (operands
[0]);
8535 /* Must be a floating point mode. */
8536 if (mode
!= SFmode
&& mode
!= DFmode
)
8539 /* All modes must be the same. */
8540 if (! (mode
== GET_MODE (operands
[1])
8541 && mode
== GET_MODE (operands
[2])
8542 && mode
== GET_MODE (operands
[3])
8543 && mode
== GET_MODE (operands
[4])
8544 && mode
== GET_MODE (operands
[5])))
8547 /* All operands must be registers. */
8548 if (! (GET_CODE (operands
[1]) == REG
8549 && GET_CODE (operands
[2]) == REG
8550 && GET_CODE (operands
[3]) == REG
8551 && GET_CODE (operands
[4]) == REG
8552 && GET_CODE (operands
[5]) == REG
))
8555 /* Only 2 real operands to the addition. One of the input operands must
8556 be the same as the output operand. */
8557 if (! rtx_equal_p (operands
[3], operands
[4])
8558 && ! rtx_equal_p (operands
[3], operands
[5]))
8561 /* Inout operand of add cannot conflict with any operands from multiply. */
8562 if (rtx_equal_p (operands
[3], operands
[0])
8563 || rtx_equal_p (operands
[3], operands
[1])
8564 || rtx_equal_p (operands
[3], operands
[2]))
8567 /* multiply cannot feed into addition operands. */
8568 if (rtx_equal_p (operands
[4], operands
[0])
8569 || rtx_equal_p (operands
[5], operands
[0]))
8572 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8574 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8575 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8576 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8577 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8578 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8579 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8582 /* Passed. Operands are suitable for fmpyadd. */
8586 #if !defined(USE_COLLECT2)
8588 pa_asm_out_constructor (rtx symbol
, int priority
)
8590 if (!function_label_operand (symbol
, VOIDmode
))
8591 pa_encode_label (symbol
);
8593 #ifdef CTORS_SECTION_ASM_OP
8594 default_ctor_section_asm_out_constructor (symbol
, priority
);
8596 # ifdef TARGET_ASM_NAMED_SECTION
8597 default_named_section_asm_out_constructor (symbol
, priority
);
8599 default_stabs_asm_out_constructor (symbol
, priority
);
8605 pa_asm_out_destructor (rtx symbol
, int priority
)
8607 if (!function_label_operand (symbol
, VOIDmode
))
8608 pa_encode_label (symbol
);
8610 #ifdef DTORS_SECTION_ASM_OP
8611 default_dtor_section_asm_out_destructor (symbol
, priority
);
8613 # ifdef TARGET_ASM_NAMED_SECTION
8614 default_named_section_asm_out_destructor (symbol
, priority
);
8616 default_stabs_asm_out_destructor (symbol
, priority
);
8622 /* This function places uninitialized global data in the bss section.
8623 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8624 function on the SOM port to prevent uninitialized global data from
8625 being placed in the data section. */
8628 pa_asm_output_aligned_bss (FILE *stream
,
8630 unsigned HOST_WIDE_INT size
,
8633 switch_to_section (bss_section
);
8634 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8636 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8637 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8640 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8641 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8644 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8645 ASM_OUTPUT_LABEL (stream
, name
);
8646 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8649 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8650 that doesn't allow the alignment of global common storage to be directly
8651 specified. The SOM linker aligns common storage based on the rounded
8652 value of the NUM_BYTES parameter in the .comm directive. It's not
8653 possible to use the .align directive as it doesn't affect the alignment
8654 of the label associated with a .comm directive. */
8657 pa_asm_output_aligned_common (FILE *stream
,
8659 unsigned HOST_WIDE_INT size
,
8662 unsigned int max_common_align
;
8664 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8665 if (align
> max_common_align
)
8667 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8668 "for global common data. Using %u",
8669 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8670 align
= max_common_align
;
8673 switch_to_section (bss_section
);
8675 assemble_name (stream
, name
);
8676 fprintf (stream
, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8677 MAX (size
, align
/ BITS_PER_UNIT
));
8680 /* We can't use .comm for local common storage as the SOM linker effectively
8681 treats the symbol as universal and uses the same storage for local symbols
8682 with the same name in different object files. The .block directive
8683 reserves an uninitialized block of storage. However, it's not common
8684 storage. Fortunately, GCC never requests common storage with the same
8685 name in any given translation unit. */
8688 pa_asm_output_aligned_local (FILE *stream
,
8690 unsigned HOST_WIDE_INT size
,
8693 switch_to_section (bss_section
);
8694 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8697 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8698 assemble_name (stream
, name
);
8699 fprintf (stream
, "\n");
8702 ASM_OUTPUT_LABEL (stream
, name
);
8703 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8706 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8707 use in fmpysub instructions. */
8709 pa_fmpysuboperands (rtx
*operands
)
8711 enum machine_mode mode
= GET_MODE (operands
[0]);
8713 /* Must be a floating point mode. */
8714 if (mode
!= SFmode
&& mode
!= DFmode
)
8717 /* All modes must be the same. */
8718 if (! (mode
== GET_MODE (operands
[1])
8719 && mode
== GET_MODE (operands
[2])
8720 && mode
== GET_MODE (operands
[3])
8721 && mode
== GET_MODE (operands
[4])
8722 && mode
== GET_MODE (operands
[5])))
8725 /* All operands must be registers. */
8726 if (! (GET_CODE (operands
[1]) == REG
8727 && GET_CODE (operands
[2]) == REG
8728 && GET_CODE (operands
[3]) == REG
8729 && GET_CODE (operands
[4]) == REG
8730 && GET_CODE (operands
[5]) == REG
))
8733 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8734 operation, so operands[4] must be the same as operand[3]. */
8735 if (! rtx_equal_p (operands
[3], operands
[4]))
8738 /* multiply cannot feed into subtraction. */
8739 if (rtx_equal_p (operands
[5], operands
[0]))
8742 /* Inout operand of sub cannot conflict with any operands from multiply. */
8743 if (rtx_equal_p (operands
[3], operands
[0])
8744 || rtx_equal_p (operands
[3], operands
[1])
8745 || rtx_equal_p (operands
[3], operands
[2]))
8748 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8750 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8751 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8752 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8753 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8754 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8755 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8758 /* Passed. Operands are suitable for fmpysub. */
8762 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8763 constants for shadd instructions. */
8765 pa_shadd_constant_p (int val
)
8767 if (val
== 2 || val
== 4 || val
== 8)
8773 /* Return TRUE if INSN branches forward. */
8776 forward_branch_p (rtx insn
)
8778 rtx lab
= JUMP_LABEL (insn
);
8780 /* The INSN must have a jump label. */
8781 gcc_assert (lab
!= NULL_RTX
);
8783 if (INSN_ADDRESSES_SET_P ())
8784 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8791 insn
= NEXT_INSN (insn
);
8797 /* Return 1 if INSN is in the delay slot of a call instruction. */
8799 pa_jump_in_call_delay (rtx insn
)
8802 if (GET_CODE (insn
) != JUMP_INSN
)
8805 if (PREV_INSN (insn
)
8806 && PREV_INSN (PREV_INSN (insn
))
8807 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn
)))) == INSN
)
8809 rtx test_insn
= next_real_insn (PREV_INSN (PREV_INSN (insn
)));
8811 return (GET_CODE (PATTERN (test_insn
)) == SEQUENCE
8812 && XVECEXP (PATTERN (test_insn
), 0, 1) == insn
);
8819 /* Output an unconditional move and branch insn. */
8822 pa_output_parallel_movb (rtx
*operands
, rtx insn
)
8824 int length
= get_attr_length (insn
);
8826 /* These are the cases in which we win. */
8828 return "mov%I1b,tr %1,%0,%2";
8830 /* None of the following cases win, but they don't lose either. */
8833 if (dbr_sequence_length () == 0)
8835 /* Nothing in the delay slot, fake it by putting the combined
8836 insn (the copy or add) in the delay slot of a bl. */
8837 if (GET_CODE (operands
[1]) == CONST_INT
)
8838 return "b %2\n\tldi %1,%0";
8840 return "b %2\n\tcopy %1,%0";
8844 /* Something in the delay slot, but we've got a long branch. */
8845 if (GET_CODE (operands
[1]) == CONST_INT
)
8846 return "ldi %1,%0\n\tb %2";
8848 return "copy %1,%0\n\tb %2";
8852 if (GET_CODE (operands
[1]) == CONST_INT
)
8853 output_asm_insn ("ldi %1,%0", operands
);
8855 output_asm_insn ("copy %1,%0", operands
);
8856 return pa_output_lbranch (operands
[2], insn
, 1);
8859 /* Output an unconditional add and branch insn. */
8862 pa_output_parallel_addb (rtx
*operands
, rtx insn
)
8864 int length
= get_attr_length (insn
);
8866 /* To make life easy we want operand0 to be the shared input/output
8867 operand and operand1 to be the readonly operand. */
8868 if (operands
[0] == operands
[1])
8869 operands
[1] = operands
[2];
8871 /* These are the cases in which we win. */
8873 return "add%I1b,tr %1,%0,%3";
8875 /* None of the following cases win, but they don't lose either. */
8878 if (dbr_sequence_length () == 0)
8879 /* Nothing in the delay slot, fake it by putting the combined
8880 insn (the copy or add) in the delay slot of a bl. */
8881 return "b %3\n\tadd%I1 %1,%0,%0";
8883 /* Something in the delay slot, but we've got a long branch. */
8884 return "add%I1 %1,%0,%0\n\tb %3";
8887 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8888 return pa_output_lbranch (operands
[3], insn
, 1);
8891 /* Return nonzero if INSN (a jump insn) immediately follows a call
8892 to a named function. This is used to avoid filling the delay slot
8893 of the jump since it can usually be eliminated by modifying RP in
8894 the delay slot of the call. */
8897 pa_following_call (rtx insn
)
8899 if (! TARGET_JUMP_IN_DELAY
)
8902 /* Find the previous real insn, skipping NOTEs. */
8903 insn
= PREV_INSN (insn
);
8904 while (insn
&& GET_CODE (insn
) == NOTE
)
8905 insn
= PREV_INSN (insn
);
8907 /* Check for CALL_INSNs and millicode calls. */
8909 && ((GET_CODE (insn
) == CALL_INSN
8910 && get_attr_type (insn
) != TYPE_DYNCALL
)
8911 || (GET_CODE (insn
) == INSN
8912 && GET_CODE (PATTERN (insn
)) != SEQUENCE
8913 && GET_CODE (PATTERN (insn
)) != USE
8914 && GET_CODE (PATTERN (insn
)) != CLOBBER
8915 && get_attr_type (insn
) == TYPE_MILLI
)))
8921 /* We use this hook to perform a PA specific optimization which is difficult
8922 to do in earlier passes.
8924 We want the delay slots of branches within jump tables to be filled.
8925 None of the compiler passes at the moment even has the notion that a
8926 PA jump table doesn't contain addresses, but instead contains actual
8929 Because we actually jump into the table, the addresses of each entry
8930 must stay constant in relation to the beginning of the table (which
8931 itself must stay constant relative to the instruction to jump into
8932 it). I don't believe we can guarantee earlier passes of the compiler
8933 will adhere to those rules.
8935 So, late in the compilation process we find all the jump tables, and
8936 expand them into real code -- e.g. each entry in the jump table vector
8937 will get an appropriate label followed by a jump to the final target.
8939 Reorg and the final jump pass can then optimize these branches and
8940 fill their delay slots. We end up with smaller, more efficient code.
8942 The jump instructions within the table are special; we must be able
8943 to identify them during assembly output (if the jumps don't get filled
8944 we need to emit a nop rather than nullifying the delay slot)). We
8945 identify jumps in switch tables by using insns with the attribute
8946 type TYPE_BTABLE_BRANCH.
8948 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8949 insns. This serves two purposes, first it prevents jump.c from
8950 noticing that the last N entries in the table jump to the instruction
8951 immediately after the table and deleting the jumps. Second, those
8952 insns mark where we should emit .begin_brtab and .end_brtab directives
8953 when using GAS (allows for better link time optimizations). */
8960 remove_useless_addtr_insns (1);
8962 if (pa_cpu
< PROCESSOR_8000
)
8963 pa_combine_instructions ();
8966 /* This is fairly cheap, so always run it if optimizing. */
8967 if (optimize
> 0 && !TARGET_BIG_SWITCH
)
8969 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8970 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8972 rtx pattern
, tmp
, location
, label
;
8973 unsigned int length
, i
;
8975 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8976 if (GET_CODE (insn
) != JUMP_INSN
8977 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
8978 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
8981 /* Emit marker for the beginning of the branch table. */
8982 emit_insn_before (gen_begin_brtab (), insn
);
8984 pattern
= PATTERN (insn
);
8985 location
= PREV_INSN (insn
);
8986 length
= XVECLEN (pattern
, GET_CODE (pattern
) == ADDR_DIFF_VEC
);
8988 for (i
= 0; i
< length
; i
++)
8990 /* Emit a label before each jump to keep jump.c from
8991 removing this code. */
8992 tmp
= gen_label_rtx ();
8993 LABEL_NUSES (tmp
) = 1;
8994 emit_label_after (tmp
, location
);
8995 location
= NEXT_INSN (location
);
8997 if (GET_CODE (pattern
) == ADDR_VEC
)
8998 label
= XEXP (XVECEXP (pattern
, 0, i
), 0);
9000 label
= XEXP (XVECEXP (pattern
, 1, i
), 0);
9002 tmp
= gen_short_jump (label
);
9004 /* Emit the jump itself. */
9005 tmp
= emit_jump_insn_after (tmp
, location
);
9006 JUMP_LABEL (tmp
) = label
;
9007 LABEL_NUSES (label
)++;
9008 location
= NEXT_INSN (location
);
9010 /* Emit a BARRIER after the jump. */
9011 emit_barrier_after (location
);
9012 location
= NEXT_INSN (location
);
9015 /* Emit marker for the end of the branch table. */
9016 emit_insn_before (gen_end_brtab (), location
);
9017 location
= NEXT_INSN (location
);
9018 emit_barrier_after (location
);
9020 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
9026 /* Still need brtab marker insns. FIXME: the presence of these
9027 markers disables output of the branch table to readonly memory,
9028 and any alignment directives that might be needed. Possibly,
9029 the begin_brtab insn should be output before the label for the
9030 table. This doesn't matter at the moment since the tables are
9031 always output in the text section. */
9032 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9034 /* Find an ADDR_VEC insn. */
9035 if (GET_CODE (insn
) != JUMP_INSN
9036 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
9037 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
9040 /* Now generate markers for the beginning and end of the
9042 emit_insn_before (gen_begin_brtab (), insn
);
9043 emit_insn_after (gen_end_brtab (), insn
);
9048 /* The PA has a number of odd instructions which can perform multiple
9049 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9050 it may be profitable to combine two instructions into one instruction
9051 with two outputs. It's not profitable PA2.0 machines because the
9052 two outputs would take two slots in the reorder buffers.
9054 This routine finds instructions which can be combined and combines
9055 them. We only support some of the potential combinations, and we
9056 only try common ways to find suitable instructions.
9058 * addb can add two registers or a register and a small integer
9059 and jump to a nearby (+-8k) location. Normally the jump to the
9060 nearby location is conditional on the result of the add, but by
9061 using the "true" condition we can make the jump unconditional.
9062 Thus addb can perform two independent operations in one insn.
9064 * movb is similar to addb in that it can perform a reg->reg
9065 or small immediate->reg copy and jump to a nearby (+-8k location).
9067 * fmpyadd and fmpysub can perform a FP multiply and either an
9068 FP add or FP sub if the operands of the multiply and add/sub are
9069 independent (there are other minor restrictions). Note both
9070 the fmpy and fadd/fsub can in theory move to better spots according
9071 to data dependencies, but for now we require the fmpy stay at a
9074 * Many of the memory operations can perform pre & post updates
9075 of index registers. GCC's pre/post increment/decrement addressing
9076 is far too simple to take advantage of all the possibilities. This
9077 pass may not be suitable since those insns may not be independent.
9079 * comclr can compare two ints or an int and a register, nullify
9080 the following instruction and zero some other register. This
9081 is more difficult to use as it's harder to find an insn which
9082 will generate a comclr than finding something like an unconditional
9083 branch. (conditional moves & long branches create comclr insns).
9085 * Most arithmetic operations can conditionally skip the next
9086 instruction. They can be viewed as "perform this operation
9087 and conditionally jump to this nearby location" (where nearby
9088 is an insns away). These are difficult to use due to the
9089 branch length restrictions. */
9092 pa_combine_instructions (void)
9094 rtx anchor
, new_rtx
;
9096 /* This can get expensive since the basic algorithm is on the
9097 order of O(n^2) (or worse). Only do it for -O2 or higher
9098 levels of optimization. */
9102 /* Walk down the list of insns looking for "anchor" insns which
9103 may be combined with "floating" insns. As the name implies,
9104 "anchor" instructions don't move, while "floating" insns may
9106 new_rtx
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9107 new_rtx
= make_insn_raw (new_rtx
);
9109 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9111 enum attr_pa_combine_type anchor_attr
;
9112 enum attr_pa_combine_type floater_attr
;
9114 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9115 Also ignore any special USE insns. */
9116 if ((GET_CODE (anchor
) != INSN
9117 && GET_CODE (anchor
) != JUMP_INSN
9118 && GET_CODE (anchor
) != CALL_INSN
)
9119 || GET_CODE (PATTERN (anchor
)) == USE
9120 || GET_CODE (PATTERN (anchor
)) == CLOBBER
9121 || GET_CODE (PATTERN (anchor
)) == ADDR_VEC
9122 || GET_CODE (PATTERN (anchor
)) == ADDR_DIFF_VEC
)
9125 anchor_attr
= get_attr_pa_combine_type (anchor
);
9126 /* See if anchor is an insn suitable for combination. */
9127 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9128 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9129 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9130 && ! forward_branch_p (anchor
)))
9134 for (floater
= PREV_INSN (anchor
);
9136 floater
= PREV_INSN (floater
))
9138 if (GET_CODE (floater
) == NOTE
9139 || (GET_CODE (floater
) == INSN
9140 && (GET_CODE (PATTERN (floater
)) == USE
9141 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9144 /* Anything except a regular INSN will stop our search. */
9145 if (GET_CODE (floater
) != INSN
9146 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9147 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9153 /* See if FLOATER is suitable for combination with the
9155 floater_attr
= get_attr_pa_combine_type (floater
);
9156 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9157 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9158 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9159 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9161 /* If ANCHOR and FLOATER can be combined, then we're
9162 done with this pass. */
9163 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9164 SET_DEST (PATTERN (floater
)),
9165 XEXP (SET_SRC (PATTERN (floater
)), 0),
9166 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9170 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9171 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9173 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9175 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9176 SET_DEST (PATTERN (floater
)),
9177 XEXP (SET_SRC (PATTERN (floater
)), 0),
9178 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9183 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9184 SET_DEST (PATTERN (floater
)),
9185 SET_SRC (PATTERN (floater
)),
9186 SET_SRC (PATTERN (floater
))))
9192 /* If we didn't find anything on the backwards scan try forwards. */
9194 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9195 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9197 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9199 if (GET_CODE (floater
) == NOTE
9200 || (GET_CODE (floater
) == INSN
9201 && (GET_CODE (PATTERN (floater
)) == USE
9202 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9206 /* Anything except a regular INSN will stop our search. */
9207 if (GET_CODE (floater
) != INSN
9208 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9209 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9215 /* See if FLOATER is suitable for combination with the
9217 floater_attr
= get_attr_pa_combine_type (floater
);
9218 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9219 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9220 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9221 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9223 /* If ANCHOR and FLOATER can be combined, then we're
9224 done with this pass. */
9225 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9226 SET_DEST (PATTERN (floater
)),
9227 XEXP (SET_SRC (PATTERN (floater
)),
9229 XEXP (SET_SRC (PATTERN (floater
)),
9236 /* FLOATER will be nonzero if we found a suitable floating
9237 insn for combination with ANCHOR. */
9239 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9240 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9242 /* Emit the new instruction and delete the old anchor. */
9243 emit_insn_before (gen_rtx_PARALLEL
9245 gen_rtvec (2, PATTERN (anchor
),
9246 PATTERN (floater
))),
9249 SET_INSN_DELETED (anchor
);
9251 /* Emit a special USE insn for FLOATER, then delete
9252 the floating insn. */
9253 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9254 delete_insn (floater
);
9259 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9262 /* Emit the new_jump instruction and delete the old anchor. */
9264 = emit_jump_insn_before (gen_rtx_PARALLEL
9266 gen_rtvec (2, PATTERN (anchor
),
9267 PATTERN (floater
))),
9270 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9271 SET_INSN_DELETED (anchor
);
9273 /* Emit a special USE insn for FLOATER, then delete
9274 the floating insn. */
9275 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9276 delete_insn (floater
);
9284 pa_can_combine_p (rtx new_rtx
, rtx anchor
, rtx floater
, int reversed
, rtx dest
,
9287 int insn_code_number
;
9290 /* Create a PARALLEL with the patterns of ANCHOR and
9291 FLOATER, try to recognize it, then test constraints
9292 for the resulting pattern.
9294 If the pattern doesn't match or the constraints
9295 aren't met keep searching for a suitable floater
9297 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9298 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9299 INSN_CODE (new_rtx
) = -1;
9300 insn_code_number
= recog_memoized (new_rtx
);
9301 if (insn_code_number
< 0
9302 || (extract_insn (new_rtx
), ! constrain_operands (1)))
9316 /* There's up to three operands to consider. One
9317 output and two inputs.
9319 The output must not be used between FLOATER & ANCHOR
9320 exclusive. The inputs must not be set between
9321 FLOATER and ANCHOR exclusive. */
9323 if (reg_used_between_p (dest
, start
, end
))
9326 if (reg_set_between_p (src1
, start
, end
))
9329 if (reg_set_between_p (src2
, start
, end
))
9332 /* If we get here, then everything is good. */
9336 /* Return nonzero if references for INSN are delayed.
9338 Millicode insns are actually function calls with some special
9339 constraints on arguments and register usage.
9341 Millicode calls always expect their arguments in the integer argument
9342 registers, and always return their result in %r29 (ret1). They
9343 are expected to clobber their arguments, %r1, %r29, and the return
9344 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9346 This function tells reorg that the references to arguments and
9347 millicode calls do not appear to happen until after the millicode call.
9348 This allows reorg to put insns which set the argument registers into the
9349 delay slot of the millicode call -- thus they act more like traditional
9352 Note we cannot consider side effects of the insn to be delayed because
9353 the branch and link insn will clobber the return pointer. If we happened
9354 to use the return pointer in the delay slot of the call, then we lose.
9356 get_attr_type will try to recognize the given insn, so make sure to
9357 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9360 pa_insn_refs_are_delayed (rtx insn
)
9362 return ((GET_CODE (insn
) == INSN
9363 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9364 && GET_CODE (PATTERN (insn
)) != USE
9365 && GET_CODE (PATTERN (insn
)) != CLOBBER
9366 && get_attr_type (insn
) == TYPE_MILLI
));
9369 /* Promote the return value, but not the arguments. */
9371 static enum machine_mode
9372 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9373 enum machine_mode mode
,
9374 int *punsignedp ATTRIBUTE_UNUSED
,
9375 const_tree fntype ATTRIBUTE_UNUSED
,
9378 if (for_return
== 0)
9380 return promote_mode (type
, mode
, punsignedp
);
9383 /* On the HP-PA the value is found in register(s) 28(-29), unless
9384 the mode is SF or DF. Then the value is returned in fr4 (32).
9386 This must perform the same promotions as PROMOTE_MODE, else promoting
9387 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9389 Small structures must be returned in a PARALLEL on PA64 in order
9390 to match the HP Compiler ABI. */
9393 pa_function_value (const_tree valtype
,
9394 const_tree func ATTRIBUTE_UNUSED
,
9395 bool outgoing ATTRIBUTE_UNUSED
)
9397 enum machine_mode valmode
;
9399 if (AGGREGATE_TYPE_P (valtype
)
9400 || TREE_CODE (valtype
) == COMPLEX_TYPE
9401 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9405 /* Aggregates with a size less than or equal to 128 bits are
9406 returned in GR 28(-29). They are left justified. The pad
9407 bits are undefined. Larger aggregates are returned in
9411 int ub
= int_size_in_bytes (valtype
) <= UNITS_PER_WORD
? 1 : 2;
9413 for (i
= 0; i
< ub
; i
++)
9415 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9416 gen_rtx_REG (DImode
, 28 + i
),
9421 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9423 else if (int_size_in_bytes (valtype
) > UNITS_PER_WORD
)
9425 /* Aggregates 5 to 8 bytes in size are returned in general
9426 registers r28-r29 in the same manner as other non
9427 floating-point objects. The data is right-justified and
9428 zero-extended to 64 bits. This is opposite to the normal
9429 justification used on big endian targets and requires
9430 special treatment. */
9431 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9432 gen_rtx_REG (DImode
, 28), const0_rtx
);
9433 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9437 if ((INTEGRAL_TYPE_P (valtype
)
9438 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9439 || POINTER_TYPE_P (valtype
))
9440 valmode
= word_mode
;
9442 valmode
= TYPE_MODE (valtype
);
9444 if (TREE_CODE (valtype
) == REAL_TYPE
9445 && !AGGREGATE_TYPE_P (valtype
)
9446 && TYPE_MODE (valtype
) != TFmode
9447 && !TARGET_SOFT_FLOAT
)
9448 return gen_rtx_REG (valmode
, 32);
9450 return gen_rtx_REG (valmode
, 28);
9453 /* Implement the TARGET_LIBCALL_VALUE hook. */
9456 pa_libcall_value (enum machine_mode mode
,
9457 const_rtx fun ATTRIBUTE_UNUSED
)
9459 if (! TARGET_SOFT_FLOAT
9460 && (mode
== SFmode
|| mode
== DFmode
))
9461 return gen_rtx_REG (mode
, 32);
9463 return gen_rtx_REG (mode
, 28);
9466 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9469 pa_function_value_regno_p (const unsigned int regno
)
9472 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9478 /* Update the data in CUM to advance over an argument
9479 of mode MODE and data type TYPE.
9480 (TYPE is null for libcalls where that information may not be available.) */
9483 pa_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
9484 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9486 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9487 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9489 cum
->nargs_prototype
--;
9490 cum
->words
+= (arg_size
9491 + ((cum
->words
& 01)
9492 && type
!= NULL_TREE
9496 /* Return the location of a parameter that is passed in a register or NULL
9497 if the parameter has any component that is passed in memory.
9499 This is new code and will be pushed to into the net sources after
9502 ??? We might want to restructure this so that it looks more like other
9505 pa_function_arg (cumulative_args_t cum_v
, enum machine_mode mode
,
9506 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9508 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9509 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9516 if (mode
== VOIDmode
)
9519 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9521 /* If this arg would be passed partially or totally on the stack, then
9522 this routine should return zero. pa_arg_partial_bytes will
9523 handle arguments which are split between regs and stack slots if
9524 the ABI mandates split arguments. */
9527 /* The 32-bit ABI does not split arguments. */
9528 if (cum
->words
+ arg_size
> max_arg_words
)
9534 alignment
= cum
->words
& 1;
9535 if (cum
->words
+ alignment
>= max_arg_words
)
9539 /* The 32bit ABIs and the 64bit ABIs are rather different,
9540 particularly in their handling of FP registers. We might
9541 be able to cleverly share code between them, but I'm not
9542 going to bother in the hope that splitting them up results
9543 in code that is more easily understood. */
9547 /* Advance the base registers to their current locations.
9549 Remember, gprs grow towards smaller register numbers while
9550 fprs grow to higher register numbers. Also remember that
9551 although FP regs are 32-bit addressable, we pretend that
9552 the registers are 64-bits wide. */
9553 gpr_reg_base
= 26 - cum
->words
;
9554 fpr_reg_base
= 32 + cum
->words
;
9556 /* Arguments wider than one word and small aggregates need special
9560 || (type
&& (AGGREGATE_TYPE_P (type
)
9561 || TREE_CODE (type
) == COMPLEX_TYPE
9562 || TREE_CODE (type
) == VECTOR_TYPE
)))
9564 /* Double-extended precision (80-bit), quad-precision (128-bit)
9565 and aggregates including complex numbers are aligned on
9566 128-bit boundaries. The first eight 64-bit argument slots
9567 are associated one-to-one, with general registers r26
9568 through r19, and also with floating-point registers fr4
9569 through fr11. Arguments larger than one word are always
9570 passed in general registers.
9572 Using a PARALLEL with a word mode register results in left
9573 justified data on a big-endian target. */
9576 int i
, offset
= 0, ub
= arg_size
;
9578 /* Align the base register. */
9579 gpr_reg_base
-= alignment
;
9581 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9582 for (i
= 0; i
< ub
; i
++)
9584 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9585 gen_rtx_REG (DImode
, gpr_reg_base
),
9591 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9596 /* If the argument is larger than a word, then we know precisely
9597 which registers we must use. */
9611 /* Structures 5 to 8 bytes in size are passed in the general
9612 registers in the same manner as other non floating-point
9613 objects. The data is right-justified and zero-extended
9614 to 64 bits. This is opposite to the normal justification
9615 used on big endian targets and requires special treatment.
9616 We now define BLOCK_REG_PADDING to pad these objects.
9617 Aggregates, complex and vector types are passed in the same
9618 manner as structures. */
9620 || (type
&& (AGGREGATE_TYPE_P (type
)
9621 || TREE_CODE (type
) == COMPLEX_TYPE
9622 || TREE_CODE (type
) == VECTOR_TYPE
)))
9624 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9625 gen_rtx_REG (DImode
, gpr_reg_base
),
9627 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9632 /* We have a single word (32 bits). A simple computation
9633 will get us the register #s we need. */
9634 gpr_reg_base
= 26 - cum
->words
;
9635 fpr_reg_base
= 32 + 2 * cum
->words
;
9639 /* Determine if the argument needs to be passed in both general and
9640 floating point registers. */
9641 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9642 /* If we are doing soft-float with portable runtime, then there
9643 is no need to worry about FP regs. */
9644 && !TARGET_SOFT_FLOAT
9645 /* The parameter must be some kind of scalar float, else we just
9646 pass it in integer registers. */
9647 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9648 /* The target function must not have a prototype. */
9649 && cum
->nargs_prototype
<= 0
9650 /* libcalls do not need to pass items in both FP and general
9652 && type
!= NULL_TREE
9653 /* All this hair applies to "outgoing" args only. This includes
9654 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9656 /* Also pass outgoing floating arguments in both registers in indirect
9657 calls with the 32 bit ABI and the HP assembler since there is no
9658 way to the specify argument locations in static functions. */
9663 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9669 gen_rtx_EXPR_LIST (VOIDmode
,
9670 gen_rtx_REG (mode
, fpr_reg_base
),
9672 gen_rtx_EXPR_LIST (VOIDmode
,
9673 gen_rtx_REG (mode
, gpr_reg_base
),
9678 /* See if we should pass this parameter in a general register. */
9679 if (TARGET_SOFT_FLOAT
9680 /* Indirect calls in the normal 32bit ABI require all arguments
9681 to be passed in general registers. */
9682 || (!TARGET_PORTABLE_RUNTIME
9686 /* If the parameter is not a scalar floating-point parameter,
9687 then it belongs in GPRs. */
9688 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9689 /* Structure with single SFmode field belongs in GPR. */
9690 || (type
&& AGGREGATE_TYPE_P (type
)))
9691 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9693 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9698 /* Arguments larger than one word are double word aligned. */
9701 pa_function_arg_boundary (enum machine_mode mode
, const_tree type
)
9703 bool singleword
= (type
9704 ? (integer_zerop (TYPE_SIZE (type
))
9705 || !TREE_CONSTANT (TYPE_SIZE (type
))
9706 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9707 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9709 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9712 /* If this arg would be passed totally in registers or totally on the stack,
9713 then this routine should return zero. */
9716 pa_arg_partial_bytes (cumulative_args_t cum_v
, enum machine_mode mode
,
9717 tree type
, bool named ATTRIBUTE_UNUSED
)
9719 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9720 unsigned int max_arg_words
= 8;
9721 unsigned int offset
= 0;
9726 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9729 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9730 /* Arg fits fully into registers. */
9732 else if (cum
->words
+ offset
>= max_arg_words
)
9733 /* Arg fully on the stack. */
9737 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9741 /* A get_unnamed_section callback for switching to the text section.
9743 This function is only used with SOM. Because we don't support
9744 named subspaces, we can only create a new subspace or switch back
9745 to the default text subspace. */
9748 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9750 gcc_assert (TARGET_SOM
);
9753 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9755 /* We only want to emit a .nsubspa directive once at the
9756 start of the function. */
9757 cfun
->machine
->in_nsubspa
= 1;
9759 /* Create a new subspace for the text. This provides
9760 better stub placement and one-only functions. */
9762 && DECL_ONE_ONLY (cfun
->decl
)
9763 && !DECL_WEAK (cfun
->decl
))
9765 output_section_asm_op ("\t.SPACE $TEXT$\n"
9766 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9767 "ACCESS=44,SORT=24,COMDAT");
9773 /* There isn't a current function or the body of the current
9774 function has been completed. So, we are changing to the
9775 text section to output debugging information. Thus, we
9776 need to forget that we are in the text section so that
9777 varasm.c will call us when text_section is selected again. */
9778 gcc_assert (!cfun
|| !cfun
->machine
9779 || cfun
->machine
->in_nsubspa
== 2);
9782 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9785 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9788 /* A get_unnamed_section callback for switching to comdat data
9789 sections. This function is only used with SOM. */
9792 som_output_comdat_data_section_asm_op (const void *data
)
9795 output_section_asm_op (data
);
9798 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9801 pa_som_asm_init_sections (void)
9804 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9806 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9807 is not being generated. */
9808 som_readonly_data_section
9809 = get_unnamed_section (0, output_section_asm_op
,
9810 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9812 /* When secondary definitions are not supported, SOM makes readonly
9813 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9815 som_one_only_readonly_data_section
9816 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9818 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9819 "ACCESS=0x2c,SORT=16,COMDAT");
9822 /* When secondary definitions are not supported, SOM makes data one-only
9823 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9824 som_one_only_data_section
9825 = get_unnamed_section (SECTION_WRITE
,
9826 som_output_comdat_data_section_asm_op
,
9827 "\t.SPACE $PRIVATE$\n"
9828 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9829 "ACCESS=31,SORT=24,COMDAT");
9832 som_tm_clone_table_section
9833 = get_unnamed_section (0, output_section_asm_op
,
9834 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9836 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9837 which reference data within the $TEXT$ space (for example constant
9838 strings in the $LIT$ subspace).
9840 The assemblers (GAS and HP as) both have problems with handling
9841 the difference of two symbols which is the other correct way to
9842 reference constant data during PIC code generation.
9844 So, there's no way to reference constant data which is in the
9845 $TEXT$ space during PIC generation. Instead place all constant
9846 data into the $PRIVATE$ subspace (this reduces sharing, but it
9847 works correctly). */
9848 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9850 /* We must not have a reference to an external symbol defined in a
9851 shared library in a readonly section, else the SOM linker will
9854 So, we force exception information into the data section. */
9855 exception_section
= data_section
;
9858 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9861 pa_som_tm_clone_table_section (void)
9863 return som_tm_clone_table_section
;
9866 /* On hpux10, the linker will give an error if we have a reference
9867 in the read-only data section to a symbol defined in a shared
9868 library. Therefore, expressions that might require a reloc can
9869 not be placed in the read-only data section. */
9872 pa_select_section (tree exp
, int reloc
,
9873 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9875 if (TREE_CODE (exp
) == VAR_DECL
9876 && TREE_READONLY (exp
)
9877 && !TREE_THIS_VOLATILE (exp
)
9878 && DECL_INITIAL (exp
)
9879 && (DECL_INITIAL (exp
) == error_mark_node
9880 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9884 && DECL_ONE_ONLY (exp
)
9885 && !DECL_WEAK (exp
))
9886 return som_one_only_readonly_data_section
;
9888 return readonly_data_section
;
9890 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9891 return readonly_data_section
;
9893 && TREE_CODE (exp
) == VAR_DECL
9894 && DECL_ONE_ONLY (exp
)
9895 && !DECL_WEAK (exp
))
9896 return som_one_only_data_section
;
9898 return data_section
;
9902 pa_globalize_label (FILE *stream
, const char *name
)
9904 /* We only handle DATA objects here, functions are globalized in
9905 ASM_DECLARE_FUNCTION_NAME. */
9906 if (! FUNCTION_NAME_P (name
))
9908 fputs ("\t.EXPORT ", stream
);
9909 assemble_name (stream
, name
);
9910 fputs (",DATA\n", stream
);
9914 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9917 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9918 int incoming ATTRIBUTE_UNUSED
)
9920 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9923 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9926 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9928 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9929 PA64 ABI says that objects larger than 128 bits are returned in memory.
9930 Note, int_size_in_bytes can return -1 if the size of the object is
9931 variable or larger than the maximum value that can be expressed as
9932 a HOST_WIDE_INT. It can also return zero for an empty type. The
9933 simplest way to handle variable and empty types is to pass them in
9934 memory. This avoids problems in defining the boundaries of argument
9935 slots, allocating registers, etc. */
9936 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9937 || int_size_in_bytes (type
) <= 0);
9940 /* Structure to hold declaration and name of external symbols that are
9941 emitted by GCC. We generate a vector of these symbols and output them
9942 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9943 This avoids putting out names that are never really used. */
9945 typedef struct GTY(()) extern_symbol
9951 /* Define gc'd vector type for extern_symbol. */
9953 /* Vector of extern_symbol pointers. */
9954 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9956 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9957 /* Mark DECL (name NAME) as an external reference (assembler output
9958 file FILE). This saves the names to output at the end of the file
9959 if actually referenced. */
9962 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9964 gcc_assert (file
== asm_out_file
);
9965 extern_symbol p
= {decl
, name
};
9966 vec_safe_push (extern_symbols
, p
);
9969 /* Output text required at the end of an assembler file.
9970 This includes deferred plabels and .import directives for
9971 all external symbols that were actually referenced. */
9974 pa_hpux_file_end (void)
9979 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9980 output_deferred_profile_counters ();
9982 output_deferred_plabels ();
9984 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9986 tree decl
= p
->decl
;
9988 if (!TREE_ASM_WRITTEN (decl
)
9989 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9990 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9993 vec_free (extern_symbols
);
9997 /* Return true if a change from mode FROM to mode TO for a register
9998 in register class RCLASS is invalid. */
10001 pa_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
10002 enum reg_class rclass
)
10007 /* Reject changes to/from complex and vector modes. */
10008 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
10009 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
10012 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
10015 /* There is no way to load QImode or HImode values directly from
10016 memory. SImode loads to the FP registers are not zero extended.
10017 On the 64-bit target, this conflicts with the definition of
10018 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
10019 with different sizes in the floating-point registers. */
10020 if (MAYBE_FP_REG_CLASS_P (rclass
))
10023 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10024 in specific sets of registers. Thus, we cannot allow changing
10025 to a larger mode when it's larger than a word. */
10026 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
10027 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
10033 /* Returns TRUE if it is a good idea to tie two pseudo registers
10034 when one has mode MODE1 and one has mode MODE2.
10035 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10036 for any hard reg, then this must be FALSE for correct output.
10038 We should return FALSE for QImode and HImode because these modes
10039 are not ok in the floating-point registers. However, this prevents
10040 tieing these modes to SImode and DImode in the general registers.
10041 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
10042 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10043 in the floating-point registers. */
10046 pa_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
10048 /* Don't tie modes in different classes. */
10049 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
10056 /* Length in units of the trampoline instruction code. */
10058 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10061 /* Output assembler code for a block containing the constant parts
10062 of a trampoline, leaving space for the variable parts.\
10064 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10065 and then branches to the specified routine.
10067 This code template is copied from text segment to stack location
10068 and then patched with pa_trampoline_init to contain valid values,
10069 and then entered as a subroutine.
10071 It is best to keep this as small as possible to avoid having to
10072 flush multiple lines in the cache. */
10075 pa_asm_trampoline_template (FILE *f
)
10079 fputs ("\tldw 36(%r22),%r21\n", f
);
10080 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
10081 if (ASSEMBLER_DIALECT
== 0)
10082 fputs ("\tdepi 0,31,2,%r21\n", f
);
10084 fputs ("\tdepwi 0,31,2,%r21\n", f
);
10085 fputs ("\tldw 4(%r21),%r19\n", f
);
10086 fputs ("\tldw 0(%r21),%r21\n", f
);
10089 fputs ("\tbve (%r21)\n", f
);
10090 fputs ("\tldw 40(%r22),%r29\n", f
);
10091 fputs ("\t.word 0\n", f
);
10092 fputs ("\t.word 0\n", f
);
10096 fputs ("\tldsid (%r21),%r1\n", f
);
10097 fputs ("\tmtsp %r1,%sr0\n", f
);
10098 fputs ("\tbe 0(%sr0,%r21)\n", f
);
10099 fputs ("\tldw 40(%r22),%r29\n", f
);
10101 fputs ("\t.word 0\n", f
);
10102 fputs ("\t.word 0\n", f
);
10103 fputs ("\t.word 0\n", f
);
10104 fputs ("\t.word 0\n", f
);
10108 fputs ("\t.dword 0\n", f
);
10109 fputs ("\t.dword 0\n", f
);
10110 fputs ("\t.dword 0\n", f
);
10111 fputs ("\t.dword 0\n", f
);
10112 fputs ("\tmfia %r31\n", f
);
10113 fputs ("\tldd 24(%r31),%r1\n", f
);
10114 fputs ("\tldd 24(%r1),%r27\n", f
);
10115 fputs ("\tldd 16(%r1),%r1\n", f
);
10116 fputs ("\tbve (%r1)\n", f
);
10117 fputs ("\tldd 32(%r31),%r31\n", f
);
10118 fputs ("\t.dword 0 ; fptr\n", f
);
10119 fputs ("\t.dword 0 ; static link\n", f
);
10123 /* Emit RTL insns to initialize the variable parts of a trampoline.
10124 FNADDR is an RTX for the address of the function's pure code.
10125 CXT is an RTX for the static chain value for the function.
10127 Move the function address to the trampoline template at offset 36.
10128 Move the static chain value to trampoline template at offset 40.
10129 Move the trampoline address to trampoline template at offset 44.
10130 Move r19 to trampoline template at offset 48. The latter two
10131 words create a plabel for the indirect call to the trampoline.
10133 A similar sequence is used for the 64-bit port but the plabel is
10134 at the beginning of the trampoline.
10136 Finally, the cache entries for the trampoline code are flushed.
10137 This is necessary to ensure that the trampoline instruction sequence
10138 is written to memory prior to any attempts at prefetching the code
10142 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10144 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10145 rtx start_addr
= gen_reg_rtx (Pmode
);
10146 rtx end_addr
= gen_reg_rtx (Pmode
);
10147 rtx line_length
= gen_reg_rtx (Pmode
);
10150 emit_block_move (m_tramp
, assemble_trampoline_template (),
10151 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10152 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10156 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10157 emit_move_insn (tmp
, fnaddr
);
10158 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10159 emit_move_insn (tmp
, chain_value
);
10161 /* Create a fat pointer for the trampoline. */
10162 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10163 emit_move_insn (tmp
, r_tramp
);
10164 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10165 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10167 /* fdc and fic only use registers for the address to flush,
10168 they do not accept integer displacements. We align the
10169 start and end addresses to the beginning of their respective
10170 cache lines to minimize the number of lines flushed. */
10171 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10172 GEN_INT (-MIN_CACHELINE_SIZE
)));
10173 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10174 TRAMPOLINE_CODE_SIZE
-1));
10175 emit_insn (gen_andsi3 (end_addr
, tmp
,
10176 GEN_INT (-MIN_CACHELINE_SIZE
)));
10177 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10178 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10179 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10180 gen_reg_rtx (Pmode
),
10181 gen_reg_rtx (Pmode
)));
10185 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10186 emit_move_insn (tmp
, fnaddr
);
10187 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10188 emit_move_insn (tmp
, chain_value
);
10190 /* Create a fat pointer for the trampoline. */
10191 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10192 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10194 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10195 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10197 /* fdc and fic only use registers for the address to flush,
10198 they do not accept integer displacements. We align the
10199 start and end addresses to the beginning of their respective
10200 cache lines to minimize the number of lines flushed. */
10201 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10202 emit_insn (gen_anddi3 (start_addr
, tmp
,
10203 GEN_INT (-MIN_CACHELINE_SIZE
)));
10204 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10205 TRAMPOLINE_CODE_SIZE
- 1));
10206 emit_insn (gen_anddi3 (end_addr
, tmp
,
10207 GEN_INT (-MIN_CACHELINE_SIZE
)));
10208 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10209 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10210 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10211 gen_reg_rtx (Pmode
),
10212 gen_reg_rtx (Pmode
)));
10216 /* Perform any machine-specific adjustment in the address of the trampoline.
10217 ADDR contains the address that was passed to pa_trampoline_init.
10218 Adjust the trampoline address to point to the plabel at offset 44. */
10221 pa_trampoline_adjust_address (rtx addr
)
10224 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10229 pa_delegitimize_address (rtx orig_x
)
10231 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10233 if (GET_CODE (x
) == LO_SUM
10234 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10235 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10236 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10241 pa_internal_arg_pointer (void)
10243 /* The argument pointer and the hard frame pointer are the same in
10244 the 32-bit runtime, so we don't need a copy. */
10246 return copy_to_reg (virtual_incoming_args_rtx
);
10248 return virtual_incoming_args_rtx
;
10251 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10252 Frame pointer elimination is automatically handled. */
10255 pa_can_eliminate (const int from
, const int to
)
10257 /* The argument cannot be eliminated in the 64-bit runtime. */
10258 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10261 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10262 ? ! frame_pointer_needed
10266 /* Define the offset between two registers, FROM to be eliminated and its
10267 replacement TO, at the start of a routine. */
10269 pa_initial_elimination_offset (int from
, int to
)
10271 HOST_WIDE_INT offset
;
10273 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10274 && to
== STACK_POINTER_REGNUM
)
10275 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10276 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10279 gcc_unreachable ();
10285 pa_conditional_register_usage (void)
10289 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10291 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10292 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10293 for (i
= 33; i
< 56; i
+= 2)
10294 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10296 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10298 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10299 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10302 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10305 /* Target hook for c_mode_for_suffix. */
10307 static enum machine_mode
10308 pa_c_mode_for_suffix (char suffix
)
10310 if (HPUX_LONG_DOUBLE_LIBRARY
)
10319 /* Target hook for function_section. */
10322 pa_function_section (tree decl
, enum node_frequency freq
,
10323 bool startup
, bool exit
)
10325 /* Put functions in text section if target doesn't have named sections. */
10326 if (!targetm_common
.have_named_sections
)
10327 return text_section
;
10329 /* Force nested functions into the same section as the containing
10332 && DECL_SECTION_NAME (decl
) == NULL_TREE
10333 && DECL_CONTEXT (decl
) != NULL_TREE
10334 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10335 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL_TREE
)
10336 return function_section (DECL_CONTEXT (decl
));
10338 /* Otherwise, use the default function section. */
10339 return default_function_section (decl
, freq
, startup
, exit
);
10342 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10344 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10345 that need more than three instructions to load prior to reload. This
10346 limit is somewhat arbitrary. It takes three instructions to load a
10347 CONST_INT from memory but two are memory accesses. It may be better
10348 to increase the allowed range for CONST_INTS. We may also be able
10349 to handle CONST_DOUBLES. */
10352 pa_legitimate_constant_p (enum machine_mode mode
, rtx x
)
10354 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10357 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10360 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10361 legitimate constants. The other variants can't be handled by
10362 the move patterns after reload starts. */
10363 if (PA_SYMBOL_REF_TLS_P (x
))
10366 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10370 && HOST_BITS_PER_WIDE_INT
> 32
10371 && GET_CODE (x
) == CONST_INT
10372 && !reload_in_progress
10373 && !reload_completed
10374 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10375 && !pa_cint_ok_for_move (INTVAL (x
)))
10378 if (function_label_operand (x
, mode
))
10384 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10386 static unsigned int
10387 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10389 unsigned int flags
;
10391 flags
= default_section_type_flags (decl
, name
, reloc
);
10393 /* Function labels are placed in the constant pool. This can
10394 cause a section conflict if decls are put in ".data.rel.ro"
10395 or ".data.rel.ro.local" using the __attribute__ construct. */
10396 if (strcmp (name
, ".data.rel.ro") == 0
10397 || strcmp (name
, ".data.rel.ro.local") == 0)
10398 flags
|= SECTION_WRITE
| SECTION_RELRO
;