hppa: Improve handling of REG+D addresses when generating PA 2.0 code
[official-gcc.git] / gcc / config / pa / pa.cc
blobd7666103de8f3ccf4e14dc476fea70af7ad37b67
1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "output.h"
48 #include "except.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "reload.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "cfgrtl.h"
55 #include "opts.h"
56 #include "builtins.h"
58 /* This file should be included last. */
59 #include "target-def.h"
61 /* Return nonzero if there is a bypass for the output of
62 OUT_INSN and the fp store IN_INSN. */
63 int
64 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
66 machine_mode store_mode;
67 machine_mode other_mode;
68 rtx set;
70 if (recog_memoized (in_insn) < 0
71 || (get_attr_type (in_insn) != TYPE_FPSTORE
72 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
73 || recog_memoized (out_insn) < 0)
74 return 0;
76 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
78 set = single_set (out_insn);
79 if (!set)
80 return 0;
82 other_mode = GET_MODE (SET_SRC (set));
84 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
91 #else
92 #define DO_FRAME_NOTES 0
93 #endif
94 #endif
96 static void pa_option_override (void);
97 static void copy_reg_pointer (rtx, rtx);
98 static void fix_range (const char *);
99 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
100 reg_class_t);
101 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
102 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
103 static inline rtx force_mode (machine_mode, rtx);
104 static void pa_reorg (void);
105 static void pa_combine_instructions (void);
106 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
107 rtx, rtx);
108 static bool forward_branch_p (rtx_insn *);
109 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
111 static int compute_cpymem_length (rtx_insn *);
112 static int compute_clrmem_length (rtx_insn *);
113 static bool pa_assemble_integer (rtx, unsigned int, int);
114 static void remove_useless_addtr_insns (int);
115 static void store_reg (int, HOST_WIDE_INT, int);
116 static void store_reg_modify (int, int, HOST_WIDE_INT);
117 static void load_reg (int, HOST_WIDE_INT, int);
118 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
119 static rtx pa_function_value (const_tree, const_tree, bool);
120 static rtx pa_libcall_value (machine_mode, const_rtx);
121 static bool pa_function_value_regno_p (const unsigned int);
122 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
124 static void update_total_code_bytes (unsigned int);
125 static void pa_output_function_epilogue (FILE *);
126 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
127 static int pa_issue_rate (void);
128 static int pa_reloc_rw_mask (void);
129 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
130 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
131 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
132 ATTRIBUTE_UNUSED;
133 static void pa_encode_section_info (tree, rtx, int);
134 static const char *pa_strip_name_encoding (const char *);
135 static bool pa_function_ok_for_sibcall (tree, tree);
136 static void pa_globalize_label (FILE *, const char *)
137 ATTRIBUTE_UNUSED;
138 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
139 HOST_WIDE_INT, tree);
140 #if !defined(USE_COLLECT2)
141 static void pa_asm_out_constructor (rtx, int);
142 static void pa_asm_out_destructor (rtx, int);
143 #endif
144 static void pa_init_builtins (void);
145 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
146 static tree pa_builtin_decl (unsigned, bool);
147 static rtx hppa_builtin_saveregs (void);
148 static void hppa_va_start (tree, rtx);
149 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
150 static bool pa_scalar_mode_supported_p (scalar_mode);
151 static bool pa_commutative_p (const_rtx x, int outer_code);
152 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
153 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
154 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
155 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
157 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
158 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
159 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
162 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
163 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
164 static void output_deferred_plabels (void);
165 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
166 static void pa_file_end (void);
167 static void pa_init_libfuncs (void);
168 static rtx pa_struct_value_rtx (tree, int);
169 static bool pa_pass_by_reference (cumulative_args_t,
170 const function_arg_info &);
171 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
172 static void pa_function_arg_advance (cumulative_args_t,
173 const function_arg_info &);
174 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 machine_mode,
180 secondary_reload_info *);
181 static bool pa_secondary_memory_needed (machine_mode,
182 reg_class_t, reg_class_t);
183 static void pa_extra_live_on_entry (bitmap);
184 static machine_mode pa_promote_function_mode (const_tree,
185 machine_mode, int *,
186 const_tree, int);
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx, tree, rtx);
190 static rtx pa_trampoline_adjust_address (rtx);
191 static rtx pa_delegitimize_address (rtx);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode pa_c_mode_for_suffix (char);
197 static section *pa_function_section (tree, enum node_frequency, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode, rtx);
199 static bool pa_legitimate_constant_p (machine_mode, rtx);
200 static unsigned int pa_section_type_flags (tree, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode, rtx, bool,
202 code_helper = ERROR_MARK);
203 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
206 static bool pa_modes_tieable_p (machine_mode, machine_mode);
207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
208 static HOST_WIDE_INT pa_starting_frame_offset (void);
209 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
210 static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
212 /* The following extra sections are only used for SOM. */
213 static GTY(()) section *som_readonly_data_section;
214 static GTY(()) section *som_one_only_readonly_data_section;
215 static GTY(()) section *som_one_only_data_section;
216 static GTY(()) section *som_tm_clone_table_section;
218 /* Counts for the number of callee-saved general and floating point
219 registers which were saved by the current function's prologue. */
220 static int gr_saved, fr_saved;
222 /* Boolean indicating whether the return pointer was saved by the
223 current function's prologue. */
224 static bool rp_saved;
226 static rtx find_addr_reg (rtx);
228 /* Keep track of the number of bytes we have output in the CODE subspace
229 during this compilation so we'll know when to emit inline long-calls. */
230 unsigned long total_code_bytes;
232 /* The last address of the previous function plus the number of bytes in
233 associated thunks that have been output. This is used to determine if
234 a thunk can use an IA-relative branch to reach its target function. */
235 static unsigned int last_address;
237 /* Variables to handle plabels that we discover are necessary at assembly
238 output time. They are output after the current function. */
239 struct GTY(()) deferred_plabel
241 rtx internal_label;
242 rtx symbol;
244 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
245 deferred_plabels;
246 static size_t n_deferred_plabels = 0;
248 /* Initialize the GCC target structure. */
250 #undef TARGET_OPTION_OVERRIDE
251 #define TARGET_OPTION_OVERRIDE pa_option_override
253 #undef TARGET_ASM_ALIGNED_HI_OP
254 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
255 #undef TARGET_ASM_ALIGNED_SI_OP
256 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
257 #undef TARGET_ASM_ALIGNED_DI_OP
258 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
259 #undef TARGET_ASM_UNALIGNED_HI_OP
260 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
261 #undef TARGET_ASM_UNALIGNED_SI_OP
262 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
263 #undef TARGET_ASM_UNALIGNED_DI_OP
264 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
265 #undef TARGET_ASM_INTEGER
266 #define TARGET_ASM_INTEGER pa_assemble_integer
268 #undef TARGET_ASM_FUNCTION_EPILOGUE
269 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
271 #undef TARGET_FUNCTION_VALUE
272 #define TARGET_FUNCTION_VALUE pa_function_value
273 #undef TARGET_LIBCALL_VALUE
274 #define TARGET_LIBCALL_VALUE pa_libcall_value
275 #undef TARGET_FUNCTION_VALUE_REGNO_P
276 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
278 #undef TARGET_LEGITIMIZE_ADDRESS
279 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
281 #undef TARGET_SCHED_ADJUST_COST
282 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
283 #undef TARGET_SCHED_ISSUE_RATE
284 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
286 #undef TARGET_ENCODE_SECTION_INFO
287 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
288 #undef TARGET_STRIP_NAME_ENCODING
289 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
291 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
292 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
294 #undef TARGET_COMMUTATIVE_P
295 #define TARGET_COMMUTATIVE_P pa_commutative_p
297 #undef TARGET_ASM_OUTPUT_MI_THUNK
298 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_END
303 #define TARGET_ASM_FILE_END pa_file_end
305 #undef TARGET_ASM_RELOC_RW_MASK
306 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
308 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
309 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
311 #if !defined(USE_COLLECT2)
312 #undef TARGET_ASM_CONSTRUCTOR
313 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
314 #undef TARGET_ASM_DESTRUCTOR
315 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
316 #endif
318 #undef TARGET_INIT_BUILTINS
319 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
322 #undef TARGET_BUILTIN_DECL
323 #define TARGET_BUILTIN_DECL pa_builtin_decl
325 #undef TARGET_REGISTER_MOVE_COST
326 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
327 #undef TARGET_RTX_COSTS
328 #define TARGET_RTX_COSTS hppa_rtx_costs
329 #undef TARGET_ADDRESS_COST
330 #define TARGET_ADDRESS_COST hppa_address_cost
332 #undef TARGET_MACHINE_DEPENDENT_REORG
333 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
335 #undef TARGET_INIT_LIBFUNCS
336 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
338 #undef TARGET_PROMOTE_FUNCTION_MODE
339 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
340 #undef TARGET_PROMOTE_PROTOTYPES
341 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
343 #undef TARGET_STRUCT_VALUE_RTX
344 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
345 #undef TARGET_RETURN_IN_MEMORY
346 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
347 #undef TARGET_MUST_PASS_IN_STACK
348 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
351 #undef TARGET_CALLEE_COPIES
352 #define TARGET_CALLEE_COPIES pa_callee_copies
353 #undef TARGET_ARG_PARTIAL_BYTES
354 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
355 #undef TARGET_FUNCTION_ARG
356 #define TARGET_FUNCTION_ARG pa_function_arg
357 #undef TARGET_FUNCTION_ARG_ADVANCE
358 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
359 #undef TARGET_FUNCTION_ARG_PADDING
360 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
361 #undef TARGET_FUNCTION_ARG_BOUNDARY
362 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
364 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
365 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
366 #undef TARGET_EXPAND_BUILTIN_VA_START
367 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
368 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
369 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
371 #undef TARGET_SCALAR_MODE_SUPPORTED_P
372 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
374 #undef TARGET_CANNOT_FORCE_CONST_MEM
375 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
377 #undef TARGET_SECONDARY_RELOAD
378 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
379 #undef TARGET_SECONDARY_MEMORY_NEEDED
380 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
382 #undef TARGET_EXTRA_LIVE_ON_ENTRY
383 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
385 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
386 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
387 #undef TARGET_TRAMPOLINE_INIT
388 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
389 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
390 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
391 #undef TARGET_DELEGITIMIZE_ADDRESS
392 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
393 #undef TARGET_INTERNAL_ARG_POINTER
394 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
395 #undef TARGET_CAN_ELIMINATE
396 #define TARGET_CAN_ELIMINATE pa_can_eliminate
397 #undef TARGET_CONDITIONAL_REGISTER_USAGE
398 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
399 #undef TARGET_C_MODE_FOR_SUFFIX
400 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
401 #undef TARGET_ASM_FUNCTION_SECTION
402 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
404 #undef TARGET_LEGITIMATE_CONSTANT_P
405 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
406 #undef TARGET_SECTION_TYPE_FLAGS
407 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
408 #undef TARGET_LEGITIMATE_ADDRESS_P
409 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
411 #undef TARGET_LRA_P
412 #define TARGET_LRA_P hook_bool_void_false
414 #undef TARGET_HARD_REGNO_NREGS
415 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
416 #undef TARGET_HARD_REGNO_MODE_OK
417 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
418 #undef TARGET_MODES_TIEABLE_P
419 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
421 #undef TARGET_CAN_CHANGE_MODE_CLASS
422 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
424 #undef TARGET_CONSTANT_ALIGNMENT
425 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
427 #undef TARGET_STARTING_FRAME_OFFSET
428 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
430 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
431 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
433 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
434 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV pa_atomic_assign_expand_fenv
436 struct gcc_target targetm = TARGET_INITIALIZER;
438 /* Parse the -mfixed-range= option string. */
440 static void
441 fix_range (const char *const_str)
443 int i, first, last;
444 char *str, *dash, *comma;
446 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
447 REG2 are either register names or register numbers. The effect
448 of this option is to mark the registers in the range from REG1 to
449 REG2 as ``fixed'' so they won't be used by the compiler. This is
450 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
452 i = strlen (const_str);
453 str = (char *) alloca (i + 1);
454 memcpy (str, const_str, i + 1);
456 while (1)
458 dash = strchr (str, '-');
459 if (!dash)
461 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
462 return;
464 *dash = '\0';
466 comma = strchr (dash + 1, ',');
467 if (comma)
468 *comma = '\0';
470 first = decode_reg_name (str);
471 if (first < 0)
473 warning (0, "unknown register name: %s", str);
474 return;
477 last = decode_reg_name (dash + 1);
478 if (last < 0)
480 warning (0, "unknown register name: %s", dash + 1);
481 return;
484 *dash = '-';
486 if (first > last)
488 warning (0, "%s-%s is an empty range", str, dash + 1);
489 return;
492 for (i = first; i <= last; ++i)
493 fixed_regs[i] = call_used_regs[i] = 1;
495 if (!comma)
496 break;
498 *comma = ',';
499 str = comma + 1;
502 /* Check if all floating point registers have been fixed. */
503 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
504 if (!fixed_regs[i])
505 break;
507 if (i > FP_REG_LAST)
508 target_flags |= MASK_SOFT_FLOAT;
511 /* Implement the TARGET_OPTION_OVERRIDE hook. */
513 static void
514 pa_option_override (void)
516 unsigned int i;
517 cl_deferred_option *opt;
518 vec<cl_deferred_option> *v
519 = (vec<cl_deferred_option> *) pa_deferred_options;
521 if (v)
522 FOR_EACH_VEC_ELT (*v, i, opt)
524 switch (opt->opt_index)
526 case OPT_mfixed_range_:
527 fix_range (opt->arg);
528 break;
530 default:
531 gcc_unreachable ();
535 if (flag_pic && TARGET_PORTABLE_RUNTIME)
537 warning (0, "PIC code generation is not supported in the portable runtime model");
540 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
542 warning (0, "PIC code generation is not compatible with fast indirect calls");
545 if (! TARGET_GAS && write_symbols != NO_DEBUG)
547 warning (0, "%<-g%> is only supported when using GAS on this processor");
548 warning (0, "%<-g%> option disabled");
549 write_symbols = NO_DEBUG;
552 if (TARGET_64BIT && TARGET_HPUX)
554 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
555 specifically selected. */
556 if (!OPTION_SET_P (dwarf_strict))
557 dwarf_strict = 1;
558 if (!OPTION_SET_P (dwarf_version))
559 dwarf_version = 4;
562 /* We only support the "big PIC" model now. And we always generate PIC
563 code when in 64bit mode. */
564 if (flag_pic == 1 || TARGET_64BIT)
565 flag_pic = 2;
567 /* 64-bit target is always PIE. */
568 if (TARGET_64BIT)
569 flag_pie = 2;
571 /* Disable -freorder-blocks-and-partition as we don't support hot and
572 cold partitioning. */
573 if (flag_reorder_blocks_and_partition)
575 inform (input_location,
576 "%<-freorder-blocks-and-partition%> does not work "
577 "on this architecture");
578 flag_reorder_blocks_and_partition = 0;
579 flag_reorder_blocks = 1;
582 /* Disable -fstack-protector to suppress warning. */
583 flag_stack_protect = 0;
585 /* We can't guarantee that .dword is available for 32-bit targets. */
586 if (UNITS_PER_WORD == 4)
587 targetm.asm_out.aligned_op.di = NULL;
589 /* The unaligned ops are only available when using GAS. */
590 if (!TARGET_GAS)
592 targetm.asm_out.unaligned_op.hi = NULL;
593 targetm.asm_out.unaligned_op.si = NULL;
594 targetm.asm_out.unaligned_op.di = NULL;
597 init_machine_status = pa_init_machine_status;
600 enum pa_builtins
602 /* FPU builtins. */
603 PA_BUILTIN_GET_FPSR,
604 PA_BUILTIN_SET_FPSR,
606 PA_BUILTIN_COPYSIGNQ,
607 PA_BUILTIN_FABSQ,
608 PA_BUILTIN_INFQ,
609 PA_BUILTIN_HUGE_VALQ,
610 PA_BUILTIN_max
613 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
614 static GTY(()) enum insn_code pa_builtins_icode[(int) PA_BUILTIN_max];
616 /* Add a PA builtin function with NAME, ICODE, CODE and TYPE. Return the
617 function decl or NULL_TREE if the builtin was not added. */
619 static tree
620 def_builtin (const char *name, enum insn_code icode, enum pa_builtins code,
621 tree type)
623 tree t
624 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
626 if (t)
628 pa_builtins[code] = t;
629 pa_builtins_icode[code] = icode;
632 return t;
635 /* Create builtin functions for FPU instructions. */
637 static void
638 pa_fpu_init_builtins (void)
640 tree ftype;
642 ftype = build_function_type_list (unsigned_type_node, 0);
643 def_builtin ("__builtin_get_fpsr", CODE_FOR_get_fpsr,
644 PA_BUILTIN_GET_FPSR, ftype);
645 ftype = build_function_type_list (void_type_node, unsigned_type_node, 0);
646 def_builtin ("__builtin_set_fpsr", CODE_FOR_set_fpsr,
647 PA_BUILTIN_SET_FPSR, ftype);
650 static void
651 pa_init_builtins (void)
653 if (!TARGET_SOFT_FLOAT)
654 pa_fpu_init_builtins ();
656 #ifdef DONT_HAVE_FPUTC_UNLOCKED
658 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
659 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
660 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
662 #endif
663 #if TARGET_HPUX_11
665 tree decl;
667 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
668 set_user_assembler_name (decl, "_Isfinite");
669 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
670 set_user_assembler_name (decl, "_Isfinitef");
672 #endif
674 if (HPUX_LONG_DOUBLE_LIBRARY)
676 tree decl, ftype;
678 /* Under HPUX, the __float128 type is a synonym for "long double". */
679 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
680 "__float128");
682 /* TFmode support builtins. */
683 ftype = build_function_type_list (long_double_type_node,
684 long_double_type_node,
685 NULL_TREE);
686 decl = add_builtin_function ("__builtin_fabsq", ftype,
687 PA_BUILTIN_FABSQ, BUILT_IN_MD,
688 "_U_Qfabs", NULL_TREE);
689 TREE_READONLY (decl) = 1;
690 pa_builtins[PA_BUILTIN_FABSQ] = decl;
692 ftype = build_function_type_list (long_double_type_node,
693 long_double_type_node,
694 long_double_type_node,
695 NULL_TREE);
696 decl = add_builtin_function ("__builtin_copysignq", ftype,
697 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
698 "_U_Qfcopysign", NULL_TREE);
699 TREE_READONLY (decl) = 1;
700 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
702 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
703 decl = add_builtin_function ("__builtin_infq", ftype,
704 PA_BUILTIN_INFQ, BUILT_IN_MD,
705 NULL, NULL_TREE);
706 pa_builtins[PA_BUILTIN_INFQ] = decl;
708 decl = add_builtin_function ("__builtin_huge_valq", ftype,
709 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
710 NULL, NULL_TREE);
711 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
715 /* Implement TARGET_BUILTIN_DECL. */
717 static tree
718 pa_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
720 if (code >= PA_BUILTIN_max)
721 return error_mark_node;
722 return pa_builtins[code];
725 static rtx
726 pa_expand_builtin_1 (tree exp, rtx target,
727 rtx subtarget ATTRIBUTE_UNUSED,
728 machine_mode tmode ATTRIBUTE_UNUSED,
729 int ignore ATTRIBUTE_UNUSED)
731 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
732 enum pa_builtins code
733 = (enum pa_builtins) DECL_MD_FUNCTION_CODE (fndecl);
734 enum insn_code icode = pa_builtins_icode[code];
735 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
736 call_expr_arg_iterator iter;
737 int arg_count = 0;
738 rtx pat, op[4];
739 tree arg;
741 if (nonvoid)
743 machine_mode tmode = insn_data[icode].operand[0].mode;
744 if (!target
745 || GET_MODE (target) != tmode
746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
747 op[0] = gen_reg_rtx (tmode);
748 else
749 op[0] = target;
751 else
752 op[0] = NULL_RTX;
754 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
756 const struct insn_operand_data *insn_op;
757 int idx;
759 if (arg == error_mark_node)
760 return NULL_RTX;
762 arg_count++;
763 idx = arg_count - !nonvoid;
764 insn_op = &insn_data[icode].operand[idx];
765 op[arg_count] = expand_normal (arg);
767 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
768 insn_op->mode))
769 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
772 switch (arg_count)
774 case 0:
775 pat = GEN_FCN (icode) (op[0]);
776 break;
777 case 1:
778 if (nonvoid)
779 pat = GEN_FCN (icode) (op[0], op[1]);
780 else
781 pat = GEN_FCN (icode) (op[1]);
782 break;
783 case 2:
784 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
785 break;
786 case 3:
787 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
788 break;
789 default:
790 gcc_unreachable ();
793 if (!pat)
794 return NULL_RTX;
796 emit_insn (pat);
798 return (nonvoid ? op[0] : const0_rtx);
801 static rtx
802 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
803 machine_mode mode ATTRIBUTE_UNUSED,
804 int ignore ATTRIBUTE_UNUSED)
806 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
807 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
809 switch (fcode)
811 case PA_BUILTIN_GET_FPSR:
812 case PA_BUILTIN_SET_FPSR:
813 return pa_expand_builtin_1 (exp, target, subtarget, mode, ignore);
815 case PA_BUILTIN_FABSQ:
816 case PA_BUILTIN_COPYSIGNQ:
817 return expand_call (exp, target, ignore);
819 case PA_BUILTIN_INFQ:
820 case PA_BUILTIN_HUGE_VALQ:
822 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
823 REAL_VALUE_TYPE inf;
824 rtx tmp;
826 real_inf (&inf);
827 tmp = const_double_from_real_value (inf, target_mode);
829 tmp = validize_mem (force_const_mem (target_mode, tmp));
831 if (target == 0)
832 target = gen_reg_rtx (target_mode);
834 emit_move_insn (target, tmp);
835 return target;
838 default:
839 gcc_unreachable ();
842 return NULL_RTX;
845 /* Function to init struct machine_function.
846 This will be called, via a pointer variable,
847 from push_function_context. */
849 static struct machine_function *
850 pa_init_machine_status (void)
852 return ggc_cleared_alloc<machine_function> ();
855 /* If FROM is a probable pointer register, mark TO as a probable
856 pointer register with the same pointer alignment as FROM. */
858 static void
859 copy_reg_pointer (rtx to, rtx from)
861 if (REG_POINTER (from))
862 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
865 /* Return 1 if X contains a symbolic expression. We know these
866 expressions will have one of a few well defined forms, so
867 we need only check those forms. */
869 pa_symbolic_expression_p (rtx x)
872 /* Strip off any HIGH. */
873 if (GET_CODE (x) == HIGH)
874 x = XEXP (x, 0);
876 return symbolic_operand (x, VOIDmode);
879 /* Accept any constant that can be moved in one instruction into a
880 general register. */
882 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
884 /* OK if ldo, ldil, or zdepi, can be used. */
885 return (VAL_14_BITS_P (ival)
886 || pa_ldil_cint_p (ival)
887 || pa_zdepi_cint_p (ival));
890 /* True iff ldil can be used to load this CONST_INT. The least
891 significant 11 bits of the value must be zero and the value must
892 not change sign when extended from 32 to 64 bits. */
894 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
896 unsigned HOST_WIDE_INT x;
898 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
899 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
902 /* True iff zdepi can be used to generate this CONST_INT.
903 zdepi first sign extends a 5-bit signed number to a given field
904 length, then places this field anywhere in a zero. */
906 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
908 unsigned HOST_WIDE_INT lsb_mask, t;
910 /* This might not be obvious, but it's at least fast.
911 This function is critical; we don't have the time loops would take. */
912 lsb_mask = x & -x;
913 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
914 /* Return true iff t is a power of two. */
915 return ((t & (t - 1)) == 0);
918 /* True iff depi or extru can be used to compute (reg & mask).
919 Accept bit pattern like these:
920 0....01....1
921 1....10....0
922 1..10..01..1 */
924 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
926 mask = ~mask;
927 mask += mask & -mask;
928 return (mask & (mask - 1)) == 0;
931 /* True iff depi can be used to compute (reg | MASK). */
933 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
935 mask += mask & -mask;
936 return (mask & (mask - 1)) == 0;
939 /* Legitimize PIC addresses. If the address is already
940 position-independent, we return ORIG. Newly generated
941 position-independent addresses go to REG. If we need more
942 than one register, we lose. */
944 static rtx
945 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
947 rtx pic_ref = orig;
949 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
951 /* Labels need special handling. */
952 if (pic_label_operand (orig, mode))
954 rtx_insn *insn;
956 /* We do not want to go through the movXX expanders here since that
957 would create recursion.
959 Nor do we really want to call a generator for a named pattern
960 since that requires multiple patterns if we want to support
961 multiple word sizes.
963 So instead we just emit the raw set, which avoids the movXX
964 expanders completely. */
965 mark_reg_pointer (reg, BITS_PER_UNIT);
966 insn = emit_insn (gen_rtx_SET (reg, orig));
968 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
969 add_reg_note (insn, REG_EQUAL, orig);
971 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
972 and update LABEL_NUSES because this is not done automatically. */
973 if (reload_in_progress || reload_completed)
975 /* Extract LABEL_REF. */
976 if (GET_CODE (orig) == CONST)
977 orig = XEXP (XEXP (orig, 0), 0);
978 /* Extract CODE_LABEL. */
979 orig = XEXP (orig, 0);
980 add_reg_note (insn, REG_LABEL_OPERAND, orig);
981 /* Make sure we have label and not a note. */
982 if (LABEL_P (orig))
983 LABEL_NUSES (orig)++;
985 crtl->uses_pic_offset_table = 1;
986 return reg;
988 if (GET_CODE (orig) == SYMBOL_REF)
990 rtx_insn *insn;
991 rtx tmp_reg;
993 gcc_assert (reg);
995 /* Before reload, allocate a temporary register for the intermediate
996 result. This allows the sequence to be deleted when the final
997 result is unused and the insns are trivially dead. */
998 tmp_reg = ((reload_in_progress || reload_completed)
999 ? reg : gen_reg_rtx (Pmode));
1001 if (function_label_operand (orig, VOIDmode))
1003 /* Force function label into memory in word mode. */
1004 orig = XEXP (force_const_mem (word_mode, orig), 0);
1005 /* Load plabel address from DLT. */
1006 emit_move_insn (tmp_reg,
1007 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
1008 gen_rtx_HIGH (word_mode, orig)));
1009 pic_ref
1010 = gen_const_mem (Pmode,
1011 gen_rtx_LO_SUM (Pmode, tmp_reg,
1012 gen_rtx_UNSPEC (Pmode,
1013 gen_rtvec (1, orig),
1014 UNSPEC_DLTIND14R)));
1015 emit_move_insn (reg, pic_ref);
1016 /* Now load address of function descriptor. */
1017 pic_ref = gen_rtx_MEM (Pmode, reg);
1019 else
1021 /* Load symbol reference from DLT. */
1022 emit_move_insn (tmp_reg,
1023 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
1024 gen_rtx_HIGH (word_mode, orig)));
1025 pic_ref
1026 = gen_const_mem (Pmode,
1027 gen_rtx_LO_SUM (Pmode, tmp_reg,
1028 gen_rtx_UNSPEC (Pmode,
1029 gen_rtvec (1, orig),
1030 UNSPEC_DLTIND14R)));
1033 crtl->uses_pic_offset_table = 1;
1034 mark_reg_pointer (reg, BITS_PER_UNIT);
1035 insn = emit_move_insn (reg, pic_ref);
1037 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
1038 set_unique_reg_note (insn, REG_EQUAL, orig);
1040 return reg;
1042 else if (GET_CODE (orig) == CONST)
1044 rtx base;
1046 if (GET_CODE (XEXP (orig, 0)) == PLUS
1047 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
1048 return orig;
1050 gcc_assert (reg);
1051 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
1053 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
1054 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
1055 base == reg ? 0 : reg);
1057 if (GET_CODE (orig) == CONST_INT)
1059 if (INT_14_BITS (orig))
1060 return plus_constant (Pmode, base, INTVAL (orig));
1061 orig = force_reg (Pmode, orig);
1063 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
1064 /* Likewise, should we set special REG_NOTEs here? */
1067 return pic_ref;
1070 static GTY(()) rtx gen_tls_tga;
1072 static rtx
1073 gen_tls_get_addr (void)
1075 if (!gen_tls_tga)
1076 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1077 return gen_tls_tga;
1080 static rtx
1081 hppa_tls_call (rtx arg)
1083 rtx ret;
1085 ret = gen_reg_rtx (Pmode);
1086 emit_library_call_value (gen_tls_get_addr (), ret,
1087 LCT_CONST, Pmode, arg, Pmode);
1089 return ret;
1092 static rtx
1093 legitimize_tls_address (rtx addr)
1095 rtx ret, tmp, t1, t2, tp;
1096 rtx_insn *insn;
1098 /* Currently, we can't handle anything but a SYMBOL_REF. */
1099 if (GET_CODE (addr) != SYMBOL_REF)
1100 return addr;
1102 switch (SYMBOL_REF_TLS_MODEL (addr))
1104 case TLS_MODEL_GLOBAL_DYNAMIC:
1105 tmp = gen_reg_rtx (Pmode);
1106 if (flag_pic)
1107 emit_insn (gen_tgd_load_pic (tmp, addr));
1108 else
1109 emit_insn (gen_tgd_load (tmp, addr));
1110 ret = hppa_tls_call (tmp);
1111 break;
1113 case TLS_MODEL_LOCAL_DYNAMIC:
1114 ret = gen_reg_rtx (Pmode);
1115 tmp = gen_reg_rtx (Pmode);
1116 start_sequence ();
1117 if (flag_pic)
1118 emit_insn (gen_tld_load_pic (tmp, addr));
1119 else
1120 emit_insn (gen_tld_load (tmp, addr));
1121 t1 = hppa_tls_call (tmp);
1122 insn = get_insns ();
1123 end_sequence ();
1124 t2 = gen_reg_rtx (Pmode);
1125 emit_libcall_block (insn, t2, t1,
1126 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1127 UNSPEC_TLSLDBASE));
1128 emit_insn (gen_tld_offset_load (ret, addr, t2));
1129 break;
1131 case TLS_MODEL_INITIAL_EXEC:
1132 tp = gen_reg_rtx (Pmode);
1133 tmp = gen_reg_rtx (Pmode);
1134 ret = gen_reg_rtx (Pmode);
1135 emit_insn (gen_tp_load (tp));
1136 if (flag_pic)
1137 emit_insn (gen_tie_load_pic (tmp, addr));
1138 else
1139 emit_insn (gen_tie_load (tmp, addr));
1140 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
1141 break;
1143 case TLS_MODEL_LOCAL_EXEC:
1144 tp = gen_reg_rtx (Pmode);
1145 ret = gen_reg_rtx (Pmode);
1146 emit_insn (gen_tp_load (tp));
1147 emit_insn (gen_tle_load (ret, addr, tp));
1148 break;
1150 default:
1151 gcc_unreachable ();
1154 return ret;
1157 /* Helper for hppa_legitimize_address. Given X, return true if it
1158 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1160 This respectively represent canonical shift-add rtxs or scaled
1161 memory addresses. */
1162 static bool
1163 mem_shadd_or_shadd_rtx_p (rtx x)
1165 return ((GET_CODE (x) == ASHIFT
1166 || GET_CODE (x) == MULT)
1167 && GET_CODE (XEXP (x, 1)) == CONST_INT
1168 && ((GET_CODE (x) == ASHIFT
1169 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1170 || (GET_CODE (x) == MULT
1171 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1174 /* Try machine-dependent ways of modifying an illegitimate address
1175 to be legitimate. If we find one, return the new, valid address.
1176 This macro is used in only one place: `memory_address' in explow.cc.
1178 OLDX is the address as it was before break_out_memory_refs was called.
1179 In some cases it is useful to look at this to decide what needs to be done.
1181 It is always safe for this macro to do nothing. It exists to recognize
1182 opportunities to optimize the output.
1184 For the PA, transform:
1186 memory(X + <large int>)
1188 into:
1190 if (<large int> & mask) >= 16
1191 Y = (<large int> & ~mask) + mask + 1 Round up.
1192 else
1193 Y = (<large int> & ~mask) Round down.
1194 Z = X + Y
1195 memory (Z + (<large int> - Y));
1197 This is for CSE to find several similar references, and only use one Z.
1199 X can either be a SYMBOL_REF or REG, but because combine cannot
1200 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1201 D will not fit in 14 bits.
1203 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1204 0x1f as the mask.
1206 MODE_INT references allow displacements which fit in 14 bits, so use
1207 0x3fff as the mask.
1209 This relies on the fact that most mode MODE_FLOAT references will use FP
1210 registers and most mode MODE_INT references will use integer registers.
1211 (In the rare case of an FP register used in an integer MODE, we depend
1212 on secondary reloads to clean things up.)
1215 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1216 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1217 addressing modes to be used).
1219 Note that the addresses passed into hppa_legitimize_address always
1220 come from a MEM, so we only have to match the MULT form on incoming
1221 addresses. But to be future proof we also match the ASHIFT form.
1223 However, this routine always places those shift-add sequences into
1224 registers, so we have to generate the ASHIFT form as our output.
1226 Put X and Z into registers. Then put the entire expression into
1227 a register. */
1230 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1231 machine_mode mode)
1233 rtx orig = x;
1235 /* We need to canonicalize the order of operands in unscaled indexed
1236 addresses since the code that checks if an address is valid doesn't
1237 always try both orders. */
1238 if (!TARGET_NO_SPACE_REGS
1239 && GET_CODE (x) == PLUS
1240 && GET_MODE (x) == Pmode
1241 && REG_P (XEXP (x, 0))
1242 && REG_P (XEXP (x, 1))
1243 && REG_POINTER (XEXP (x, 0))
1244 && !REG_POINTER (XEXP (x, 1)))
1245 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1247 if (tls_referenced_p (x))
1248 return legitimize_tls_address (x);
1249 else if (flag_pic)
1250 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1252 /* Strip off CONST. */
1253 if (GET_CODE (x) == CONST)
1254 x = XEXP (x, 0);
1256 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1257 That should always be safe. */
1258 if (GET_CODE (x) == PLUS
1259 && GET_CODE (XEXP (x, 0)) == REG
1260 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1262 rtx reg = force_reg (Pmode, XEXP (x, 1));
1263 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1266 /* Note we must reject symbols which represent function addresses
1267 since the assembler/linker can't handle arithmetic on plabels. */
1268 if (GET_CODE (x) == PLUS
1269 && GET_CODE (XEXP (x, 1)) == CONST_INT
1270 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1271 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1272 || GET_CODE (XEXP (x, 0)) == REG))
1274 rtx int_part, ptr_reg;
1275 int newoffset;
1276 int offset = INTVAL (XEXP (x, 1));
1277 int mask;
1279 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1280 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1282 /* Choose which way to round the offset. Round up if we
1283 are >= halfway to the next boundary. */
1284 if ((offset & mask) >= ((mask + 1) / 2))
1285 newoffset = (offset & ~ mask) + mask + 1;
1286 else
1287 newoffset = (offset & ~ mask);
1289 /* If the newoffset will not fit in 14 bits (ldo), then
1290 handling this would take 4 or 5 instructions (2 to load
1291 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1292 add the new offset and the SYMBOL_REF.) Combine cannot
1293 handle 4->2 or 5->2 combinations, so do not create
1294 them. */
1295 if (! VAL_14_BITS_P (newoffset)
1296 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1298 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1299 rtx tmp_reg
1300 = force_reg (Pmode,
1301 gen_rtx_HIGH (Pmode, const_part));
1302 ptr_reg
1303 = force_reg (Pmode,
1304 gen_rtx_LO_SUM (Pmode,
1305 tmp_reg, const_part));
1307 else
1309 if (! VAL_14_BITS_P (newoffset))
1310 int_part = force_reg (Pmode, GEN_INT (newoffset));
1311 else
1312 int_part = GEN_INT (newoffset);
1314 ptr_reg = force_reg (Pmode,
1315 gen_rtx_PLUS (Pmode,
1316 force_reg (Pmode, XEXP (x, 0)),
1317 int_part));
1319 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1322 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1324 if (GET_CODE (x) == PLUS
1325 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1326 && (OBJECT_P (XEXP (x, 1))
1327 || GET_CODE (XEXP (x, 1)) == SUBREG)
1328 && GET_CODE (XEXP (x, 1)) != CONST)
1330 /* If we were given a MULT, we must fix the constant
1331 as we're going to create the ASHIFT form. */
1332 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1333 if (GET_CODE (XEXP (x, 0)) == MULT)
1334 shift_val = exact_log2 (shift_val);
1336 rtx reg1, reg2;
1337 reg1 = XEXP (x, 1);
1338 if (GET_CODE (reg1) != REG)
1339 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1341 reg2 = XEXP (XEXP (x, 0), 0);
1342 if (GET_CODE (reg2) != REG)
1343 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1345 return force_reg (Pmode,
1346 gen_rtx_PLUS (Pmode,
1347 gen_rtx_ASHIFT (Pmode, reg2,
1348 GEN_INT (shift_val)),
1349 reg1));
1352 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1354 Only do so for floating point modes since this is more speculative
1355 and we lose if it's an integer store. */
1356 if (GET_CODE (x) == PLUS
1357 && GET_CODE (XEXP (x, 0)) == PLUS
1358 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1359 && (mode == SFmode || mode == DFmode))
1361 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1363 /* If we were given a MULT, we must fix the constant
1364 as we're going to create the ASHIFT form. */
1365 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1366 shift_val = exact_log2 (shift_val);
1368 /* Try and figure out what to use as a base register. */
1369 rtx reg1, reg2, base, idx;
1371 reg1 = XEXP (XEXP (x, 0), 1);
1372 reg2 = XEXP (x, 1);
1373 base = NULL_RTX;
1374 idx = NULL_RTX;
1376 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1377 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1378 it's a base register below. */
1379 if (GET_CODE (reg1) != REG)
1380 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1382 if (GET_CODE (reg2) != REG)
1383 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1385 /* Figure out what the base and index are. */
1387 if (GET_CODE (reg1) == REG
1388 && REG_POINTER (reg1))
1390 base = reg1;
1391 idx = gen_rtx_PLUS (Pmode,
1392 gen_rtx_ASHIFT (Pmode,
1393 XEXP (XEXP (XEXP (x, 0), 0), 0),
1394 GEN_INT (shift_val)),
1395 XEXP (x, 1));
1397 else if (GET_CODE (reg2) == REG
1398 && REG_POINTER (reg2))
1400 base = reg2;
1401 idx = XEXP (x, 0);
1404 if (base == 0)
1405 return orig;
1407 /* If the index adds a large constant, try to scale the
1408 constant so that it can be loaded with only one insn. */
1409 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1410 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1411 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1412 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1414 /* Divide the CONST_INT by the scale factor, then add it to A. */
1415 int val = INTVAL (XEXP (idx, 1));
1416 val /= (1 << shift_val);
1418 reg1 = XEXP (XEXP (idx, 0), 0);
1419 if (GET_CODE (reg1) != REG)
1420 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1422 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1424 /* We can now generate a simple scaled indexed address. */
1425 return
1426 force_reg
1427 (Pmode, gen_rtx_PLUS (Pmode,
1428 gen_rtx_ASHIFT (Pmode, reg1,
1429 GEN_INT (shift_val)),
1430 base));
1433 /* If B + C is still a valid base register, then add them. */
1434 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1435 && INTVAL (XEXP (idx, 1)) <= 4096
1436 && INTVAL (XEXP (idx, 1)) >= -4096)
1438 rtx reg1, reg2;
1440 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1442 reg2 = XEXP (XEXP (idx, 0), 0);
1443 if (GET_CODE (reg2) != CONST_INT)
1444 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1446 return force_reg (Pmode,
1447 gen_rtx_PLUS (Pmode,
1448 gen_rtx_ASHIFT (Pmode, reg2,
1449 GEN_INT (shift_val)),
1450 reg1));
1453 /* Get the index into a register, then add the base + index and
1454 return a register holding the result. */
1456 /* First get A into a register. */
1457 reg1 = XEXP (XEXP (idx, 0), 0);
1458 if (GET_CODE (reg1) != REG)
1459 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1461 /* And get B into a register. */
1462 reg2 = XEXP (idx, 1);
1463 if (GET_CODE (reg2) != REG)
1464 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1466 reg1 = force_reg (Pmode,
1467 gen_rtx_PLUS (Pmode,
1468 gen_rtx_ASHIFT (Pmode, reg1,
1469 GEN_INT (shift_val)),
1470 reg2));
1472 /* Add the result to our base register and return. */
1473 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1477 /* Uh-oh. We might have an address for x[n-100000]. This needs
1478 special handling to avoid creating an indexed memory address
1479 with x-100000 as the base.
1481 If the constant part is small enough, then it's still safe because
1482 there is a guard page at the beginning and end of the data segment.
1484 Scaled references are common enough that we want to try and rearrange the
1485 terms so that we can use indexing for these addresses too. Only
1486 do the optimization for floatint point modes. */
1488 if (GET_CODE (x) == PLUS
1489 && pa_symbolic_expression_p (XEXP (x, 1)))
1491 /* Ugly. We modify things here so that the address offset specified
1492 by the index expression is computed first, then added to x to form
1493 the entire address. */
1495 rtx regx1, regx2, regy1, regy2, y;
1497 /* Strip off any CONST. */
1498 y = XEXP (x, 1);
1499 if (GET_CODE (y) == CONST)
1500 y = XEXP (y, 0);
1502 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1504 /* See if this looks like
1505 (plus (mult (reg) (mem_shadd_const))
1506 (const (plus (symbol_ref) (const_int))))
1508 Where const_int is small. In that case the const
1509 expression is a valid pointer for indexing.
1511 If const_int is big, but can be divided evenly by shadd_const
1512 and added to (reg). This allows more scaled indexed addresses. */
1513 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1514 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1515 && GET_CODE (XEXP (y, 1)) == CONST_INT
1516 && INTVAL (XEXP (y, 1)) >= -4096
1517 && INTVAL (XEXP (y, 1)) <= 4095)
1519 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1521 /* If we were given a MULT, we must fix the constant
1522 as we're going to create the ASHIFT form. */
1523 if (GET_CODE (XEXP (x, 0)) == MULT)
1524 shift_val = exact_log2 (shift_val);
1526 rtx reg1, reg2;
1528 reg1 = XEXP (x, 1);
1529 if (GET_CODE (reg1) != REG)
1530 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1532 reg2 = XEXP (XEXP (x, 0), 0);
1533 if (GET_CODE (reg2) != REG)
1534 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1536 return
1537 force_reg (Pmode,
1538 gen_rtx_PLUS (Pmode,
1539 gen_rtx_ASHIFT (Pmode,
1540 reg2,
1541 GEN_INT (shift_val)),
1542 reg1));
1544 else if ((mode == DFmode || mode == SFmode)
1545 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1546 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1547 && GET_CODE (XEXP (y, 1)) == CONST_INT
1548 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1550 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1552 /* If we were given a MULT, we must fix the constant
1553 as we're going to create the ASHIFT form. */
1554 if (GET_CODE (XEXP (x, 0)) == MULT)
1555 shift_val = exact_log2 (shift_val);
1557 regx1
1558 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1559 / INTVAL (XEXP (XEXP (x, 0), 1))));
1560 regx2 = XEXP (XEXP (x, 0), 0);
1561 if (GET_CODE (regx2) != REG)
1562 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1563 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1564 regx2, regx1));
1565 return
1566 force_reg (Pmode,
1567 gen_rtx_PLUS (Pmode,
1568 gen_rtx_ASHIFT (Pmode, regx2,
1569 GEN_INT (shift_val)),
1570 force_reg (Pmode, XEXP (y, 0))));
1572 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1573 && INTVAL (XEXP (y, 1)) >= -4096
1574 && INTVAL (XEXP (y, 1)) <= 4095)
1576 /* This is safe because of the guard page at the
1577 beginning and end of the data space. Just
1578 return the original address. */
1579 return orig;
1581 else
1583 /* Doesn't look like one we can optimize. */
1584 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1585 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1586 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1587 regx1 = force_reg (Pmode,
1588 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1589 regx1, regy2));
1590 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1595 return orig;
1598 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1600 Compute extra cost of moving data between one register class
1601 and another.
1603 Make moves from SAR so expensive they should never happen. We used to
1604 have 0xffff here, but that generates overflow in rare cases.
1606 Copies involving a FP register and a non-FP register are relatively
1607 expensive because they must go through memory.
1609 Other copies are reasonably cheap. */
1611 static int
1612 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1613 reg_class_t from, reg_class_t to)
1615 if (from == SHIFT_REGS)
1616 return 0x100;
1617 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1618 return 18;
1619 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1620 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1621 return 16;
1622 else
1623 return 2;
1626 /* For the HPPA, REG and REG+CONST is cost 0
1627 and addresses involving symbolic constants are cost 2.
1629 PIC addresses are very expensive.
1631 It is no coincidence that this has the same structure
1632 as pa_legitimate_address_p. */
1634 static int
1635 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1636 addr_space_t as ATTRIBUTE_UNUSED,
1637 bool speed ATTRIBUTE_UNUSED)
1639 switch (GET_CODE (X))
1641 case REG:
1642 case PLUS:
1643 case LO_SUM:
1644 return 1;
1645 case HIGH:
1646 return 2;
1647 default:
1648 return 4;
1652 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1653 The machine mode of X is known to be SImode or DImode. */
1655 static bool
1656 hppa_rtx_costs_shadd_p (rtx x)
1658 if (GET_CODE (x) != PLUS
1659 || !REG_P (XEXP (x, 1)))
1660 return false;
1661 rtx op0 = XEXP (x, 0);
1662 if (GET_CODE (op0) == ASHIFT
1663 && CONST_INT_P (XEXP (op0, 1))
1664 && REG_P (XEXP (op0, 0)))
1666 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1667 return x == 1 || x == 2 || x == 3;
1669 if (GET_CODE (op0) == MULT
1670 && CONST_INT_P (XEXP (op0, 1))
1671 && REG_P (XEXP (op0, 0)))
1673 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1674 return x == 2 || x == 4 || x == 8;
1676 return false;
1679 /* Compute a (partial) cost for rtx X. Return true if the complete
1680 cost has been computed, and false if subexpressions should be
1681 scanned. In either case, *TOTAL contains the cost result. */
1683 static bool
1684 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1685 int opno ATTRIBUTE_UNUSED,
1686 int *total, bool speed)
1688 int code = GET_CODE (x);
1690 switch (code)
1692 case CONST_INT:
1693 if (outer_code == SET)
1694 *total = COSTS_N_INSNS (1);
1695 else if (INTVAL (x) == 0)
1696 *total = 0;
1697 else if (INT_14_BITS (x))
1698 *total = 1;
1699 else
1700 *total = 2;
1701 return true;
1703 case HIGH:
1704 *total = 2;
1705 return true;
1707 case CONST:
1708 case LABEL_REF:
1709 case SYMBOL_REF:
1710 *total = 4;
1711 return true;
1713 case CONST_DOUBLE:
1714 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1715 && outer_code != SET)
1716 *total = 0;
1717 else
1718 *total = 8;
1719 return true;
1721 case MULT:
1722 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1724 *total = COSTS_N_INSNS (3);
1726 else if (mode == DImode)
1728 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1729 *total = COSTS_N_INSNS (25);
1730 else
1731 *total = COSTS_N_INSNS (80);
1733 else
1735 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1736 *total = COSTS_N_INSNS (8);
1737 else
1738 *total = COSTS_N_INSNS (20);
1740 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1742 case DIV:
1743 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1745 *total = COSTS_N_INSNS (14);
1746 return false;
1748 /* FALLTHRU */
1750 case UDIV:
1751 case MOD:
1752 case UMOD:
1753 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1754 if (mode == DImode)
1755 *total = COSTS_N_INSNS (240);
1756 else
1757 *total = COSTS_N_INSNS (60);
1758 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1760 case PLUS: /* this includes shNadd insns */
1761 case MINUS:
1762 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1763 *total = COSTS_N_INSNS (3);
1764 else if (mode == DImode)
1766 if (TARGET_64BIT)
1768 *total = COSTS_N_INSNS (1);
1769 /* Handle shladd,l instructions. */
1770 if (hppa_rtx_costs_shadd_p (x))
1771 return true;
1773 else
1774 *total = COSTS_N_INSNS (2);
1776 else
1778 *total = COSTS_N_INSNS (1);
1779 /* Handle shNadd instructions. */
1780 if (hppa_rtx_costs_shadd_p (x))
1781 return true;
1783 return REG_P (XEXP (x, 0))
1784 && (REG_P (XEXP (x, 1))
1785 || CONST_INT_P (XEXP (x, 1)));
1787 case ASHIFT:
1788 if (mode == DImode)
1790 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1792 if (TARGET_64BIT)
1793 *total = COSTS_N_INSNS (1);
1794 else
1795 *total = COSTS_N_INSNS (2);
1796 return true;
1798 else if (TARGET_64BIT)
1799 *total = COSTS_N_INSNS (3);
1800 else if (speed)
1801 *total = COSTS_N_INSNS (13);
1802 else
1803 *total = COSTS_N_INSNS (18);
1805 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1807 if (TARGET_64BIT)
1808 *total = COSTS_N_INSNS (2);
1809 else
1810 *total = COSTS_N_INSNS (1);
1811 return true;
1813 else if (TARGET_64BIT)
1814 *total = COSTS_N_INSNS (4);
1815 else
1816 *total = COSTS_N_INSNS (2);
1817 return REG_P (XEXP (x, 0))
1818 && (REG_P (XEXP (x, 1))
1819 || CONST_INT_P (XEXP (x, 1)));
1821 case ASHIFTRT:
1822 if (mode == DImode)
1824 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1826 if (TARGET_64BIT)
1827 *total = COSTS_N_INSNS (1);
1828 else
1829 *total = COSTS_N_INSNS (2);
1830 return true;
1832 else if (TARGET_64BIT)
1833 *total = COSTS_N_INSNS (3);
1834 else if (speed)
1835 *total = COSTS_N_INSNS (14);
1836 else
1837 *total = COSTS_N_INSNS (19);
1839 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1841 if (TARGET_64BIT)
1842 *total = COSTS_N_INSNS (2);
1843 else
1844 *total = COSTS_N_INSNS (1);
1845 return true;
1847 else if (TARGET_64BIT)
1848 *total = COSTS_N_INSNS (4);
1849 else
1850 *total = COSTS_N_INSNS (2);
1851 return REG_P (XEXP (x, 0))
1852 && (REG_P (XEXP (x, 1))
1853 || CONST_INT_P (XEXP (x, 1)));
1855 case LSHIFTRT:
1856 if (mode == DImode)
1858 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1860 if (TARGET_64BIT)
1861 *total = COSTS_N_INSNS (1);
1862 else
1863 *total = COSTS_N_INSNS (2);
1864 return true;
1866 else if (TARGET_64BIT)
1867 *total = COSTS_N_INSNS (2);
1868 else if (speed)
1869 *total = COSTS_N_INSNS (12);
1870 else
1871 *total = COSTS_N_INSNS (15);
1873 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1875 *total = COSTS_N_INSNS (1);
1876 return true;
1878 else if (TARGET_64BIT)
1879 *total = COSTS_N_INSNS (3);
1880 else
1881 *total = COSTS_N_INSNS (2);
1882 return REG_P (XEXP (x, 0))
1883 && (REG_P (XEXP (x, 1))
1884 || CONST_INT_P (XEXP (x, 1)));
1886 default:
1887 return false;
1891 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1892 new rtx with the correct mode. */
1893 static inline rtx
1894 force_mode (machine_mode mode, rtx orig)
1896 if (mode == GET_MODE (orig))
1897 return orig;
1899 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1901 return gen_rtx_REG (mode, REGNO (orig));
1904 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1906 static bool
1907 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1909 return tls_referenced_p (x);
1912 /* Emit insns to move operands[1] into operands[0].
1914 Return 1 if we have written out everything that needs to be done to
1915 do the move. Otherwise, return 0 and the caller will emit the move
1916 normally.
1918 Note SCRATCH_REG may not be in the proper mode depending on how it
1919 will be used. This routine is responsible for creating a new copy
1920 of SCRATCH_REG in the proper mode. */
1923 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1925 rtx operand0 = operands[0];
1926 rtx operand1 = operands[1];
1927 rtx tem;
1929 /* We can only handle indexed addresses in the destination operand
1930 of floating point stores. Thus, we need to break out indexed
1931 addresses from the destination operand. */
1932 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1934 gcc_assert (can_create_pseudo_p ());
1936 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1937 operand0 = replace_equiv_address (operand0, tem);
1940 /* On targets with non-equivalent space registers, break out unscaled
1941 indexed addresses from the source operand before the final CSE.
1942 We have to do this because the REG_POINTER flag is not correctly
1943 carried through various optimization passes and CSE may substitute
1944 a pseudo without the pointer set for one with the pointer set. As
1945 a result, we loose various opportunities to create insns with
1946 unscaled indexed addresses. */
1947 if (!TARGET_NO_SPACE_REGS
1948 && !cse_not_expected
1949 && GET_CODE (operand1) == MEM
1950 && GET_CODE (XEXP (operand1, 0)) == PLUS
1951 && REG_P (XEXP (XEXP (operand1, 0), 0))
1952 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1953 operand1
1954 = replace_equiv_address (operand1,
1955 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1957 if (scratch_reg
1958 && reload_in_progress && GET_CODE (operand0) == REG
1959 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1960 operand0 = reg_equiv_mem (REGNO (operand0));
1961 else if (scratch_reg
1962 && reload_in_progress && GET_CODE (operand0) == SUBREG
1963 && GET_CODE (SUBREG_REG (operand0)) == REG
1964 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1966 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1967 the code which tracks sets/uses for delete_output_reload. */
1968 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1969 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1970 SUBREG_BYTE (operand0));
1971 operand0 = alter_subreg (&temp, true);
1974 if (scratch_reg
1975 && reload_in_progress && GET_CODE (operand1) == REG
1976 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1977 operand1 = reg_equiv_mem (REGNO (operand1));
1978 else if (scratch_reg
1979 && reload_in_progress && GET_CODE (operand1) == SUBREG
1980 && GET_CODE (SUBREG_REG (operand1)) == REG
1981 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1983 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1984 the code which tracks sets/uses for delete_output_reload. */
1985 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1986 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1987 SUBREG_BYTE (operand1));
1988 operand1 = alter_subreg (&temp, true);
1991 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1992 && ((tem = find_replacement (&XEXP (operand0, 0)))
1993 != XEXP (operand0, 0)))
1994 operand0 = replace_equiv_address (operand0, tem);
1996 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1997 && ((tem = find_replacement (&XEXP (operand1, 0)))
1998 != XEXP (operand1, 0)))
1999 operand1 = replace_equiv_address (operand1, tem);
2001 /* Handle secondary reloads for loads/stores of FP registers from
2002 REG+D addresses where D does not fit in 5 or 14 bits, including
2003 (subreg (mem (addr))) cases, and reloads for other unsupported
2004 memory operands. */
2005 if (scratch_reg
2006 && FP_REG_P (operand0)
2007 && (MEM_P (operand1)
2008 || (GET_CODE (operand1) == SUBREG
2009 && MEM_P (XEXP (operand1, 0)))))
2011 rtx op1 = operand1;
2013 if (GET_CODE (op1) == SUBREG)
2014 op1 = XEXP (op1, 0);
2016 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
2018 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
2019 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
2021 /* SCRATCH_REG will hold an address and maybe the actual data.
2022 We want it in WORD_MODE regardless of what mode it was
2023 originally given to us. */
2024 scratch_reg = force_mode (word_mode, scratch_reg);
2026 /* D might not fit in 14 bits either; for such cases load D
2027 into scratch reg. */
2028 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
2030 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
2031 emit_move_insn (scratch_reg,
2032 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
2033 Pmode,
2034 XEXP (XEXP (op1, 0), 0),
2035 scratch_reg));
2037 else
2038 emit_move_insn (scratch_reg, XEXP (op1, 0));
2039 op1 = replace_equiv_address (op1, scratch_reg);
2042 else if (((TARGET_ELF32 || !TARGET_PA_20)
2043 && symbolic_memory_operand (op1, VOIDmode))
2044 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
2045 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
2047 /* Load memory address into SCRATCH_REG. */
2048 scratch_reg = force_mode (word_mode, scratch_reg);
2049 emit_move_insn (scratch_reg, XEXP (op1, 0));
2050 op1 = replace_equiv_address (op1, scratch_reg);
2052 emit_insn (gen_rtx_SET (operand0, op1));
2053 return 1;
2055 else if (scratch_reg
2056 && FP_REG_P (operand1)
2057 && (MEM_P (operand0)
2058 || (GET_CODE (operand0) == SUBREG
2059 && MEM_P (XEXP (operand0, 0)))))
2061 rtx op0 = operand0;
2063 if (GET_CODE (op0) == SUBREG)
2064 op0 = XEXP (op0, 0);
2066 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
2068 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
2069 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
2071 /* SCRATCH_REG will hold an address and maybe the actual data.
2072 We want it in WORD_MODE regardless of what mode it was
2073 originally given to us. */
2074 scratch_reg = force_mode (word_mode, scratch_reg);
2076 /* D might not fit in 14 bits either; for such cases load D
2077 into scratch reg. */
2078 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
2080 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
2081 emit_move_insn (scratch_reg,
2082 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
2083 Pmode,
2084 XEXP (XEXP (op0, 0), 0),
2085 scratch_reg));
2087 else
2088 emit_move_insn (scratch_reg, XEXP (op0, 0));
2089 op0 = replace_equiv_address (op0, scratch_reg);
2092 else if (((TARGET_ELF32 || !TARGET_PA_20)
2093 && symbolic_memory_operand (op0, VOIDmode))
2094 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
2095 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
2097 /* Load memory address into SCRATCH_REG. */
2098 scratch_reg = force_mode (word_mode, scratch_reg);
2099 emit_move_insn (scratch_reg, XEXP (op0, 0));
2100 op0 = replace_equiv_address (op0, scratch_reg);
2102 emit_insn (gen_rtx_SET (op0, operand1));
2103 return 1;
2105 /* Handle secondary reloads for loads of FP registers from constant
2106 expressions by forcing the constant into memory. For the most part,
2107 this is only necessary for SImode and DImode.
2109 Use scratch_reg to hold the address of the memory location. */
2110 else if (scratch_reg
2111 && CONSTANT_P (operand1)
2112 && FP_REG_P (operand0))
2114 rtx const_mem, xoperands[2];
2116 if (operand1 == CONST0_RTX (mode))
2118 emit_insn (gen_rtx_SET (operand0, operand1));
2119 return 1;
2122 /* SCRATCH_REG will hold an address and maybe the actual data. We want
2123 it in WORD_MODE regardless of what mode it was originally given
2124 to us. */
2125 scratch_reg = force_mode (word_mode, scratch_reg);
2127 /* Force the constant into memory and put the address of the
2128 memory location into scratch_reg. */
2129 const_mem = force_const_mem (mode, operand1);
2130 xoperands[0] = scratch_reg;
2131 xoperands[1] = XEXP (const_mem, 0);
2132 pa_emit_move_sequence (xoperands, Pmode, 0);
2134 /* Now load the destination register. */
2135 emit_insn (gen_rtx_SET (operand0,
2136 replace_equiv_address (const_mem, scratch_reg)));
2137 return 1;
2139 /* Handle secondary reloads for SAR. These occur when trying to load
2140 the SAR from memory or a constant. */
2141 else if (scratch_reg
2142 && GET_CODE (operand0) == REG
2143 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2144 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2145 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2147 /* D might not fit in 14 bits either; for such cases load D into
2148 scratch reg. */
2149 if (GET_CODE (operand1) == MEM
2150 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2152 /* We are reloading the address into the scratch register, so we
2153 want to make sure the scratch register is a full register. */
2154 scratch_reg = force_mode (word_mode, scratch_reg);
2156 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2157 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2158 0)),
2159 Pmode,
2160 XEXP (XEXP (operand1, 0),
2162 scratch_reg));
2164 /* Now we are going to load the scratch register from memory,
2165 we want to load it in the same width as the original MEM,
2166 which must be the same as the width of the ultimate destination,
2167 OPERAND0. */
2168 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2170 emit_move_insn (scratch_reg,
2171 replace_equiv_address (operand1, scratch_reg));
2173 else
2175 /* We want to load the scratch register using the same mode as
2176 the ultimate destination. */
2177 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2179 emit_move_insn (scratch_reg, operand1);
2182 /* And emit the insn to set the ultimate destination. We know that
2183 the scratch register has the same mode as the destination at this
2184 point. */
2185 emit_move_insn (operand0, scratch_reg);
2186 return 1;
2189 /* Handle the most common case: storing into a register. */
2190 if (register_operand (operand0, mode))
2192 /* Legitimize TLS symbol references. This happens for references
2193 that aren't a legitimate constant. */
2194 if (PA_SYMBOL_REF_TLS_P (operand1))
2195 operand1 = legitimize_tls_address (operand1);
2197 if (register_operand (operand1, mode)
2198 || (GET_CODE (operand1) == CONST_INT
2199 && pa_cint_ok_for_move (UINTVAL (operand1)))
2200 || (operand1 == CONST0_RTX (mode))
2201 || (GET_CODE (operand1) == HIGH
2202 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2203 /* Only `general_operands' can come here, so MEM is ok. */
2204 || GET_CODE (operand1) == MEM)
2206 /* Various sets are created during RTL generation which don't
2207 have the REG_POINTER flag correctly set. After the CSE pass,
2208 instruction recognition can fail if we don't consistently
2209 set this flag when performing register copies. This should
2210 also improve the opportunities for creating insns that use
2211 unscaled indexing. */
2212 if (REG_P (operand0) && REG_P (operand1))
2214 if (REG_POINTER (operand1)
2215 && !REG_POINTER (operand0)
2216 && !HARD_REGISTER_P (operand0))
2217 copy_reg_pointer (operand0, operand1);
2220 /* When MEMs are broken out, the REG_POINTER flag doesn't
2221 get set. In some cases, we can set the REG_POINTER flag
2222 from the declaration for the MEM. */
2223 if (REG_P (operand0)
2224 && GET_CODE (operand1) == MEM
2225 && !REG_POINTER (operand0))
2227 tree decl = MEM_EXPR (operand1);
2229 /* Set the register pointer flag and register alignment
2230 if the declaration for this memory reference is a
2231 pointer type. */
2232 if (decl)
2234 tree type;
2236 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2237 tree operand 1. */
2238 if (TREE_CODE (decl) == COMPONENT_REF)
2239 decl = TREE_OPERAND (decl, 1);
2241 type = TREE_TYPE (decl);
2242 type = strip_array_types (type);
2244 if (POINTER_TYPE_P (type))
2245 mark_reg_pointer (operand0, BITS_PER_UNIT);
2249 emit_insn (gen_rtx_SET (operand0, operand1));
2250 return 1;
2253 else if (GET_CODE (operand0) == MEM)
2255 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2256 && !(reload_in_progress || reload_completed))
2258 rtx temp = gen_reg_rtx (DFmode);
2260 emit_insn (gen_rtx_SET (temp, operand1));
2261 emit_insn (gen_rtx_SET (operand0, temp));
2262 return 1;
2264 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2266 /* Run this case quickly. */
2267 emit_insn (gen_rtx_SET (operand0, operand1));
2268 return 1;
2270 if (! (reload_in_progress || reload_completed))
2272 operands[0] = validize_mem (operand0);
2273 operands[1] = operand1 = force_reg (mode, operand1);
2277 /* Simplify the source if we need to.
2278 Note we do have to handle function labels here, even though we do
2279 not consider them legitimate constants. Loop optimizations can
2280 call the emit_move_xxx with one as a source. */
2281 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2282 || (GET_CODE (operand1) == HIGH
2283 && symbolic_operand (XEXP (operand1, 0), mode))
2284 || function_label_operand (operand1, VOIDmode)
2285 || tls_referenced_p (operand1))
2287 int ishighonly = 0;
2289 if (GET_CODE (operand1) == HIGH)
2291 ishighonly = 1;
2292 operand1 = XEXP (operand1, 0);
2294 if (symbolic_operand (operand1, mode))
2296 /* Argh. The assembler and linker can't handle arithmetic
2297 involving plabels.
2299 So we force the plabel into memory, load operand0 from
2300 the memory location, then add in the constant part. */
2301 if ((GET_CODE (operand1) == CONST
2302 && GET_CODE (XEXP (operand1, 0)) == PLUS
2303 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2304 VOIDmode))
2305 || function_label_operand (operand1, VOIDmode))
2307 rtx temp, const_part;
2309 /* Figure out what (if any) scratch register to use. */
2310 if (reload_in_progress || reload_completed)
2312 scratch_reg = scratch_reg ? scratch_reg : operand0;
2313 /* SCRATCH_REG will hold an address and maybe the actual
2314 data. We want it in WORD_MODE regardless of what mode it
2315 was originally given to us. */
2316 scratch_reg = force_mode (word_mode, scratch_reg);
2318 else if (flag_pic)
2319 scratch_reg = gen_reg_rtx (Pmode);
2321 if (GET_CODE (operand1) == CONST)
2323 /* Save away the constant part of the expression. */
2324 const_part = XEXP (XEXP (operand1, 0), 1);
2325 gcc_assert (GET_CODE (const_part) == CONST_INT);
2327 /* Force the function label into memory. */
2328 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2330 else
2332 /* No constant part. */
2333 const_part = NULL_RTX;
2335 /* Force the function label into memory. */
2336 temp = force_const_mem (mode, operand1);
2340 /* Get the address of the memory location. PIC-ify it if
2341 necessary. */
2342 temp = XEXP (temp, 0);
2343 if (flag_pic)
2344 temp = legitimize_pic_address (temp, mode, scratch_reg);
2346 /* Put the address of the memory location into our destination
2347 register. */
2348 operands[1] = temp;
2349 pa_emit_move_sequence (operands, mode, scratch_reg);
2351 /* Now load from the memory location into our destination
2352 register. */
2353 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2354 pa_emit_move_sequence (operands, mode, scratch_reg);
2356 /* And add back in the constant part. */
2357 if (const_part != NULL_RTX)
2358 expand_inc (operand0, const_part);
2360 return 1;
2363 if (flag_pic)
2365 rtx_insn *insn;
2366 rtx temp;
2368 if (reload_in_progress || reload_completed)
2370 temp = scratch_reg ? scratch_reg : operand0;
2371 /* TEMP will hold an address and maybe the actual
2372 data. We want it in WORD_MODE regardless of what mode it
2373 was originally given to us. */
2374 temp = force_mode (word_mode, temp);
2376 else
2377 temp = gen_reg_rtx (Pmode);
2379 /* Force (const (plus (symbol) (const_int))) to memory
2380 if the const_int will not fit in 14 bits. Although
2381 this requires a relocation, the instruction sequence
2382 needed to load the value is shorter. */
2383 if (GET_CODE (operand1) == CONST
2384 && GET_CODE (XEXP (operand1, 0)) == PLUS
2385 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2386 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2388 rtx x, m = force_const_mem (mode, operand1);
2390 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2391 x = replace_equiv_address (m, x);
2392 insn = emit_move_insn (operand0, x);
2394 else
2396 operands[1] = legitimize_pic_address (operand1, mode, temp);
2397 if (REG_P (operand0) && REG_P (operands[1]))
2398 copy_reg_pointer (operand0, operands[1]);
2399 insn = emit_move_insn (operand0, operands[1]);
2402 /* Put a REG_EQUAL note on this insn. */
2403 set_unique_reg_note (insn, REG_EQUAL, operand1);
2405 /* On the HPPA, references to data space are supposed to use dp,
2406 register 27, but showing it in the RTL inhibits various cse
2407 and loop optimizations. */
2408 else
2410 rtx temp, set;
2412 if (reload_in_progress || reload_completed)
2414 temp = scratch_reg ? scratch_reg : operand0;
2415 /* TEMP will hold an address and maybe the actual
2416 data. We want it in WORD_MODE regardless of what mode it
2417 was originally given to us. */
2418 temp = force_mode (word_mode, temp);
2420 else
2421 temp = gen_reg_rtx (mode);
2423 /* Loading a SYMBOL_REF into a register makes that register
2424 safe to be used as the base in an indexed address.
2426 Don't mark hard registers though. That loses. */
2427 if (GET_CODE (operand0) == REG
2428 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2429 mark_reg_pointer (operand0, BITS_PER_UNIT);
2430 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2431 mark_reg_pointer (temp, BITS_PER_UNIT);
2433 if (ishighonly)
2434 set = gen_rtx_SET (operand0, temp);
2435 else
2436 set = gen_rtx_SET (operand0,
2437 gen_rtx_LO_SUM (mode, temp, operand1));
2439 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2440 emit_insn (set);
2443 return 1;
2445 else if (tls_referenced_p (operand1))
2447 rtx tmp = operand1;
2448 rtx addend = NULL;
2450 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2452 addend = XEXP (XEXP (tmp, 0), 1);
2453 tmp = XEXP (XEXP (tmp, 0), 0);
2456 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2457 tmp = legitimize_tls_address (tmp);
2458 if (addend)
2460 tmp = gen_rtx_PLUS (mode, tmp, addend);
2461 tmp = force_operand (tmp, operands[0]);
2463 operands[1] = tmp;
2465 else if (GET_CODE (operand1) != CONST_INT
2466 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2468 rtx temp;
2469 rtx_insn *insn;
2470 rtx op1 = operand1;
2471 HOST_WIDE_INT value = 0;
2472 HOST_WIDE_INT insv = 0;
2473 int insert = 0;
2475 if (GET_CODE (operand1) == CONST_INT)
2476 value = INTVAL (operand1);
2478 if (TARGET_64BIT
2479 && GET_CODE (operand1) == CONST_INT
2480 && HOST_BITS_PER_WIDE_INT > 32
2481 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2483 HOST_WIDE_INT nval;
2485 /* Extract the low order 32 bits of the value and sign extend.
2486 If the new value is the same as the original value, we can
2487 can use the original value as-is. If the new value is
2488 different, we use it and insert the most-significant 32-bits
2489 of the original value into the final result. */
2490 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2491 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2492 if (value != nval)
2494 #if HOST_BITS_PER_WIDE_INT > 32
2495 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2496 #endif
2497 insert = 1;
2498 value = nval;
2499 operand1 = GEN_INT (nval);
2503 if (reload_in_progress || reload_completed)
2504 temp = scratch_reg ? scratch_reg : operand0;
2505 else
2506 temp = gen_reg_rtx (mode);
2508 /* We don't directly split DImode constants on 32-bit targets
2509 because PLUS uses an 11-bit immediate and the insn sequence
2510 generated is not as efficient as the one using HIGH/LO_SUM. */
2511 if (GET_CODE (operand1) == CONST_INT
2512 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2513 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2514 && !insert)
2516 /* Directly break constant into high and low parts. This
2517 provides better optimization opportunities because various
2518 passes recognize constants split with PLUS but not LO_SUM.
2519 We use a 14-bit signed low part except when the addition
2520 of 0x4000 to the high part might change the sign of the
2521 high part. */
2522 HOST_WIDE_INT low = value & 0x3fff;
2523 HOST_WIDE_INT high = value & ~ 0x3fff;
2525 if (low >= 0x2000)
2527 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2528 high += 0x2000;
2529 else
2530 high += 0x4000;
2533 low = value - high;
2535 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2536 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2538 else
2540 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2541 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2544 insn = emit_move_insn (operands[0], operands[1]);
2546 /* Now insert the most significant 32 bits of the value
2547 into the register. When we don't have a second register
2548 available, it could take up to nine instructions to load
2549 a 64-bit integer constant. Prior to reload, we force
2550 constants that would take more than three instructions
2551 to load to the constant pool. During and after reload,
2552 we have to handle all possible values. */
2553 if (insert)
2555 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2556 register and the value to be inserted is outside the
2557 range that can be loaded with three depdi instructions. */
2558 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2560 operand1 = GEN_INT (insv);
2562 emit_insn (gen_rtx_SET (temp,
2563 gen_rtx_HIGH (mode, operand1)));
2564 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2565 if (mode == DImode)
2566 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2567 const0_rtx, temp));
2568 else
2569 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2570 const0_rtx, temp));
2572 else
2574 int len = 5, pos = 27;
2576 /* Insert the bits using the depdi instruction. */
2577 while (pos >= 0)
2579 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2580 HOST_WIDE_INT sign = v5 < 0;
2582 /* Left extend the insertion. */
2583 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2584 while (pos > 0 && (insv & 1) == sign)
2586 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2587 len += 1;
2588 pos -= 1;
2591 if (mode == DImode)
2592 insn = emit_insn (gen_insvdi (operand0,
2593 GEN_INT (len),
2594 GEN_INT (pos),
2595 GEN_INT (v5)));
2596 else
2597 insn = emit_insn (gen_insvsi (operand0,
2598 GEN_INT (len),
2599 GEN_INT (pos),
2600 GEN_INT (v5)));
2602 len = pos > 0 && pos < 5 ? pos : 5;
2603 pos -= len;
2608 set_unique_reg_note (insn, REG_EQUAL, op1);
2610 return 1;
2613 /* Now have insn-emit do whatever it normally does. */
2614 return 0;
2617 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2618 it will need a link/runtime reloc). */
2621 pa_reloc_needed (tree exp)
2623 int reloc = 0;
2625 switch (TREE_CODE (exp))
2627 case ADDR_EXPR:
2628 return 1;
2630 case POINTER_PLUS_EXPR:
2631 case PLUS_EXPR:
2632 case MINUS_EXPR:
2633 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2634 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2635 break;
2637 CASE_CONVERT:
2638 case NON_LVALUE_EXPR:
2639 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2640 break;
2642 case CONSTRUCTOR:
2644 tree value;
2645 unsigned HOST_WIDE_INT ix;
2647 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2648 if (value)
2649 reloc |= pa_reloc_needed (value);
2651 break;
2653 case ERROR_MARK:
2654 break;
2656 default:
2657 break;
2659 return reloc;
2663 /* Return the best assembler insn template
2664 for moving operands[1] into operands[0] as a fullword. */
2665 const char *
2666 pa_singlemove_string (rtx *operands)
2668 HOST_WIDE_INT intval;
2670 if (GET_CODE (operands[0]) == MEM)
2671 return "stw %r1,%0";
2672 if (GET_CODE (operands[1]) == MEM)
2673 return "ldw %1,%0";
2674 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2676 long i;
2678 gcc_assert (GET_MODE (operands[1]) == SFmode);
2680 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2681 bit pattern. */
2682 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2684 operands[1] = GEN_INT (i);
2685 /* Fall through to CONST_INT case. */
2687 if (GET_CODE (operands[1]) == CONST_INT)
2689 intval = INTVAL (operands[1]);
2691 if (VAL_14_BITS_P (intval))
2692 return "ldi %1,%0";
2693 else if ((intval & 0x7ff) == 0)
2694 return "ldil L'%1,%0";
2695 else if (pa_zdepi_cint_p (intval))
2696 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2697 else
2698 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2700 return "copy %1,%0";
2704 /* Compute position (in OP[1]) and width (in OP[2])
2705 useful for copying IMM to a register using the zdepi
2706 instructions. Store the immediate value to insert in OP[0]. */
2707 static void
2708 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2710 int lsb, len;
2712 /* Find the least significant set bit in IMM. */
2713 for (lsb = 0; lsb < 32; lsb++)
2715 if ((imm & 1) != 0)
2716 break;
2717 imm >>= 1;
2720 /* Choose variants based on *sign* of the 5-bit field. */
2721 if ((imm & 0x10) == 0)
2722 len = (lsb <= 28) ? 4 : 32 - lsb;
2723 else
2725 /* Find the width of the bitstring in IMM. */
2726 for (len = 5; len < 32 - lsb; len++)
2728 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2729 break;
2732 /* Sign extend IMM as a 5-bit value. */
2733 imm = (imm & 0xf) - 0x10;
2736 op[0] = imm;
2737 op[1] = 31 - lsb;
2738 op[2] = len;
2741 /* Compute position (in OP[1]) and width (in OP[2])
2742 useful for copying IMM to a register using the depdi,z
2743 instructions. Store the immediate value to insert in OP[0]. */
2745 static void
2746 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2748 int lsb, len, maxlen;
2750 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2752 /* Find the least significant set bit in IMM. */
2753 for (lsb = 0; lsb < maxlen; lsb++)
2755 if ((imm & 1) != 0)
2756 break;
2757 imm >>= 1;
2760 /* Choose variants based on *sign* of the 5-bit field. */
2761 if ((imm & 0x10) == 0)
2762 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2763 else
2765 /* Find the width of the bitstring in IMM. */
2766 for (len = 5; len < maxlen - lsb; len++)
2768 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2769 break;
2772 /* Extend length if host is narrow and IMM is negative. */
2773 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2774 len += 32;
2776 /* Sign extend IMM as a 5-bit value. */
2777 imm = (imm & 0xf) - 0x10;
2780 op[0] = imm;
2781 op[1] = 63 - lsb;
2782 op[2] = len;
2785 /* Output assembler code to perform a doubleword move insn
2786 with operands OPERANDS. */
2788 const char *
2789 pa_output_move_double (rtx *operands)
2791 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2792 rtx latehalf[2];
2793 rtx addreg0 = 0, addreg1 = 0;
2794 int highonly = 0;
2796 /* First classify both operands. */
2798 if (REG_P (operands[0]))
2799 optype0 = REGOP;
2800 else if (offsettable_memref_p (operands[0]))
2801 optype0 = OFFSOP;
2802 else if (GET_CODE (operands[0]) == MEM)
2803 optype0 = MEMOP;
2804 else
2805 optype0 = RNDOP;
2807 if (REG_P (operands[1]))
2808 optype1 = REGOP;
2809 else if (CONSTANT_P (operands[1]))
2810 optype1 = CNSTOP;
2811 else if (offsettable_memref_p (operands[1]))
2812 optype1 = OFFSOP;
2813 else if (GET_CODE (operands[1]) == MEM)
2814 optype1 = MEMOP;
2815 else
2816 optype1 = RNDOP;
2818 /* Check for the cases that the operand constraints are not
2819 supposed to allow to happen. */
2820 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2822 /* Handle copies between general and floating registers. */
2824 if (optype0 == REGOP && optype1 == REGOP
2825 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2827 if (FP_REG_P (operands[0]))
2829 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2830 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2831 return "{fldds|fldd} -16(%%sp),%0";
2833 else
2835 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2836 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2837 return "{ldws|ldw} -12(%%sp),%R0";
2841 /* Handle auto decrementing and incrementing loads and stores
2842 specifically, since the structure of the function doesn't work
2843 for them without major modification. Do it better when we learn
2844 this port about the general inc/dec addressing of PA.
2845 (This was written by tege. Chide him if it doesn't work.) */
2847 if (optype0 == MEMOP)
2849 /* We have to output the address syntax ourselves, since print_operand
2850 doesn't deal with the addresses we want to use. Fix this later. */
2852 rtx addr = XEXP (operands[0], 0);
2853 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2855 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2857 operands[0] = XEXP (addr, 0);
2858 gcc_assert (GET_CODE (operands[1]) == REG
2859 && GET_CODE (operands[0]) == REG);
2861 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2863 /* No overlap between high target register and address
2864 register. (We do this in a non-obvious way to
2865 save a register file writeback) */
2866 if (GET_CODE (addr) == POST_INC)
2867 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2868 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2870 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2872 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2874 operands[0] = XEXP (addr, 0);
2875 gcc_assert (GET_CODE (operands[1]) == REG
2876 && GET_CODE (operands[0]) == REG);
2878 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2879 /* No overlap between high target register and address
2880 register. (We do this in a non-obvious way to save a
2881 register file writeback) */
2882 if (GET_CODE (addr) == PRE_INC)
2883 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2884 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2887 if (optype1 == MEMOP)
2889 /* We have to output the address syntax ourselves, since print_operand
2890 doesn't deal with the addresses we want to use. Fix this later. */
2892 rtx addr = XEXP (operands[1], 0);
2893 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2895 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2897 operands[1] = XEXP (addr, 0);
2898 gcc_assert (GET_CODE (operands[0]) == REG
2899 && GET_CODE (operands[1]) == REG);
2901 if (!reg_overlap_mentioned_p (high_reg, addr))
2903 /* No overlap between high target register and address
2904 register. (We do this in a non-obvious way to
2905 save a register file writeback) */
2906 if (GET_CODE (addr) == POST_INC)
2907 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2908 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2910 else
2912 /* This is an undefined situation. We should load into the
2913 address register *and* update that register. Probably
2914 we don't need to handle this at all. */
2915 if (GET_CODE (addr) == POST_INC)
2916 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2917 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2920 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2922 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2924 operands[1] = XEXP (addr, 0);
2925 gcc_assert (GET_CODE (operands[0]) == REG
2926 && GET_CODE (operands[1]) == REG);
2928 if (!reg_overlap_mentioned_p (high_reg, addr))
2930 /* No overlap between high target register and address
2931 register. (We do this in a non-obvious way to
2932 save a register file writeback) */
2933 if (GET_CODE (addr) == PRE_INC)
2934 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2935 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2937 else
2939 /* This is an undefined situation. We should load into the
2940 address register *and* update that register. Probably
2941 we don't need to handle this at all. */
2942 if (GET_CODE (addr) == PRE_INC)
2943 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2944 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2947 else if (GET_CODE (addr) == PLUS
2948 && GET_CODE (XEXP (addr, 0)) == MULT)
2950 rtx xoperands[4];
2952 /* Load address into left half of destination register. */
2953 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2954 xoperands[1] = XEXP (addr, 1);
2955 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2956 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2957 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2958 xoperands);
2959 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2961 else if (GET_CODE (addr) == PLUS
2962 && REG_P (XEXP (addr, 0))
2963 && REG_P (XEXP (addr, 1)))
2965 rtx xoperands[3];
2967 /* Load address into left half of destination register. */
2968 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2969 xoperands[1] = XEXP (addr, 0);
2970 xoperands[2] = XEXP (addr, 1);
2971 output_asm_insn ("{addl|add,l} %1,%2,%0",
2972 xoperands);
2973 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2977 /* If an operand is an unoffsettable memory ref, find a register
2978 we can increment temporarily to make it refer to the second word. */
2980 if (optype0 == MEMOP)
2981 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2983 if (optype1 == MEMOP)
2984 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2986 /* Ok, we can do one word at a time.
2987 Normally we do the low-numbered word first.
2989 In either case, set up in LATEHALF the operands to use
2990 for the high-numbered word and in some cases alter the
2991 operands in OPERANDS to be suitable for the low-numbered word. */
2993 if (optype0 == REGOP)
2994 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2995 else if (optype0 == OFFSOP)
2996 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2997 else
2998 latehalf[0] = operands[0];
3000 if (optype1 == REGOP)
3001 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
3002 else if (optype1 == OFFSOP)
3003 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
3004 else if (optype1 == CNSTOP)
3006 if (GET_CODE (operands[1]) == HIGH)
3008 operands[1] = XEXP (operands[1], 0);
3009 highonly = 1;
3011 split_double (operands[1], &operands[1], &latehalf[1]);
3013 else
3014 latehalf[1] = operands[1];
3016 /* If the first move would clobber the source of the second one,
3017 do them in the other order.
3019 This can happen in two cases:
3021 mem -> register where the first half of the destination register
3022 is the same register used in the memory's address. Reload
3023 can create such insns.
3025 mem in this case will be either register indirect or register
3026 indirect plus a valid offset.
3028 register -> register move where REGNO(dst) == REGNO(src + 1)
3029 someone (Tim/Tege?) claimed this can happen for parameter loads.
3031 Handle mem -> register case first. */
3032 if (optype0 == REGOP
3033 && (optype1 == MEMOP || optype1 == OFFSOP)
3034 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
3036 /* Do the late half first. */
3037 if (addreg1)
3038 output_asm_insn ("ldo 4(%0),%0", &addreg1);
3039 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3041 /* Then clobber. */
3042 if (addreg1)
3043 output_asm_insn ("ldo -4(%0),%0", &addreg1);
3044 return pa_singlemove_string (operands);
3047 /* Now handle register -> register case. */
3048 if (optype0 == REGOP && optype1 == REGOP
3049 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
3051 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3052 return pa_singlemove_string (operands);
3055 /* Normal case: do the two words, low-numbered first. */
3057 output_asm_insn (pa_singlemove_string (operands), operands);
3059 /* Make any unoffsettable addresses point at high-numbered word. */
3060 if (addreg0)
3061 output_asm_insn ("ldo 4(%0),%0", &addreg0);
3062 if (addreg1)
3063 output_asm_insn ("ldo 4(%0),%0", &addreg1);
3065 /* Do high-numbered word. */
3066 if (highonly)
3067 output_asm_insn ("ldil L'%1,%0", latehalf);
3068 else
3069 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3071 /* Undo the adds we just did. */
3072 if (addreg0)
3073 output_asm_insn ("ldo -4(%0),%0", &addreg0);
3074 if (addreg1)
3075 output_asm_insn ("ldo -4(%0),%0", &addreg1);
3077 return "";
3080 const char *
3081 pa_output_fp_move_double (rtx *operands)
3083 if (FP_REG_P (operands[0]))
3085 if (FP_REG_P (operands[1])
3086 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
3087 output_asm_insn ("fcpy,dbl %f1,%0", operands);
3088 else
3089 output_asm_insn ("fldd%F1 %1,%0", operands);
3091 else if (FP_REG_P (operands[1]))
3093 output_asm_insn ("fstd%F0 %1,%0", operands);
3095 else
3097 rtx xoperands[2];
3099 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
3101 /* This is a pain. You have to be prepared to deal with an
3102 arbitrary address here including pre/post increment/decrement.
3104 so avoid this in the MD. */
3105 gcc_assert (GET_CODE (operands[0]) == REG);
3107 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
3108 xoperands[0] = operands[0];
3109 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
3111 return "";
3114 /* Return a REG that occurs in ADDR with coefficient 1.
3115 ADDR can be effectively incremented by incrementing REG. */
3117 static rtx
3118 find_addr_reg (rtx addr)
3120 while (GET_CODE (addr) == PLUS)
3122 if (GET_CODE (XEXP (addr, 0)) == REG)
3123 addr = XEXP (addr, 0);
3124 else if (GET_CODE (XEXP (addr, 1)) == REG)
3125 addr = XEXP (addr, 1);
3126 else if (CONSTANT_P (XEXP (addr, 0)))
3127 addr = XEXP (addr, 1);
3128 else if (CONSTANT_P (XEXP (addr, 1)))
3129 addr = XEXP (addr, 0);
3130 else
3131 gcc_unreachable ();
3133 gcc_assert (GET_CODE (addr) == REG);
3134 return addr;
3137 /* Emit code to perform a block move.
3139 OPERANDS[0] is the destination pointer as a REG, clobbered.
3140 OPERANDS[1] is the source pointer as a REG, clobbered.
3141 OPERANDS[2] is a register for temporary storage.
3142 OPERANDS[3] is a register for temporary storage.
3143 OPERANDS[4] is the size as a CONST_INT
3144 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3145 OPERANDS[6] is another temporary register. */
3147 const char *
3148 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3150 int align = INTVAL (operands[5]);
3151 unsigned long n_bytes = INTVAL (operands[4]);
3153 /* We can't move more than a word at a time because the PA
3154 has no longer integer move insns. (Could use fp mem ops?) */
3155 if (align > (TARGET_64BIT ? 8 : 4))
3156 align = (TARGET_64BIT ? 8 : 4);
3158 /* Note that we know each loop below will execute at least twice
3159 (else we would have open-coded the copy). */
3160 switch (align)
3162 case 8:
3163 /* Pre-adjust the loop counter. */
3164 operands[4] = GEN_INT (n_bytes - 16);
3165 output_asm_insn ("ldi %4,%2", operands);
3167 /* Copying loop. */
3168 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3169 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3170 output_asm_insn ("std,ma %3,8(%0)", operands);
3171 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3172 output_asm_insn ("std,ma %6,8(%0)", operands);
3174 /* Handle the residual. There could be up to 7 bytes of
3175 residual to copy! */
3176 if (n_bytes % 16 != 0)
3178 operands[4] = GEN_INT (n_bytes % 8);
3179 if (n_bytes % 16 >= 8)
3180 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3181 if (n_bytes % 8 != 0)
3182 output_asm_insn ("ldd 0(%1),%6", operands);
3183 if (n_bytes % 16 >= 8)
3184 output_asm_insn ("std,ma %3,8(%0)", operands);
3185 if (n_bytes % 8 != 0)
3186 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3188 return "";
3190 case 4:
3191 /* Pre-adjust the loop counter. */
3192 operands[4] = GEN_INT (n_bytes - 8);
3193 output_asm_insn ("ldi %4,%2", operands);
3195 /* Copying loop. */
3196 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3197 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3198 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3199 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3200 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3202 /* Handle the residual. There could be up to 7 bytes of
3203 residual to copy! */
3204 if (n_bytes % 8 != 0)
3206 operands[4] = GEN_INT (n_bytes % 4);
3207 if (n_bytes % 8 >= 4)
3208 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3209 if (n_bytes % 4 != 0)
3210 output_asm_insn ("ldw 0(%1),%6", operands);
3211 if (n_bytes % 8 >= 4)
3212 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3213 if (n_bytes % 4 != 0)
3214 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3216 return "";
3218 case 2:
3219 /* Pre-adjust the loop counter. */
3220 operands[4] = GEN_INT (n_bytes - 4);
3221 output_asm_insn ("ldi %4,%2", operands);
3223 /* Copying loop. */
3224 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3225 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3226 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3227 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3228 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3230 /* Handle the residual. */
3231 if (n_bytes % 4 != 0)
3233 if (n_bytes % 4 >= 2)
3234 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3235 if (n_bytes % 2 != 0)
3236 output_asm_insn ("ldb 0(%1),%6", operands);
3237 if (n_bytes % 4 >= 2)
3238 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3239 if (n_bytes % 2 != 0)
3240 output_asm_insn ("stb %6,0(%0)", operands);
3242 return "";
3244 case 1:
3245 /* Pre-adjust the loop counter. */
3246 operands[4] = GEN_INT (n_bytes - 2);
3247 output_asm_insn ("ldi %4,%2", operands);
3249 /* Copying loop. */
3250 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3251 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3252 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3253 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3254 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3256 /* Handle the residual. */
3257 if (n_bytes % 2 != 0)
3259 output_asm_insn ("ldb 0(%1),%3", operands);
3260 output_asm_insn ("stb %3,0(%0)", operands);
3262 return "";
3264 default:
3265 gcc_unreachable ();
3269 /* Count the number of insns necessary to handle this block move.
3271 Basic structure is the same as emit_block_move, except that we
3272 count insns rather than emit them. */
3274 static int
3275 compute_cpymem_length (rtx_insn *insn)
3277 rtx pat = PATTERN (insn);
3278 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3279 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3280 unsigned int n_insns = 0;
3282 /* We can't move more than four bytes at a time because the PA
3283 has no longer integer move insns. (Could use fp mem ops?) */
3284 if (align > (TARGET_64BIT ? 8 : 4))
3285 align = (TARGET_64BIT ? 8 : 4);
3287 /* The basic copying loop. */
3288 n_insns = 6;
3290 /* Residuals. */
3291 if (n_bytes % (2 * align) != 0)
3293 if ((n_bytes % (2 * align)) >= align)
3294 n_insns += 2;
3296 if ((n_bytes % align) != 0)
3297 n_insns += 2;
3300 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3301 return n_insns * 4;
3304 /* Emit code to perform a block clear.
3306 OPERANDS[0] is the destination pointer as a REG, clobbered.
3307 OPERANDS[1] is a register for temporary storage.
3308 OPERANDS[2] is the size as a CONST_INT
3309 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3311 const char *
3312 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3314 int align = INTVAL (operands[3]);
3315 unsigned long n_bytes = INTVAL (operands[2]);
3317 /* We can't clear more than a word at a time because the PA
3318 has no longer integer move insns. */
3319 if (align > (TARGET_64BIT ? 8 : 4))
3320 align = (TARGET_64BIT ? 8 : 4);
3322 /* Note that we know each loop below will execute at least twice
3323 (else we would have open-coded the copy). */
3324 switch (align)
3326 case 8:
3327 /* Pre-adjust the loop counter. */
3328 operands[2] = GEN_INT (n_bytes - 16);
3329 output_asm_insn ("ldi %2,%1", operands);
3331 /* Loop. */
3332 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3333 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3334 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3336 /* Handle the residual. There could be up to 7 bytes of
3337 residual to copy! */
3338 if (n_bytes % 16 != 0)
3340 operands[2] = GEN_INT (n_bytes % 8);
3341 if (n_bytes % 16 >= 8)
3342 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3343 if (n_bytes % 8 != 0)
3344 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3346 return "";
3348 case 4:
3349 /* Pre-adjust the loop counter. */
3350 operands[2] = GEN_INT (n_bytes - 8);
3351 output_asm_insn ("ldi %2,%1", operands);
3353 /* Loop. */
3354 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3355 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3356 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3358 /* Handle the residual. There could be up to 7 bytes of
3359 residual to copy! */
3360 if (n_bytes % 8 != 0)
3362 operands[2] = GEN_INT (n_bytes % 4);
3363 if (n_bytes % 8 >= 4)
3364 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3365 if (n_bytes % 4 != 0)
3366 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3368 return "";
3370 case 2:
3371 /* Pre-adjust the loop counter. */
3372 operands[2] = GEN_INT (n_bytes - 4);
3373 output_asm_insn ("ldi %2,%1", operands);
3375 /* Loop. */
3376 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3377 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3378 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3380 /* Handle the residual. */
3381 if (n_bytes % 4 != 0)
3383 if (n_bytes % 4 >= 2)
3384 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3385 if (n_bytes % 2 != 0)
3386 output_asm_insn ("stb %%r0,0(%0)", operands);
3388 return "";
3390 case 1:
3391 /* Pre-adjust the loop counter. */
3392 operands[2] = GEN_INT (n_bytes - 2);
3393 output_asm_insn ("ldi %2,%1", operands);
3395 /* Loop. */
3396 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3397 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3398 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3400 /* Handle the residual. */
3401 if (n_bytes % 2 != 0)
3402 output_asm_insn ("stb %%r0,0(%0)", operands);
3404 return "";
3406 default:
3407 gcc_unreachable ();
3411 /* Count the number of insns necessary to handle this block move.
3413 Basic structure is the same as emit_block_move, except that we
3414 count insns rather than emit them. */
3416 static int
3417 compute_clrmem_length (rtx_insn *insn)
3419 rtx pat = PATTERN (insn);
3420 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3421 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3422 unsigned int n_insns = 0;
3424 /* We can't clear more than a word at a time because the PA
3425 has no longer integer move insns. */
3426 if (align > (TARGET_64BIT ? 8 : 4))
3427 align = (TARGET_64BIT ? 8 : 4);
3429 /* The basic loop. */
3430 n_insns = 4;
3432 /* Residuals. */
3433 if (n_bytes % (2 * align) != 0)
3435 if ((n_bytes % (2 * align)) >= align)
3436 n_insns++;
3438 if ((n_bytes % align) != 0)
3439 n_insns++;
3442 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3443 return n_insns * 4;
3447 const char *
3448 pa_output_and (rtx *operands)
3450 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3452 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3453 int ls0, ls1, ms0, p, len;
3455 for (ls0 = 0; ls0 < 32; ls0++)
3456 if ((mask & (1 << ls0)) == 0)
3457 break;
3459 for (ls1 = ls0; ls1 < 32; ls1++)
3460 if ((mask & (1 << ls1)) != 0)
3461 break;
3463 for (ms0 = ls1; ms0 < 32; ms0++)
3464 if ((mask & (1 << ms0)) == 0)
3465 break;
3467 gcc_assert (ms0 == 32);
3469 if (ls1 == 32)
3471 len = ls0;
3473 gcc_assert (len);
3475 operands[2] = GEN_INT (len);
3476 return "{extru|extrw,u} %1,31,%2,%0";
3478 else
3480 /* We could use this `depi' for the case above as well, but `depi'
3481 requires one more register file access than an `extru'. */
3483 p = 31 - ls0;
3484 len = ls1 - ls0;
3486 operands[2] = GEN_INT (p);
3487 operands[3] = GEN_INT (len);
3488 return "{depi|depwi} 0,%2,%3,%0";
3491 else
3492 return "and %1,%2,%0";
3495 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3496 storing the result in operands[0]. */
3497 const char *
3498 pa_output_64bit_and (rtx *operands)
3500 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3502 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3503 int ls0, ls1, ms0, p, len;
3505 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3506 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3507 break;
3509 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3510 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3511 break;
3513 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3514 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3515 break;
3517 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3519 if (ls1 == HOST_BITS_PER_WIDE_INT)
3521 len = ls0;
3523 gcc_assert (len);
3525 operands[2] = GEN_INT (len);
3526 return "extrd,u %1,63,%2,%0";
3528 else
3530 /* We could use this `depi' for the case above as well, but `depi'
3531 requires one more register file access than an `extru'. */
3533 p = 63 - ls0;
3534 len = ls1 - ls0;
3536 operands[2] = GEN_INT (p);
3537 operands[3] = GEN_INT (len);
3538 return "depdi 0,%2,%3,%0";
3541 else
3542 return "and %1,%2,%0";
3545 const char *
3546 pa_output_ior (rtx *operands)
3548 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3549 int bs0, bs1, p, len;
3551 if (INTVAL (operands[2]) == 0)
3552 return "copy %1,%0";
3554 for (bs0 = 0; bs0 < 32; bs0++)
3555 if ((mask & (1 << bs0)) != 0)
3556 break;
3558 for (bs1 = bs0; bs1 < 32; bs1++)
3559 if ((mask & (1 << bs1)) == 0)
3560 break;
3562 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3564 p = 31 - bs0;
3565 len = bs1 - bs0;
3567 operands[2] = GEN_INT (p);
3568 operands[3] = GEN_INT (len);
3569 return "{depi|depwi} -1,%2,%3,%0";
3572 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3573 storing the result in operands[0]. */
3574 const char *
3575 pa_output_64bit_ior (rtx *operands)
3577 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3578 int bs0, bs1, p, len;
3580 if (INTVAL (operands[2]) == 0)
3581 return "copy %1,%0";
3583 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3584 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3585 break;
3587 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3588 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3589 break;
3591 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3592 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3594 p = 63 - bs0;
3595 len = bs1 - bs0;
3597 operands[2] = GEN_INT (p);
3598 operands[3] = GEN_INT (len);
3599 return "depdi -1,%2,%3,%0";
3602 /* Target hook for assembling integer objects. This code handles
3603 aligned SI and DI integers specially since function references
3604 must be preceded by P%. */
3606 static bool
3607 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3609 bool result;
3610 tree decl = NULL;
3612 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3613 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3614 calling output_addr_const. Otherwise, it may call assemble_external
3615 in the midst of outputing the assembler code for the SYMBOL_REF.
3616 We restore the SYMBOL_REF_DECL after the output is done. */
3617 if (GET_CODE (x) == SYMBOL_REF)
3619 decl = SYMBOL_REF_DECL (x);
3620 if (decl)
3622 assemble_external (decl);
3623 SET_SYMBOL_REF_DECL (x, NULL);
3627 if (size == UNITS_PER_WORD
3628 && aligned_p
3629 && function_label_operand (x, VOIDmode))
3631 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3633 /* We don't want an OPD when generating fast indirect calls. */
3634 if (!TARGET_FAST_INDIRECT_CALLS)
3635 fputs ("P%", asm_out_file);
3637 output_addr_const (asm_out_file, x);
3638 fputc ('\n', asm_out_file);
3639 result = true;
3641 else
3642 result = default_assemble_integer (x, size, aligned_p);
3644 if (decl)
3645 SET_SYMBOL_REF_DECL (x, decl);
3647 return result;
3650 /* Output an ascii string. */
3651 void
3652 pa_output_ascii (FILE *file, const char *p, int size)
3654 int i;
3655 int chars_output;
3656 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3658 /* The HP assembler can only take strings of 256 characters at one
3659 time. This is a limitation on input line length, *not* the
3660 length of the string. Sigh. Even worse, it seems that the
3661 restriction is in number of input characters (see \xnn &
3662 \whatever). So we have to do this very carefully. */
3664 fputs ("\t.STRING \"", file);
3666 chars_output = 0;
3667 for (i = 0; i < size; i += 4)
3669 int co = 0;
3670 int io = 0;
3671 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3673 unsigned int c = (unsigned char) p[i + io];
3675 if (c == '\"' || c == '\\')
3676 partial_output[co++] = '\\';
3677 if (c >= ' ' && c < 0177)
3678 partial_output[co++] = c;
3679 else
3681 unsigned int hexd;
3682 partial_output[co++] = '\\';
3683 partial_output[co++] = 'x';
3684 hexd = c / 16 - 0 + '0';
3685 if (hexd > '9')
3686 hexd -= '9' - 'a' + 1;
3687 partial_output[co++] = hexd;
3688 hexd = c % 16 - 0 + '0';
3689 if (hexd > '9')
3690 hexd -= '9' - 'a' + 1;
3691 partial_output[co++] = hexd;
3694 if (chars_output + co > 243)
3696 fputs ("\"\n\t.STRING \"", file);
3697 chars_output = 0;
3699 fwrite (partial_output, 1, (size_t) co, file);
3700 chars_output += co;
3701 co = 0;
3703 fputs ("\"\n", file);
3706 /* Try to rewrite floating point comparisons & branches to avoid
3707 useless add,tr insns.
3709 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3710 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3711 first attempt to remove useless add,tr insns. It is zero
3712 for the second pass as reorg sometimes leaves bogus REG_DEAD
3713 notes lying around.
3715 When CHECK_NOTES is zero we can only eliminate add,tr insns
3716 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3717 instructions. */
3718 static void
3719 remove_useless_addtr_insns (int check_notes)
3721 rtx_insn *insn;
3722 static int pass = 0;
3724 /* This is fairly cheap, so always run it when optimizing. */
3725 if (optimize > 0)
3727 int fcmp_count = 0;
3728 int fbranch_count = 0;
3730 /* Walk all the insns in this function looking for fcmp & fbranch
3731 instructions. Keep track of how many of each we find. */
3732 for (insn = get_insns (); insn; insn = next_insn (insn))
3734 rtx tmp;
3736 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3737 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3738 continue;
3740 tmp = PATTERN (insn);
3742 /* It must be a set. */
3743 if (GET_CODE (tmp) != SET)
3744 continue;
3746 /* If the destination is CCFP, then we've found an fcmp insn. */
3747 tmp = SET_DEST (tmp);
3748 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3750 fcmp_count++;
3751 continue;
3754 tmp = PATTERN (insn);
3755 /* If this is an fbranch instruction, bump the fbranch counter. */
3756 if (GET_CODE (tmp) == SET
3757 && SET_DEST (tmp) == pc_rtx
3758 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3759 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3760 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3761 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3763 fbranch_count++;
3764 continue;
3769 /* Find all floating point compare + branch insns. If possible,
3770 reverse the comparison & the branch to avoid add,tr insns. */
3771 for (insn = get_insns (); insn; insn = next_insn (insn))
3773 rtx tmp;
3774 rtx_insn *next;
3776 /* Ignore anything that isn't an INSN. */
3777 if (! NONJUMP_INSN_P (insn))
3778 continue;
3780 tmp = PATTERN (insn);
3782 /* It must be a set. */
3783 if (GET_CODE (tmp) != SET)
3784 continue;
3786 /* The destination must be CCFP, which is register zero. */
3787 tmp = SET_DEST (tmp);
3788 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3789 continue;
3791 /* INSN should be a set of CCFP.
3793 See if the result of this insn is used in a reversed FP
3794 conditional branch. If so, reverse our condition and
3795 the branch. Doing so avoids useless add,tr insns. */
3796 next = next_insn (insn);
3797 while (next)
3799 /* Jumps, calls and labels stop our search. */
3800 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3801 break;
3803 /* As does another fcmp insn. */
3804 if (NONJUMP_INSN_P (next)
3805 && GET_CODE (PATTERN (next)) == SET
3806 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3807 && REGNO (SET_DEST (PATTERN (next))) == 0)
3808 break;
3810 next = next_insn (next);
3813 /* Is NEXT_INSN a branch? */
3814 if (next && JUMP_P (next))
3816 rtx pattern = PATTERN (next);
3818 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3819 and CCFP dies, then reverse our conditional and the branch
3820 to avoid the add,tr. */
3821 if (GET_CODE (pattern) == SET
3822 && SET_DEST (pattern) == pc_rtx
3823 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3824 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3825 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3826 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3827 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3828 && (fcmp_count == fbranch_count
3829 || (check_notes
3830 && find_regno_note (next, REG_DEAD, 0))))
3832 /* Reverse the branch. */
3833 tmp = XEXP (SET_SRC (pattern), 1);
3834 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3835 XEXP (SET_SRC (pattern), 2) = tmp;
3836 INSN_CODE (next) = -1;
3838 /* Reverse our condition. */
3839 tmp = PATTERN (insn);
3840 PUT_CODE (XEXP (tmp, 1),
3841 (reverse_condition_maybe_unordered
3842 (GET_CODE (XEXP (tmp, 1)))));
3848 pass = !pass;
3852 /* You may have trouble believing this, but this is the 32 bit HP-PA
3853 stack layout. Wow.
3855 Offset Contents
3857 Variable arguments (optional; any number may be allocated)
3859 SP-(4*(N+9)) arg word N
3861 SP-56 arg word 5
3862 SP-52 arg word 4
3864 Fixed arguments (must be allocated; may remain unused)
3866 SP-48 arg word 3
3867 SP-44 arg word 2
3868 SP-40 arg word 1
3869 SP-36 arg word 0
3871 Frame Marker
3873 SP-32 External Data Pointer (DP)
3874 SP-28 External sr4
3875 SP-24 External/stub RP (RP')
3876 SP-20 Current RP
3877 SP-16 Static Link
3878 SP-12 Clean up
3879 SP-8 Calling Stub RP (RP'')
3880 SP-4 Previous SP
3882 Top of Frame
3884 SP-0 Stack Pointer (points to next available address)
3888 /* This function saves registers as follows. Registers marked with ' are
3889 this function's registers (as opposed to the previous function's).
3890 If a frame_pointer isn't needed, r4 is saved as a general register;
3891 the space for the frame pointer is still allocated, though, to keep
3892 things simple.
3895 Top of Frame
3897 SP (FP') Previous FP
3898 SP + 4 Alignment filler (sigh)
3899 SP + 8 Space for locals reserved here.
3903 SP + n All call saved register used.
3907 SP + o All call saved fp registers used.
3911 SP + p (SP') points to next available address.
3915 /* Global variables set by output_function_prologue(). */
3916 /* Size of frame. Need to know this to emit return insns from
3917 leaf procedures. */
3918 static HOST_WIDE_INT actual_fsize, local_fsize;
3919 static int save_fregs;
3921 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3922 Handle case where DISP > 8k by using the add_high_const patterns.
3924 Note in DISP > 8k case, we will leave the high part of the address
3925 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3927 static void
3928 store_reg (int reg, HOST_WIDE_INT disp, int base)
3930 rtx dest, src, basereg;
3931 rtx_insn *insn;
3933 src = gen_rtx_REG (word_mode, reg);
3934 basereg = gen_rtx_REG (Pmode, base);
3935 if (VAL_14_BITS_P (disp))
3937 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3938 insn = emit_move_insn (dest, src);
3940 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3942 rtx delta = GEN_INT (disp);
3943 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3945 emit_move_insn (tmpreg, delta);
3946 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3947 if (DO_FRAME_NOTES)
3949 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3950 gen_rtx_SET (tmpreg,
3951 gen_rtx_PLUS (Pmode, basereg, delta)));
3952 RTX_FRAME_RELATED_P (insn) = 1;
3954 dest = gen_rtx_MEM (word_mode, tmpreg);
3955 insn = emit_move_insn (dest, src);
3957 else
3959 rtx delta = GEN_INT (disp);
3960 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3961 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3963 emit_move_insn (tmpreg, high);
3964 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3965 insn = emit_move_insn (dest, src);
3966 if (DO_FRAME_NOTES)
3967 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3968 gen_rtx_SET (gen_rtx_MEM (word_mode,
3969 gen_rtx_PLUS (word_mode,
3970 basereg,
3971 delta)),
3972 src));
3975 if (DO_FRAME_NOTES)
3976 RTX_FRAME_RELATED_P (insn) = 1;
3979 /* Emit RTL to store REG at the memory location specified by BASE and then
3980 add MOD to BASE. MOD must be <= 8k. */
3982 static void
3983 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3985 rtx basereg, srcreg, delta;
3986 rtx_insn *insn;
3988 gcc_assert (VAL_14_BITS_P (mod));
3990 basereg = gen_rtx_REG (Pmode, base);
3991 srcreg = gen_rtx_REG (word_mode, reg);
3992 delta = GEN_INT (mod);
3994 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3995 if (DO_FRAME_NOTES)
3997 RTX_FRAME_RELATED_P (insn) = 1;
3999 /* RTX_FRAME_RELATED_P must be set on each frame related set
4000 in a parallel with more than one element. */
4001 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
4002 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4006 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
4007 where DISP > 8k by using the add_high_const patterns. NOTE indicates
4008 whether to add a frame note or not.
4010 In the DISP > 8k case, we leave the high part of the address in %r1.
4011 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
4013 static void
4014 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
4016 rtx_insn *insn;
4018 if (VAL_14_BITS_P (disp))
4020 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4021 plus_constant (Pmode,
4022 gen_rtx_REG (Pmode, base), disp));
4024 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4026 rtx basereg = gen_rtx_REG (Pmode, base);
4027 rtx delta = GEN_INT (disp);
4028 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4030 emit_move_insn (tmpreg, delta);
4031 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4032 gen_rtx_PLUS (Pmode, tmpreg, basereg));
4033 if (DO_FRAME_NOTES)
4034 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4035 gen_rtx_SET (tmpreg,
4036 gen_rtx_PLUS (Pmode, basereg, delta)));
4038 else
4040 rtx basereg = gen_rtx_REG (Pmode, base);
4041 rtx delta = GEN_INT (disp);
4042 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4044 emit_move_insn (tmpreg,
4045 gen_rtx_PLUS (Pmode, basereg,
4046 gen_rtx_HIGH (Pmode, delta)));
4047 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4048 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4051 if (DO_FRAME_NOTES && note)
4052 RTX_FRAME_RELATED_P (insn) = 1;
4055 HOST_WIDE_INT
4056 pa_compute_frame_size (poly_int64 size, int *fregs_live)
4058 int freg_saved = 0;
4059 int i, j;
4061 /* The code in pa_expand_prologue and pa_expand_epilogue must
4062 be consistent with the rounding and size calculation done here.
4063 Change them at the same time. */
4065 /* We do our own stack alignment. First, round the size of the
4066 stack locals up to a word boundary. */
4067 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4069 /* Space for previous frame pointer + filler. If any frame is
4070 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
4071 waste some space here for the sake of HP compatibility. The
4072 first slot is only used when the frame pointer is needed. */
4073 if (size || frame_pointer_needed)
4074 size += pa_starting_frame_offset ();
4076 /* If the current function calls __builtin_eh_return, then we need
4077 to allocate stack space for registers that will hold data for
4078 the exception handler. */
4079 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4081 unsigned int i;
4083 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
4084 continue;
4085 size += i * UNITS_PER_WORD;
4088 /* Account for space used by the callee general register saves. */
4089 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
4090 if (df_regs_ever_live_p (i))
4091 size += UNITS_PER_WORD;
4093 /* Account for space used by the callee floating point register saves. */
4094 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4095 if (df_regs_ever_live_p (i)
4096 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4098 freg_saved = 1;
4100 /* We always save both halves of the FP register, so always
4101 increment the frame size by 8 bytes. */
4102 size += 8;
4105 /* If any of the floating registers are saved, account for the
4106 alignment needed for the floating point register save block. */
4107 if (freg_saved)
4109 size = (size + 7) & ~7;
4110 if (fregs_live)
4111 *fregs_live = 1;
4114 /* The various ABIs include space for the outgoing parameters in the
4115 size of the current function's stack frame. We don't need to align
4116 for the outgoing arguments as their alignment is set by the final
4117 rounding for the frame as a whole. */
4118 size += crtl->outgoing_args_size;
4120 /* Allocate space for the fixed frame marker. This space must be
4121 allocated for any function that makes calls or allocates
4122 stack space. */
4123 if (!crtl->is_leaf || size)
4124 size += TARGET_64BIT ? 48 : 32;
4126 /* Finally, round to the preferred stack boundary. */
4127 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
4128 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
4131 /* Output function label, and associated .PROC and .CALLINFO statements. */
4133 void
4134 pa_output_function_label (FILE *file)
4136 /* The function's label and associated .PROC must never be
4137 separated and must be output *after* any profiling declarations
4138 to avoid changing spaces/subspaces within a procedure. */
4139 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
4140 ASM_OUTPUT_FUNCTION_LABEL (file, name, current_function_decl);
4141 fputs ("\t.PROC\n", file);
4143 /* pa_expand_prologue does the dirty work now. We just need
4144 to output the assembler directives which denote the start
4145 of a function. */
4146 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4147 if (crtl->is_leaf)
4148 fputs (",NO_CALLS", file);
4149 else
4150 fputs (",CALLS", file);
4151 if (rp_saved)
4152 fputs (",SAVE_RP", file);
4154 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4155 at the beginning of the frame and that it is used as the frame
4156 pointer for the frame. We do this because our current frame
4157 layout doesn't conform to that specified in the HP runtime
4158 documentation and we need a way to indicate to programs such as
4159 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4160 isn't used by HP compilers but is supported by the assembler.
4161 However, SAVE_SP is supposed to indicate that the previous stack
4162 pointer has been saved in the frame marker. */
4163 if (frame_pointer_needed)
4164 fputs (",SAVE_SP", file);
4166 /* Pass on information about the number of callee register saves
4167 performed in the prologue.
4169 The compiler is supposed to pass the highest register number
4170 saved, the assembler then has to adjust that number before
4171 entering it into the unwind descriptor (to account for any
4172 caller saved registers with lower register numbers than the
4173 first callee saved register). */
4174 if (gr_saved)
4175 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4177 if (fr_saved)
4178 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4180 fputs ("\n\t.ENTRY\n", file);
4183 /* Output function prologue. */
4185 static void
4186 pa_output_function_prologue (FILE *file)
4188 pa_output_function_label (file);
4189 remove_useless_addtr_insns (0);
4192 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4194 static void
4195 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4197 remove_useless_addtr_insns (0);
4200 void
4201 pa_expand_prologue (void)
4203 int merge_sp_adjust_with_store = 0;
4204 HOST_WIDE_INT size = get_frame_size ();
4205 HOST_WIDE_INT offset;
4206 int i;
4207 rtx tmpreg;
4208 rtx_insn *insn;
4210 gr_saved = 0;
4211 fr_saved = 0;
4212 save_fregs = 0;
4214 /* Compute total size for frame pointer, filler, locals and rounding to
4215 the next word boundary. Similar code appears in pa_compute_frame_size
4216 and must be changed in tandem with this code. */
4217 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4218 if (local_fsize || frame_pointer_needed)
4219 local_fsize += pa_starting_frame_offset ();
4221 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4222 if (flag_stack_usage_info)
4223 current_function_static_stack_size = actual_fsize;
4225 /* Compute a few things we will use often. */
4226 tmpreg = gen_rtx_REG (word_mode, 1);
4228 /* Save RP first. The calling conventions manual states RP will
4229 always be stored into the caller's frame at sp - 20 or sp - 16
4230 depending on which ABI is in use. */
4231 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4233 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4234 rp_saved = true;
4236 else
4237 rp_saved = false;
4239 /* Allocate the local frame and set up the frame pointer if needed. */
4240 if (actual_fsize != 0)
4242 if (frame_pointer_needed)
4244 /* Copy the old frame pointer temporarily into %r1. Set up the
4245 new stack pointer, then store away the saved old frame pointer
4246 into the stack at sp and at the same time update the stack
4247 pointer by actual_fsize bytes. Two versions, first
4248 handles small (<8k) frames. The second handles large (>=8k)
4249 frames. */
4250 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4251 if (DO_FRAME_NOTES)
4252 RTX_FRAME_RELATED_P (insn) = 1;
4254 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4255 if (DO_FRAME_NOTES)
4256 RTX_FRAME_RELATED_P (insn) = 1;
4258 if (VAL_14_BITS_P (actual_fsize))
4259 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4260 else
4262 /* It is incorrect to store the saved frame pointer at *sp,
4263 then increment sp (writes beyond the current stack boundary).
4265 So instead use stwm to store at *sp and post-increment the
4266 stack pointer as an atomic operation. Then increment sp to
4267 finish allocating the new frame. */
4268 HOST_WIDE_INT adjust1 = 8192 - 64;
4269 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4271 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4272 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4273 adjust2, 1);
4276 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4277 we need to store the previous stack pointer (frame pointer)
4278 into the frame marker on targets that use the HP unwind
4279 library. This allows the HP unwind library to be used to
4280 unwind GCC frames. However, we are not fully compatible
4281 with the HP library because our frame layout differs from
4282 that specified in the HP runtime specification.
4284 We don't want a frame note on this instruction as the frame
4285 marker moves during dynamic stack allocation.
4287 This instruction also serves as a blockage to prevent
4288 register spills from being scheduled before the stack
4289 pointer is raised. This is necessary as we store
4290 registers using the frame pointer as a base register,
4291 and the frame pointer is set before sp is raised. */
4292 if (TARGET_HPUX_UNWIND_LIBRARY)
4294 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4295 GEN_INT (TARGET_64BIT ? -8 : -4));
4297 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4298 hard_frame_pointer_rtx);
4300 else
4301 emit_insn (gen_blockage ());
4303 /* no frame pointer needed. */
4304 else
4306 /* In some cases we can perform the first callee register save
4307 and allocating the stack frame at the same time. If so, just
4308 make a note of it and defer allocating the frame until saving
4309 the callee registers. */
4310 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4311 merge_sp_adjust_with_store = 1;
4312 /* Cannot optimize. Adjust the stack frame by actual_fsize
4313 bytes. */
4314 else
4315 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4316 actual_fsize, 1);
4320 /* Normal register save.
4322 Do not save the frame pointer in the frame_pointer_needed case. It
4323 was done earlier. */
4324 if (frame_pointer_needed)
4326 offset = local_fsize;
4328 /* Saving the EH return data registers in the frame is the simplest
4329 way to get the frame unwind information emitted. We put them
4330 just before the general registers. */
4331 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4333 unsigned int i, regno;
4335 for (i = 0; ; ++i)
4337 regno = EH_RETURN_DATA_REGNO (i);
4338 if (regno == INVALID_REGNUM)
4339 break;
4341 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4342 offset += UNITS_PER_WORD;
4346 for (i = 18; i >= 4; i--)
4347 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4349 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4350 offset += UNITS_PER_WORD;
4351 gr_saved++;
4353 /* Account for %r3 which is saved in a special place. */
4354 gr_saved++;
4356 /* No frame pointer needed. */
4357 else
4359 offset = local_fsize - actual_fsize;
4361 /* Saving the EH return data registers in the frame is the simplest
4362 way to get the frame unwind information emitted. */
4363 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4365 unsigned int i, regno;
4367 for (i = 0; ; ++i)
4369 regno = EH_RETURN_DATA_REGNO (i);
4370 if (regno == INVALID_REGNUM)
4371 break;
4373 /* If merge_sp_adjust_with_store is nonzero, then we can
4374 optimize the first save. */
4375 if (merge_sp_adjust_with_store)
4377 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4378 merge_sp_adjust_with_store = 0;
4380 else
4381 store_reg (regno, offset, STACK_POINTER_REGNUM);
4382 offset += UNITS_PER_WORD;
4386 for (i = 18; i >= 3; i--)
4387 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4389 /* If merge_sp_adjust_with_store is nonzero, then we can
4390 optimize the first GR save. */
4391 if (merge_sp_adjust_with_store)
4393 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4394 merge_sp_adjust_with_store = 0;
4396 else
4397 store_reg (i, offset, STACK_POINTER_REGNUM);
4398 offset += UNITS_PER_WORD;
4399 gr_saved++;
4402 /* If we wanted to merge the SP adjustment with a GR save, but we never
4403 did any GR saves, then just emit the adjustment here. */
4404 if (merge_sp_adjust_with_store)
4405 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4406 actual_fsize, 1);
4409 /* The hppa calling conventions say that %r19, the pic offset
4410 register, is saved at sp - 32 (in this function's frame)
4411 when generating PIC code. FIXME: What is the correct thing
4412 to do for functions which make no calls and allocate no
4413 frame? Do we need to allocate a frame, or can we just omit
4414 the save? For now we'll just omit the save.
4416 We don't want a note on this insn as the frame marker can
4417 move if there is a dynamic stack allocation. */
4418 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4420 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4422 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4426 /* Align pointer properly (doubleword boundary). */
4427 offset = (offset + 7) & ~7;
4429 /* Floating point register store. */
4430 if (save_fregs)
4432 rtx base;
4434 /* First get the frame or stack pointer to the start of the FP register
4435 save area. */
4436 if (frame_pointer_needed)
4438 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4439 base = hard_frame_pointer_rtx;
4441 else
4443 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4444 base = stack_pointer_rtx;
4447 /* Now actually save the FP registers. */
4448 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4450 if (df_regs_ever_live_p (i)
4451 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4453 rtx addr, reg;
4454 rtx_insn *insn;
4455 addr = gen_rtx_MEM (DFmode,
4456 gen_rtx_POST_INC (word_mode, tmpreg));
4457 reg = gen_rtx_REG (DFmode, i);
4458 insn = emit_move_insn (addr, reg);
4459 if (DO_FRAME_NOTES)
4461 RTX_FRAME_RELATED_P (insn) = 1;
4462 if (TARGET_64BIT)
4464 rtx mem = gen_rtx_MEM (DFmode,
4465 plus_constant (Pmode, base,
4466 offset));
4467 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4468 gen_rtx_SET (mem, reg));
4470 else
4472 rtx meml = gen_rtx_MEM (SFmode,
4473 plus_constant (Pmode, base,
4474 offset));
4475 rtx memr = gen_rtx_MEM (SFmode,
4476 plus_constant (Pmode, base,
4477 offset + 4));
4478 rtx regl = gen_rtx_REG (SFmode, i);
4479 rtx regr = gen_rtx_REG (SFmode, i + 1);
4480 rtx setl = gen_rtx_SET (meml, regl);
4481 rtx setr = gen_rtx_SET (memr, regr);
4482 rtvec vec;
4484 RTX_FRAME_RELATED_P (setl) = 1;
4485 RTX_FRAME_RELATED_P (setr) = 1;
4486 vec = gen_rtvec (2, setl, setr);
4487 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4488 gen_rtx_SEQUENCE (VOIDmode, vec));
4491 offset += GET_MODE_SIZE (DFmode);
4492 fr_saved++;
4498 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4499 Handle case where DISP > 8k by using the add_high_const patterns. */
4501 static void
4502 load_reg (int reg, HOST_WIDE_INT disp, int base)
4504 rtx dest = gen_rtx_REG (word_mode, reg);
4505 rtx basereg = gen_rtx_REG (Pmode, base);
4506 rtx src;
4508 if (VAL_14_BITS_P (disp))
4509 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4510 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4512 rtx delta = GEN_INT (disp);
4513 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4515 emit_move_insn (tmpreg, delta);
4516 if (TARGET_DISABLE_INDEXING)
4518 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4519 src = gen_rtx_MEM (word_mode, tmpreg);
4521 else
4522 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4524 else
4526 rtx delta = GEN_INT (disp);
4527 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4528 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4530 emit_move_insn (tmpreg, high);
4531 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4534 emit_move_insn (dest, src);
4537 /* Update the total code bytes output to the text section. */
4539 static void
4540 update_total_code_bytes (unsigned int nbytes)
4542 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4543 && !IN_NAMED_SECTION_P (cfun->decl))
4545 unsigned int old_total = total_code_bytes;
4547 total_code_bytes += nbytes;
4549 /* Be prepared to handle overflows. */
4550 if (old_total > total_code_bytes)
4551 total_code_bytes = UINT_MAX;
4555 /* This function generates the assembly code for function exit.
4556 Args are as for output_function_prologue ().
4558 The function epilogue should not depend on the current stack
4559 pointer! It should use the frame pointer only. This is mandatory
4560 because of alloca; we also take advantage of it to omit stack
4561 adjustments before returning. */
4563 static void
4564 pa_output_function_epilogue (FILE *file)
4566 rtx_insn *insn = get_last_insn ();
4567 bool extra_nop;
4569 /* pa_expand_epilogue does the dirty work now. We just need
4570 to output the assembler directives which denote the end
4571 of a function.
4573 To make debuggers happy, emit a nop if the epilogue was completely
4574 eliminated due to a volatile call as the last insn in the
4575 current function. That way the return address (in %r2) will
4576 always point to a valid instruction in the current function. */
4578 /* Get the last real insn. */
4579 if (NOTE_P (insn))
4580 insn = prev_real_insn (insn);
4582 /* If it is a sequence, then look inside. */
4583 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4584 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4586 /* If insn is a CALL_INSN, then it must be a call to a volatile
4587 function (otherwise there would be epilogue insns). */
4588 if (insn && CALL_P (insn))
4590 fputs ("\tnop\n", file);
4591 extra_nop = true;
4593 else
4594 extra_nop = false;
4596 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4598 if (TARGET_SOM && TARGET_GAS)
4600 /* We are done with this subspace except possibly for some additional
4601 debug information. Forget that we are in this subspace to ensure
4602 that the next function is output in its own subspace. */
4603 in_section = NULL;
4604 cfun->machine->in_nsubspa = 2;
4607 /* Thunks do their own insn accounting. */
4608 if (cfun->is_thunk)
4609 return;
4611 if (INSN_ADDRESSES_SET_P ())
4613 last_address = extra_nop ? 4 : 0;
4614 insn = get_last_nonnote_insn ();
4615 if (insn)
4617 last_address += INSN_ADDRESSES (INSN_UID (insn));
4618 if (INSN_P (insn))
4619 last_address += insn_default_length (insn);
4621 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4622 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4624 else
4625 last_address = UINT_MAX;
4627 /* Finally, update the total number of code bytes output so far. */
4628 update_total_code_bytes (last_address);
4631 void
4632 pa_expand_epilogue (void)
4634 rtx tmpreg;
4635 HOST_WIDE_INT offset;
4636 HOST_WIDE_INT ret_off = 0;
4637 int i;
4638 int merge_sp_adjust_with_load = 0;
4640 /* We will use this often. */
4641 tmpreg = gen_rtx_REG (word_mode, 1);
4643 /* Try to restore RP early to avoid load/use interlocks when
4644 RP gets used in the return (bv) instruction. This appears to still
4645 be necessary even when we schedule the prologue and epilogue. */
4646 if (rp_saved)
4648 ret_off = TARGET_64BIT ? -16 : -20;
4649 if (frame_pointer_needed)
4651 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4652 ret_off = 0;
4654 else
4656 /* No frame pointer, and stack is smaller than 8k. */
4657 if (VAL_14_BITS_P (ret_off - actual_fsize))
4659 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4660 ret_off = 0;
4665 /* General register restores. */
4666 if (frame_pointer_needed)
4668 offset = local_fsize;
4670 /* If the current function calls __builtin_eh_return, then we need
4671 to restore the saved EH data registers. */
4672 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4674 unsigned int i, regno;
4676 for (i = 0; ; ++i)
4678 regno = EH_RETURN_DATA_REGNO (i);
4679 if (regno == INVALID_REGNUM)
4680 break;
4682 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4683 offset += UNITS_PER_WORD;
4687 for (i = 18; i >= 4; i--)
4688 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4690 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4691 offset += UNITS_PER_WORD;
4694 else
4696 offset = local_fsize - actual_fsize;
4698 /* If the current function calls __builtin_eh_return, then we need
4699 to restore the saved EH data registers. */
4700 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4702 unsigned int i, regno;
4704 for (i = 0; ; ++i)
4706 regno = EH_RETURN_DATA_REGNO (i);
4707 if (regno == INVALID_REGNUM)
4708 break;
4710 /* Only for the first load.
4711 merge_sp_adjust_with_load holds the register load
4712 with which we will merge the sp adjustment. */
4713 if (merge_sp_adjust_with_load == 0
4714 && local_fsize == 0
4715 && VAL_14_BITS_P (-actual_fsize))
4716 merge_sp_adjust_with_load = regno;
4717 else
4718 load_reg (regno, offset, STACK_POINTER_REGNUM);
4719 offset += UNITS_PER_WORD;
4723 for (i = 18; i >= 3; i--)
4725 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4727 /* Only for the first load.
4728 merge_sp_adjust_with_load holds the register load
4729 with which we will merge the sp adjustment. */
4730 if (merge_sp_adjust_with_load == 0
4731 && local_fsize == 0
4732 && VAL_14_BITS_P (-actual_fsize))
4733 merge_sp_adjust_with_load = i;
4734 else
4735 load_reg (i, offset, STACK_POINTER_REGNUM);
4736 offset += UNITS_PER_WORD;
4741 /* Align pointer properly (doubleword boundary). */
4742 offset = (offset + 7) & ~7;
4744 /* FP register restores. */
4745 if (save_fregs)
4747 /* Adjust the register to index off of. */
4748 if (frame_pointer_needed)
4749 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4750 else
4751 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4753 /* Actually do the restores now. */
4754 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4755 if (df_regs_ever_live_p (i)
4756 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4758 rtx src = gen_rtx_MEM (DFmode,
4759 gen_rtx_POST_INC (word_mode, tmpreg));
4760 rtx dest = gen_rtx_REG (DFmode, i);
4761 emit_move_insn (dest, src);
4765 /* Emit a blockage insn here to keep these insns from being moved to
4766 an earlier spot in the epilogue, or into the main instruction stream.
4768 This is necessary as we must not cut the stack back before all the
4769 restores are finished. */
4770 emit_insn (gen_blockage ());
4772 /* Reset stack pointer (and possibly frame pointer). The stack
4773 pointer is initially set to fp + 64 to avoid a race condition. */
4774 if (frame_pointer_needed)
4776 rtx delta = GEN_INT (-64);
4778 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4779 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4780 stack_pointer_rtx, delta));
4782 /* If we were deferring a callee register restore, do it now. */
4783 else if (merge_sp_adjust_with_load)
4785 rtx delta = GEN_INT (-actual_fsize);
4786 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4788 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4790 else if (actual_fsize != 0)
4791 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4792 - actual_fsize, 0);
4794 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4795 frame greater than 8k), do so now. */
4796 if (ret_off != 0)
4797 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4799 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4801 rtx sa = EH_RETURN_STACKADJ_RTX;
4803 emit_insn (gen_blockage ());
4804 emit_insn (TARGET_64BIT
4805 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4806 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4810 bool
4811 pa_can_use_return_insn (void)
4813 if (!reload_completed)
4814 return false;
4816 if (frame_pointer_needed)
4817 return false;
4819 if (df_regs_ever_live_p (2))
4820 return false;
4822 if (crtl->profile)
4823 return false;
4825 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4829 hppa_pic_save_rtx (void)
4831 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4834 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4835 #define NO_DEFERRED_PROFILE_COUNTERS 0
4836 #endif
4839 /* Vector of funcdef numbers. */
4840 static vec<int> funcdef_nos;
4842 /* Output deferred profile counters. */
4843 static void
4844 output_deferred_profile_counters (void)
4846 unsigned int i;
4847 int align, n;
4849 if (funcdef_nos.is_empty ())
4850 return;
4852 switch_to_section (data_section);
4853 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4854 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4856 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4858 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4859 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4862 funcdef_nos.release ();
4865 void
4866 hppa_profile_hook (int label_no)
4868 rtx_code_label *label_rtx = gen_label_rtx ();
4869 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4870 rtx arg_bytes, begin_label_rtx, mcount, sym;
4871 rtx_insn *call_insn;
4872 char begin_label_name[16];
4873 bool use_mcount_pcrel_call;
4875 /* Set up call destination. */
4876 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4877 pa_encode_label (sym);
4878 mcount = gen_rtx_MEM (Pmode, sym);
4880 /* If we can reach _mcount with a pc-relative call, we can optimize
4881 loading the address of the current function. This requires linker
4882 long branch stub support. */
4883 if (!TARGET_PORTABLE_RUNTIME
4884 && !TARGET_LONG_CALLS
4885 && (TARGET_SOM || flag_function_sections))
4886 use_mcount_pcrel_call = TRUE;
4887 else
4888 use_mcount_pcrel_call = FALSE;
4890 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4891 label_no);
4892 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4894 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4896 if (!use_mcount_pcrel_call)
4898 /* The address of the function is loaded into %r25 with an instruction-
4899 relative sequence that avoids the use of relocations. We use SImode
4900 for the address of the function in both 32 and 64-bit code to avoid
4901 having to provide DImode versions of the lcla2 pattern. */
4902 if (TARGET_PA_20)
4903 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4904 else
4905 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4908 if (!NO_DEFERRED_PROFILE_COUNTERS)
4910 rtx count_label_rtx, addr, r24;
4911 char count_label_name[16];
4913 funcdef_nos.safe_push (label_no);
4914 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4915 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4916 ggc_strdup (count_label_name));
4918 addr = force_reg (Pmode, count_label_rtx);
4919 r24 = gen_rtx_REG (Pmode, 24);
4920 emit_move_insn (r24, addr);
4922 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4923 if (use_mcount_pcrel_call)
4924 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4925 begin_label_rtx));
4926 else
4927 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4929 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4931 else
4933 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4934 if (use_mcount_pcrel_call)
4935 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4936 begin_label_rtx));
4937 else
4938 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4941 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4942 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4944 /* Indicate the _mcount call cannot throw, nor will it execute a
4945 non-local goto. */
4946 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4948 /* Allocate space for fixed arguments. */
4949 if (reg_parm_stack_space > crtl->outgoing_args_size)
4950 crtl->outgoing_args_size = reg_parm_stack_space;
4953 /* Fetch the return address for the frame COUNT steps up from
4954 the current frame, after the prologue. FRAMEADDR is the
4955 frame pointer of the COUNT frame.
4957 We want to ignore any export stub remnants here. To handle this,
4958 we examine the code at the return address, and if it is an export
4959 stub, we return a memory rtx for the stub return address stored
4960 at frame-24.
4962 The value returned is used in two different ways:
4964 1. To find a function's caller.
4966 2. To change the return address for a function.
4968 This function handles most instances of case 1; however, it will
4969 fail if there are two levels of stubs to execute on the return
4970 path. The only way I believe that can happen is if the return value
4971 needs a parameter relocation, which never happens for C code.
4973 This function handles most instances of case 2; however, it will
4974 fail if we did not originally have stub code on the return path
4975 but will need stub code on the new return path. This can happen if
4976 the caller & callee are both in the main program, but the new
4977 return location is in a shared library. */
4980 pa_return_addr_rtx (int count, rtx frameaddr)
4982 rtx label;
4983 rtx rp;
4984 rtx saved_rp;
4985 rtx ins;
4987 /* The instruction stream at the return address of a PA1.X export stub is:
4989 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4990 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4991 0x00011820 | stub+16: mtsp r1,sr0
4992 0xe0400002 | stub+20: be,n 0(sr0,rp)
4994 0xe0400002 must be specified as -532676606 so that it won't be
4995 rejected as an invalid immediate operand on 64-bit hosts.
4997 The instruction stream at the return address of a PA2.0 export stub is:
4999 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
5000 0xe840d002 | stub+12: bve,n (rp)
5003 HOST_WIDE_INT insns[4];
5004 int i, len;
5006 if (count != 0)
5007 return NULL_RTX;
5009 rp = get_hard_reg_initial_val (Pmode, 2);
5011 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
5012 return rp;
5014 /* If there is no export stub then just use the value saved from
5015 the return pointer register. */
5017 saved_rp = gen_reg_rtx (Pmode);
5018 emit_move_insn (saved_rp, rp);
5020 /* Get pointer to the instruction stream. We have to mask out the
5021 privilege level from the two low order bits of the return address
5022 pointer here so that ins will point to the start of the first
5023 instruction that would have been executed if we returned. */
5024 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
5025 label = gen_label_rtx ();
5027 if (TARGET_PA_20)
5029 insns[0] = 0x4bc23fd1;
5030 insns[1] = -398405630;
5031 len = 2;
5033 else
5035 insns[0] = 0x4bc23fd1;
5036 insns[1] = 0x004010a1;
5037 insns[2] = 0x00011820;
5038 insns[3] = -532676606;
5039 len = 4;
5042 /* Check the instruction stream at the normal return address for the
5043 export stub. If it is an export stub, than our return address is
5044 really in -24[frameaddr]. */
5046 for (i = 0; i < len; i++)
5048 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
5049 rtx op1 = GEN_INT (insns[i]);
5050 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
5053 /* Here we know that our return address points to an export
5054 stub. We don't want to return the address of the export stub,
5055 but rather the return address of the export stub. That return
5056 address is stored at -24[frameaddr]. */
5058 emit_move_insn (saved_rp,
5059 gen_rtx_MEM (Pmode,
5060 memory_address (Pmode,
5061 plus_constant (Pmode, frameaddr,
5062 -24))));
5064 emit_label (label);
5066 return saved_rp;
5069 void
5070 pa_emit_bcond_fp (rtx operands[])
5072 enum rtx_code code = GET_CODE (operands[0]);
5073 rtx operand0 = operands[1];
5074 rtx operand1 = operands[2];
5075 rtx label = operands[3];
5077 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
5078 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
5080 emit_jump_insn (gen_rtx_SET (pc_rtx,
5081 gen_rtx_IF_THEN_ELSE (VOIDmode,
5082 gen_rtx_fmt_ee (NE,
5083 VOIDmode,
5084 gen_rtx_REG (CCFPmode, 0),
5085 const0_rtx),
5086 gen_rtx_LABEL_REF (VOIDmode, label),
5087 pc_rtx)));
5091 /* Adjust the cost of a scheduling dependency. Return the new cost of
5092 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5094 static int
5095 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
5096 unsigned int)
5098 enum attr_type attr_type;
5100 /* Don't adjust costs for a pa8000 chip, also do not adjust any
5101 true dependencies as they are described with bypasses now. */
5102 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
5103 return cost;
5105 if (! recog_memoized (insn))
5106 return 0;
5108 attr_type = get_attr_type (insn);
5110 switch (dep_type)
5112 case REG_DEP_ANTI:
5113 /* Anti dependency; DEP_INSN reads a register that INSN writes some
5114 cycles later. */
5116 if (attr_type == TYPE_FPLOAD)
5118 rtx pat = PATTERN (insn);
5119 rtx dep_pat = PATTERN (dep_insn);
5120 if (GET_CODE (pat) == PARALLEL)
5122 /* This happens for the fldXs,mb patterns. */
5123 pat = XVECEXP (pat, 0, 0);
5125 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5126 /* If this happens, we have to extend this to schedule
5127 optimally. Return 0 for now. */
5128 return 0;
5130 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5132 if (! recog_memoized (dep_insn))
5133 return 0;
5134 switch (get_attr_type (dep_insn))
5136 case TYPE_FPALU:
5137 case TYPE_FPMULSGL:
5138 case TYPE_FPMULDBL:
5139 case TYPE_FPDIVSGL:
5140 case TYPE_FPDIVDBL:
5141 case TYPE_FPSQRTSGL:
5142 case TYPE_FPSQRTDBL:
5143 /* A fpload can't be issued until one cycle before a
5144 preceding arithmetic operation has finished if
5145 the target of the fpload is any of the sources
5146 (or destination) of the arithmetic operation. */
5147 return insn_default_latency (dep_insn) - 1;
5149 default:
5150 return 0;
5154 else if (attr_type == TYPE_FPALU)
5156 rtx pat = PATTERN (insn);
5157 rtx dep_pat = PATTERN (dep_insn);
5158 if (GET_CODE (pat) == PARALLEL)
5160 /* This happens for the fldXs,mb patterns. */
5161 pat = XVECEXP (pat, 0, 0);
5163 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5164 /* If this happens, we have to extend this to schedule
5165 optimally. Return 0 for now. */
5166 return 0;
5168 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5170 if (! recog_memoized (dep_insn))
5171 return 0;
5172 switch (get_attr_type (dep_insn))
5174 case TYPE_FPDIVSGL:
5175 case TYPE_FPDIVDBL:
5176 case TYPE_FPSQRTSGL:
5177 case TYPE_FPSQRTDBL:
5178 /* An ALU flop can't be issued until two cycles before a
5179 preceding divide or sqrt operation has finished if
5180 the target of the ALU flop is any of the sources
5181 (or destination) of the divide or sqrt operation. */
5182 return insn_default_latency (dep_insn) - 2;
5184 default:
5185 return 0;
5190 /* For other anti dependencies, the cost is 0. */
5191 return 0;
5193 case REG_DEP_OUTPUT:
5194 /* Output dependency; DEP_INSN writes a register that INSN writes some
5195 cycles later. */
5196 if (attr_type == TYPE_FPLOAD)
5198 rtx pat = PATTERN (insn);
5199 rtx dep_pat = PATTERN (dep_insn);
5200 if (GET_CODE (pat) == PARALLEL)
5202 /* This happens for the fldXs,mb patterns. */
5203 pat = XVECEXP (pat, 0, 0);
5205 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5206 /* If this happens, we have to extend this to schedule
5207 optimally. Return 0 for now. */
5208 return 0;
5210 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5212 if (! recog_memoized (dep_insn))
5213 return 0;
5214 switch (get_attr_type (dep_insn))
5216 case TYPE_FPALU:
5217 case TYPE_FPMULSGL:
5218 case TYPE_FPMULDBL:
5219 case TYPE_FPDIVSGL:
5220 case TYPE_FPDIVDBL:
5221 case TYPE_FPSQRTSGL:
5222 case TYPE_FPSQRTDBL:
5223 /* A fpload can't be issued until one cycle before a
5224 preceding arithmetic operation has finished if
5225 the target of the fpload is the destination of the
5226 arithmetic operation.
5228 Exception: For PA7100LC, PA7200 and PA7300, the cost
5229 is 3 cycles, unless they bundle together. We also
5230 pay the penalty if the second insn is a fpload. */
5231 return insn_default_latency (dep_insn) - 1;
5233 default:
5234 return 0;
5238 else if (attr_type == TYPE_FPALU)
5240 rtx pat = PATTERN (insn);
5241 rtx dep_pat = PATTERN (dep_insn);
5242 if (GET_CODE (pat) == PARALLEL)
5244 /* This happens for the fldXs,mb patterns. */
5245 pat = XVECEXP (pat, 0, 0);
5247 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5248 /* If this happens, we have to extend this to schedule
5249 optimally. Return 0 for now. */
5250 return 0;
5252 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5254 if (! recog_memoized (dep_insn))
5255 return 0;
5256 switch (get_attr_type (dep_insn))
5258 case TYPE_FPDIVSGL:
5259 case TYPE_FPDIVDBL:
5260 case TYPE_FPSQRTSGL:
5261 case TYPE_FPSQRTDBL:
5262 /* An ALU flop can't be issued until two cycles before a
5263 preceding divide or sqrt operation has finished if
5264 the target of the ALU flop is also the target of
5265 the divide or sqrt operation. */
5266 return insn_default_latency (dep_insn) - 2;
5268 default:
5269 return 0;
5274 /* For other output dependencies, the cost is 0. */
5275 return 0;
5277 default:
5278 gcc_unreachable ();
5282 /* The 700 can only issue a single insn at a time.
5283 The 7XXX processors can issue two insns at a time.
5284 The 8000 can issue 4 insns at a time. */
5285 static int
5286 pa_issue_rate (void)
5288 switch (pa_cpu)
5290 case PROCESSOR_700: return 1;
5291 case PROCESSOR_7100: return 2;
5292 case PROCESSOR_7100LC: return 2;
5293 case PROCESSOR_7200: return 2;
5294 case PROCESSOR_7300: return 2;
5295 case PROCESSOR_8000: return 4;
5297 default:
5298 gcc_unreachable ();
5304 /* Return any length plus adjustment needed by INSN which already has
5305 its length computed as LENGTH. Return LENGTH if no adjustment is
5306 necessary.
5308 Also compute the length of an inline block move here as it is too
5309 complicated to express as a length attribute in pa.md. */
5311 pa_adjust_insn_length (rtx_insn *insn, int length)
5313 rtx pat = PATTERN (insn);
5315 /* If length is negative or undefined, provide initial length. */
5316 if ((unsigned int) length >= INT_MAX)
5318 if (GET_CODE (pat) == SEQUENCE)
5319 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5321 switch (get_attr_type (insn))
5323 case TYPE_MILLI:
5324 length = pa_attr_length_millicode_call (insn);
5325 break;
5326 case TYPE_CALL:
5327 length = pa_attr_length_call (insn, 0);
5328 break;
5329 case TYPE_SIBCALL:
5330 length = pa_attr_length_call (insn, 1);
5331 break;
5332 case TYPE_DYNCALL:
5333 length = pa_attr_length_indirect_call (insn);
5334 break;
5335 case TYPE_SH_FUNC_ADRS:
5336 length = pa_attr_length_millicode_call (insn) + 20;
5337 break;
5338 default:
5339 gcc_unreachable ();
5343 /* Block move pattern. */
5344 if (NONJUMP_INSN_P (insn)
5345 && GET_CODE (pat) == PARALLEL
5346 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5347 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5348 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5349 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5350 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5351 length += compute_cpymem_length (insn) - 4;
5352 /* Block clear pattern. */
5353 else if (NONJUMP_INSN_P (insn)
5354 && GET_CODE (pat) == PARALLEL
5355 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5356 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5357 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5358 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5359 length += compute_clrmem_length (insn) - 4;
5360 /* Conditional branch with an unfilled delay slot. */
5361 else if (JUMP_P (insn) && ! simplejump_p (insn))
5363 /* Adjust a short backwards conditional with an unfilled delay slot. */
5364 if (GET_CODE (pat) == SET
5365 && length == 4
5366 && JUMP_LABEL (insn) != NULL_RTX
5367 && ! forward_branch_p (insn))
5368 length += 4;
5369 else if (GET_CODE (pat) == PARALLEL
5370 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5371 && length == 4)
5372 length += 4;
5373 /* Adjust dbra insn with short backwards conditional branch with
5374 unfilled delay slot -- only for case where counter is in a
5375 general register register. */
5376 else if (GET_CODE (pat) == PARALLEL
5377 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5378 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5379 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5380 && length == 4
5381 && ! forward_branch_p (insn))
5382 length += 4;
5384 return length;
5387 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5389 static bool
5390 pa_print_operand_punct_valid_p (unsigned char code)
5392 if (code == '@'
5393 || code == '#'
5394 || code == '*'
5395 || code == '^')
5396 return true;
5398 return false;
5401 /* Print operand X (an rtx) in assembler syntax to file FILE.
5402 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5403 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5405 void
5406 pa_print_operand (FILE *file, rtx x, int code)
5408 switch (code)
5410 case '#':
5411 /* Output a 'nop' if there's nothing for the delay slot. */
5412 if (dbr_sequence_length () == 0)
5413 fputs ("\n\tnop", file);
5414 return;
5415 case '*':
5416 /* Output a nullification completer if there's nothing for the */
5417 /* delay slot or nullification is requested. */
5418 if (dbr_sequence_length () == 0 ||
5419 (final_sequence &&
5420 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5421 fputs (",n", file);
5422 return;
5423 case 'R':
5424 /* Print out the second register name of a register pair.
5425 I.e., R (6) => 7. */
5426 fputs (reg_names[REGNO (x) + 1], file);
5427 return;
5428 case 'r':
5429 /* A register or zero. */
5430 if (x == const0_rtx
5431 || (x == CONST0_RTX (DFmode))
5432 || (x == CONST0_RTX (SFmode)))
5434 fputs ("%r0", file);
5435 return;
5437 else
5438 break;
5439 case 'f':
5440 /* A register or zero (floating point). */
5441 if (x == const0_rtx
5442 || (x == CONST0_RTX (DFmode))
5443 || (x == CONST0_RTX (SFmode)))
5445 fputs ("%fr0", file);
5446 return;
5448 else
5449 break;
5450 case 'A':
5452 rtx xoperands[2];
5454 xoperands[0] = XEXP (XEXP (x, 0), 0);
5455 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5456 pa_output_global_address (file, xoperands[1], 0);
5457 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5458 return;
5461 case 'C': /* Plain (C)ondition */
5462 case 'X':
5463 switch (GET_CODE (x))
5465 case EQ:
5466 fputs ("=", file); break;
5467 case NE:
5468 fputs ("<>", file); break;
5469 case GT:
5470 fputs (">", file); break;
5471 case GE:
5472 fputs (">=", file); break;
5473 case GEU:
5474 fputs (">>=", file); break;
5475 case GTU:
5476 fputs (">>", file); break;
5477 case LT:
5478 fputs ("<", file); break;
5479 case LE:
5480 fputs ("<=", file); break;
5481 case LEU:
5482 fputs ("<<=", file); break;
5483 case LTU:
5484 fputs ("<<", file); break;
5485 default:
5486 gcc_unreachable ();
5488 return;
5489 case 'N': /* Condition, (N)egated */
5490 switch (GET_CODE (x))
5492 case EQ:
5493 fputs ("<>", file); break;
5494 case NE:
5495 fputs ("=", file); break;
5496 case GT:
5497 fputs ("<=", file); break;
5498 case GE:
5499 fputs ("<", file); break;
5500 case GEU:
5501 fputs ("<<", file); break;
5502 case GTU:
5503 fputs ("<<=", file); break;
5504 case LT:
5505 fputs (">=", file); break;
5506 case LE:
5507 fputs (">", file); break;
5508 case LEU:
5509 fputs (">>", file); break;
5510 case LTU:
5511 fputs (">>=", file); break;
5512 default:
5513 gcc_unreachable ();
5515 return;
5516 /* For floating point comparisons. Note that the output
5517 predicates are the complement of the desired mode. The
5518 conditions for GT, GE, LT, LE and LTGT cause an invalid
5519 operation exception if the result is unordered and this
5520 exception is enabled in the floating-point status register. */
5521 case 'Y':
5522 switch (GET_CODE (x))
5524 case EQ:
5525 fputs ("!=", file); break;
5526 case NE:
5527 fputs ("=", file); break;
5528 case GT:
5529 fputs ("!>", file); break;
5530 case GE:
5531 fputs ("!>=", file); break;
5532 case LT:
5533 fputs ("!<", file); break;
5534 case LE:
5535 fputs ("!<=", file); break;
5536 case LTGT:
5537 fputs ("!<>", file); break;
5538 case UNLE:
5539 fputs ("!?<=", file); break;
5540 case UNLT:
5541 fputs ("!?<", file); break;
5542 case UNGE:
5543 fputs ("!?>=", file); break;
5544 case UNGT:
5545 fputs ("!?>", file); break;
5546 case UNEQ:
5547 fputs ("!?=", file); break;
5548 case UNORDERED:
5549 fputs ("!?", file); break;
5550 case ORDERED:
5551 fputs ("?", file); break;
5552 default:
5553 gcc_unreachable ();
5555 return;
5556 case 'S': /* Condition, operands are (S)wapped. */
5557 switch (GET_CODE (x))
5559 case EQ:
5560 fputs ("=", file); break;
5561 case NE:
5562 fputs ("<>", file); break;
5563 case GT:
5564 fputs ("<", file); break;
5565 case GE:
5566 fputs ("<=", file); break;
5567 case GEU:
5568 fputs ("<<=", file); break;
5569 case GTU:
5570 fputs ("<<", file); break;
5571 case LT:
5572 fputs (">", file); break;
5573 case LE:
5574 fputs (">=", file); break;
5575 case LEU:
5576 fputs (">>=", file); break;
5577 case LTU:
5578 fputs (">>", file); break;
5579 default:
5580 gcc_unreachable ();
5582 return;
5583 case 'B': /* Condition, (B)oth swapped and negate. */
5584 switch (GET_CODE (x))
5586 case EQ:
5587 fputs ("<>", file); break;
5588 case NE:
5589 fputs ("=", file); break;
5590 case GT:
5591 fputs (">=", file); break;
5592 case GE:
5593 fputs (">", file); break;
5594 case GEU:
5595 fputs (">>", file); break;
5596 case GTU:
5597 fputs (">>=", file); break;
5598 case LT:
5599 fputs ("<=", file); break;
5600 case LE:
5601 fputs ("<", file); break;
5602 case LEU:
5603 fputs ("<<", file); break;
5604 case LTU:
5605 fputs ("<<=", file); break;
5606 default:
5607 gcc_unreachable ();
5609 return;
5610 case 'k':
5611 gcc_assert (GET_CODE (x) == CONST_INT);
5612 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5613 return;
5614 case 'Q':
5615 gcc_assert (GET_CODE (x) == CONST_INT);
5616 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5617 return;
5618 case 'L':
5619 gcc_assert (GET_CODE (x) == CONST_INT);
5620 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5621 return;
5622 case 'o':
5623 gcc_assert (GET_CODE (x) == CONST_INT
5624 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5625 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5626 return;
5627 case 'O':
5628 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5629 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5630 return;
5631 case 'p':
5632 gcc_assert (GET_CODE (x) == CONST_INT);
5633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5634 return;
5635 case 'P':
5636 gcc_assert (GET_CODE (x) == CONST_INT);
5637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5638 return;
5639 case 'I':
5640 if (GET_CODE (x) == CONST_INT)
5641 fputs ("i", file);
5642 return;
5643 case 'M':
5644 case 'F':
5645 switch (GET_CODE (XEXP (x, 0)))
5647 case PRE_DEC:
5648 case PRE_INC:
5649 if (ASSEMBLER_DIALECT == 0)
5650 fputs ("s,mb", file);
5651 else
5652 fputs (",mb", file);
5653 break;
5654 case POST_DEC:
5655 case POST_INC:
5656 if (ASSEMBLER_DIALECT == 0)
5657 fputs ("s,ma", file);
5658 else
5659 fputs (",ma", file);
5660 break;
5661 case PLUS:
5662 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5663 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5665 if (ASSEMBLER_DIALECT == 0)
5666 fputs ("x", file);
5668 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5669 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5671 if (ASSEMBLER_DIALECT == 0)
5672 fputs ("x,s", file);
5673 else
5674 fputs (",s", file);
5676 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5677 fputs ("s", file);
5678 break;
5679 default:
5680 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5681 fputs ("s", file);
5682 break;
5684 return;
5685 case 'G':
5686 pa_output_global_address (file, x, 0);
5687 return;
5688 case 'H':
5689 pa_output_global_address (file, x, 1);
5690 return;
5691 case 0: /* Don't do anything special */
5692 break;
5693 case 'Z':
5695 unsigned op[3];
5696 compute_zdepwi_operands (INTVAL (x), op);
5697 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5698 return;
5700 case 'z':
5702 unsigned op[3];
5703 compute_zdepdi_operands (INTVAL (x), op);
5704 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5705 return;
5707 case 'c':
5708 /* We can get here from a .vtable_inherit due to our
5709 CONSTANT_ADDRESS_P rejecting perfectly good constant
5710 addresses. */
5711 break;
5712 default:
5713 gcc_unreachable ();
5715 if (GET_CODE (x) == REG)
5717 fputs (reg_names [REGNO (x)], file);
5718 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5720 fputs ("R", file);
5721 return;
5723 if (FP_REG_P (x)
5724 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5725 && (REGNO (x) & 1) == 0)
5726 fputs ("L", file);
5728 else if (GET_CODE (x) == MEM)
5730 int size = GET_MODE_SIZE (GET_MODE (x));
5731 rtx base = NULL_RTX;
5732 switch (GET_CODE (XEXP (x, 0)))
5734 case PRE_DEC:
5735 case POST_DEC:
5736 base = XEXP (XEXP (x, 0), 0);
5737 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5738 break;
5739 case PRE_INC:
5740 case POST_INC:
5741 base = XEXP (XEXP (x, 0), 0);
5742 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5743 break;
5744 case PLUS:
5745 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5746 fprintf (file, "%s(%s)",
5747 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5748 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5749 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5750 fprintf (file, "%s(%s)",
5751 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5752 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5753 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5754 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5756 /* Because the REG_POINTER flag can get lost during reload,
5757 pa_legitimate_address_p canonicalizes the order of the
5758 index and base registers in the combined move patterns. */
5759 rtx base = XEXP (XEXP (x, 0), 1);
5760 rtx index = XEXP (XEXP (x, 0), 0);
5762 fprintf (file, "%s(%s)",
5763 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5765 else
5766 output_address (GET_MODE (x), XEXP (x, 0));
5767 break;
5768 default:
5769 output_address (GET_MODE (x), XEXP (x, 0));
5770 break;
5773 else
5774 output_addr_const (file, x);
5777 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5779 void
5780 pa_output_global_address (FILE *file, rtx x, int round_constant)
5783 /* Imagine (high (const (plus ...))). */
5784 if (GET_CODE (x) == HIGH)
5785 x = XEXP (x, 0);
5787 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5788 output_addr_const (file, x);
5789 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5791 output_addr_const (file, x);
5792 fputs ("-$global$", file);
5794 else if (GET_CODE (x) == CONST)
5796 const char *sep = "";
5797 int offset = 0; /* assembler wants -$global$ at end */
5798 rtx base = NULL_RTX;
5800 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5802 case LABEL_REF:
5803 case SYMBOL_REF:
5804 base = XEXP (XEXP (x, 0), 0);
5805 output_addr_const (file, base);
5806 break;
5807 case CONST_INT:
5808 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5809 break;
5810 default:
5811 gcc_unreachable ();
5814 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5816 case LABEL_REF:
5817 case SYMBOL_REF:
5818 base = XEXP (XEXP (x, 0), 1);
5819 output_addr_const (file, base);
5820 break;
5821 case CONST_INT:
5822 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5823 break;
5824 default:
5825 gcc_unreachable ();
5828 /* How bogus. The compiler is apparently responsible for
5829 rounding the constant if it uses an LR field selector.
5831 The linker and/or assembler seem a better place since
5832 they have to do this kind of thing already.
5834 If we fail to do this, HP's optimizing linker may eliminate
5835 an addil, but not update the ldw/stw/ldo instruction that
5836 uses the result of the addil. */
5837 if (round_constant)
5838 offset = ((offset + 0x1000) & ~0x1fff);
5840 switch (GET_CODE (XEXP (x, 0)))
5842 case PLUS:
5843 if (offset < 0)
5845 offset = -offset;
5846 sep = "-";
5848 else
5849 sep = "+";
5850 break;
5852 case MINUS:
5853 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5854 sep = "-";
5855 break;
5857 default:
5858 gcc_unreachable ();
5861 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5862 fputs ("-$global$", file);
5863 if (offset)
5864 fprintf (file, "%s%d", sep, offset);
5866 else
5867 output_addr_const (file, x);
5870 /* Output boilerplate text to appear at the beginning of the file.
5871 There are several possible versions. */
5872 #define aputs(x) fputs(x, asm_out_file)
5873 static inline void
5874 pa_file_start_level (void)
5876 if (TARGET_64BIT)
5877 aputs ("\t.LEVEL 2.0w\n");
5878 else if (TARGET_PA_20)
5879 aputs ("\t.LEVEL 2.0\n");
5880 else if (TARGET_PA_11)
5881 aputs ("\t.LEVEL 1.1\n");
5882 else
5883 aputs ("\t.LEVEL 1.0\n");
5886 static inline void
5887 pa_file_start_space (int sortspace)
5889 aputs ("\t.SPACE $PRIVATE$");
5890 if (sortspace)
5891 aputs (",SORT=16");
5892 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5893 if (flag_tm)
5894 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5895 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5896 "\n\t.SPACE $TEXT$");
5897 if (sortspace)
5898 aputs (",SORT=8");
5899 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5900 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5903 static inline void
5904 pa_file_start_file (int want_version)
5906 if (write_symbols != NO_DEBUG)
5908 output_file_directive (asm_out_file, main_input_filename);
5909 if (want_version)
5910 aputs ("\t.version\t\"01.01\"\n");
5914 static inline void
5915 pa_file_start_mcount (const char *aswhat)
5917 if (profile_flag)
5918 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5921 static void
5922 pa_elf_file_start (void)
5924 pa_file_start_level ();
5925 pa_file_start_mcount ("ENTRY");
5926 pa_file_start_file (0);
5929 static void
5930 pa_som_file_start (void)
5932 pa_file_start_level ();
5933 pa_file_start_space (0);
5934 aputs ("\t.IMPORT $global$,DATA\n"
5935 "\t.IMPORT $$dyncall,MILLICODE\n");
5936 pa_file_start_mcount ("CODE");
5937 pa_file_start_file (0);
5940 static void
5941 pa_linux_file_start (void)
5943 pa_file_start_file (1);
5944 pa_file_start_level ();
5945 pa_file_start_mcount ("CODE");
5948 static void
5949 pa_hpux64_gas_file_start (void)
5951 pa_file_start_level ();
5952 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5953 if (profile_flag)
5954 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5955 #endif
5956 pa_file_start_file (1);
5959 static void
5960 pa_hpux64_hpas_file_start (void)
5962 pa_file_start_level ();
5963 pa_file_start_space (1);
5964 pa_file_start_mcount ("CODE");
5965 pa_file_start_file (0);
5967 #undef aputs
5969 /* Search the deferred plabel list for SYMBOL and return its internal
5970 label. If an entry for SYMBOL is not found, a new entry is created. */
5973 pa_get_deferred_plabel (rtx symbol)
5975 const char *fname = XSTR (symbol, 0);
5976 size_t i;
5978 /* See if we have already put this function on the list of deferred
5979 plabels. This list is generally small, so a liner search is not
5980 too ugly. If it proves too slow replace it with something faster. */
5981 for (i = 0; i < n_deferred_plabels; i++)
5982 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5983 break;
5985 /* If the deferred plabel list is empty, or this entry was not found
5986 on the list, create a new entry on the list. */
5987 if (deferred_plabels == NULL || i == n_deferred_plabels)
5989 tree id;
5991 if (deferred_plabels == 0)
5992 deferred_plabels = ggc_alloc<deferred_plabel> ();
5993 else
5994 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5995 deferred_plabels,
5996 n_deferred_plabels + 1);
5998 i = n_deferred_plabels++;
5999 deferred_plabels[i].internal_label = gen_label_rtx ();
6000 deferred_plabels[i].symbol = symbol;
6002 /* Gross. We have just implicitly taken the address of this
6003 function. Mark it in the same manner as assemble_name. */
6004 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
6005 if (id)
6006 mark_referenced (id);
6009 return deferred_plabels[i].internal_label;
6012 static void
6013 output_deferred_plabels (void)
6015 size_t i;
6017 /* If we have some deferred plabels, then we need to switch into the
6018 data or readonly data section, and align it to a 4 byte boundary
6019 before outputting the deferred plabels. */
6020 if (n_deferred_plabels)
6022 switch_to_section (flag_pic ? data_section : readonly_data_section);
6023 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
6026 /* Now output the deferred plabels. */
6027 for (i = 0; i < n_deferred_plabels; i++)
6029 targetm.asm_out.internal_label (asm_out_file, "L",
6030 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
6031 assemble_integer (deferred_plabels[i].symbol,
6032 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
6036 /* Initialize optabs to point to emulation routines. */
6038 static void
6039 pa_init_libfuncs (void)
6041 if (HPUX_LONG_DOUBLE_LIBRARY)
6043 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
6044 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
6045 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
6046 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
6047 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
6048 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
6049 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
6050 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
6051 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
6053 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
6054 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
6055 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
6056 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
6057 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
6058 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
6059 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
6061 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
6062 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
6063 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
6064 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
6066 set_conv_libfunc (sfix_optab, SImode, TFmode,
6067 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
6068 : "_U_Qfcnvfxt_quad_to_sgl");
6069 set_conv_libfunc (sfix_optab, DImode, TFmode,
6070 "_U_Qfcnvfxt_quad_to_dbl");
6071 set_conv_libfunc (ufix_optab, SImode, TFmode,
6072 "_U_Qfcnvfxt_quad_to_usgl");
6073 set_conv_libfunc (ufix_optab, DImode, TFmode,
6074 "_U_Qfcnvfxt_quad_to_udbl");
6076 set_conv_libfunc (sfloat_optab, TFmode, SImode,
6077 "_U_Qfcnvxf_sgl_to_quad");
6078 set_conv_libfunc (sfloat_optab, TFmode, DImode,
6079 "_U_Qfcnvxf_dbl_to_quad");
6080 set_conv_libfunc (ufloat_optab, TFmode, SImode,
6081 "_U_Qfcnvxf_usgl_to_quad");
6082 set_conv_libfunc (ufloat_optab, TFmode, DImode,
6083 "_U_Qfcnvxf_udbl_to_quad");
6086 if (TARGET_SYNC_LIBCALLS)
6087 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
6090 /* HP's millicode routines mean something special to the assembler.
6091 Keep track of which ones we have used. */
6093 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
6094 static void import_milli (enum millicodes);
6095 static char imported[(int) end1000];
6096 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
6097 static const char import_string[] = ".IMPORT $$....,MILLICODE";
6098 #define MILLI_START 10
6100 static void
6101 import_milli (enum millicodes code)
6103 char str[sizeof (import_string)];
6105 if (!imported[(int) code])
6107 imported[(int) code] = 1;
6108 strcpy (str, import_string);
6109 memcpy (str + MILLI_START, milli_names[(int) code], 4);
6110 output_asm_insn (str, 0);
6114 /* The register constraints have put the operands and return value in
6115 the proper registers. */
6117 const char *
6118 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
6120 import_milli (mulI);
6121 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
6124 /* Emit the rtl for doing a division by a constant. */
6126 /* Do magic division millicodes exist for this value? */
6127 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
6129 /* We'll use an array to keep track of the magic millicodes and
6130 whether or not we've used them already. [n][0] is signed, [n][1] is
6131 unsigned. */
6133 static int div_milli[16][2];
6136 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
6138 if (GET_CODE (operands[2]) == CONST_INT
6139 && INTVAL (operands[2]) > 0
6140 && INTVAL (operands[2]) < 16
6141 && pa_magic_milli[INTVAL (operands[2])])
6143 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6145 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6146 emit
6147 (gen_rtx_PARALLEL
6148 (VOIDmode,
6149 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6150 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6151 SImode,
6152 gen_rtx_REG (SImode, 26),
6153 operands[2])),
6154 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6155 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6156 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6157 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6158 gen_rtx_CLOBBER (VOIDmode, ret))));
6159 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6160 return 1;
6162 return 0;
6165 const char *
6166 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6168 int divisor;
6170 /* If the divisor is a constant, try to use one of the special
6171 opcodes .*/
6172 if (GET_CODE (operands[0]) == CONST_INT)
6174 static char buf[100];
6175 divisor = INTVAL (operands[0]);
6176 if (!div_milli[divisor][unsignedp])
6178 div_milli[divisor][unsignedp] = 1;
6179 if (unsignedp)
6180 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6181 else
6182 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6184 if (unsignedp)
6186 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6187 INTVAL (operands[0]));
6188 return pa_output_millicode_call (insn,
6189 gen_rtx_SYMBOL_REF (SImode, buf));
6191 else
6193 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6194 INTVAL (operands[0]));
6195 return pa_output_millicode_call (insn,
6196 gen_rtx_SYMBOL_REF (SImode, buf));
6199 /* Divisor isn't a special constant. */
6200 else
6202 if (unsignedp)
6204 import_milli (divU);
6205 return pa_output_millicode_call (insn,
6206 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6208 else
6210 import_milli (divI);
6211 return pa_output_millicode_call (insn,
6212 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6217 /* Output a $$rem millicode to do mod. */
6219 const char *
6220 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6222 if (unsignedp)
6224 import_milli (remU);
6225 return pa_output_millicode_call (insn,
6226 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6228 else
6230 import_milli (remI);
6231 return pa_output_millicode_call (insn,
6232 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6236 void
6237 pa_output_arg_descriptor (rtx_insn *call_insn)
6239 const char *arg_regs[4];
6240 machine_mode arg_mode;
6241 rtx link;
6242 int i, output_flag = 0;
6243 int regno;
6245 /* We neither need nor want argument location descriptors for the
6246 64bit runtime environment or the ELF32 environment. */
6247 if (TARGET_64BIT || TARGET_ELF32)
6248 return;
6250 for (i = 0; i < 4; i++)
6251 arg_regs[i] = 0;
6253 /* Specify explicitly that no argument relocations should take place
6254 if using the portable runtime calling conventions. */
6255 if (TARGET_PORTABLE_RUNTIME)
6257 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6258 asm_out_file);
6259 return;
6262 gcc_assert (CALL_P (call_insn));
6263 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6264 link; link = XEXP (link, 1))
6266 rtx use = XEXP (link, 0);
6268 if (! (GET_CODE (use) == USE
6269 && GET_CODE (XEXP (use, 0)) == REG
6270 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6271 continue;
6273 arg_mode = GET_MODE (XEXP (use, 0));
6274 regno = REGNO (XEXP (use, 0));
6275 if (regno >= 23 && regno <= 26)
6277 arg_regs[26 - regno] = "GR";
6278 if (arg_mode == DImode)
6279 arg_regs[25 - regno] = "GR";
6281 else if (regno >= 32 && regno <= 39)
6283 if (arg_mode == SFmode)
6284 arg_regs[(regno - 32) / 2] = "FR";
6285 else
6287 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6288 arg_regs[(regno - 34) / 2] = "FR";
6289 arg_regs[(regno - 34) / 2 + 1] = "FU";
6290 #else
6291 arg_regs[(regno - 34) / 2] = "FU";
6292 arg_regs[(regno - 34) / 2 + 1] = "FR";
6293 #endif
6297 fputs ("\t.CALL ", asm_out_file);
6298 for (i = 0; i < 4; i++)
6300 if (arg_regs[i])
6302 if (output_flag++)
6303 fputc (',', asm_out_file);
6304 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6307 fputc ('\n', asm_out_file);
6310 /* Inform reload about cases where moving X with a mode MODE to or from
6311 a register in RCLASS requires an extra scratch or immediate register.
6312 Return the class needed for the immediate register. */
6314 static reg_class_t
6315 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6316 machine_mode mode, secondary_reload_info *sri)
6318 int regno;
6319 enum reg_class rclass = (enum reg_class) rclass_i;
6321 /* Handle the easy stuff first. */
6322 if (rclass == R1_REGS)
6323 return NO_REGS;
6325 if (REG_P (x))
6327 regno = REGNO (x);
6328 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6329 return NO_REGS;
6331 else
6332 regno = -1;
6334 /* If we have something like (mem (mem (...)), we can safely assume the
6335 inner MEM will end up in a general register after reloading, so there's
6336 no need for a secondary reload. */
6337 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6338 return NO_REGS;
6340 /* Trying to load a constant into a FP register during PIC code
6341 generation requires %r1 as a scratch register. For float modes,
6342 the only legitimate constant is CONST0_RTX. However, there are
6343 a few patterns that accept constant double operands. */
6344 if (flag_pic
6345 && FP_REG_CLASS_P (rclass)
6346 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6348 switch (mode)
6350 case E_SImode:
6351 sri->icode = CODE_FOR_reload_insi_r1;
6352 break;
6354 case E_DImode:
6355 sri->icode = CODE_FOR_reload_indi_r1;
6356 break;
6358 case E_SFmode:
6359 sri->icode = CODE_FOR_reload_insf_r1;
6360 break;
6362 case E_DFmode:
6363 sri->icode = CODE_FOR_reload_indf_r1;
6364 break;
6366 default:
6367 gcc_unreachable ();
6369 return NO_REGS;
6372 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6373 register when we're generating PIC code or when the operand isn't
6374 readonly. */
6375 if (pa_symbolic_expression_p (x))
6377 if (GET_CODE (x) == HIGH)
6378 x = XEXP (x, 0);
6380 if (flag_pic || !read_only_operand (x, VOIDmode))
6382 switch (mode)
6384 case E_SImode:
6385 sri->icode = CODE_FOR_reload_insi_r1;
6386 break;
6388 case E_DImode:
6389 sri->icode = CODE_FOR_reload_indi_r1;
6390 break;
6392 default:
6393 gcc_unreachable ();
6395 return NO_REGS;
6399 /* Profiling showed the PA port spends about 1.3% of its compilation
6400 time in true_regnum from calls inside pa_secondary_reload_class. */
6401 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6402 regno = true_regnum (x);
6404 /* Handle reloads for floating point loads and stores. */
6405 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6406 && FP_REG_CLASS_P (rclass))
6408 if (MEM_P (x))
6410 x = XEXP (x, 0);
6412 /* We don't need a secondary reload for indexed memory addresses.
6414 When INT14_OK_STRICT is true, it might appear that we could
6415 directly allow register indirect memory addresses. However,
6416 this doesn't work because we don't support SUBREGs in
6417 floating-point register copies and reload doesn't tell us
6418 when it's going to use a SUBREG. */
6419 if (IS_INDEX_ADDR_P (x))
6420 return NO_REGS;
6423 /* Request a secondary reload with a general scratch register
6424 for everything else. ??? Could symbolic operands be handled
6425 directly when generating non-pic PA 2.0 code? */
6426 sri->icode = (in_p
6427 ? direct_optab_handler (reload_in_optab, mode)
6428 : direct_optab_handler (reload_out_optab, mode));
6429 return NO_REGS;
6432 /* A SAR<->FP register copy requires an intermediate general register
6433 and secondary memory. We need a secondary reload with a general
6434 scratch register for spills. */
6435 if (rclass == SHIFT_REGS)
6437 /* Handle spill. */
6438 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6440 sri->icode = (in_p
6441 ? direct_optab_handler (reload_in_optab, mode)
6442 : direct_optab_handler (reload_out_optab, mode));
6443 return NO_REGS;
6446 /* Handle FP copy. */
6447 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6448 return GENERAL_REGS;
6451 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6452 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6453 && FP_REG_CLASS_P (rclass))
6454 return GENERAL_REGS;
6456 return NO_REGS;
6459 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6461 static bool
6462 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6463 reg_class_t class1 ATTRIBUTE_UNUSED,
6464 reg_class_t class2 ATTRIBUTE_UNUSED)
6466 #ifdef PA_SECONDARY_MEMORY_NEEDED
6467 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6468 #else
6469 return false;
6470 #endif
6473 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6474 is only marked as live on entry by df-scan when it is a fixed
6475 register. It isn't a fixed register in the 64-bit runtime,
6476 so we need to mark it here. */
6478 static void
6479 pa_extra_live_on_entry (bitmap regs)
6481 if (TARGET_64BIT)
6482 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6485 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6486 to prevent it from being deleted. */
6489 pa_eh_return_handler_rtx (void)
6491 rtx tmp;
6493 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6494 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6495 tmp = gen_rtx_MEM (word_mode, tmp);
6496 tmp->volatil = 1;
6497 return tmp;
6500 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6501 by invisible reference. As a GCC extension, we also pass anything
6502 with a zero or variable size by reference.
6504 The 64-bit runtime does not describe passing any types by invisible
6505 reference. The internals of GCC can't currently handle passing
6506 empty structures, and zero or variable length arrays when they are
6507 not passed entirely on the stack or by reference. Thus, as a GCC
6508 extension, we pass these types by reference. The HP compiler doesn't
6509 support these types, so hopefully there shouldn't be any compatibility
6510 issues. This may have to be revisited when HP releases a C99 compiler
6511 or updates the ABI. */
6513 static bool
6514 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6516 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6517 if (TARGET_64BIT)
6518 return size <= 0;
6519 else
6520 return size <= 0 || size > 8;
6523 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6525 static pad_direction
6526 pa_function_arg_padding (machine_mode mode, const_tree type)
6528 if (mode == BLKmode
6529 || (TARGET_64BIT
6530 && type
6531 && (AGGREGATE_TYPE_P (type)
6532 || TREE_CODE (type) == COMPLEX_TYPE
6533 || VECTOR_TYPE_P (type))))
6535 /* Return PAD_NONE if justification is not required. */
6536 if (type
6537 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6538 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6539 return PAD_NONE;
6541 /* The directions set here are ignored when a BLKmode argument larger
6542 than a word is placed in a register. Different code is used for
6543 the stack and registers. This makes it difficult to have a
6544 consistent data representation for both the stack and registers.
6545 For both runtimes, the justification and padding for arguments on
6546 the stack and in registers should be identical. */
6547 if (TARGET_64BIT)
6548 /* The 64-bit runtime specifies left justification for aggregates. */
6549 return PAD_UPWARD;
6550 else
6551 /* The 32-bit runtime architecture specifies right justification.
6552 When the argument is passed on the stack, the argument is padded
6553 with garbage on the left. The HP compiler pads with zeros. */
6554 return PAD_DOWNWARD;
6557 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6558 return PAD_DOWNWARD;
6559 else
6560 return PAD_NONE;
6564 /* Do what is necessary for `va_start'. We look at the current function
6565 to determine if stdargs or varargs is used and fill in an initial
6566 va_list. A pointer to this constructor is returned. */
6568 static rtx
6569 hppa_builtin_saveregs (void)
6571 rtx offset, dest;
6572 tree fntype = TREE_TYPE (current_function_decl);
6573 int argadj = ((!stdarg_p (fntype))
6574 ? UNITS_PER_WORD : 0);
6576 if (argadj)
6577 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6578 else
6579 offset = crtl->args.arg_offset_rtx;
6581 if (TARGET_64BIT)
6583 int i, off;
6585 /* Adjust for varargs/stdarg differences. */
6586 if (argadj)
6587 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6588 else
6589 offset = crtl->args.arg_offset_rtx;
6591 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6592 from the incoming arg pointer and growing to larger addresses. */
6593 for (i = 26, off = -64; i >= 19; i--, off += 8)
6594 emit_move_insn (gen_rtx_MEM (word_mode,
6595 plus_constant (Pmode,
6596 arg_pointer_rtx, off)),
6597 gen_rtx_REG (word_mode, i));
6599 /* The incoming args pointer points just beyond the flushback area;
6600 normally this is not a serious concern. However, when we are doing
6601 varargs/stdargs we want to make the arg pointer point to the start
6602 of the incoming argument area. */
6603 emit_move_insn (virtual_incoming_args_rtx,
6604 plus_constant (Pmode, arg_pointer_rtx, -64));
6606 /* Now return a pointer to the first anonymous argument. */
6607 return copy_to_reg (expand_binop (Pmode, add_optab,
6608 virtual_incoming_args_rtx,
6609 offset, 0, 0, OPTAB_LIB_WIDEN));
6612 /* Store general registers on the stack. */
6613 dest = gen_rtx_MEM (BLKmode,
6614 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6615 -16));
6616 set_mem_alias_set (dest, get_varargs_alias_set ());
6617 set_mem_align (dest, BITS_PER_WORD);
6618 move_block_from_reg (23, dest, 4);
6620 /* move_block_from_reg will emit code to store the argument registers
6621 individually as scalar stores.
6623 However, other insns may later load from the same addresses for
6624 a structure load (passing a struct to a varargs routine).
6626 The alias code assumes that such aliasing can never happen, so we
6627 have to keep memory referencing insns from moving up beyond the
6628 last argument register store. So we emit a blockage insn here. */
6629 emit_insn (gen_blockage ());
6631 return copy_to_reg (expand_binop (Pmode, add_optab,
6632 crtl->args.internal_arg_pointer,
6633 offset, 0, 0, OPTAB_LIB_WIDEN));
6636 static void
6637 hppa_va_start (tree valist, rtx nextarg)
6639 nextarg = expand_builtin_saveregs ();
6640 std_expand_builtin_va_start (valist, nextarg);
6643 static tree
6644 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6645 gimple_seq *post_p)
6647 if (TARGET_64BIT)
6649 /* Args grow upward. We can use the generic routines. */
6650 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6652 else /* !TARGET_64BIT */
6654 tree ptr = build_pointer_type (type);
6655 tree valist_type;
6656 tree t, u;
6657 unsigned int size, ofs;
6658 bool indirect;
6660 indirect = pass_va_arg_by_reference (type);
6661 if (indirect)
6663 type = ptr;
6664 ptr = build_pointer_type (type);
6666 size = int_size_in_bytes (type);
6667 valist_type = TREE_TYPE (valist);
6669 /* Args grow down. Not handled by generic routines. */
6671 u = fold_convert (sizetype, size_in_bytes (type));
6672 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6673 t = fold_build_pointer_plus (valist, u);
6675 /* Align to 4 or 8 byte boundary depending on argument size. */
6677 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6678 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6679 t = fold_convert (valist_type, t);
6681 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6683 ofs = (8 - size) % 4;
6684 if (ofs != 0)
6685 t = fold_build_pointer_plus_hwi (t, ofs);
6687 t = fold_convert (ptr, t);
6688 t = build_va_arg_indirect_ref (t);
6690 if (indirect)
6691 t = build_va_arg_indirect_ref (t);
6693 return t;
6697 /* True if MODE is valid for the target. By "valid", we mean able to
6698 be manipulated in non-trivial ways. In particular, this means all
6699 the arithmetic is supported. */
6701 static bool
6702 pa_scalar_mode_supported_p (scalar_mode mode)
6704 int precision = GET_MODE_PRECISION (mode);
6706 if (TARGET_64BIT && mode == TImode)
6707 return true;
6709 switch (GET_MODE_CLASS (mode))
6711 case MODE_PARTIAL_INT:
6712 case MODE_INT:
6713 if (precision == CHAR_TYPE_SIZE)
6714 return true;
6715 if (precision == SHORT_TYPE_SIZE)
6716 return true;
6717 if (precision == INT_TYPE_SIZE)
6718 return true;
6719 if (precision == LONG_TYPE_SIZE)
6720 return true;
6721 if (precision == LONG_LONG_TYPE_SIZE)
6722 return true;
6723 return false;
6725 case MODE_FLOAT:
6726 if (precision == FLOAT_TYPE_SIZE)
6727 return true;
6728 if (precision == DOUBLE_TYPE_SIZE)
6729 return true;
6730 if (precision == LONG_DOUBLE_TYPE_SIZE)
6731 return true;
6732 return false;
6734 case MODE_DECIMAL_FLOAT:
6735 return false;
6737 default:
6738 gcc_unreachable ();
6742 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6743 it branches into the delay slot. Otherwise, return FALSE. */
6745 static bool
6746 branch_to_delay_slot_p (rtx_insn *insn)
6748 rtx_insn *jump_insn;
6750 if (dbr_sequence_length ())
6751 return FALSE;
6753 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6754 while (insn)
6756 insn = next_active_insn (insn);
6757 if (jump_insn == insn)
6758 return TRUE;
6760 /* We can't rely on the length of asms. So, we return FALSE when
6761 the branch is followed by an asm. */
6762 if (!insn
6763 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6764 || asm_noperands (PATTERN (insn)) >= 0
6765 || get_attr_length (insn) > 0)
6766 break;
6769 return FALSE;
6772 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6774 This occurs when INSN has an unfilled delay slot and is followed
6775 by an asm. Disaster can occur if the asm is empty and the jump
6776 branches into the delay slot. So, we add a nop in the delay slot
6777 when this occurs. */
6779 static bool
6780 branch_needs_nop_p (rtx_insn *insn)
6782 rtx_insn *jump_insn;
6784 if (dbr_sequence_length ())
6785 return FALSE;
6787 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6788 while (insn)
6790 insn = next_active_insn (insn);
6791 if (!insn || jump_insn == insn)
6792 return TRUE;
6794 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6795 || asm_noperands (PATTERN (insn)) >= 0)
6796 && get_attr_length (insn) > 0)
6797 break;
6800 return FALSE;
6803 /* Return TRUE if INSN, a forward jump insn, can use nullification
6804 to skip the following instruction. This avoids an extra cycle due
6805 to a mis-predicted branch when we fall through. */
6807 static bool
6808 use_skip_p (rtx_insn *insn)
6810 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6812 while (insn)
6814 insn = next_active_insn (insn);
6816 /* We can't rely on the length of asms, so we can't skip asms. */
6817 if (!insn
6818 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6819 || asm_noperands (PATTERN (insn)) >= 0)
6820 break;
6821 if (get_attr_length (insn) == 4
6822 && jump_insn == next_active_insn (insn))
6823 return TRUE;
6824 if (get_attr_length (insn) > 0)
6825 break;
6828 return FALSE;
6831 /* This routine handles all the normal conditional branch sequences we
6832 might need to generate. It handles compare immediate vs compare
6833 register, nullification of delay slots, varying length branches,
6834 negated branches, and all combinations of the above. It returns the
6835 output appropriate to emit the branch corresponding to all given
6836 parameters. */
6838 const char *
6839 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6841 static char buf[100];
6842 bool useskip;
6843 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6844 int length = get_attr_length (insn);
6845 int xdelay;
6847 /* A conditional branch to the following instruction (e.g. the delay slot)
6848 is asking for a disaster. This can happen when not optimizing and
6849 when jump optimization fails.
6851 While it is usually safe to emit nothing, this can fail if the
6852 preceding instruction is a nullified branch with an empty delay
6853 slot and the same branch target as this branch. We could check
6854 for this but jump optimization should eliminate nop jumps. It
6855 is always safe to emit a nop. */
6856 if (branch_to_delay_slot_p (insn))
6857 return "nop";
6859 /* The doubleword form of the cmpib instruction doesn't have the LEU
6860 and GTU conditions while the cmpb instruction does. Since we accept
6861 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6862 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6863 operands[2] = gen_rtx_REG (DImode, 0);
6864 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6865 operands[1] = gen_rtx_REG (DImode, 0);
6867 /* If this is a long branch with its delay slot unfilled, set `nullify'
6868 as it can nullify the delay slot and save a nop. */
6869 if (length == 8 && dbr_sequence_length () == 0)
6870 nullify = 1;
6872 /* If this is a short forward conditional branch which did not get
6873 its delay slot filled, the delay slot can still be nullified. */
6874 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6875 nullify = forward_branch_p (insn);
6877 /* A forward branch over a single nullified insn can be done with a
6878 comclr instruction. This avoids a single cycle penalty due to
6879 mis-predicted branch if we fall through (branch not taken). */
6880 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6882 switch (length)
6884 /* All short conditional branches except backwards with an unfilled
6885 delay slot. */
6886 case 4:
6887 if (useskip)
6888 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6889 else
6890 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6891 if (GET_MODE (operands[1]) == DImode)
6892 strcat (buf, "*");
6893 if (negated)
6894 strcat (buf, "%B3");
6895 else
6896 strcat (buf, "%S3");
6897 if (useskip)
6898 strcat (buf, " %2,%r1,%%r0");
6899 else if (nullify)
6901 if (branch_needs_nop_p (insn))
6902 strcat (buf, ",n %2,%r1,%0%#");
6903 else
6904 strcat (buf, ",n %2,%r1,%0");
6906 else
6907 strcat (buf, " %2,%r1,%0");
6908 break;
6910 /* All long conditionals. Note a short backward branch with an
6911 unfilled delay slot is treated just like a long backward branch
6912 with an unfilled delay slot. */
6913 case 8:
6914 /* Handle weird backwards branch with a filled delay slot
6915 which is nullified. */
6916 if (dbr_sequence_length () != 0
6917 && ! forward_branch_p (insn)
6918 && nullify)
6920 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6921 if (GET_MODE (operands[1]) == DImode)
6922 strcat (buf, "*");
6923 if (negated)
6924 strcat (buf, "%S3");
6925 else
6926 strcat (buf, "%B3");
6927 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6929 /* Handle short backwards branch with an unfilled delay slot.
6930 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6931 taken and untaken branches. */
6932 else if (dbr_sequence_length () == 0
6933 && ! forward_branch_p (insn)
6934 && INSN_ADDRESSES_SET_P ()
6935 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6936 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6938 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6939 if (GET_MODE (operands[1]) == DImode)
6940 strcat (buf, "*");
6941 if (negated)
6942 strcat (buf, "%B3 %2,%r1,%0%#");
6943 else
6944 strcat (buf, "%S3 %2,%r1,%0%#");
6946 else
6948 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6949 if (GET_MODE (operands[1]) == DImode)
6950 strcat (buf, "*");
6951 if (negated)
6952 strcat (buf, "%S3");
6953 else
6954 strcat (buf, "%B3");
6955 if (nullify)
6956 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6957 else
6958 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6960 break;
6962 default:
6963 /* The reversed conditional branch must branch over one additional
6964 instruction if the delay slot is filled and needs to be extracted
6965 by pa_output_lbranch. If the delay slot is empty or this is a
6966 nullified forward branch, the instruction after the reversed
6967 condition branch must be nullified. */
6968 if (dbr_sequence_length () == 0
6969 || (nullify && forward_branch_p (insn)))
6971 nullify = 1;
6972 xdelay = 0;
6973 operands[4] = GEN_INT (length);
6975 else
6977 xdelay = 1;
6978 operands[4] = GEN_INT (length + 4);
6981 /* Create a reversed conditional branch which branches around
6982 the following insns. */
6983 if (GET_MODE (operands[1]) != DImode)
6985 if (nullify)
6987 if (negated)
6988 strcpy (buf,
6989 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6990 else
6991 strcpy (buf,
6992 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6994 else
6996 if (negated)
6997 strcpy (buf,
6998 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6999 else
7000 strcpy (buf,
7001 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
7004 else
7006 if (nullify)
7008 if (negated)
7009 strcpy (buf,
7010 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
7011 else
7012 strcpy (buf,
7013 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
7015 else
7017 if (negated)
7018 strcpy (buf,
7019 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
7020 else
7021 strcpy (buf,
7022 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
7026 output_asm_insn (buf, operands);
7027 return pa_output_lbranch (operands[0], insn, xdelay);
7029 return buf;
7032 /* Output a PIC pc-relative instruction sequence to load the address of
7033 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
7034 or a code label. OPERANDS[1] specifies the register to use to load
7035 the program counter. OPERANDS[3] may be used for label generation
7036 The sequence is always three instructions in length. The program
7037 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
7038 Register %r1 is clobbered. */
7040 static void
7041 pa_output_pic_pcrel_sequence (rtx *operands)
7043 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
7044 if (TARGET_PA_20)
7046 /* We can use mfia to determine the current program counter. */
7047 if (TARGET_SOM || !TARGET_GAS)
7049 operands[3] = gen_label_rtx ();
7050 targetm.asm_out.internal_label (asm_out_file, "L",
7051 CODE_LABEL_NUMBER (operands[3]));
7052 output_asm_insn ("mfia %1", operands);
7053 output_asm_insn ("addil L'%0-%l3,%1", operands);
7054 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
7056 else
7058 output_asm_insn ("mfia %1", operands);
7059 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
7060 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
7063 else
7065 /* We need to use a branch to determine the current program counter. */
7066 output_asm_insn ("{bl|b,l} .+8,%1", operands);
7067 if (TARGET_SOM || !TARGET_GAS)
7069 operands[3] = gen_label_rtx ();
7070 output_asm_insn ("addil L'%0-%l3,%1", operands);
7071 targetm.asm_out.internal_label (asm_out_file, "L",
7072 CODE_LABEL_NUMBER (operands[3]));
7073 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
7075 else
7077 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
7078 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
7083 /* This routine handles output of long unconditional branches that
7084 exceed the maximum range of a simple branch instruction. Since
7085 we don't have a register available for the branch, we save register
7086 %r1 in the frame marker, load the branch destination DEST into %r1,
7087 execute the branch, and restore %r1 in the delay slot of the branch.
7089 Since long branches may have an insn in the delay slot and the
7090 delay slot is used to restore %r1, we in general need to extract
7091 this insn and execute it before the branch. However, to facilitate
7092 use of this function by conditional branches, we also provide an
7093 option to not extract the delay insn so that it will be emitted
7094 after the long branch. So, if there is an insn in the delay slot,
7095 it is extracted if XDELAY is nonzero.
7097 The lengths of the various long-branch sequences are 20, 16 and 24
7098 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
7100 const char *
7101 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
7103 rtx xoperands[4];
7105 xoperands[0] = dest;
7107 /* First, free up the delay slot. */
7108 if (xdelay && dbr_sequence_length () != 0)
7110 /* We can't handle a jump in the delay slot. */
7111 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
7113 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7114 optimize, 0, NULL);
7116 /* Now delete the delay insn. */
7117 SET_INSN_DELETED (NEXT_INSN (insn));
7120 /* Output an insn to save %r1. The runtime documentation doesn't
7121 specify whether the "Clean Up" slot in the callers frame can
7122 be clobbered by the callee. It isn't copied by HP's builtin
7123 alloca, so this suggests that it can be clobbered if necessary.
7124 The "Static Link" location is copied by HP builtin alloca, so
7125 we avoid using it. Using the cleanup slot might be a problem
7126 if we have to interoperate with languages that pass cleanup
7127 information. However, it should be possible to handle these
7128 situations with GCC's asm feature.
7130 The "Current RP" slot is reserved for the called procedure, so
7131 we try to use it when we don't have a frame of our own. It's
7132 rather unlikely that we won't have a frame when we need to emit
7133 a very long branch.
7135 Really the way to go long term is a register scavenger; goto
7136 the target of the jump and find a register which we can use
7137 as a scratch to hold the value in %r1. Then, we wouldn't have
7138 to free up the delay slot or clobber a slot that may be needed
7139 for other purposes. */
7140 if (TARGET_64BIT)
7142 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7143 /* Use the return pointer slot in the frame marker. */
7144 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7145 else
7146 /* Use the slot at -40 in the frame marker since HP builtin
7147 alloca doesn't copy it. */
7148 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7150 else
7152 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7153 /* Use the return pointer slot in the frame marker. */
7154 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7155 else
7156 /* Use the "Clean Up" slot in the frame marker. In GCC,
7157 the only other use of this location is for copying a
7158 floating point double argument from a floating-point
7159 register to two general registers. The copy is done
7160 as an "atomic" operation when outputting a call, so it
7161 won't interfere with our using the location here. */
7162 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7165 if (TARGET_PORTABLE_RUNTIME)
7167 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7168 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7169 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7171 else if (flag_pic)
7173 xoperands[1] = gen_rtx_REG (Pmode, 1);
7174 xoperands[2] = xoperands[1];
7175 pa_output_pic_pcrel_sequence (xoperands);
7176 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7178 else
7179 /* Now output a very long branch to the original target. */
7180 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7182 /* Now restore the value of %r1 in the delay slot. */
7183 if (TARGET_64BIT)
7185 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7186 return "ldd -16(%%r30),%%r1";
7187 else
7188 return "ldd -40(%%r30),%%r1";
7190 else
7192 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7193 return "ldw -20(%%r30),%%r1";
7194 else
7195 return "ldw -12(%%r30),%%r1";
7199 /* This routine handles all the branch-on-bit conditional branch sequences we
7200 might need to generate. It handles nullification of delay slots,
7201 varying length branches, negated branches and all combinations of the
7202 above. it returns the appropriate output template to emit the branch. */
7204 const char *
7205 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7207 static char buf[100];
7208 bool useskip;
7209 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7210 int length = get_attr_length (insn);
7211 int xdelay;
7213 /* A conditional branch to the following instruction (e.g. the delay slot) is
7214 asking for a disaster. I do not think this can happen as this pattern
7215 is only used when optimizing; jump optimization should eliminate the
7216 jump. But be prepared just in case. */
7218 if (branch_to_delay_slot_p (insn))
7219 return "nop";
7221 /* If this is a long branch with its delay slot unfilled, set `nullify'
7222 as it can nullify the delay slot and save a nop. */
7223 if (length == 8 && dbr_sequence_length () == 0)
7224 nullify = 1;
7226 /* If this is a short forward conditional branch which did not get
7227 its delay slot filled, the delay slot can still be nullified. */
7228 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7229 nullify = forward_branch_p (insn);
7231 /* A forward branch over a single nullified insn can be done with a
7232 extrs instruction. This avoids a single cycle penalty due to
7233 mis-predicted branch if we fall through (branch not taken). */
7234 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7236 switch (length)
7239 /* All short conditional branches except backwards with an unfilled
7240 delay slot. */
7241 case 4:
7242 if (useskip)
7243 strcpy (buf, "{extrs,|extrw,s,}");
7244 else
7245 strcpy (buf, "bb,");
7246 if (useskip && GET_MODE (operands[0]) == DImode)
7247 strcpy (buf, "extrd,s,*");
7248 else if (GET_MODE (operands[0]) == DImode)
7249 strcpy (buf, "bb,*");
7250 if ((which == 0 && negated)
7251 || (which == 1 && ! negated))
7252 strcat (buf, ">=");
7253 else
7254 strcat (buf, "<");
7255 if (useskip)
7256 strcat (buf, " %0,%1,1,%%r0");
7257 else if (nullify && negated)
7259 if (branch_needs_nop_p (insn))
7260 strcat (buf, ",n %0,%1,%3%#");
7261 else
7262 strcat (buf, ",n %0,%1,%3");
7264 else if (nullify && ! negated)
7266 if (branch_needs_nop_p (insn))
7267 strcat (buf, ",n %0,%1,%2%#");
7268 else
7269 strcat (buf, ",n %0,%1,%2");
7271 else if (! nullify && negated)
7272 strcat (buf, " %0,%1,%3");
7273 else if (! nullify && ! negated)
7274 strcat (buf, " %0,%1,%2");
7275 break;
7277 /* All long conditionals. Note a short backward branch with an
7278 unfilled delay slot is treated just like a long backward branch
7279 with an unfilled delay slot. */
7280 case 8:
7281 /* Handle weird backwards branch with a filled delay slot
7282 which is nullified. */
7283 if (dbr_sequence_length () != 0
7284 && ! forward_branch_p (insn)
7285 && nullify)
7287 strcpy (buf, "bb,");
7288 if (GET_MODE (operands[0]) == DImode)
7289 strcat (buf, "*");
7290 if ((which == 0 && negated)
7291 || (which == 1 && ! negated))
7292 strcat (buf, "<");
7293 else
7294 strcat (buf, ">=");
7295 if (negated)
7296 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7297 else
7298 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7300 /* Handle short backwards branch with an unfilled delay slot.
7301 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7302 taken and untaken branches. */
7303 else if (dbr_sequence_length () == 0
7304 && ! forward_branch_p (insn)
7305 && INSN_ADDRESSES_SET_P ()
7306 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7307 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7309 strcpy (buf, "bb,");
7310 if (GET_MODE (operands[0]) == DImode)
7311 strcat (buf, "*");
7312 if ((which == 0 && negated)
7313 || (which == 1 && ! negated))
7314 strcat (buf, ">=");
7315 else
7316 strcat (buf, "<");
7317 if (negated)
7318 strcat (buf, " %0,%1,%3%#");
7319 else
7320 strcat (buf, " %0,%1,%2%#");
7322 else
7324 if (GET_MODE (operands[0]) == DImode)
7325 strcpy (buf, "extrd,s,*");
7326 else
7327 strcpy (buf, "{extrs,|extrw,s,}");
7328 if ((which == 0 && negated)
7329 || (which == 1 && ! negated))
7330 strcat (buf, "<");
7331 else
7332 strcat (buf, ">=");
7333 if (nullify && negated)
7334 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7335 else if (nullify && ! negated)
7336 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7337 else if (negated)
7338 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7339 else
7340 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7342 break;
7344 default:
7345 /* The reversed conditional branch must branch over one additional
7346 instruction if the delay slot is filled and needs to be extracted
7347 by pa_output_lbranch. If the delay slot is empty or this is a
7348 nullified forward branch, the instruction after the reversed
7349 condition branch must be nullified. */
7350 if (dbr_sequence_length () == 0
7351 || (nullify && forward_branch_p (insn)))
7353 nullify = 1;
7354 xdelay = 0;
7355 operands[4] = GEN_INT (length);
7357 else
7359 xdelay = 1;
7360 operands[4] = GEN_INT (length + 4);
7363 if (GET_MODE (operands[0]) == DImode)
7364 strcpy (buf, "bb,*");
7365 else
7366 strcpy (buf, "bb,");
7367 if ((which == 0 && negated)
7368 || (which == 1 && !negated))
7369 strcat (buf, "<");
7370 else
7371 strcat (buf, ">=");
7372 if (nullify)
7373 strcat (buf, ",n %0,%1,.+%4");
7374 else
7375 strcat (buf, " %0,%1,.+%4");
7376 output_asm_insn (buf, operands);
7377 return pa_output_lbranch (negated ? operands[3] : operands[2],
7378 insn, xdelay);
7380 return buf;
7383 /* This routine handles all the branch-on-variable-bit conditional branch
7384 sequences we might need to generate. It handles nullification of delay
7385 slots, varying length branches, negated branches and all combinations
7386 of the above. it returns the appropriate output template to emit the
7387 branch. */
7389 const char *
7390 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7391 int which)
7393 static char buf[100];
7394 bool useskip;
7395 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7396 int length = get_attr_length (insn);
7397 int xdelay;
7399 /* A conditional branch to the following instruction (e.g. the delay slot) is
7400 asking for a disaster. I do not think this can happen as this pattern
7401 is only used when optimizing; jump optimization should eliminate the
7402 jump. But be prepared just in case. */
7404 if (branch_to_delay_slot_p (insn))
7405 return "nop";
7407 /* If this is a long branch with its delay slot unfilled, set `nullify'
7408 as it can nullify the delay slot and save a nop. */
7409 if (length == 8 && dbr_sequence_length () == 0)
7410 nullify = 1;
7412 /* If this is a short forward conditional branch which did not get
7413 its delay slot filled, the delay slot can still be nullified. */
7414 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7415 nullify = forward_branch_p (insn);
7417 /* A forward branch over a single nullified insn can be done with a
7418 extrs instruction. This avoids a single cycle penalty due to
7419 mis-predicted branch if we fall through (branch not taken). */
7420 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7422 switch (length)
7425 /* All short conditional branches except backwards with an unfilled
7426 delay slot. */
7427 case 4:
7428 if (useskip)
7429 strcpy (buf, "{vextrs,|extrw,s,}");
7430 else
7431 strcpy (buf, "{bvb,|bb,}");
7432 if (useskip && GET_MODE (operands[0]) == DImode)
7433 strcpy (buf, "extrd,s,*");
7434 else if (GET_MODE (operands[0]) == DImode)
7435 strcpy (buf, "bb,*");
7436 if ((which == 0 && negated)
7437 || (which == 1 && ! negated))
7438 strcat (buf, ">=");
7439 else
7440 strcat (buf, "<");
7441 if (useskip)
7442 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7443 else if (nullify && negated)
7445 if (branch_needs_nop_p (insn))
7446 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7447 else
7448 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7450 else if (nullify && ! negated)
7452 if (branch_needs_nop_p (insn))
7453 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7454 else
7455 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7457 else if (! nullify && negated)
7458 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7459 else if (! nullify && ! negated)
7460 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7461 break;
7463 /* All long conditionals. Note a short backward branch with an
7464 unfilled delay slot is treated just like a long backward branch
7465 with an unfilled delay slot. */
7466 case 8:
7467 /* Handle weird backwards branch with a filled delay slot
7468 which is nullified. */
7469 if (dbr_sequence_length () != 0
7470 && ! forward_branch_p (insn)
7471 && nullify)
7473 strcpy (buf, "{bvb,|bb,}");
7474 if (GET_MODE (operands[0]) == DImode)
7475 strcat (buf, "*");
7476 if ((which == 0 && negated)
7477 || (which == 1 && ! negated))
7478 strcat (buf, "<");
7479 else
7480 strcat (buf, ">=");
7481 if (negated)
7482 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7483 else
7484 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7486 /* Handle short backwards branch with an unfilled delay slot.
7487 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7488 taken and untaken branches. */
7489 else if (dbr_sequence_length () == 0
7490 && ! forward_branch_p (insn)
7491 && INSN_ADDRESSES_SET_P ()
7492 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7493 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7495 strcpy (buf, "{bvb,|bb,}");
7496 if (GET_MODE (operands[0]) == DImode)
7497 strcat (buf, "*");
7498 if ((which == 0 && negated)
7499 || (which == 1 && ! negated))
7500 strcat (buf, ">=");
7501 else
7502 strcat (buf, "<");
7503 if (negated)
7504 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7505 else
7506 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7508 else
7510 strcpy (buf, "{vextrs,|extrw,s,}");
7511 if (GET_MODE (operands[0]) == DImode)
7512 strcpy (buf, "extrd,s,*");
7513 if ((which == 0 && negated)
7514 || (which == 1 && ! negated))
7515 strcat (buf, "<");
7516 else
7517 strcat (buf, ">=");
7518 if (nullify && negated)
7519 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7520 else if (nullify && ! negated)
7521 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7522 else if (negated)
7523 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7524 else
7525 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7527 break;
7529 default:
7530 /* The reversed conditional branch must branch over one additional
7531 instruction if the delay slot is filled and needs to be extracted
7532 by pa_output_lbranch. If the delay slot is empty or this is a
7533 nullified forward branch, the instruction after the reversed
7534 condition branch must be nullified. */
7535 if (dbr_sequence_length () == 0
7536 || (nullify && forward_branch_p (insn)))
7538 nullify = 1;
7539 xdelay = 0;
7540 operands[4] = GEN_INT (length);
7542 else
7544 xdelay = 1;
7545 operands[4] = GEN_INT (length + 4);
7548 if (GET_MODE (operands[0]) == DImode)
7549 strcpy (buf, "bb,*");
7550 else
7551 strcpy (buf, "{bvb,|bb,}");
7552 if ((which == 0 && negated)
7553 || (which == 1 && !negated))
7554 strcat (buf, "<");
7555 else
7556 strcat (buf, ">=");
7557 if (nullify)
7558 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7559 else
7560 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7561 output_asm_insn (buf, operands);
7562 return pa_output_lbranch (negated ? operands[3] : operands[2],
7563 insn, xdelay);
7565 return buf;
7568 /* Return the output template for emitting a dbra type insn.
7570 Note it may perform some output operations on its own before
7571 returning the final output string. */
7572 const char *
7573 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7575 int length = get_attr_length (insn);
7577 /* A conditional branch to the following instruction (e.g. the delay slot) is
7578 asking for a disaster. Be prepared! */
7580 if (branch_to_delay_slot_p (insn))
7582 if (which_alternative == 0)
7583 return "ldo %1(%0),%0";
7584 else if (which_alternative == 1)
7586 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7587 output_asm_insn ("ldw -16(%%r30),%4", operands);
7588 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7589 return "{fldws|fldw} -16(%%r30),%0";
7591 else
7593 output_asm_insn ("ldw %0,%4", operands);
7594 return "ldo %1(%4),%4\n\tstw %4,%0";
7598 if (which_alternative == 0)
7600 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7601 int xdelay;
7603 /* If this is a long branch with its delay slot unfilled, set `nullify'
7604 as it can nullify the delay slot and save a nop. */
7605 if (length == 8 && dbr_sequence_length () == 0)
7606 nullify = 1;
7608 /* If this is a short forward conditional branch which did not get
7609 its delay slot filled, the delay slot can still be nullified. */
7610 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7611 nullify = forward_branch_p (insn);
7613 switch (length)
7615 case 4:
7616 if (nullify)
7618 if (branch_needs_nop_p (insn))
7619 return "addib,%C2,n %1,%0,%3%#";
7620 else
7621 return "addib,%C2,n %1,%0,%3";
7623 else
7624 return "addib,%C2 %1,%0,%3";
7626 case 8:
7627 /* Handle weird backwards branch with a fulled delay slot
7628 which is nullified. */
7629 if (dbr_sequence_length () != 0
7630 && ! forward_branch_p (insn)
7631 && nullify)
7632 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7633 /* Handle short backwards branch with an unfilled delay slot.
7634 Using a addb;nop rather than addi;bl saves 1 cycle for both
7635 taken and untaken branches. */
7636 else if (dbr_sequence_length () == 0
7637 && ! forward_branch_p (insn)
7638 && INSN_ADDRESSES_SET_P ()
7639 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7640 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7641 return "addib,%C2 %1,%0,%3%#";
7643 /* Handle normal cases. */
7644 if (nullify)
7645 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7646 else
7647 return "addi,%N2 %1,%0,%0\n\tb %3";
7649 default:
7650 /* The reversed conditional branch must branch over one additional
7651 instruction if the delay slot is filled and needs to be extracted
7652 by pa_output_lbranch. If the delay slot is empty or this is a
7653 nullified forward branch, the instruction after the reversed
7654 condition branch must be nullified. */
7655 if (dbr_sequence_length () == 0
7656 || (nullify && forward_branch_p (insn)))
7658 nullify = 1;
7659 xdelay = 0;
7660 operands[4] = GEN_INT (length);
7662 else
7664 xdelay = 1;
7665 operands[4] = GEN_INT (length + 4);
7668 if (nullify)
7669 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7670 else
7671 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7673 return pa_output_lbranch (operands[3], insn, xdelay);
7677 /* Deal with gross reload from FP register case. */
7678 else if (which_alternative == 1)
7680 /* Move loop counter from FP register to MEM then into a GR,
7681 increment the GR, store the GR into MEM, and finally reload
7682 the FP register from MEM from within the branch's delay slot. */
7683 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7684 operands);
7685 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7686 if (length == 24)
7687 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7688 else if (length == 28)
7689 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7690 else
7692 operands[5] = GEN_INT (length - 16);
7693 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7694 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7695 return pa_output_lbranch (operands[3], insn, 0);
7698 /* Deal with gross reload from memory case. */
7699 else
7701 /* Reload loop counter from memory, the store back to memory
7702 happens in the branch's delay slot. */
7703 output_asm_insn ("ldw %0,%4", operands);
7704 if (length == 12)
7705 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7706 else if (length == 16)
7707 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7708 else
7710 operands[5] = GEN_INT (length - 4);
7711 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7712 return pa_output_lbranch (operands[3], insn, 0);
7717 /* Return the output template for emitting a movb type insn.
7719 Note it may perform some output operations on its own before
7720 returning the final output string. */
7721 const char *
7722 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7723 int reverse_comparison)
7725 int length = get_attr_length (insn);
7727 /* A conditional branch to the following instruction (e.g. the delay slot) is
7728 asking for a disaster. Be prepared! */
7730 if (branch_to_delay_slot_p (insn))
7732 if (which_alternative == 0)
7733 return "copy %1,%0";
7734 else if (which_alternative == 1)
7736 output_asm_insn ("stw %1,-16(%%r30)", operands);
7737 return "{fldws|fldw} -16(%%r30),%0";
7739 else if (which_alternative == 2)
7740 return "stw %1,%0";
7741 else
7742 return "mtsar %r1";
7745 /* Support the second variant. */
7746 if (reverse_comparison)
7747 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7749 if (which_alternative == 0)
7751 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7752 int xdelay;
7754 /* If this is a long branch with its delay slot unfilled, set `nullify'
7755 as it can nullify the delay slot and save a nop. */
7756 if (length == 8 && dbr_sequence_length () == 0)
7757 nullify = 1;
7759 /* If this is a short forward conditional branch which did not get
7760 its delay slot filled, the delay slot can still be nullified. */
7761 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7762 nullify = forward_branch_p (insn);
7764 switch (length)
7766 case 4:
7767 if (nullify)
7769 if (branch_needs_nop_p (insn))
7770 return "movb,%C2,n %1,%0,%3%#";
7771 else
7772 return "movb,%C2,n %1,%0,%3";
7774 else
7775 return "movb,%C2 %1,%0,%3";
7777 case 8:
7778 /* Handle weird backwards branch with a filled delay slot
7779 which is nullified. */
7780 if (dbr_sequence_length () != 0
7781 && ! forward_branch_p (insn)
7782 && nullify)
7783 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7785 /* Handle short backwards branch with an unfilled delay slot.
7786 Using a movb;nop rather than or;bl saves 1 cycle for both
7787 taken and untaken branches. */
7788 else if (dbr_sequence_length () == 0
7789 && ! forward_branch_p (insn)
7790 && INSN_ADDRESSES_SET_P ()
7791 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7792 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7793 return "movb,%C2 %1,%0,%3%#";
7794 /* Handle normal cases. */
7795 if (nullify)
7796 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7797 else
7798 return "or,%N2 %1,%%r0,%0\n\tb %3";
7800 default:
7801 /* The reversed conditional branch must branch over one additional
7802 instruction if the delay slot is filled and needs to be extracted
7803 by pa_output_lbranch. If the delay slot is empty or this is a
7804 nullified forward branch, the instruction after the reversed
7805 condition branch must be nullified. */
7806 if (dbr_sequence_length () == 0
7807 || (nullify && forward_branch_p (insn)))
7809 nullify = 1;
7810 xdelay = 0;
7811 operands[4] = GEN_INT (length);
7813 else
7815 xdelay = 1;
7816 operands[4] = GEN_INT (length + 4);
7819 if (nullify)
7820 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7821 else
7822 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7824 return pa_output_lbranch (operands[3], insn, xdelay);
7827 /* Deal with gross reload for FP destination register case. */
7828 else if (which_alternative == 1)
7830 /* Move source register to MEM, perform the branch test, then
7831 finally load the FP register from MEM from within the branch's
7832 delay slot. */
7833 output_asm_insn ("stw %1,-16(%%r30)", operands);
7834 if (length == 12)
7835 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7836 else if (length == 16)
7837 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7838 else
7840 operands[4] = GEN_INT (length - 4);
7841 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7842 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7843 return pa_output_lbranch (operands[3], insn, 0);
7846 /* Deal with gross reload from memory case. */
7847 else if (which_alternative == 2)
7849 /* Reload loop counter from memory, the store back to memory
7850 happens in the branch's delay slot. */
7851 if (length == 8)
7852 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7853 else if (length == 12)
7854 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7855 else
7857 operands[4] = GEN_INT (length);
7858 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7859 operands);
7860 return pa_output_lbranch (operands[3], insn, 0);
7863 /* Handle SAR as a destination. */
7864 else
7866 if (length == 8)
7867 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7868 else if (length == 12)
7869 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7870 else
7872 operands[4] = GEN_INT (length);
7873 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7874 operands);
7875 return pa_output_lbranch (operands[3], insn, 0);
7880 /* Copy any FP arguments in INSN into integer registers. */
7881 static void
7882 copy_fp_args (rtx_insn *insn)
7884 rtx link;
7885 rtx xoperands[2];
7887 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7889 int arg_mode, regno;
7890 rtx use = XEXP (link, 0);
7892 if (! (GET_CODE (use) == USE
7893 && GET_CODE (XEXP (use, 0)) == REG
7894 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7895 continue;
7897 arg_mode = GET_MODE (XEXP (use, 0));
7898 regno = REGNO (XEXP (use, 0));
7900 /* Is it a floating point register? */
7901 if (regno >= 32 && regno <= 39)
7903 /* Copy the FP register into an integer register via memory. */
7904 if (arg_mode == SFmode)
7906 xoperands[0] = XEXP (use, 0);
7907 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7908 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7909 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7911 else
7913 xoperands[0] = XEXP (use, 0);
7914 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7915 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7916 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7917 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7923 /* Compute length of the FP argument copy sequence for INSN. */
7924 static int
7925 length_fp_args (rtx_insn *insn)
7927 int length = 0;
7928 rtx link;
7930 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7932 int arg_mode, regno;
7933 rtx use = XEXP (link, 0);
7935 if (! (GET_CODE (use) == USE
7936 && GET_CODE (XEXP (use, 0)) == REG
7937 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7938 continue;
7940 arg_mode = GET_MODE (XEXP (use, 0));
7941 regno = REGNO (XEXP (use, 0));
7943 /* Is it a floating point register? */
7944 if (regno >= 32 && regno <= 39)
7946 if (arg_mode == SFmode)
7947 length += 8;
7948 else
7949 length += 12;
7953 return length;
7956 /* Return the attribute length for the millicode call instruction INSN.
7957 The length must match the code generated by pa_output_millicode_call.
7958 We include the delay slot in the returned length as it is better to
7959 over estimate the length than to under estimate it. */
7962 pa_attr_length_millicode_call (rtx_insn *insn)
7964 unsigned long distance = -1;
7965 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7967 if (INSN_ADDRESSES_SET_P ())
7969 distance = (total + insn_current_reference_address (insn));
7970 if (distance < total)
7971 distance = -1;
7974 if (TARGET_64BIT)
7976 if (!TARGET_LONG_CALLS && distance < 7600000)
7977 return 8;
7979 return 20;
7981 else if (TARGET_PORTABLE_RUNTIME)
7982 return 24;
7983 else
7985 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7986 return 8;
7988 if (!flag_pic)
7989 return 12;
7991 return 24;
7995 /* INSN is a function call.
7997 CALL_DEST is the routine we are calling. */
7999 const char *
8000 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
8002 int attr_length = get_attr_length (insn);
8003 int seq_length = dbr_sequence_length ();
8004 rtx xoperands[4];
8006 xoperands[0] = call_dest;
8008 /* Handle the common case where we are sure that the branch will
8009 reach the beginning of the $CODE$ subspace. The within reach
8010 form of the $$sh_func_adrs call has a length of 28. Because it
8011 has an attribute type of sh_func_adrs, it never has a nonzero
8012 sequence length (i.e., the delay slot is never filled). */
8013 if (!TARGET_LONG_CALLS
8014 && (attr_length == 8
8015 || (attr_length == 28
8016 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
8018 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
8019 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8021 else
8023 if (TARGET_64BIT)
8025 /* It might seem that one insn could be saved by accessing
8026 the millicode function using the linkage table. However,
8027 this doesn't work in shared libraries and other dynamically
8028 loaded objects. Using a pc-relative sequence also avoids
8029 problems related to the implicit use of the gp register. */
8030 xoperands[1] = gen_rtx_REG (Pmode, 1);
8031 xoperands[2] = xoperands[1];
8032 pa_output_pic_pcrel_sequence (xoperands);
8033 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8035 else if (TARGET_PORTABLE_RUNTIME)
8037 /* Pure portable runtime doesn't allow be/ble; we also don't
8038 have PIC support in the assembler/linker, so this sequence
8039 is needed. */
8041 /* Get the address of our target into %r1. */
8042 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8043 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
8045 /* Get our return address into %r31. */
8046 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
8047 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
8049 /* Jump to our target address in %r1. */
8050 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8052 else if (!flag_pic)
8054 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8055 if (TARGET_PA_20)
8056 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
8057 else
8058 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8060 else
8062 xoperands[1] = gen_rtx_REG (Pmode, 31);
8063 xoperands[2] = gen_rtx_REG (Pmode, 1);
8064 pa_output_pic_pcrel_sequence (xoperands);
8066 /* Adjust return address. */
8067 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
8069 /* Jump to our target address in %r1. */
8070 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8074 if (seq_length == 0)
8075 output_asm_insn ("nop", xoperands);
8077 return "";
8080 /* Return the attribute length of the call instruction INSN. The SIBCALL
8081 flag indicates whether INSN is a regular call or a sibling call. The
8082 length returned must be longer than the code actually generated by
8083 pa_output_call. Since branch shortening is done before delay branch
8084 sequencing, there is no way to determine whether or not the delay
8085 slot will be filled during branch shortening. Even when the delay
8086 slot is filled, we may have to add a nop if the delay slot contains
8087 a branch that can't reach its target. Thus, we always have to include
8088 the delay slot in the length estimate. This used to be done in
8089 pa_adjust_insn_length but we do it here now as some sequences always
8090 fill the delay slot and we can save four bytes in the estimate for
8091 these sequences. */
8094 pa_attr_length_call (rtx_insn *insn, int sibcall)
8096 int local_call;
8097 rtx call, call_dest;
8098 tree call_decl;
8099 int length = 0;
8100 rtx pat = PATTERN (insn);
8101 unsigned long distance = -1;
8103 gcc_assert (CALL_P (insn));
8105 if (INSN_ADDRESSES_SET_P ())
8107 unsigned long total;
8109 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8110 distance = (total + insn_current_reference_address (insn));
8111 if (distance < total)
8112 distance = -1;
8115 gcc_assert (GET_CODE (pat) == PARALLEL);
8117 /* Get the call rtx. */
8118 call = XVECEXP (pat, 0, 0);
8119 if (GET_CODE (call) == SET)
8120 call = SET_SRC (call);
8122 gcc_assert (GET_CODE (call) == CALL);
8124 /* Determine if this is a local call. */
8125 call_dest = XEXP (XEXP (call, 0), 0);
8126 call_decl = SYMBOL_REF_DECL (call_dest);
8127 local_call = call_decl && targetm.binds_local_p (call_decl);
8129 /* pc-relative branch. */
8130 if (!TARGET_LONG_CALLS
8131 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
8132 || distance < MAX_PCREL17F_OFFSET))
8133 length += 8;
8135 /* 64-bit plabel sequence. */
8136 else if (TARGET_64BIT && !local_call)
8137 length += 24;
8139 /* non-pic long absolute branch sequence. */
8140 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8141 length += 12;
8143 /* long pc-relative branch sequence. */
8144 else if (TARGET_LONG_PIC_SDIFF_CALL
8145 || (TARGET_GAS && !TARGET_SOM && local_call))
8147 length += 20;
8149 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8150 length += 8;
8153 /* 32-bit plabel sequence. */
8154 else
8156 length += 32;
8158 if (TARGET_SOM)
8159 length += length_fp_args (insn);
8161 if (flag_pic)
8162 length += 4;
8164 if (!TARGET_PA_20)
8166 if (!sibcall)
8167 length += 8;
8169 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8170 length += 8;
8174 return length;
8177 /* INSN is a function call.
8179 CALL_DEST is the routine we are calling. */
8181 const char *
8182 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8184 int seq_length = dbr_sequence_length ();
8185 tree call_decl = SYMBOL_REF_DECL (call_dest);
8186 int local_call = call_decl && targetm.binds_local_p (call_decl);
8187 rtx xoperands[4];
8189 xoperands[0] = call_dest;
8191 /* Handle the common case where we're sure that the branch will reach
8192 the beginning of the "$CODE$" subspace. This is the beginning of
8193 the current function if we are in a named section. */
8194 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8196 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8197 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8199 else
8201 if (TARGET_64BIT && !local_call)
8203 /* ??? As far as I can tell, the HP linker doesn't support the
8204 long pc-relative sequence described in the 64-bit runtime
8205 architecture. So, we use a slightly longer indirect call. */
8206 xoperands[0] = pa_get_deferred_plabel (call_dest);
8207 xoperands[1] = gen_label_rtx ();
8209 /* Put the load of %r27 into the delay slot. We don't need to
8210 do anything when generating fast indirect calls. */
8211 if (seq_length != 0)
8213 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8214 optimize, 0, NULL);
8216 /* Now delete the delay insn. */
8217 SET_INSN_DELETED (NEXT_INSN (insn));
8220 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8221 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8222 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8223 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8224 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8225 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8226 seq_length = 1;
8228 else
8230 int indirect_call = 0;
8232 /* Emit a long call. There are several different sequences
8233 of increasing length and complexity. In most cases,
8234 they don't allow an instruction in the delay slot. */
8235 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8236 && !TARGET_LONG_PIC_SDIFF_CALL
8237 && !(TARGET_GAS && !TARGET_SOM && local_call)
8238 && !TARGET_64BIT)
8239 indirect_call = 1;
8241 if (seq_length != 0
8242 && !sibcall
8243 && (!TARGET_PA_20
8244 || indirect_call
8245 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8247 /* A non-jump insn in the delay slot. By definition we can
8248 emit this insn before the call (and in fact before argument
8249 relocating. */
8250 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8251 NULL);
8253 /* Now delete the delay insn. */
8254 SET_INSN_DELETED (NEXT_INSN (insn));
8255 seq_length = 0;
8258 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8260 /* This is the best sequence for making long calls in
8261 non-pic code. Unfortunately, GNU ld doesn't provide
8262 the stub needed for external calls, and GAS's support
8263 for this with the SOM linker is buggy. It is safe
8264 to use this for local calls. */
8265 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8266 if (sibcall)
8267 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8268 else
8270 if (TARGET_PA_20)
8271 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8272 xoperands);
8273 else
8274 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8276 output_asm_insn ("copy %%r31,%%r2", xoperands);
8277 seq_length = 1;
8280 else
8282 /* The HP assembler and linker can handle relocations for
8283 the difference of two symbols. The HP assembler
8284 recognizes the sequence as a pc-relative call and
8285 the linker provides stubs when needed. */
8287 /* GAS currently can't generate the relocations that
8288 are needed for the SOM linker under HP-UX using this
8289 sequence. The GNU linker doesn't generate the stubs
8290 that are needed for external calls on TARGET_ELF32
8291 with this sequence. For now, we have to use a longer
8292 plabel sequence when using GAS for non local calls. */
8293 if (TARGET_LONG_PIC_SDIFF_CALL
8294 || (TARGET_GAS && !TARGET_SOM && local_call))
8296 xoperands[1] = gen_rtx_REG (Pmode, 1);
8297 xoperands[2] = xoperands[1];
8298 pa_output_pic_pcrel_sequence (xoperands);
8300 else
8302 /* Emit a long plabel-based call sequence. This is
8303 essentially an inline implementation of $$dyncall.
8304 We don't actually try to call $$dyncall as this is
8305 as difficult as calling the function itself. */
8306 xoperands[0] = pa_get_deferred_plabel (call_dest);
8307 xoperands[1] = gen_label_rtx ();
8309 /* Since the call is indirect, FP arguments in registers
8310 need to be copied to the general registers. Then, the
8311 argument relocation stub will copy them back. */
8312 if (TARGET_SOM)
8313 copy_fp_args (insn);
8315 if (flag_pic)
8317 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8318 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8319 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8321 else
8323 output_asm_insn ("addil LR'%0-$global$,%%r27",
8324 xoperands);
8325 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8326 xoperands);
8329 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8330 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8331 /* Should this be an ordered load to ensure the target
8332 address is loaded before the global pointer? */
8333 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8334 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8336 if (!sibcall && !TARGET_PA_20)
8338 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8339 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8340 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8341 else
8342 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8346 if (TARGET_PA_20)
8348 if (sibcall)
8349 output_asm_insn ("bve (%%r1)", xoperands);
8350 else
8352 if (indirect_call)
8354 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8355 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8356 seq_length = 1;
8358 else
8359 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8362 else
8364 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8365 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8366 xoperands);
8368 if (sibcall)
8370 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8371 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8372 else
8373 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8375 else
8377 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8378 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8379 else
8380 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8382 if (indirect_call)
8383 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8384 else
8385 output_asm_insn ("copy %%r31,%%r2", xoperands);
8386 seq_length = 1;
8393 if (seq_length == 0)
8394 output_asm_insn ("nop", xoperands);
8396 return "";
8399 /* Return the attribute length of the indirect call instruction INSN.
8400 The length must match the code generated by output_indirect call.
8401 The returned length includes the delay slot. Currently, the delay
8402 slot of an indirect call sequence is not exposed and it is used by
8403 the sequence itself. */
8406 pa_attr_length_indirect_call (rtx_insn *insn)
8408 unsigned long distance = -1;
8409 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8411 if (INSN_ADDRESSES_SET_P ())
8413 distance = (total + insn_current_reference_address (insn));
8414 if (distance < total)
8415 distance = -1;
8418 if (TARGET_64BIT)
8419 return 12;
8421 if (TARGET_FAST_INDIRECT_CALLS)
8422 return 8;
8424 if (TARGET_PORTABLE_RUNTIME)
8425 return 16;
8427 if (!TARGET_LONG_CALLS
8428 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8429 || distance < MAX_PCREL17F_OFFSET))
8430 return 8;
8432 /* Out of reach, can use ble. */
8433 if (!flag_pic)
8434 return 12;
8436 /* Inline versions of $$dyncall. */
8437 if (!optimize_size)
8439 if (TARGET_NO_SPACE_REGS)
8440 return 28;
8442 if (TARGET_PA_20)
8443 return 32;
8446 /* Long PIC pc-relative call. */
8447 return 20;
8450 const char *
8451 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8453 rtx xoperands[4];
8454 int length;
8456 if (TARGET_64BIT)
8458 xoperands[0] = call_dest;
8459 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8460 "bve,l (%%r2),%%r2\n\t"
8461 "ldd 24(%0),%%r27", xoperands);
8462 return "";
8465 /* First the special case for kernels, level 0 systems, etc. */
8466 if (TARGET_FAST_INDIRECT_CALLS)
8468 pa_output_arg_descriptor (insn);
8469 if (TARGET_PA_20)
8470 return "bve,l,n (%%r22),%%r2\n\tnop";
8471 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8474 if (TARGET_PORTABLE_RUNTIME)
8476 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8477 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8478 pa_output_arg_descriptor (insn);
8479 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8482 /* Now the normal case -- we can reach $$dyncall directly or
8483 we're sure that we can get there via a long-branch stub.
8485 No need to check target flags as the length uniquely identifies
8486 the remaining cases. */
8487 length = pa_attr_length_indirect_call (insn);
8488 if (length == 8)
8490 pa_output_arg_descriptor (insn);
8492 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8493 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8494 variant of the B,L instruction can't be used on the SOM target. */
8495 if (TARGET_PA_20 && !TARGET_SOM)
8496 return "b,l,n $$dyncall,%%r2\n\tnop";
8497 else
8498 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8501 /* Long millicode call, but we are not generating PIC or portable runtime
8502 code. */
8503 if (length == 12)
8505 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8506 pa_output_arg_descriptor (insn);
8507 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8510 /* The long PIC pc-relative call sequence is five instructions. So,
8511 let's use an inline version of $$dyncall when the calling sequence
8512 has a roughly similar number of instructions and we are not optimizing
8513 for size. We need two instructions to load the return pointer plus
8514 the $$dyncall implementation. */
8515 if (!optimize_size)
8517 if (TARGET_NO_SPACE_REGS)
8519 pa_output_arg_descriptor (insn);
8520 output_asm_insn ("bl .+8,%%r2\n\t"
8521 "ldo 20(%%r2),%%r2\n\t"
8522 "extru,<> %%r22,30,1,%%r0\n\t"
8523 "bv,n %%r0(%%r22)\n\t"
8524 "ldw -2(%%r22),%%r21\n\t"
8525 "bv %%r0(%%r21)\n\t"
8526 "ldw 2(%%r22),%%r19", xoperands);
8527 return "";
8529 if (TARGET_PA_20)
8531 pa_output_arg_descriptor (insn);
8532 output_asm_insn ("bl .+8,%%r2\n\t"
8533 "ldo 24(%%r2),%%r2\n\t"
8534 "stw %%r2,-24(%%sp)\n\t"
8535 "extru,<> %r22,30,1,%%r0\n\t"
8536 "bve,n (%%r22)\n\t"
8537 "ldw -2(%%r22),%%r21\n\t"
8538 "bve (%%r21)\n\t"
8539 "ldw 2(%%r22),%%r19", xoperands);
8540 return "";
8544 /* We need a long PIC call to $$dyncall. */
8545 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8546 xoperands[1] = gen_rtx_REG (Pmode, 2);
8547 xoperands[2] = gen_rtx_REG (Pmode, 1);
8548 pa_output_pic_pcrel_sequence (xoperands);
8549 pa_output_arg_descriptor (insn);
8550 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8553 /* In HPUX 8.0's shared library scheme, special relocations are needed
8554 for function labels if they might be passed to a function
8555 in a shared library (because shared libraries don't live in code
8556 space), and special magic is needed to construct their address. */
8558 void
8559 pa_encode_label (rtx sym)
8561 const char *str = XSTR (sym, 0);
8562 int len = strlen (str) + 1;
8563 char *newstr, *p;
8565 p = newstr = XALLOCAVEC (char, len + 1);
8566 *p++ = '@';
8567 strcpy (p, str);
8569 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8572 static void
8573 pa_encode_section_info (tree decl, rtx rtl, int first)
8575 int old_referenced = 0;
8577 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8578 old_referenced
8579 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8581 default_encode_section_info (decl, rtl, first);
8583 if (first && TEXT_SPACE_P (decl))
8585 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8586 if (TREE_CODE (decl) == FUNCTION_DECL)
8587 pa_encode_label (XEXP (rtl, 0));
8589 else if (old_referenced)
8590 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8593 /* This is sort of inverse to pa_encode_section_info. */
8595 static const char *
8596 pa_strip_name_encoding (const char *str)
8598 str += (*str == '@');
8599 str += (*str == '*');
8600 return str;
8603 /* Returns 1 if OP is a function label involved in a simple addition
8604 with a constant. Used to keep certain patterns from matching
8605 during instruction combination. */
8607 pa_is_function_label_plus_const (rtx op)
8609 /* Strip off any CONST. */
8610 if (GET_CODE (op) == CONST)
8611 op = XEXP (op, 0);
8613 return (GET_CODE (op) == PLUS
8614 && function_label_operand (XEXP (op, 0), VOIDmode)
8615 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8618 /* Output the assembler code for a thunk function. THUNK_DECL is the
8619 declaration for the thunk function itself, FUNCTION is the decl for
8620 the target function. DELTA is an immediate constant offset to be
8621 added to THIS. If VCALL_OFFSET is nonzero, the word at
8622 *(*this + vcall_offset) should be added to THIS. */
8624 static void
8625 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8626 HOST_WIDE_INT vcall_offset, tree function)
8628 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8629 static unsigned int current_thunk_number;
8630 int val_14 = VAL_14_BITS_P (delta);
8631 unsigned int old_last_address = last_address, nbytes = 0;
8632 char label[17];
8633 rtx xoperands[4];
8635 xoperands[0] = XEXP (DECL_RTL (function), 0);
8636 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8637 xoperands[2] = GEN_INT (delta);
8639 assemble_start_function (thunk_fndecl, fnname);
8640 final_start_function (emit_barrier (), file, 1);
8642 if (!vcall_offset)
8644 /* Output the thunk. We know that the function is in the same
8645 translation unit (i.e., the same space) as the thunk, and that
8646 thunks are output after their method. Thus, we don't need an
8647 external branch to reach the function. With SOM and GAS,
8648 functions and thunks are effectively in different sections.
8649 Thus, we can always use a IA-relative branch and the linker
8650 will add a long branch stub if necessary.
8652 However, we have to be careful when generating PIC code on the
8653 SOM port to ensure that the sequence does not transfer to an
8654 import stub for the target function as this could clobber the
8655 return value saved at SP-24. This would also apply to the
8656 32-bit linux port if the multi-space model is implemented. */
8657 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8658 && !(flag_pic && TREE_PUBLIC (function))
8659 && (TARGET_GAS || last_address < 262132))
8660 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8661 && ((targetm_common.have_named_sections
8662 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8663 /* The GNU 64-bit linker has rather poor stub management.
8664 So, we use a long branch from thunks that aren't in
8665 the same section as the target function. */
8666 && ((!TARGET_64BIT
8667 && (DECL_SECTION_NAME (thunk_fndecl)
8668 != DECL_SECTION_NAME (function)))
8669 || ((DECL_SECTION_NAME (thunk_fndecl)
8670 == DECL_SECTION_NAME (function))
8671 && last_address < 262132)))
8672 /* In this case, we need to be able to reach the start of
8673 the stub table even though the function is likely closer
8674 and can be jumped to directly. */
8675 || (targetm_common.have_named_sections
8676 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8677 && DECL_SECTION_NAME (function) == NULL
8678 && total_code_bytes < MAX_PCREL17F_OFFSET)
8679 /* Likewise. */
8680 || (!targetm_common.have_named_sections
8681 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8683 if (!val_14)
8684 output_asm_insn ("addil L'%2,%%r26", xoperands);
8686 output_asm_insn ("b %0", xoperands);
8688 if (val_14)
8690 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8691 nbytes += 8;
8693 else
8695 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8696 nbytes += 12;
8699 else if (TARGET_64BIT)
8701 rtx xop[4];
8703 /* We only have one call-clobbered scratch register, so we can't
8704 make use of the delay slot if delta doesn't fit in 14 bits. */
8705 if (!val_14)
8707 output_asm_insn ("addil L'%2,%%r26", xoperands);
8708 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8711 /* Load function address into %r1. */
8712 xop[0] = xoperands[0];
8713 xop[1] = gen_rtx_REG (Pmode, 1);
8714 xop[2] = xop[1];
8715 pa_output_pic_pcrel_sequence (xop);
8717 if (val_14)
8719 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8720 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8721 nbytes += 20;
8723 else
8725 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8726 nbytes += 24;
8729 else if (TARGET_PORTABLE_RUNTIME)
8731 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8732 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8734 if (!val_14)
8735 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8737 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8739 if (val_14)
8741 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8742 nbytes += 16;
8744 else
8746 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8747 nbytes += 20;
8750 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8752 /* The function is accessible from outside this module. The only
8753 way to avoid an import stub between the thunk and function is to
8754 call the function directly with an indirect sequence similar to
8755 that used by $$dyncall. This is possible because $$dyncall acts
8756 as the import stub in an indirect call. */
8757 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8758 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8759 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8760 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8761 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8762 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8763 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8764 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8765 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8767 if (!val_14)
8769 output_asm_insn ("addil L'%2,%%r26", xoperands);
8770 nbytes += 4;
8773 if (TARGET_PA_20)
8775 output_asm_insn ("bve (%%r22)", xoperands);
8776 nbytes += 36;
8778 else if (TARGET_NO_SPACE_REGS)
8780 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8781 nbytes += 36;
8783 else
8785 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8786 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8787 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8788 nbytes += 44;
8791 if (val_14)
8792 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8793 else
8794 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8796 else if (flag_pic)
8798 rtx xop[4];
8800 /* Load function address into %r22. */
8801 xop[0] = xoperands[0];
8802 xop[1] = gen_rtx_REG (Pmode, 1);
8803 xop[2] = gen_rtx_REG (Pmode, 22);
8804 pa_output_pic_pcrel_sequence (xop);
8806 if (!val_14)
8807 output_asm_insn ("addil L'%2,%%r26", xoperands);
8809 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8811 if (val_14)
8813 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8814 nbytes += 20;
8816 else
8818 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8819 nbytes += 24;
8822 else
8824 if (!val_14)
8825 output_asm_insn ("addil L'%2,%%r26", xoperands);
8827 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8828 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8830 if (val_14)
8832 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8833 nbytes += 12;
8835 else
8837 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8838 nbytes += 16;
8842 else
8844 rtx xop[4];
8846 /* Add DELTA to THIS. */
8847 if (val_14)
8849 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8850 nbytes += 4;
8852 else
8854 output_asm_insn ("addil L'%2,%%r26", xoperands);
8855 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8856 nbytes += 8;
8859 if (TARGET_64BIT)
8861 /* Load *(THIS + DELTA) to %r1. */
8862 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8864 val_14 = VAL_14_BITS_P (vcall_offset);
8865 xoperands[2] = GEN_INT (vcall_offset);
8867 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8868 if (val_14)
8870 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8871 nbytes += 8;
8873 else
8875 output_asm_insn ("addil L'%2,%%r1", xoperands);
8876 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8877 nbytes += 12;
8880 else
8882 /* Load *(THIS + DELTA) to %r1. */
8883 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8885 val_14 = VAL_14_BITS_P (vcall_offset);
8886 xoperands[2] = GEN_INT (vcall_offset);
8888 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8889 if (val_14)
8891 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8892 nbytes += 8;
8894 else
8896 output_asm_insn ("addil L'%2,%%r1", xoperands);
8897 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8898 nbytes += 12;
8902 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8903 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8904 && !(flag_pic && TREE_PUBLIC (function))
8905 && (TARGET_GAS || last_address < 262132))
8906 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8907 && ((targetm_common.have_named_sections
8908 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8909 /* The GNU 64-bit linker has rather poor stub management.
8910 So, we use a long branch from thunks that aren't in
8911 the same section as the target function. */
8912 && ((!TARGET_64BIT
8913 && (DECL_SECTION_NAME (thunk_fndecl)
8914 != DECL_SECTION_NAME (function)))
8915 || ((DECL_SECTION_NAME (thunk_fndecl)
8916 == DECL_SECTION_NAME (function))
8917 && last_address < 262132)))
8918 /* In this case, we need to be able to reach the start of
8919 the stub table even though the function is likely closer
8920 and can be jumped to directly. */
8921 || (targetm_common.have_named_sections
8922 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8923 && DECL_SECTION_NAME (function) == NULL
8924 && total_code_bytes < MAX_PCREL17F_OFFSET)
8925 /* Likewise. */
8926 || (!targetm_common.have_named_sections
8927 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8929 nbytes += 4;
8930 output_asm_insn ("b %0", xoperands);
8932 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8933 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8935 else if (TARGET_64BIT)
8937 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8938 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8940 /* Load function address into %r1. */
8941 nbytes += 16;
8942 xop[0] = xoperands[0];
8943 xop[1] = gen_rtx_REG (Pmode, 1);
8944 xop[2] = xop[1];
8945 pa_output_pic_pcrel_sequence (xop);
8947 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8949 else if (TARGET_PORTABLE_RUNTIME)
8951 /* Load function address into %r22. */
8952 nbytes += 12;
8953 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8954 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8956 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8958 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8959 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8961 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8963 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8964 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8966 /* The function is accessible from outside this module. The only
8967 way to avoid an import stub between the thunk and function is to
8968 call the function directly with an indirect sequence similar to
8969 that used by $$dyncall. This is possible because $$dyncall acts
8970 as the import stub in an indirect call. */
8971 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8972 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8973 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8974 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8975 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8976 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8977 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8978 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8979 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8981 if (TARGET_PA_20)
8983 output_asm_insn ("bve,n (%%r22)", xoperands);
8984 nbytes += 32;
8986 else if (TARGET_NO_SPACE_REGS)
8988 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8989 nbytes += 32;
8991 else
8993 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8994 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8995 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8996 nbytes += 40;
8999 else if (flag_pic)
9001 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9002 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
9004 /* Load function address into %r1. */
9005 nbytes += 16;
9006 xop[0] = xoperands[0];
9007 xop[1] = gen_rtx_REG (Pmode, 1);
9008 xop[2] = xop[1];
9009 pa_output_pic_pcrel_sequence (xop);
9011 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
9013 else
9015 /* Load function address into %r22. */
9016 nbytes += 8;
9017 output_asm_insn ("ldil L'%0,%%r22", xoperands);
9018 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
9020 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9021 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
9025 final_end_function ();
9027 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
9029 switch_to_section (data_section);
9030 output_asm_insn (".align 4", xoperands);
9031 ASM_OUTPUT_LABEL (file, label);
9032 output_asm_insn (".word P'%0", xoperands);
9035 current_thunk_number++;
9036 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
9037 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
9038 last_address += nbytes;
9039 if (old_last_address > last_address)
9040 last_address = UINT_MAX;
9041 update_total_code_bytes (nbytes);
9042 assemble_end_function (thunk_fndecl, fnname);
9045 /* Only direct calls to static functions are allowed to be sibling (tail)
9046 call optimized.
9048 This restriction is necessary because some linker generated stubs will
9049 store return pointers into rp' in some cases which might clobber a
9050 live value already in rp'.
9052 In a sibcall the current function and the target function share stack
9053 space. Thus if the path to the current function and the path to the
9054 target function save a value in rp', they save the value into the
9055 same stack slot, which has undesirable consequences.
9057 Because of the deferred binding nature of shared libraries any function
9058 with external scope could be in a different load module and thus require
9059 rp' to be saved when calling that function. So sibcall optimizations
9060 can only be safe for static function.
9062 Note that GCC never needs return value relocations, so we don't have to
9063 worry about static calls with return value relocations (which require
9064 saving rp').
9066 It is safe to perform a sibcall optimization when the target function
9067 will never return. */
9068 static bool
9069 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9071 /* Sibcalls are not ok because the arg pointer register is not a fixed
9072 register. This prevents the sibcall optimization from occurring. In
9073 addition, there are problems with stub placement using GNU ld. This
9074 is because a normal sibcall branch uses a 17-bit relocation while
9075 a regular call branch uses a 22-bit relocation. As a result, more
9076 care needs to be taken in the placement of long-branch stubs. */
9077 if (TARGET_64BIT)
9078 return false;
9080 if (TARGET_PORTABLE_RUNTIME)
9081 return false;
9083 /* Sibcalls are only ok within a translation unit. */
9084 return decl && targetm.binds_local_p (decl);
9087 /* ??? Addition is not commutative on the PA due to the weird implicit
9088 space register selection rules for memory addresses. Therefore, we
9089 don't consider a + b == b + a, as this might be inside a MEM. */
9090 static bool
9091 pa_commutative_p (const_rtx x, int outer_code)
9093 return (COMMUTATIVE_P (x)
9094 && (TARGET_NO_SPACE_REGS
9095 || (outer_code != UNKNOWN && outer_code != MEM)
9096 || GET_CODE (x) != PLUS));
9099 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9100 use in fmpyadd instructions. */
9102 pa_fmpyaddoperands (rtx *operands)
9104 machine_mode mode = GET_MODE (operands[0]);
9106 /* Must be a floating point mode. */
9107 if (mode != SFmode && mode != DFmode)
9108 return 0;
9110 /* All modes must be the same. */
9111 if (! (mode == GET_MODE (operands[1])
9112 && mode == GET_MODE (operands[2])
9113 && mode == GET_MODE (operands[3])
9114 && mode == GET_MODE (operands[4])
9115 && mode == GET_MODE (operands[5])))
9116 return 0;
9118 /* All operands must be registers. */
9119 if (! (GET_CODE (operands[1]) == REG
9120 && GET_CODE (operands[2]) == REG
9121 && GET_CODE (operands[3]) == REG
9122 && GET_CODE (operands[4]) == REG
9123 && GET_CODE (operands[5]) == REG))
9124 return 0;
9126 /* Only 2 real operands to the addition. One of the input operands must
9127 be the same as the output operand. */
9128 if (! rtx_equal_p (operands[3], operands[4])
9129 && ! rtx_equal_p (operands[3], operands[5]))
9130 return 0;
9132 /* Inout operand of add cannot conflict with any operands from multiply. */
9133 if (rtx_equal_p (operands[3], operands[0])
9134 || rtx_equal_p (operands[3], operands[1])
9135 || rtx_equal_p (operands[3], operands[2]))
9136 return 0;
9138 /* multiply cannot feed into addition operands. */
9139 if (rtx_equal_p (operands[4], operands[0])
9140 || rtx_equal_p (operands[5], operands[0]))
9141 return 0;
9143 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9144 if (mode == SFmode
9145 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9146 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9147 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9148 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9149 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9150 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9151 return 0;
9153 /* Passed. Operands are suitable for fmpyadd. */
9154 return 1;
9157 #if !defined(USE_COLLECT2)
9158 static void
9159 pa_asm_out_constructor (rtx symbol, int priority)
9161 if (!function_label_operand (symbol, VOIDmode))
9162 pa_encode_label (symbol);
9164 #ifdef CTORS_SECTION_ASM_OP
9165 default_ctor_section_asm_out_constructor (symbol, priority);
9166 #else
9167 # ifdef TARGET_ASM_NAMED_SECTION
9168 default_named_section_asm_out_constructor (symbol, priority);
9169 # else
9170 default_stabs_asm_out_constructor (symbol, priority);
9171 # endif
9172 #endif
9175 static void
9176 pa_asm_out_destructor (rtx symbol, int priority)
9178 if (!function_label_operand (symbol, VOIDmode))
9179 pa_encode_label (symbol);
9181 #ifdef DTORS_SECTION_ASM_OP
9182 default_dtor_section_asm_out_destructor (symbol, priority);
9183 #else
9184 # ifdef TARGET_ASM_NAMED_SECTION
9185 default_named_section_asm_out_destructor (symbol, priority);
9186 # else
9187 default_stabs_asm_out_destructor (symbol, priority);
9188 # endif
9189 #endif
9191 #endif
9193 /* This function places uninitialized global data in the bss section.
9194 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9195 function on the SOM port to prevent uninitialized global data from
9196 being placed in the data section. */
9198 void
9199 pa_asm_output_aligned_bss (FILE *stream,
9200 const char *name,
9201 unsigned HOST_WIDE_INT size,
9202 unsigned int align)
9204 switch_to_section (bss_section);
9206 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9207 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9208 #endif
9210 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9211 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9212 #endif
9214 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9215 ASM_OUTPUT_LABEL (stream, name);
9216 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9219 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9220 that doesn't allow the alignment of global common storage to be directly
9221 specified. The SOM linker aligns common storage based on the rounded
9222 value of the NUM_BYTES parameter in the .comm directive. It's not
9223 possible to use the .align directive as it doesn't affect the alignment
9224 of the label associated with a .comm directive. */
9226 void
9227 pa_asm_output_aligned_common (FILE *stream,
9228 const char *name,
9229 unsigned HOST_WIDE_INT size,
9230 unsigned int align)
9232 unsigned int max_common_align;
9234 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9235 if (align > max_common_align)
9237 /* Alignment exceeds maximum alignment for global common data. */
9238 align = max_common_align;
9241 switch_to_section (bss_section);
9243 assemble_name (stream, name);
9244 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9245 MAX (size, align / BITS_PER_UNIT));
9248 /* We can't use .comm for local common storage as the SOM linker effectively
9249 treats the symbol as universal and uses the same storage for local symbols
9250 with the same name in different object files. The .block directive
9251 reserves an uninitialized block of storage. However, it's not common
9252 storage. Fortunately, GCC never requests common storage with the same
9253 name in any given translation unit. */
9255 void
9256 pa_asm_output_aligned_local (FILE *stream,
9257 const char *name,
9258 unsigned HOST_WIDE_INT size,
9259 unsigned int align)
9261 switch_to_section (bss_section);
9262 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9264 #ifdef LOCAL_ASM_OP
9265 fprintf (stream, "%s", LOCAL_ASM_OP);
9266 assemble_name (stream, name);
9267 fprintf (stream, "\n");
9268 #endif
9270 ASM_OUTPUT_LABEL (stream, name);
9271 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9274 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9275 use in fmpysub instructions. */
9277 pa_fmpysuboperands (rtx *operands)
9279 machine_mode mode = GET_MODE (operands[0]);
9281 /* Must be a floating point mode. */
9282 if (mode != SFmode && mode != DFmode)
9283 return 0;
9285 /* All modes must be the same. */
9286 if (! (mode == GET_MODE (operands[1])
9287 && mode == GET_MODE (operands[2])
9288 && mode == GET_MODE (operands[3])
9289 && mode == GET_MODE (operands[4])
9290 && mode == GET_MODE (operands[5])))
9291 return 0;
9293 /* All operands must be registers. */
9294 if (! (GET_CODE (operands[1]) == REG
9295 && GET_CODE (operands[2]) == REG
9296 && GET_CODE (operands[3]) == REG
9297 && GET_CODE (operands[4]) == REG
9298 && GET_CODE (operands[5]) == REG))
9299 return 0;
9301 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9302 operation, so operands[4] must be the same as operand[3]. */
9303 if (! rtx_equal_p (operands[3], operands[4]))
9304 return 0;
9306 /* multiply cannot feed into subtraction. */
9307 if (rtx_equal_p (operands[5], operands[0]))
9308 return 0;
9310 /* Inout operand of sub cannot conflict with any operands from multiply. */
9311 if (rtx_equal_p (operands[3], operands[0])
9312 || rtx_equal_p (operands[3], operands[1])
9313 || rtx_equal_p (operands[3], operands[2]))
9314 return 0;
9316 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9317 if (mode == SFmode
9318 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9319 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9320 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9321 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9322 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9323 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9324 return 0;
9326 /* Passed. Operands are suitable for fmpysub. */
9327 return 1;
9330 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9331 constants for a MULT embedded inside a memory address. */
9333 pa_mem_shadd_constant_p (int val)
9335 if (val == 2 || val == 4 || val == 8)
9336 return 1;
9337 else
9338 return 0;
9341 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9342 constants for shadd instructions. */
9344 pa_shadd_constant_p (int val)
9346 if (val == 1 || val == 2 || val == 3)
9347 return 1;
9348 else
9349 return 0;
9352 /* Return TRUE if INSN branches forward. */
9354 static bool
9355 forward_branch_p (rtx_insn *insn)
9357 rtx lab = JUMP_LABEL (insn);
9359 /* The INSN must have a jump label. */
9360 gcc_assert (lab != NULL_RTX);
9362 if (INSN_ADDRESSES_SET_P ())
9363 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9365 while (insn)
9367 if (insn == lab)
9368 return true;
9369 else
9370 insn = NEXT_INSN (insn);
9373 return false;
9376 /* Output an unconditional move and branch insn. */
9378 const char *
9379 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9381 int length = get_attr_length (insn);
9383 /* These are the cases in which we win. */
9384 if (length == 4)
9385 return "mov%I1b,tr %1,%0,%2";
9387 /* None of the following cases win, but they don't lose either. */
9388 if (length == 8)
9390 if (dbr_sequence_length () == 0)
9392 /* Nothing in the delay slot, fake it by putting the combined
9393 insn (the copy or add) in the delay slot of a bl. */
9394 if (GET_CODE (operands[1]) == CONST_INT)
9395 return "b %2\n\tldi %1,%0";
9396 else
9397 return "b %2\n\tcopy %1,%0";
9399 else
9401 /* Something in the delay slot, but we've got a long branch. */
9402 if (GET_CODE (operands[1]) == CONST_INT)
9403 return "ldi %1,%0\n\tb %2";
9404 else
9405 return "copy %1,%0\n\tb %2";
9409 if (GET_CODE (operands[1]) == CONST_INT)
9410 output_asm_insn ("ldi %1,%0", operands);
9411 else
9412 output_asm_insn ("copy %1,%0", operands);
9413 return pa_output_lbranch (operands[2], insn, 1);
9416 /* Output an unconditional add and branch insn. */
9418 const char *
9419 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9421 int length = get_attr_length (insn);
9423 /* To make life easy we want operand0 to be the shared input/output
9424 operand and operand1 to be the readonly operand. */
9425 if (operands[0] == operands[1])
9426 operands[1] = operands[2];
9428 /* These are the cases in which we win. */
9429 if (length == 4)
9430 return "add%I1b,tr %1,%0,%3";
9432 /* None of the following cases win, but they don't lose either. */
9433 if (length == 8)
9435 if (dbr_sequence_length () == 0)
9436 /* Nothing in the delay slot, fake it by putting the combined
9437 insn (the copy or add) in the delay slot of a bl. */
9438 return "b %3\n\tadd%I1 %1,%0,%0";
9439 else
9440 /* Something in the delay slot, but we've got a long branch. */
9441 return "add%I1 %1,%0,%0\n\tb %3";
9444 output_asm_insn ("add%I1 %1,%0,%0", operands);
9445 return pa_output_lbranch (operands[3], insn, 1);
9448 /* We use this hook to perform a PA specific optimization which is difficult
9449 to do in earlier passes. */
9451 static void
9452 pa_reorg (void)
9454 remove_useless_addtr_insns (1);
9456 if (pa_cpu < PROCESSOR_8000)
9457 pa_combine_instructions ();
9460 /* The PA has a number of odd instructions which can perform multiple
9461 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9462 it may be profitable to combine two instructions into one instruction
9463 with two outputs. It's not profitable PA2.0 machines because the
9464 two outputs would take two slots in the reorder buffers.
9466 This routine finds instructions which can be combined and combines
9467 them. We only support some of the potential combinations, and we
9468 only try common ways to find suitable instructions.
9470 * addb can add two registers or a register and a small integer
9471 and jump to a nearby (+-8k) location. Normally the jump to the
9472 nearby location is conditional on the result of the add, but by
9473 using the "true" condition we can make the jump unconditional.
9474 Thus addb can perform two independent operations in one insn.
9476 * movb is similar to addb in that it can perform a reg->reg
9477 or small immediate->reg copy and jump to a nearby (+-8k location).
9479 * fmpyadd and fmpysub can perform a FP multiply and either an
9480 FP add or FP sub if the operands of the multiply and add/sub are
9481 independent (there are other minor restrictions). Note both
9482 the fmpy and fadd/fsub can in theory move to better spots according
9483 to data dependencies, but for now we require the fmpy stay at a
9484 fixed location.
9486 * Many of the memory operations can perform pre & post updates
9487 of index registers. GCC's pre/post increment/decrement addressing
9488 is far too simple to take advantage of all the possibilities. This
9489 pass may not be suitable since those insns may not be independent.
9491 * comclr can compare two ints or an int and a register, nullify
9492 the following instruction and zero some other register. This
9493 is more difficult to use as it's harder to find an insn which
9494 will generate a comclr than finding something like an unconditional
9495 branch. (conditional moves & long branches create comclr insns).
9497 * Most arithmetic operations can conditionally skip the next
9498 instruction. They can be viewed as "perform this operation
9499 and conditionally jump to this nearby location" (where nearby
9500 is an insns away). These are difficult to use due to the
9501 branch length restrictions. */
9503 static void
9504 pa_combine_instructions (void)
9506 rtx_insn *anchor;
9508 /* This can get expensive since the basic algorithm is on the
9509 order of O(n^2) (or worse). Only do it for -O2 or higher
9510 levels of optimization. */
9511 if (optimize < 2)
9512 return;
9514 /* Walk down the list of insns looking for "anchor" insns which
9515 may be combined with "floating" insns. As the name implies,
9516 "anchor" instructions don't move, while "floating" insns may
9517 move around. */
9518 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9519 rtx_insn *new_rtx = make_insn_raw (par);
9521 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9523 enum attr_pa_combine_type anchor_attr;
9524 enum attr_pa_combine_type floater_attr;
9526 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9527 Also ignore any special USE insns. */
9528 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9529 || GET_CODE (PATTERN (anchor)) == USE
9530 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9531 continue;
9533 anchor_attr = get_attr_pa_combine_type (anchor);
9534 /* See if anchor is an insn suitable for combination. */
9535 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9536 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9537 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9538 && ! forward_branch_p (anchor)))
9540 rtx_insn *floater;
9542 for (floater = PREV_INSN (anchor);
9543 floater;
9544 floater = PREV_INSN (floater))
9546 if (NOTE_P (floater)
9547 || (NONJUMP_INSN_P (floater)
9548 && (GET_CODE (PATTERN (floater)) == USE
9549 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9550 continue;
9552 /* Anything except a regular INSN will stop our search. */
9553 if (! NONJUMP_INSN_P (floater))
9555 floater = NULL;
9556 break;
9559 /* See if FLOATER is suitable for combination with the
9560 anchor. */
9561 floater_attr = get_attr_pa_combine_type (floater);
9562 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9563 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9564 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9565 && floater_attr == PA_COMBINE_TYPE_FMPY))
9567 /* If ANCHOR and FLOATER can be combined, then we're
9568 done with this pass. */
9569 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9570 SET_DEST (PATTERN (floater)),
9571 XEXP (SET_SRC (PATTERN (floater)), 0),
9572 XEXP (SET_SRC (PATTERN (floater)), 1)))
9573 break;
9576 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9577 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9579 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9581 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9582 SET_DEST (PATTERN (floater)),
9583 XEXP (SET_SRC (PATTERN (floater)), 0),
9584 XEXP (SET_SRC (PATTERN (floater)), 1)))
9585 break;
9587 else
9589 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9590 SET_DEST (PATTERN (floater)),
9591 SET_SRC (PATTERN (floater)),
9592 SET_SRC (PATTERN (floater))))
9593 break;
9598 /* If we didn't find anything on the backwards scan try forwards. */
9599 if (!floater
9600 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9601 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9603 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9605 if (NOTE_P (floater)
9606 || (NONJUMP_INSN_P (floater)
9607 && (GET_CODE (PATTERN (floater)) == USE
9608 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9610 continue;
9612 /* Anything except a regular INSN will stop our search. */
9613 if (! NONJUMP_INSN_P (floater))
9615 floater = NULL;
9616 break;
9619 /* See if FLOATER is suitable for combination with the
9620 anchor. */
9621 floater_attr = get_attr_pa_combine_type (floater);
9622 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9623 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9624 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9625 && floater_attr == PA_COMBINE_TYPE_FMPY))
9627 /* If ANCHOR and FLOATER can be combined, then we're
9628 done with this pass. */
9629 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9630 SET_DEST (PATTERN (floater)),
9631 XEXP (SET_SRC (PATTERN (floater)),
9633 XEXP (SET_SRC (PATTERN (floater)),
9634 1)))
9635 break;
9640 /* FLOATER will be nonzero if we found a suitable floating
9641 insn for combination with ANCHOR. */
9642 if (floater
9643 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9644 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9646 /* Emit the new instruction and delete the old anchor. */
9647 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9648 copy_rtx (PATTERN (floater)));
9649 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9650 emit_insn_before (temp, anchor);
9652 SET_INSN_DELETED (anchor);
9654 /* Emit a special USE insn for FLOATER, then delete
9655 the floating insn. */
9656 temp = copy_rtx (PATTERN (floater));
9657 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9658 delete_insn (floater);
9660 continue;
9662 else if (floater
9663 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9665 /* Emit the new_jump instruction and delete the old anchor. */
9666 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9667 copy_rtx (PATTERN (floater)));
9668 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9669 temp = emit_jump_insn_before (temp, anchor);
9671 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9672 SET_INSN_DELETED (anchor);
9674 /* Emit a special USE insn for FLOATER, then delete
9675 the floating insn. */
9676 temp = copy_rtx (PATTERN (floater));
9677 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9678 delete_insn (floater);
9679 continue;
9685 static int
9686 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9687 int reversed, rtx dest,
9688 rtx src1, rtx src2)
9690 int insn_code_number;
9691 rtx_insn *start, *end;
9693 /* Create a PARALLEL with the patterns of ANCHOR and
9694 FLOATER, try to recognize it, then test constraints
9695 for the resulting pattern.
9697 If the pattern doesn't match or the constraints
9698 aren't met keep searching for a suitable floater
9699 insn. */
9700 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9701 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9702 INSN_CODE (new_rtx) = -1;
9703 insn_code_number = recog_memoized (new_rtx);
9704 basic_block bb = BLOCK_FOR_INSN (anchor);
9705 if (insn_code_number < 0
9706 || (extract_insn (new_rtx),
9707 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9708 return 0;
9710 if (reversed)
9712 start = anchor;
9713 end = floater;
9715 else
9717 start = floater;
9718 end = anchor;
9721 /* There's up to three operands to consider. One
9722 output and two inputs.
9724 The output must not be used between FLOATER & ANCHOR
9725 exclusive. The inputs must not be set between
9726 FLOATER and ANCHOR exclusive. */
9728 if (reg_used_between_p (dest, start, end))
9729 return 0;
9731 if (reg_set_between_p (src1, start, end))
9732 return 0;
9734 if (reg_set_between_p (src2, start, end))
9735 return 0;
9737 /* If we get here, then everything is good. */
9738 return 1;
9741 /* Return nonzero if references for INSN are delayed.
9743 Millicode insns are actually function calls with some special
9744 constraints on arguments and register usage.
9746 Millicode calls always expect their arguments in the integer argument
9747 registers, and always return their result in %r29 (ret1). They
9748 are expected to clobber their arguments, %r1, %r29, and the return
9749 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9751 This function tells reorg that the references to arguments and
9752 millicode calls do not appear to happen until after the millicode call.
9753 This allows reorg to put insns which set the argument registers into the
9754 delay slot of the millicode call -- thus they act more like traditional
9755 CALL_INSNs.
9757 Note we cannot consider side effects of the insn to be delayed because
9758 the branch and link insn will clobber the return pointer. If we happened
9759 to use the return pointer in the delay slot of the call, then we lose.
9761 get_attr_type will try to recognize the given insn, so make sure to
9762 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9763 in particular. */
9765 pa_insn_refs_are_delayed (rtx_insn *insn)
9767 return ((NONJUMP_INSN_P (insn)
9768 && GET_CODE (PATTERN (insn)) != SEQUENCE
9769 && GET_CODE (PATTERN (insn)) != USE
9770 && GET_CODE (PATTERN (insn)) != CLOBBER
9771 && get_attr_type (insn) == TYPE_MILLI));
9774 /* Promote the return value, but not the arguments. */
9776 static machine_mode
9777 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9778 machine_mode mode,
9779 int *punsignedp ATTRIBUTE_UNUSED,
9780 const_tree fntype ATTRIBUTE_UNUSED,
9781 int for_return)
9783 if (for_return == 0)
9784 return mode;
9785 return promote_mode (type, mode, punsignedp);
9788 /* On the HP-PA the value is found in register(s) 28(-29), unless
9789 the mode is SF or DF. Then the value is returned in fr4 (32).
9791 This must perform the same promotions as PROMOTE_MODE, else promoting
9792 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9794 Small structures must be returned in a PARALLEL on PA64 in order
9795 to match the HP Compiler ABI. */
9797 static rtx
9798 pa_function_value (const_tree valtype,
9799 const_tree func ATTRIBUTE_UNUSED,
9800 bool outgoing ATTRIBUTE_UNUSED)
9802 machine_mode valmode;
9804 if (AGGREGATE_TYPE_P (valtype)
9805 || TREE_CODE (valtype) == COMPLEX_TYPE
9806 || VECTOR_TYPE_P (valtype))
9808 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9810 /* Handle aggregates that fit exactly in a word or double word. */
9811 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9812 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9814 if (TARGET_64BIT)
9816 /* Aggregates with a size less than or equal to 128 bits are
9817 returned in GR 28(-29). They are left justified. The pad
9818 bits are undefined. Larger aggregates are returned in
9819 memory. */
9820 rtx loc[2];
9821 int i, offset = 0;
9822 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9824 for (i = 0; i < ub; i++)
9826 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9827 gen_rtx_REG (DImode, 28 + i),
9828 GEN_INT (offset));
9829 offset += 8;
9832 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9834 else if (valsize > UNITS_PER_WORD)
9836 /* Aggregates 5 to 8 bytes in size are returned in general
9837 registers r28-r29 in the same manner as other non
9838 floating-point objects. The data is right-justified and
9839 zero-extended to 64 bits. This is opposite to the normal
9840 justification used on big endian targets and requires
9841 special treatment. */
9842 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9843 gen_rtx_REG (DImode, 28), const0_rtx);
9844 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9848 if ((INTEGRAL_TYPE_P (valtype)
9849 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9850 || POINTER_TYPE_P (valtype))
9851 valmode = word_mode;
9852 else
9853 valmode = TYPE_MODE (valtype);
9855 if (SCALAR_FLOAT_TYPE_P (valtype)
9856 && !AGGREGATE_TYPE_P (valtype)
9857 && TYPE_MODE (valtype) != TFmode
9858 && !TARGET_SOFT_FLOAT)
9859 return gen_rtx_REG (valmode, 32);
9861 return gen_rtx_REG (valmode, 28);
9864 /* Implement the TARGET_LIBCALL_VALUE hook. */
9866 static rtx
9867 pa_libcall_value (machine_mode mode,
9868 const_rtx fun ATTRIBUTE_UNUSED)
9870 if (! TARGET_SOFT_FLOAT
9871 && (mode == SFmode || mode == DFmode))
9872 return gen_rtx_REG (mode, 32);
9873 else
9874 return gen_rtx_REG (mode, 28);
9877 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9879 static bool
9880 pa_function_value_regno_p (const unsigned int regno)
9882 if (regno == 28
9883 || (! TARGET_SOFT_FLOAT && regno == 32))
9884 return true;
9886 return false;
9889 /* Update the data in CUM to advance over argument ARG. */
9891 static void
9892 pa_function_arg_advance (cumulative_args_t cum_v,
9893 const function_arg_info &arg)
9895 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9896 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9898 cum->nargs_prototype--;
9899 cum->words += (arg_size
9900 + ((cum->words & 01)
9901 && arg.type != NULL_TREE
9902 && arg_size > 1));
9905 /* Return the location of a parameter that is passed in a register or NULL
9906 if the parameter has any component that is passed in memory.
9908 This is new code and will be pushed to into the net sources after
9909 further testing.
9911 ??? We might want to restructure this so that it looks more like other
9912 ports. */
9913 static rtx
9914 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9916 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9917 tree type = arg.type;
9918 machine_mode mode = arg.mode;
9919 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9920 int alignment = 0;
9921 int arg_size;
9922 int fpr_reg_base;
9923 int gpr_reg_base;
9924 rtx retval;
9926 if (arg.end_marker_p ())
9927 return NULL_RTX;
9929 arg_size = pa_function_arg_size (mode, type);
9930 if (!arg_size)
9931 return NULL_RTX;
9933 /* If this arg would be passed partially or totally on the stack, then
9934 this routine should return zero. pa_arg_partial_bytes will
9935 handle arguments which are split between regs and stack slots if
9936 the ABI mandates split arguments. */
9937 if (!TARGET_64BIT)
9939 /* The 32-bit ABI does not split arguments. */
9940 if (cum->words + arg_size > max_arg_words)
9941 return NULL_RTX;
9943 else
9945 if (arg_size > 1)
9946 alignment = cum->words & 1;
9947 if (cum->words + alignment >= max_arg_words)
9948 return NULL_RTX;
9951 /* The 32bit ABIs and the 64bit ABIs are rather different,
9952 particularly in their handling of FP registers. We might
9953 be able to cleverly share code between them, but I'm not
9954 going to bother in the hope that splitting them up results
9955 in code that is more easily understood. */
9957 if (TARGET_64BIT)
9959 /* Advance the base registers to their current locations.
9961 Remember, gprs grow towards smaller register numbers while
9962 fprs grow to higher register numbers. Also remember that
9963 although FP regs are 32-bit addressable, we pretend that
9964 the registers are 64-bits wide. */
9965 gpr_reg_base = 26 - cum->words;
9966 fpr_reg_base = 32 + cum->words;
9968 /* Arguments wider than one word and small aggregates need special
9969 treatment. */
9970 if (arg_size > 1
9971 || mode == BLKmode
9972 || (type && (AGGREGATE_TYPE_P (type)
9973 || TREE_CODE (type) == COMPLEX_TYPE
9974 || VECTOR_TYPE_P (type))))
9976 /* Double-extended precision (80-bit), quad-precision (128-bit)
9977 and aggregates including complex numbers are aligned on
9978 128-bit boundaries. The first eight 64-bit argument slots
9979 are associated one-to-one, with general registers r26
9980 through r19, and also with floating-point registers fr4
9981 through fr11. Arguments larger than one word are always
9982 passed in general registers.
9984 Using a PARALLEL with a word mode register results in left
9985 justified data on a big-endian target. */
9987 rtx loc[8];
9988 int i, offset = 0, ub = arg_size;
9990 /* Align the base register. */
9991 gpr_reg_base -= alignment;
9993 ub = MIN (ub, max_arg_words - cum->words - alignment);
9994 for (i = 0; i < ub; i++)
9996 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9997 gen_rtx_REG (DImode, gpr_reg_base),
9998 GEN_INT (offset));
9999 gpr_reg_base -= 1;
10000 offset += 8;
10003 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
10006 else
10008 /* If the argument is larger than a word, then we know precisely
10009 which registers we must use. */
10010 if (arg_size > 1)
10012 if (cum->words)
10014 gpr_reg_base = 23;
10015 fpr_reg_base = 38;
10017 else
10019 gpr_reg_base = 25;
10020 fpr_reg_base = 34;
10023 /* Structures 5 to 8 bytes in size are passed in the general
10024 registers in the same manner as other non floating-point
10025 objects. The data is right-justified and zero-extended
10026 to 64 bits. This is opposite to the normal justification
10027 used on big endian targets and requires special treatment.
10028 We now define BLOCK_REG_PADDING to pad these objects.
10029 Aggregates, complex and vector types are passed in the same
10030 manner as structures. */
10031 if (mode == BLKmode
10032 || (type && (AGGREGATE_TYPE_P (type)
10033 || TREE_CODE (type) == COMPLEX_TYPE
10034 || VECTOR_TYPE_P (type))))
10036 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
10037 gen_rtx_REG (DImode, gpr_reg_base),
10038 const0_rtx);
10039 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
10042 else
10044 /* We have a single word (32 bits). A simple computation
10045 will get us the register #s we need. */
10046 gpr_reg_base = 26 - cum->words;
10047 fpr_reg_base = 32 + 2 * cum->words;
10051 /* Determine if the argument needs to be passed in both general and
10052 floating point registers. */
10053 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
10054 /* If we are doing soft-float with portable runtime, then there
10055 is no need to worry about FP regs. */
10056 && !TARGET_SOFT_FLOAT
10057 /* The parameter must be some kind of scalar float, else we just
10058 pass it in integer registers. */
10059 && GET_MODE_CLASS (mode) == MODE_FLOAT
10060 /* The target function must not have a prototype. */
10061 && cum->nargs_prototype <= 0
10062 /* libcalls do not need to pass items in both FP and general
10063 registers. */
10064 && type != NULL_TREE
10065 /* All this hair applies to "outgoing" args only. This includes
10066 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
10067 && !cum->incoming)
10068 /* Also pass outgoing floating arguments in both registers in indirect
10069 calls with the 32 bit ABI and the HP assembler since there is no
10070 way to the specify argument locations in static functions. */
10071 || (!TARGET_64BIT
10072 && !TARGET_GAS
10073 && !cum->incoming
10074 && cum->indirect
10075 && GET_MODE_CLASS (mode) == MODE_FLOAT))
10077 retval
10078 = gen_rtx_PARALLEL
10079 (mode,
10080 gen_rtvec (2,
10081 gen_rtx_EXPR_LIST (VOIDmode,
10082 gen_rtx_REG (mode, fpr_reg_base),
10083 const0_rtx),
10084 gen_rtx_EXPR_LIST (VOIDmode,
10085 gen_rtx_REG (mode, gpr_reg_base),
10086 const0_rtx)));
10088 else
10090 /* See if we should pass this parameter in a general register. */
10091 if (TARGET_SOFT_FLOAT
10092 /* Indirect calls in the normal 32bit ABI require all arguments
10093 to be passed in general registers. */
10094 || (!TARGET_PORTABLE_RUNTIME
10095 && !TARGET_64BIT
10096 && !TARGET_ELF32
10097 && cum->indirect)
10098 /* If the parameter is not a scalar floating-point parameter,
10099 then it belongs in GPRs. */
10100 || GET_MODE_CLASS (mode) != MODE_FLOAT
10101 /* Structure with single SFmode field belongs in GPR. */
10102 || (type && AGGREGATE_TYPE_P (type)))
10103 retval = gen_rtx_REG (mode, gpr_reg_base);
10104 else
10105 retval = gen_rtx_REG (mode, fpr_reg_base);
10107 return retval;
10110 /* Arguments larger than one word are double word aligned. */
10112 static unsigned int
10113 pa_function_arg_boundary (machine_mode mode, const_tree type)
10115 bool singleword = (type
10116 ? (integer_zerop (TYPE_SIZE (type))
10117 || !TREE_CONSTANT (TYPE_SIZE (type))
10118 || int_size_in_bytes (type) <= UNITS_PER_WORD)
10119 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
10121 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
10124 /* If this arg would be passed totally in registers or totally on the stack,
10125 then this routine should return zero. */
10127 static int
10128 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
10130 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10131 unsigned int max_arg_words = 8;
10132 unsigned int offset = 0;
10133 int arg_size;
10135 if (!TARGET_64BIT)
10136 return 0;
10138 arg_size = pa_function_arg_size (arg.mode, arg.type);
10139 if (arg_size > 1 && (cum->words & 1))
10140 offset = 1;
10142 if (cum->words + offset + arg_size <= max_arg_words)
10143 /* Arg fits fully into registers. */
10144 return 0;
10145 else if (cum->words + offset >= max_arg_words)
10146 /* Arg fully on the stack. */
10147 return 0;
10148 else
10149 /* Arg is split. */
10150 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10154 /* A get_unnamed_section callback for switching to the text section.
10156 This function is only used with SOM. Because we don't support
10157 named subspaces, we can only create a new subspace or switch back
10158 to the default text subspace. */
10160 static void
10161 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10163 gcc_assert (TARGET_SOM);
10164 if (TARGET_GAS)
10166 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10168 /* We only want to emit a .nsubspa directive once at the
10169 start of the function. */
10170 cfun->machine->in_nsubspa = 1;
10172 /* Create a new subspace for the text. This provides
10173 better stub placement and one-only functions. */
10174 if (cfun->decl
10175 && DECL_ONE_ONLY (cfun->decl)
10176 && !DECL_WEAK (cfun->decl))
10178 output_section_asm_op ("\t.SPACE $TEXT$\n"
10179 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10180 "ACCESS=44,SORT=24,COMDAT");
10181 return;
10184 else
10186 /* There isn't a current function or the body of the current
10187 function has been completed. So, we are changing to the
10188 text section to output debugging information. Thus, we
10189 need to forget that we are in the text section so that
10190 varasm.cc will call us when text_section is selected again. */
10191 gcc_assert (!cfun || !cfun->machine
10192 || cfun->machine->in_nsubspa == 2);
10193 in_section = NULL;
10195 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10196 return;
10198 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10201 /* A get_unnamed_section callback for switching to comdat data
10202 sections. This function is only used with SOM. */
10204 static void
10205 som_output_comdat_data_section_asm_op (const char *data)
10207 in_section = NULL;
10208 output_section_asm_op (data);
10211 /* Implement TARGET_ASM_INIT_SECTIONS. */
10213 static void
10214 pa_som_asm_init_sections (void)
10216 text_section
10217 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10219 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10220 is not being generated. */
10221 som_readonly_data_section
10222 = get_unnamed_section (0, output_section_asm_op,
10223 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10225 /* When secondary definitions are not supported, SOM makes readonly
10226 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10227 the comdat flag. */
10228 som_one_only_readonly_data_section
10229 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10230 "\t.SPACE $TEXT$\n"
10231 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10232 "ACCESS=0x2c,SORT=16,COMDAT");
10235 /* When secondary definitions are not supported, SOM makes data one-only
10236 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10237 som_one_only_data_section
10238 = get_unnamed_section (SECTION_WRITE,
10239 som_output_comdat_data_section_asm_op,
10240 "\t.SPACE $PRIVATE$\n"
10241 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10242 "ACCESS=31,SORT=24,COMDAT");
10244 if (flag_tm)
10245 som_tm_clone_table_section
10246 = get_unnamed_section (0, output_section_asm_op,
10247 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10249 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10250 reference data within the $TEXT$ space (for example constant
10251 strings in the $LIT$ subspace).
10253 The assemblers (GAS and HP as) both have problems with handling
10254 the difference of two symbols. This is the other correct way to
10255 reference constant data during PIC code generation.
10257 Thus, we can't put constant data needing relocation in the $TEXT$
10258 space during PIC generation.
10260 Previously, we placed all constant data into the $DATA$ subspace
10261 when generating PIC code. This reduces sharing, but it works
10262 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10263 This puts constant data not needing relocation into the $TEXT$ space. */
10264 readonly_data_section = som_readonly_data_section;
10266 /* We must not have a reference to an external symbol defined in a
10267 shared library in a readonly section, else the SOM linker will
10268 complain.
10270 So, we force exception information into the data section. */
10271 exception_section = data_section;
10274 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10276 static section *
10277 pa_som_tm_clone_table_section (void)
10279 return som_tm_clone_table_section;
10282 /* On hpux10, the linker will give an error if we have a reference
10283 in the read-only data section to a symbol defined in a shared
10284 library. Therefore, expressions that might require a reloc
10285 cannot be placed in the read-only data section. */
10287 static section *
10288 pa_select_section (tree exp, int reloc,
10289 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10291 if (TREE_CODE (exp) == VAR_DECL
10292 && TREE_READONLY (exp)
10293 && !TREE_THIS_VOLATILE (exp)
10294 && DECL_INITIAL (exp)
10295 && (DECL_INITIAL (exp) == error_mark_node
10296 || TREE_CONSTANT (DECL_INITIAL (exp)))
10297 && !(reloc & pa_reloc_rw_mask ()))
10299 if (TARGET_SOM
10300 && DECL_ONE_ONLY (exp)
10301 && !DECL_WEAK (exp))
10302 return som_one_only_readonly_data_section;
10303 else
10304 return readonly_data_section;
10306 else if (CONSTANT_CLASS_P (exp)
10307 && !(reloc & pa_reloc_rw_mask ()))
10308 return readonly_data_section;
10309 else if (TARGET_SOM
10310 && TREE_CODE (exp) == VAR_DECL
10311 && DECL_ONE_ONLY (exp)
10312 && !DECL_WEAK (exp))
10313 return som_one_only_data_section;
10314 else
10315 return data_section;
10318 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10319 and the function is in a COMDAT group, place the plabel reference in the
10320 .data.rel.ro.local section. The linker ignores references to symbols in
10321 discarded sections from this section. */
10323 static section *
10324 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10325 unsigned HOST_WIDE_INT align)
10327 if (function_label_operand (x, VOIDmode))
10329 tree decl = SYMBOL_REF_DECL (x);
10331 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10332 return get_named_section (NULL, ".data.rel.ro.local", 1);
10335 return default_elf_select_rtx_section (mode, x, align);
10338 /* Implement pa_reloc_rw_mask. */
10340 static int
10341 pa_reloc_rw_mask (void)
10343 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10344 return 3;
10346 /* HP linker does not support global relocs in readonly memory. */
10347 return TARGET_SOM ? 2 : 0;
10350 static void
10351 pa_globalize_label (FILE *stream, const char *name)
10353 /* We only handle DATA objects here, functions are globalized in
10354 ASM_DECLARE_FUNCTION_NAME. */
10355 if (! FUNCTION_NAME_P (name))
10357 fputs ("\t.EXPORT ", stream);
10358 assemble_name (stream, name);
10359 fputs (",DATA\n", stream);
10363 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10365 static rtx
10366 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10367 int incoming ATTRIBUTE_UNUSED)
10369 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10372 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10374 bool
10375 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10377 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10378 PA64 ABI says that objects larger than 128 bits are returned in memory.
10379 Note, int_size_in_bytes can return -1 if the size of the object is
10380 variable or larger than the maximum value that can be expressed as
10381 a HOST_WIDE_INT. It can also return zero for an empty type. The
10382 simplest way to handle variable and empty types is to pass them in
10383 memory. This avoids problems in defining the boundaries of argument
10384 slots, allocating registers, etc. */
10385 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10386 || int_size_in_bytes (type) <= 0);
10389 /* Structure to hold declaration and name of external symbols that are
10390 emitted by GCC. We generate a vector of these symbols and output them
10391 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10392 This avoids putting out names that are never really used. */
10394 typedef struct GTY(()) extern_symbol
10396 tree decl;
10397 const char *name;
10398 } extern_symbol;
10400 /* Define gc'd vector type for extern_symbol. */
10402 /* Vector of extern_symbol pointers. */
10403 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10405 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10406 /* Mark DECL (name NAME) as an external reference (assembler output
10407 file FILE). This saves the names to output at the end of the file
10408 if actually referenced. */
10410 void
10411 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10413 gcc_assert (file == asm_out_file);
10414 extern_symbol p = {decl, name};
10415 vec_safe_push (extern_symbols, p);
10417 #endif
10419 /* Output text required at the end of an assembler file.
10420 This includes deferred plabels and .import directives for
10421 all external symbols that were actually referenced. */
10423 static void
10424 pa_file_end (void)
10426 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10427 unsigned int i;
10428 extern_symbol *p;
10430 if (!NO_DEFERRED_PROFILE_COUNTERS)
10431 output_deferred_profile_counters ();
10432 #endif
10434 output_deferred_plabels ();
10436 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10437 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10439 tree decl = p->decl;
10441 if (!TREE_ASM_WRITTEN (decl)
10442 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10443 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10446 vec_free (extern_symbols);
10447 #endif
10449 if (NEED_INDICATE_EXEC_STACK)
10450 file_end_indicate_exec_stack ();
10453 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10455 static bool
10456 pa_can_change_mode_class (machine_mode from, machine_mode to,
10457 reg_class_t rclass)
10459 if (from == to)
10460 return true;
10462 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10463 return true;
10465 /* Reject changes to/from modes with zero size. */
10466 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10467 return false;
10469 /* Reject changes to/from complex and vector modes. */
10470 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10471 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10472 return false;
10474 /* There is no way to load QImode or HImode values directly from memory
10475 to a FP register. SImode loads to the FP registers are not zero
10476 extended. On the 64-bit target, this conflicts with the definition
10477 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10478 except for DImode to SImode on the 64-bit target. It is handled by
10479 register renaming in pa_print_operand. */
10480 if (MAYBE_FP_REG_CLASS_P (rclass))
10481 return TARGET_64BIT && from == DImode && to == SImode;
10483 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10484 in specific sets of registers. Thus, we cannot allow changing
10485 to a larger mode when it's larger than a word. */
10486 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10487 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10488 return false;
10490 return true;
10493 /* Implement TARGET_MODES_TIEABLE_P.
10495 We should return FALSE for QImode and HImode because these modes
10496 are not ok in the floating-point registers. However, this prevents
10497 tieing these modes to SImode and DImode in the general registers.
10498 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10499 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10500 in the floating-point registers. */
10502 static bool
10503 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10505 /* Don't tie modes in different classes. */
10506 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10507 return false;
10509 return true;
10513 /* Length in units of the trampoline instruction code. */
10515 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10518 /* Output assembler code for a block containing the constant parts
10519 of a trampoline, leaving space for the variable parts.\
10521 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10522 and then branches to the specified routine.
10524 This code template is copied from text segment to stack location
10525 and then patched with pa_trampoline_init to contain valid values,
10526 and then entered as a subroutine.
10528 It is best to keep this as small as possible to avoid having to
10529 flush multiple lines in the cache. */
10531 static void
10532 pa_asm_trampoline_template (FILE *f)
10534 if (!TARGET_64BIT)
10536 if (TARGET_PA_20)
10538 fputs ("\tmfia %r20\n", f);
10539 fputs ("\tldw 48(%r20),%r22\n", f);
10540 fputs ("\tcopy %r22,%r21\n", f);
10541 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10542 fputs ("\tdepwi 0,31,2,%r22\n", f);
10543 fputs ("\tldw 0(%r22),%r21\n", f);
10544 fputs ("\tldw 4(%r22),%r19\n", f);
10545 fputs ("\tbve (%r21)\n", f);
10546 fputs ("\tldw 52(%r20),%r29\n", f);
10547 fputs ("\t.word 0\n", f);
10548 fputs ("\t.word 0\n", f);
10549 fputs ("\t.word 0\n", f);
10551 else
10553 if (ASSEMBLER_DIALECT == 0)
10555 fputs ("\tbl .+8,%r20\n", f);
10556 fputs ("\tdepi 0,31,2,%r20\n", f);
10558 else
10560 fputs ("\tb,l .+8,%r20\n", f);
10561 fputs ("\tdepwi 0,31,2,%r20\n", f);
10563 fputs ("\tldw 40(%r20),%r22\n", f);
10564 fputs ("\tcopy %r22,%r21\n", f);
10565 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10566 if (ASSEMBLER_DIALECT == 0)
10567 fputs ("\tdepi 0,31,2,%r22\n", f);
10568 else
10569 fputs ("\tdepwi 0,31,2,%r22\n", f);
10570 fputs ("\tldw 0(%r22),%r21\n", f);
10571 fputs ("\tldw 4(%r22),%r19\n", f);
10572 fputs ("\tldsid (%r21),%r1\n", f);
10573 fputs ("\tmtsp %r1,%sr0\n", f);
10574 fputs ("\tbe 0(%sr0,%r21)\n", f);
10575 fputs ("\tldw 44(%r20),%r29\n", f);
10577 fputs ("\t.word 0\n", f);
10578 fputs ("\t.word 0\n", f);
10579 fputs ("\t.word 0\n", f);
10580 fputs ("\t.word 0\n", f);
10582 else
10584 fputs ("\t.dword 0\n", f);
10585 fputs ("\t.dword 0\n", f);
10586 fputs ("\t.dword 0\n", f);
10587 fputs ("\t.dword 0\n", f);
10588 fputs ("\tmfia %r31\n", f);
10589 fputs ("\tldd 24(%r31),%r27\n", f);
10590 fputs ("\tldd 32(%r31),%r31\n", f);
10591 fputs ("\tldd 16(%r27),%r1\n", f);
10592 fputs ("\tbve (%r1)\n", f);
10593 fputs ("\tldd 24(%r27),%r27\n", f);
10594 fputs ("\t.dword 0 ; fptr\n", f);
10595 fputs ("\t.dword 0 ; static link\n", f);
10599 /* Emit RTL insns to initialize the variable parts of a trampoline.
10600 FNADDR is an RTX for the address of the function's pure code.
10601 CXT is an RTX for the static chain value for the function.
10603 Move the function address to the trampoline template at offset 48.
10604 Move the static chain value to trampoline template at offset 52.
10605 Move the trampoline address to trampoline template at offset 56.
10606 Move r19 to trampoline template at offset 60. The latter two
10607 words create a plabel for the indirect call to the trampoline.
10609 A similar sequence is used for the 64-bit port but the plabel is
10610 at the beginning of the trampoline.
10612 Finally, the cache entries for the trampoline code are flushed.
10613 This is necessary to ensure that the trampoline instruction sequence
10614 is written to memory prior to any attempts at prefetching the code
10615 sequence. */
10617 static void
10618 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10620 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10621 rtx start_addr = gen_reg_rtx (Pmode);
10622 rtx end_addr = gen_reg_rtx (Pmode);
10623 rtx line_length = gen_reg_rtx (Pmode);
10624 rtx r_tramp, tmp;
10626 emit_block_move (m_tramp, assemble_trampoline_template (),
10627 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10628 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10630 if (!TARGET_64BIT)
10632 tmp = adjust_address (m_tramp, Pmode, 48);
10633 emit_move_insn (tmp, fnaddr);
10634 tmp = adjust_address (m_tramp, Pmode, 52);
10635 emit_move_insn (tmp, chain_value);
10637 /* Create a fat pointer for the trampoline. */
10638 tmp = adjust_address (m_tramp, Pmode, 56);
10639 emit_move_insn (tmp, r_tramp);
10640 tmp = adjust_address (m_tramp, Pmode, 60);
10641 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10643 /* fdc and fic only use registers for the address to flush,
10644 they do not accept integer displacements. We align the
10645 start and end addresses to the beginning of their respective
10646 cache lines to minimize the number of lines flushed. */
10647 emit_insn (gen_andsi3 (start_addr, r_tramp,
10648 GEN_INT (-MIN_CACHELINE_SIZE)));
10649 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10650 TRAMPOLINE_CODE_SIZE-1));
10651 emit_insn (gen_andsi3 (end_addr, tmp,
10652 GEN_INT (-MIN_CACHELINE_SIZE)));
10653 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10654 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10655 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10656 gen_reg_rtx (Pmode),
10657 gen_reg_rtx (Pmode)));
10659 else
10661 tmp = adjust_address (m_tramp, Pmode, 56);
10662 emit_move_insn (tmp, fnaddr);
10663 tmp = adjust_address (m_tramp, Pmode, 64);
10664 emit_move_insn (tmp, chain_value);
10666 /* Create a fat pointer for the trampoline. */
10667 tmp = adjust_address (m_tramp, Pmode, 16);
10668 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10669 r_tramp, 32)));
10670 tmp = adjust_address (m_tramp, Pmode, 24);
10671 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10673 /* fdc and fic only use registers for the address to flush,
10674 they do not accept integer displacements. We align the
10675 start and end addresses to the beginning of their respective
10676 cache lines to minimize the number of lines flushed. */
10677 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10678 emit_insn (gen_anddi3 (start_addr, tmp,
10679 GEN_INT (-MIN_CACHELINE_SIZE)));
10680 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10681 TRAMPOLINE_CODE_SIZE - 1));
10682 emit_insn (gen_anddi3 (end_addr, tmp,
10683 GEN_INT (-MIN_CACHELINE_SIZE)));
10684 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10685 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10686 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10687 gen_reg_rtx (Pmode),
10688 gen_reg_rtx (Pmode)));
10691 #ifdef HAVE_ENABLE_EXECUTE_STACK
10692 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10693 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10694 #endif
10697 /* Perform any machine-specific adjustment in the address of the trampoline.
10698 ADDR contains the address that was passed to pa_trampoline_init.
10699 Adjust the trampoline address to point to the plabel at offset 56. */
10701 static rtx
10702 pa_trampoline_adjust_address (rtx addr)
10704 if (!TARGET_64BIT)
10705 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10706 return addr;
10709 static rtx
10710 pa_delegitimize_address (rtx orig_x)
10712 rtx x;
10714 if (GET_CODE (orig_x) == UNSPEC
10715 && XINT (orig_x, 1) == UNSPEC_TP)
10716 orig_x = XVECEXP (orig_x, 0, 0);
10718 x = delegitimize_mem_from_attrs (orig_x);
10720 if (GET_CODE (x) == LO_SUM
10721 && GET_CODE (XEXP (x, 1)) == UNSPEC
10722 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10723 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10724 return x;
10727 static rtx
10728 pa_internal_arg_pointer (void)
10730 /* The argument pointer and the hard frame pointer are the same in
10731 the 32-bit runtime, so we don't need a copy. */
10732 if (TARGET_64BIT)
10733 return copy_to_reg (virtual_incoming_args_rtx);
10734 else
10735 return virtual_incoming_args_rtx;
10738 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10739 Frame pointer elimination is automatically handled. */
10741 static bool
10742 pa_can_eliminate (const int from, const int to)
10744 /* The argument cannot be eliminated in the 64-bit runtime. */
10745 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10746 return false;
10748 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10749 ? ! frame_pointer_needed
10750 : true);
10753 /* Define the offset between two registers, FROM to be eliminated and its
10754 replacement TO, at the start of a routine. */
10755 HOST_WIDE_INT
10756 pa_initial_elimination_offset (int from, int to)
10758 HOST_WIDE_INT offset;
10760 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10761 && to == STACK_POINTER_REGNUM)
10762 offset = -pa_compute_frame_size (get_frame_size (), 0);
10763 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10764 offset = 0;
10765 else
10766 gcc_unreachable ();
10768 return offset;
10771 static void
10772 pa_conditional_register_usage (void)
10774 int i;
10776 if (!TARGET_64BIT && !TARGET_PA_11)
10778 for (i = 56; i <= FP_REG_LAST; i++)
10779 fixed_regs[i] = call_used_regs[i] = 1;
10780 for (i = 33; i < 56; i += 2)
10781 fixed_regs[i] = call_used_regs[i] = 1;
10783 if (TARGET_SOFT_FLOAT)
10785 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10786 fixed_regs[i] = call_used_regs[i] = 1;
10788 if (flag_pic)
10789 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10792 /* Target hook for c_mode_for_suffix. */
10794 static machine_mode
10795 pa_c_mode_for_suffix (char suffix)
10797 if (HPUX_LONG_DOUBLE_LIBRARY)
10799 if (suffix == 'q')
10800 return TFmode;
10803 return VOIDmode;
10806 /* Target hook for function_section. */
10808 static section *
10809 pa_function_section (tree decl, enum node_frequency freq,
10810 bool startup, bool exit)
10812 /* Put functions in text section if target doesn't have named sections. */
10813 if (!targetm_common.have_named_sections)
10814 return text_section;
10816 /* Force nested functions into the same section as the containing
10817 function. */
10818 if (decl
10819 && DECL_SECTION_NAME (decl) == NULL
10820 && DECL_CONTEXT (decl) != NULL_TREE
10821 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10822 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10823 return function_section (DECL_CONTEXT (decl));
10825 /* Otherwise, use the default function section. */
10826 return default_function_section (decl, freq, startup, exit);
10829 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10831 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10832 that need more than three instructions to load prior to reload. This
10833 limit is somewhat arbitrary. It takes three instructions to load a
10834 CONST_INT from memory but two are memory accesses. It may be better
10835 to increase the allowed range for CONST_INTS. We may also be able
10836 to handle CONST_DOUBLES. */
10838 static bool
10839 pa_legitimate_constant_p (machine_mode mode, rtx x)
10841 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10842 return false;
10844 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10845 return false;
10847 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10848 legitimate constants. The other variants can't be handled by
10849 the move patterns after reload starts. */
10850 if (tls_referenced_p (x))
10851 return false;
10853 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10854 return false;
10856 if (TARGET_64BIT
10857 && HOST_BITS_PER_WIDE_INT > 32
10858 && GET_CODE (x) == CONST_INT
10859 && !reload_in_progress
10860 && !reload_completed
10861 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10862 && !pa_cint_ok_for_move (UINTVAL (x)))
10863 return false;
10865 if (function_label_operand (x, mode))
10866 return false;
10868 return true;
10871 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10873 static unsigned int
10874 pa_section_type_flags (tree decl, const char *name, int reloc)
10876 unsigned int flags;
10878 flags = default_section_type_flags (decl, name, reloc);
10880 /* Function labels are placed in the constant pool. This can
10881 cause a section conflict if decls are put in ".data.rel.ro"
10882 or ".data.rel.ro.local" using the __attribute__ construct. */
10883 if (strcmp (name, ".data.rel.ro") == 0
10884 || strcmp (name, ".data.rel.ro.local") == 0)
10885 flags |= SECTION_WRITE | SECTION_RELRO;
10887 return flags;
10890 /* pa_legitimate_address_p recognizes an RTL expression that is a
10891 valid memory address for an instruction. The MODE argument is the
10892 machine mode for the MEM expression that wants to use this address.
10894 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10895 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10896 available with floating point loads and stores, and integer loads.
10897 We get better code by allowing indexed addresses in the initial
10898 RTL generation.
10900 The acceptance of indexed addresses as legitimate implies that we
10901 must provide patterns for doing indexed integer stores, or the move
10902 expanders must force the address of an indexed store to a register.
10903 We have adopted the latter approach.
10905 Another function of pa_legitimate_address_p is to ensure that
10906 the base register is a valid pointer for indexed instructions.
10907 On targets that have non-equivalent space registers, we have to
10908 know at the time of assembler output which register in a REG+REG
10909 pair is the base register. The REG_POINTER flag is sometimes lost
10910 in reload and the following passes, so it can't be relied on during
10911 code generation. Thus, we either have to canonicalize the order
10912 of the registers in REG+REG indexed addresses, or treat REG+REG
10913 addresses separately and provide patterns for both permutations.
10915 The latter approach requires several hundred additional lines of
10916 code in pa.md. The downside to canonicalizing is that a PLUS
10917 in the wrong order can't combine to form to make a scaled indexed
10918 memory operand. As we won't need to canonicalize the operands if
10919 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10921 We initially break out scaled indexed addresses in canonical order
10922 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10923 scaled indexed addresses during RTL generation. However, fold_rtx
10924 has its own opinion on how the operands of a PLUS should be ordered.
10925 If one of the operands is equivalent to a constant, it will make
10926 that operand the second operand. As the base register is likely to
10927 be equivalent to a SYMBOL_REF, we have made it the second operand.
10929 pa_legitimate_address_p accepts REG+REG as legitimate when the
10930 operands are in the order INDEX+BASE on targets with non-equivalent
10931 space registers, and in any order on targets with equivalent space
10932 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10934 We treat a SYMBOL_REF as legitimate if it is part of the current
10935 function's constant-pool, because such addresses can actually be
10936 output as REG+SMALLINT. */
10938 static bool
10939 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
10941 if ((REG_P (x)
10942 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10943 : REG_OK_FOR_BASE_P (x)))
10944 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10945 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10946 && REG_P (XEXP (x, 0))
10947 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10948 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10949 return true;
10951 if (GET_CODE (x) == PLUS)
10953 rtx base, index;
10955 /* For REG+REG, the base register should be in XEXP (x, 1),
10956 so check it first. */
10957 if (REG_P (XEXP (x, 1))
10958 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10959 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10960 base = XEXP (x, 1), index = XEXP (x, 0);
10961 else if (REG_P (XEXP (x, 0))
10962 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10963 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10964 base = XEXP (x, 0), index = XEXP (x, 1);
10965 else
10966 return false;
10968 if (GET_CODE (index) == CONST_INT)
10970 /* Short 5-bit displacements always okay. */
10971 if (INT_5_BITS (index))
10972 return true;
10974 if (!base14_operand (index, mode))
10975 return false;
10977 /* Long 14-bit displacements always okay for these cases. */
10978 if (INT14_OK_STRICT
10979 || reload_completed
10980 || mode == QImode
10981 || mode == HImode)
10982 return true;
10984 /* We have to limit displacements to those supported by
10985 both floating-point and integer accesses as reload can't
10986 fix invalid displacements. See PR114288. */
10987 return false;
10990 if (!TARGET_DISABLE_INDEXING
10991 /* Only accept the "canonical" INDEX+BASE operand order
10992 on targets with non-equivalent space registers. */
10993 && (TARGET_NO_SPACE_REGS
10994 ? REG_P (index)
10995 : (base == XEXP (x, 1) && REG_P (index)
10996 && (reload_completed
10997 || (reload_in_progress && HARD_REGISTER_P (base))
10998 || REG_POINTER (base))
10999 && (reload_completed
11000 || (reload_in_progress && HARD_REGISTER_P (index))
11001 || !REG_POINTER (index))))
11002 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
11003 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
11004 : REG_OK_FOR_INDEX_P (index))
11005 && borx_reg_operand (base, Pmode)
11006 && borx_reg_operand (index, Pmode))
11007 return true;
11009 if (!TARGET_DISABLE_INDEXING
11010 && GET_CODE (index) == MULT
11011 /* Only accept base operands with the REG_POINTER flag prior to
11012 reload on targets with non-equivalent space registers. */
11013 && (TARGET_NO_SPACE_REGS
11014 || (base == XEXP (x, 1)
11015 && (reload_completed
11016 || (reload_in_progress && HARD_REGISTER_P (base))
11017 || REG_POINTER (base))))
11018 && REG_P (XEXP (index, 0))
11019 && GET_MODE (XEXP (index, 0)) == Pmode
11020 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
11021 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
11022 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
11023 && GET_CODE (XEXP (index, 1)) == CONST_INT
11024 && INTVAL (XEXP (index, 1))
11025 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
11026 && borx_reg_operand (base, Pmode))
11027 return true;
11029 return false;
11032 if (GET_CODE (x) == LO_SUM)
11034 rtx y = XEXP (x, 0);
11036 if (GET_CODE (y) == SUBREG)
11037 y = SUBREG_REG (y);
11039 if (REG_P (y)
11040 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
11041 : REG_OK_FOR_BASE_P (y)))
11043 y = XEXP (x, 1);
11045 /* Needed for -fPIC */
11046 if (mode == Pmode
11047 && GET_CODE (y) == UNSPEC)
11048 return true;
11050 /* Before reload, we need support for 14-bit floating
11051 point loads and stores, and associated relocations. */
11052 if ((TARGET_ELF32 || !INT14_OK_STRICT)
11053 && !reload_completed
11054 && mode != QImode
11055 && mode != HImode)
11056 return false;
11058 if (CONSTANT_P (y))
11059 return true;
11061 return false;
11064 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
11065 return true;
11067 return false;
11070 /* Look for machine dependent ways to make the invalid address AD a
11071 valid address.
11073 For the PA, transform:
11075 memory(X + <large int>)
11077 into:
11079 if (<large int> & mask) >= 16
11080 Y = (<large int> & ~mask) + mask + 1 Round up.
11081 else
11082 Y = (<large int> & ~mask) Round down.
11083 Z = X + Y
11084 memory (Z + (<large int> - Y));
11086 This makes reload inheritance and reload_cse work better since Z
11087 can be reused.
11089 There may be more opportunities to improve code with this hook. */
11092 pa_legitimize_reload_address (rtx ad, machine_mode mode,
11093 int opnum, int type,
11094 int ind_levels ATTRIBUTE_UNUSED)
11096 long offset, newoffset, mask;
11097 rtx new_rtx, temp = NULL_RTX;
11099 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
11100 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
11102 if (optimize && GET_CODE (ad) == PLUS)
11103 temp = simplify_binary_operation (PLUS, Pmode,
11104 XEXP (ad, 0), XEXP (ad, 1));
11106 new_rtx = temp ? temp : ad;
11108 if (optimize
11109 && GET_CODE (new_rtx) == PLUS
11110 && GET_CODE (XEXP (new_rtx, 0)) == REG
11111 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
11113 offset = INTVAL (XEXP ((new_rtx), 1));
11115 /* Choose rounding direction. Round up if we are >= halfway. */
11116 if ((offset & mask) >= ((mask + 1) / 2))
11117 newoffset = (offset & ~mask) + mask + 1;
11118 else
11119 newoffset = offset & ~mask;
11121 /* Ensure that long displacements are aligned. */
11122 if (mask == 0x3fff
11123 && (GET_MODE_CLASS (mode) == MODE_FLOAT
11124 || (TARGET_64BIT && (mode) == DImode)))
11125 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
11127 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
11129 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
11130 GEN_INT (newoffset));
11131 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
11132 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
11133 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
11134 opnum, (enum reload_type) type);
11135 return ad;
11139 return NULL_RTX;
11142 /* Output address vector. */
11144 void
11145 pa_output_addr_vec (rtx lab, rtx body)
11147 int idx, vlen = XVECLEN (body, 0);
11149 if (!TARGET_SOM)
11150 fputs ("\t.align 4\n", asm_out_file);
11151 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11152 if (TARGET_GAS)
11153 fputs ("\t.begin_brtab\n", asm_out_file);
11154 for (idx = 0; idx < vlen; idx++)
11156 ASM_OUTPUT_ADDR_VEC_ELT
11157 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11159 if (TARGET_GAS)
11160 fputs ("\t.end_brtab\n", asm_out_file);
11163 /* Output address difference vector. */
11165 void
11166 pa_output_addr_diff_vec (rtx lab, rtx body)
11168 rtx base = XEXP (XEXP (body, 0), 0);
11169 int idx, vlen = XVECLEN (body, 1);
11171 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11172 if (TARGET_GAS)
11173 fputs ("\t.begin_brtab\n", asm_out_file);
11174 for (idx = 0; idx < vlen; idx++)
11176 ASM_OUTPUT_ADDR_DIFF_ELT
11177 (asm_out_file,
11178 body,
11179 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11180 CODE_LABEL_NUMBER (base));
11182 if (TARGET_GAS)
11183 fputs ("\t.end_brtab\n", asm_out_file);
11186 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11187 arguments passed by hidden reference in the 32-bit HP runtime. Users
11188 can override this behavior for better compatibility with openmp at the
11189 risk of library incompatibilities. Arguments are always passed by value
11190 in the 64-bit HP runtime. */
11192 static bool
11193 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11195 return !TARGET_CALLER_COPIES;
11198 /* Implement TARGET_HARD_REGNO_NREGS. */
11200 static unsigned int
11201 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11203 return PA_HARD_REGNO_NREGS (regno, mode);
11206 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11208 static bool
11209 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11211 return PA_HARD_REGNO_MODE_OK (regno, mode);
11214 /* Implement TARGET_STARTING_FRAME_OFFSET.
11216 On the 32-bit ports, we reserve one slot for the previous frame
11217 pointer and one fill slot. The fill slot is for compatibility
11218 with HP compiled programs. On the 64-bit ports, we reserve one
11219 slot for the previous frame pointer. */
11221 static HOST_WIDE_INT
11222 pa_starting_frame_offset (void)
11224 return 8;
11227 /* Figure out the size in words of the function argument. */
11230 pa_function_arg_size (machine_mode mode, const_tree type)
11232 HOST_WIDE_INT size;
11234 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11236 /* The 64-bit runtime does not restrict the size of stack frames,
11237 but the gcc calling conventions limit argument sizes to 1G. Our
11238 prologue/epilogue code limits frame sizes to just under 32 bits.
11239 1G is also the maximum frame size that can be handled by the HPUX
11240 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11241 occur for (parallel:BLK []), we need to ignore large arguments
11242 passed by value. */
11243 if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11244 size = 0;
11245 return (int) CEIL (size, UNITS_PER_WORD);
11248 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
11250 static void
11251 pa_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
11253 const unsigned PA_FE_INEXACT = 1;
11254 const unsigned PA_FE_UNDERFLOW = 2;
11255 const unsigned PA_FE_OVERFLOW = 4;
11256 const unsigned PA_FE_DIVBYZERO = 8;
11257 const unsigned PA_FE_INVALID = 16;
11258 const unsigned HOST_WIDE_INT PA_FE_ALL_EXCEPT = (PA_FE_INVALID
11259 | PA_FE_DIVBYZERO
11260 | PA_FE_OVERFLOW
11261 | PA_FE_UNDERFLOW
11262 | PA_FE_INEXACT);
11263 const unsigned HOST_WIDE_INT PA_FE_EXCEPT_SHIFT = 27;
11264 tree fenv_var, get_fpsr, set_fpsr, mask, ld_fenv, masked_fenv;
11265 tree hold_all, new_fenv_var, reload_fenv, restore_fnenv;
11266 tree get_fpsr_call, set_fpsr_call, update_call, atomic_feraiseexcept;
11268 if (TARGET_SOFT_FLOAT)
11269 return;
11271 /* Generate the equivalent of :
11272 unsigned int fenv_var;
11273 fenv_var = __builtin_get_fpsr ();
11275 unsigned int masked_fenv;
11276 masked_fenv = fenv_var & mask;
11278 __builtin_set_fpsr (masked_fenv); */
11280 fenv_var = create_tmp_var_raw (unsigned_type_node);
11281 get_fpsr = pa_builtins[PA_BUILTIN_GET_FPSR];
11282 set_fpsr = pa_builtins[PA_BUILTIN_SET_FPSR];
11283 mask = build_int_cst (unsigned_type_node,
11284 ~((PA_FE_ALL_EXCEPT << PA_FE_EXCEPT_SHIFT)
11285 | PA_FE_ALL_EXCEPT));
11287 get_fpsr_call = build_call_expr (get_fpsr, 0);
11288 ld_fenv = build4 (TARGET_EXPR, unsigned_type_node,
11289 fenv_var, get_fpsr_call,
11290 NULL_TREE, NULL_TREE);
11291 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
11292 hold_all = build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv);
11293 set_fpsr_call = build_call_expr (set_fpsr, 1, masked_fenv);
11294 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fpsr_call);
11296 /* Store the value of masked_fenv to clear the exceptions:
11297 __builtin_set_fpsr (masked_fenv); */
11299 *clear = set_fpsr_call;
11301 /* Generate the equivalent of :
11302 unsigned int new_fenv_var;
11303 new_fenv_var = __builtin_get_fpsr ();
11305 __builtin_set_fpsr (fenv_var);
11307 __atomic_feraiseexcept (new_fenv_var); */
11309 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
11310 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node, new_fenv_var,
11311 get_fpsr_call, NULL_TREE, NULL_TREE);
11312 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_var);
11313 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
11314 update_call = build_call_expr (atomic_feraiseexcept, 1,
11315 fold_convert (integer_type_node,
11316 new_fenv_var));
11317 *update = build2 (COMPOUND_EXPR, void_type_node,
11318 build2 (COMPOUND_EXPR, void_type_node,
11319 reload_fenv, restore_fnenv), update_call);
11322 #include "gt-pa.h"