hppa: Fix REG+D address support before reload
[official-gcc.git] / gcc / config / pa / pa.cc
blob129289f8e624f065fdcadbbc482b9d1680257060
1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "output.h"
48 #include "except.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "reload.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "cfgrtl.h"
55 #include "opts.h"
56 #include "builtins.h"
58 /* This file should be included last. */
59 #include "target-def.h"
61 /* Return nonzero if there is a bypass for the output of
62 OUT_INSN and the fp store IN_INSN. */
63 int
64 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
66 machine_mode store_mode;
67 machine_mode other_mode;
68 rtx set;
70 if (recog_memoized (in_insn) < 0
71 || (get_attr_type (in_insn) != TYPE_FPSTORE
72 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
73 || recog_memoized (out_insn) < 0)
74 return 0;
76 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
78 set = single_set (out_insn);
79 if (!set)
80 return 0;
82 other_mode = GET_MODE (SET_SRC (set));
84 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
91 #else
92 #define DO_FRAME_NOTES 0
93 #endif
94 #endif
96 static void pa_option_override (void);
97 static void copy_reg_pointer (rtx, rtx);
98 static void fix_range (const char *);
99 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
100 reg_class_t);
101 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
102 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
103 static inline rtx force_mode (machine_mode, rtx);
104 static void pa_reorg (void);
105 static void pa_combine_instructions (void);
106 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
107 rtx, rtx);
108 static bool forward_branch_p (rtx_insn *);
109 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
111 static int compute_cpymem_length (rtx_insn *);
112 static int compute_clrmem_length (rtx_insn *);
113 static bool pa_assemble_integer (rtx, unsigned int, int);
114 static void remove_useless_addtr_insns (int);
115 static void store_reg (int, HOST_WIDE_INT, int);
116 static void store_reg_modify (int, int, HOST_WIDE_INT);
117 static void load_reg (int, HOST_WIDE_INT, int);
118 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
119 static rtx pa_function_value (const_tree, const_tree, bool);
120 static rtx pa_libcall_value (machine_mode, const_rtx);
121 static bool pa_function_value_regno_p (const unsigned int);
122 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
124 static void update_total_code_bytes (unsigned int);
125 static void pa_output_function_epilogue (FILE *);
126 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
127 static int pa_issue_rate (void);
128 static int pa_reloc_rw_mask (void);
129 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
130 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
131 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
132 ATTRIBUTE_UNUSED;
133 static void pa_encode_section_info (tree, rtx, int);
134 static const char *pa_strip_name_encoding (const char *);
135 static bool pa_function_ok_for_sibcall (tree, tree);
136 static void pa_globalize_label (FILE *, const char *)
137 ATTRIBUTE_UNUSED;
138 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
139 HOST_WIDE_INT, tree);
140 #if !defined(USE_COLLECT2)
141 static void pa_asm_out_constructor (rtx, int);
142 static void pa_asm_out_destructor (rtx, int);
143 #endif
144 static void pa_init_builtins (void);
145 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
146 static tree pa_builtin_decl (unsigned, bool);
147 static rtx hppa_builtin_saveregs (void);
148 static void hppa_va_start (tree, rtx);
149 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
150 static bool pa_scalar_mode_supported_p (scalar_mode);
151 static bool pa_commutative_p (const_rtx x, int outer_code);
152 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
153 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
154 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
155 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
157 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
158 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
159 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
162 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
163 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
164 static void output_deferred_plabels (void);
165 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
166 static void pa_file_end (void);
167 static void pa_init_libfuncs (void);
168 static rtx pa_struct_value_rtx (tree, int);
169 static bool pa_pass_by_reference (cumulative_args_t,
170 const function_arg_info &);
171 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
172 static void pa_function_arg_advance (cumulative_args_t,
173 const function_arg_info &);
174 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 machine_mode,
180 secondary_reload_info *);
181 static bool pa_secondary_memory_needed (machine_mode,
182 reg_class_t, reg_class_t);
183 static void pa_extra_live_on_entry (bitmap);
184 static machine_mode pa_promote_function_mode (const_tree,
185 machine_mode, int *,
186 const_tree, int);
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx, tree, rtx);
190 static rtx pa_trampoline_adjust_address (rtx);
191 static rtx pa_delegitimize_address (rtx);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode pa_c_mode_for_suffix (char);
197 static section *pa_function_section (tree, enum node_frequency, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode, rtx);
199 static bool pa_legitimate_constant_p (machine_mode, rtx);
200 static unsigned int pa_section_type_flags (tree, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode, rtx, bool,
202 code_helper = ERROR_MARK);
203 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
206 static bool pa_modes_tieable_p (machine_mode, machine_mode);
207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
208 static HOST_WIDE_INT pa_starting_frame_offset (void);
209 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
210 static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
212 /* The following extra sections are only used for SOM. */
213 static GTY(()) section *som_readonly_data_section;
214 static GTY(()) section *som_one_only_readonly_data_section;
215 static GTY(()) section *som_one_only_data_section;
216 static GTY(()) section *som_tm_clone_table_section;
218 /* Counts for the number of callee-saved general and floating point
219 registers which were saved by the current function's prologue. */
220 static int gr_saved, fr_saved;
222 /* Boolean indicating whether the return pointer was saved by the
223 current function's prologue. */
224 static bool rp_saved;
226 static rtx find_addr_reg (rtx);
228 /* Keep track of the number of bytes we have output in the CODE subspace
229 during this compilation so we'll know when to emit inline long-calls. */
230 unsigned long total_code_bytes;
232 /* The last address of the previous function plus the number of bytes in
233 associated thunks that have been output. This is used to determine if
234 a thunk can use an IA-relative branch to reach its target function. */
235 static unsigned int last_address;
237 /* Variables to handle plabels that we discover are necessary at assembly
238 output time. They are output after the current function. */
239 struct GTY(()) deferred_plabel
241 rtx internal_label;
242 rtx symbol;
244 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
245 deferred_plabels;
246 static size_t n_deferred_plabels = 0;
248 /* Initialize the GCC target structure. */
250 #undef TARGET_OPTION_OVERRIDE
251 #define TARGET_OPTION_OVERRIDE pa_option_override
253 #undef TARGET_ASM_ALIGNED_HI_OP
254 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
255 #undef TARGET_ASM_ALIGNED_SI_OP
256 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
257 #undef TARGET_ASM_ALIGNED_DI_OP
258 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
259 #undef TARGET_ASM_UNALIGNED_HI_OP
260 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
261 #undef TARGET_ASM_UNALIGNED_SI_OP
262 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
263 #undef TARGET_ASM_UNALIGNED_DI_OP
264 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
265 #undef TARGET_ASM_INTEGER
266 #define TARGET_ASM_INTEGER pa_assemble_integer
268 #undef TARGET_ASM_FUNCTION_EPILOGUE
269 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
271 #undef TARGET_FUNCTION_VALUE
272 #define TARGET_FUNCTION_VALUE pa_function_value
273 #undef TARGET_LIBCALL_VALUE
274 #define TARGET_LIBCALL_VALUE pa_libcall_value
275 #undef TARGET_FUNCTION_VALUE_REGNO_P
276 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
278 #undef TARGET_LEGITIMIZE_ADDRESS
279 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
281 #undef TARGET_SCHED_ADJUST_COST
282 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
283 #undef TARGET_SCHED_ISSUE_RATE
284 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
286 #undef TARGET_ENCODE_SECTION_INFO
287 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
288 #undef TARGET_STRIP_NAME_ENCODING
289 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
291 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
292 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
294 #undef TARGET_COMMUTATIVE_P
295 #define TARGET_COMMUTATIVE_P pa_commutative_p
297 #undef TARGET_ASM_OUTPUT_MI_THUNK
298 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_END
303 #define TARGET_ASM_FILE_END pa_file_end
305 #undef TARGET_ASM_RELOC_RW_MASK
306 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
308 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
309 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
311 #if !defined(USE_COLLECT2)
312 #undef TARGET_ASM_CONSTRUCTOR
313 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
314 #undef TARGET_ASM_DESTRUCTOR
315 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
316 #endif
318 #undef TARGET_INIT_BUILTINS
319 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
322 #undef TARGET_BUILTIN_DECL
323 #define TARGET_BUILTIN_DECL pa_builtin_decl
325 #undef TARGET_REGISTER_MOVE_COST
326 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
327 #undef TARGET_RTX_COSTS
328 #define TARGET_RTX_COSTS hppa_rtx_costs
329 #undef TARGET_ADDRESS_COST
330 #define TARGET_ADDRESS_COST hppa_address_cost
332 #undef TARGET_MACHINE_DEPENDENT_REORG
333 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
335 #undef TARGET_INIT_LIBFUNCS
336 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
338 #undef TARGET_PROMOTE_FUNCTION_MODE
339 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
340 #undef TARGET_PROMOTE_PROTOTYPES
341 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
343 #undef TARGET_STRUCT_VALUE_RTX
344 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
345 #undef TARGET_RETURN_IN_MEMORY
346 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
347 #undef TARGET_MUST_PASS_IN_STACK
348 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
351 #undef TARGET_CALLEE_COPIES
352 #define TARGET_CALLEE_COPIES pa_callee_copies
353 #undef TARGET_ARG_PARTIAL_BYTES
354 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
355 #undef TARGET_FUNCTION_ARG
356 #define TARGET_FUNCTION_ARG pa_function_arg
357 #undef TARGET_FUNCTION_ARG_ADVANCE
358 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
359 #undef TARGET_FUNCTION_ARG_PADDING
360 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
361 #undef TARGET_FUNCTION_ARG_BOUNDARY
362 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
364 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
365 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
366 #undef TARGET_EXPAND_BUILTIN_VA_START
367 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
368 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
369 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
371 #undef TARGET_SCALAR_MODE_SUPPORTED_P
372 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
374 #undef TARGET_CANNOT_FORCE_CONST_MEM
375 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
377 #undef TARGET_SECONDARY_RELOAD
378 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
379 #undef TARGET_SECONDARY_MEMORY_NEEDED
380 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
382 #undef TARGET_EXTRA_LIVE_ON_ENTRY
383 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
385 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
386 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
387 #undef TARGET_TRAMPOLINE_INIT
388 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
389 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
390 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
391 #undef TARGET_DELEGITIMIZE_ADDRESS
392 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
393 #undef TARGET_INTERNAL_ARG_POINTER
394 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
395 #undef TARGET_CAN_ELIMINATE
396 #define TARGET_CAN_ELIMINATE pa_can_eliminate
397 #undef TARGET_CONDITIONAL_REGISTER_USAGE
398 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
399 #undef TARGET_C_MODE_FOR_SUFFIX
400 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
401 #undef TARGET_ASM_FUNCTION_SECTION
402 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
404 #undef TARGET_LEGITIMATE_CONSTANT_P
405 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
406 #undef TARGET_SECTION_TYPE_FLAGS
407 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
408 #undef TARGET_LEGITIMATE_ADDRESS_P
409 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
411 #undef TARGET_LRA_P
412 #define TARGET_LRA_P hook_bool_void_false
414 #undef TARGET_HARD_REGNO_NREGS
415 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
416 #undef TARGET_HARD_REGNO_MODE_OK
417 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
418 #undef TARGET_MODES_TIEABLE_P
419 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
421 #undef TARGET_CAN_CHANGE_MODE_CLASS
422 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
424 #undef TARGET_CONSTANT_ALIGNMENT
425 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
427 #undef TARGET_STARTING_FRAME_OFFSET
428 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
430 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
431 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
433 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
434 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV pa_atomic_assign_expand_fenv
436 struct gcc_target targetm = TARGET_INITIALIZER;
438 /* Parse the -mfixed-range= option string. */
440 static void
441 fix_range (const char *const_str)
443 int i, first, last;
444 char *str, *dash, *comma;
446 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
447 REG2 are either register names or register numbers. The effect
448 of this option is to mark the registers in the range from REG1 to
449 REG2 as ``fixed'' so they won't be used by the compiler. This is
450 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
452 i = strlen (const_str);
453 str = (char *) alloca (i + 1);
454 memcpy (str, const_str, i + 1);
456 while (1)
458 dash = strchr (str, '-');
459 if (!dash)
461 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
462 return;
464 *dash = '\0';
466 comma = strchr (dash + 1, ',');
467 if (comma)
468 *comma = '\0';
470 first = decode_reg_name (str);
471 if (first < 0)
473 warning (0, "unknown register name: %s", str);
474 return;
477 last = decode_reg_name (dash + 1);
478 if (last < 0)
480 warning (0, "unknown register name: %s", dash + 1);
481 return;
484 *dash = '-';
486 if (first > last)
488 warning (0, "%s-%s is an empty range", str, dash + 1);
489 return;
492 for (i = first; i <= last; ++i)
493 fixed_regs[i] = call_used_regs[i] = 1;
495 if (!comma)
496 break;
498 *comma = ',';
499 str = comma + 1;
502 /* Check if all floating point registers have been fixed. */
503 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
504 if (!fixed_regs[i])
505 break;
507 if (i > FP_REG_LAST)
508 target_flags |= MASK_SOFT_FLOAT;
511 /* Implement the TARGET_OPTION_OVERRIDE hook. */
513 static void
514 pa_option_override (void)
516 unsigned int i;
517 cl_deferred_option *opt;
518 vec<cl_deferred_option> *v
519 = (vec<cl_deferred_option> *) pa_deferred_options;
521 if (v)
522 FOR_EACH_VEC_ELT (*v, i, opt)
524 switch (opt->opt_index)
526 case OPT_mfixed_range_:
527 fix_range (opt->arg);
528 break;
530 default:
531 gcc_unreachable ();
535 if (flag_pic && TARGET_PORTABLE_RUNTIME)
537 warning (0, "PIC code generation is not supported in the portable runtime model");
540 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
542 warning (0, "PIC code generation is not compatible with fast indirect calls");
545 if (! TARGET_GAS && write_symbols != NO_DEBUG)
547 warning (0, "%<-g%> is only supported when using GAS on this processor");
548 warning (0, "%<-g%> option disabled");
549 write_symbols = NO_DEBUG;
552 if (TARGET_64BIT && TARGET_HPUX)
554 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
555 specifically selected. */
556 if (!OPTION_SET_P (dwarf_strict))
557 dwarf_strict = 1;
558 if (!OPTION_SET_P (dwarf_version))
559 dwarf_version = 4;
562 /* We only support the "big PIC" model now. And we always generate PIC
563 code when in 64bit mode. */
564 if (flag_pic == 1 || TARGET_64BIT)
565 flag_pic = 2;
567 /* 64-bit target is always PIE. */
568 if (TARGET_64BIT)
569 flag_pie = 2;
571 /* Disable -freorder-blocks-and-partition as we don't support hot and
572 cold partitioning. */
573 if (flag_reorder_blocks_and_partition)
575 inform (input_location,
576 "%<-freorder-blocks-and-partition%> does not work "
577 "on this architecture");
578 flag_reorder_blocks_and_partition = 0;
579 flag_reorder_blocks = 1;
582 /* Disable -fstack-protector to suppress warning. */
583 flag_stack_protect = 0;
585 /* We can't guarantee that .dword is available for 32-bit targets. */
586 if (UNITS_PER_WORD == 4)
587 targetm.asm_out.aligned_op.di = NULL;
589 /* The unaligned ops are only available when using GAS. */
590 if (!TARGET_GAS)
592 targetm.asm_out.unaligned_op.hi = NULL;
593 targetm.asm_out.unaligned_op.si = NULL;
594 targetm.asm_out.unaligned_op.di = NULL;
597 init_machine_status = pa_init_machine_status;
600 enum pa_builtins
602 /* FPU builtins. */
603 PA_BUILTIN_GET_FPSR,
604 PA_BUILTIN_SET_FPSR,
606 PA_BUILTIN_COPYSIGNQ,
607 PA_BUILTIN_FABSQ,
608 PA_BUILTIN_INFQ,
609 PA_BUILTIN_HUGE_VALQ,
610 PA_BUILTIN_max
613 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
614 static GTY(()) enum insn_code pa_builtins_icode[(int) PA_BUILTIN_max];
616 /* Add a PA builtin function with NAME, ICODE, CODE and TYPE. Return the
617 function decl or NULL_TREE if the builtin was not added. */
619 static tree
620 def_builtin (const char *name, enum insn_code icode, enum pa_builtins code,
621 tree type)
623 tree t
624 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
626 if (t)
628 pa_builtins[code] = t;
629 pa_builtins_icode[code] = icode;
632 return t;
635 /* Create builtin functions for FPU instructions. */
637 static void
638 pa_fpu_init_builtins (void)
640 tree ftype;
642 ftype = build_function_type_list (unsigned_type_node, 0);
643 def_builtin ("__builtin_get_fpsr", CODE_FOR_get_fpsr,
644 PA_BUILTIN_GET_FPSR, ftype);
645 ftype = build_function_type_list (void_type_node, unsigned_type_node, 0);
646 def_builtin ("__builtin_set_fpsr", CODE_FOR_set_fpsr,
647 PA_BUILTIN_SET_FPSR, ftype);
650 static void
651 pa_init_builtins (void)
653 if (!TARGET_SOFT_FLOAT)
654 pa_fpu_init_builtins ();
656 #ifdef DONT_HAVE_FPUTC_UNLOCKED
658 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
659 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
660 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
662 #endif
663 #if TARGET_HPUX_11
665 tree decl;
667 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
668 set_user_assembler_name (decl, "_Isfinite");
669 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
670 set_user_assembler_name (decl, "_Isfinitef");
672 #endif
674 if (HPUX_LONG_DOUBLE_LIBRARY)
676 tree decl, ftype;
678 /* Under HPUX, the __float128 type is a synonym for "long double". */
679 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
680 "__float128");
682 /* TFmode support builtins. */
683 ftype = build_function_type_list (long_double_type_node,
684 long_double_type_node,
685 NULL_TREE);
686 decl = add_builtin_function ("__builtin_fabsq", ftype,
687 PA_BUILTIN_FABSQ, BUILT_IN_MD,
688 "_U_Qfabs", NULL_TREE);
689 TREE_READONLY (decl) = 1;
690 pa_builtins[PA_BUILTIN_FABSQ] = decl;
692 ftype = build_function_type_list (long_double_type_node,
693 long_double_type_node,
694 long_double_type_node,
695 NULL_TREE);
696 decl = add_builtin_function ("__builtin_copysignq", ftype,
697 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
698 "_U_Qfcopysign", NULL_TREE);
699 TREE_READONLY (decl) = 1;
700 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
702 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
703 decl = add_builtin_function ("__builtin_infq", ftype,
704 PA_BUILTIN_INFQ, BUILT_IN_MD,
705 NULL, NULL_TREE);
706 pa_builtins[PA_BUILTIN_INFQ] = decl;
708 decl = add_builtin_function ("__builtin_huge_valq", ftype,
709 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
710 NULL, NULL_TREE);
711 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
715 /* Implement TARGET_BUILTIN_DECL. */
717 static tree
718 pa_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
720 if (code >= PA_BUILTIN_max)
721 return error_mark_node;
722 return pa_builtins[code];
725 static rtx
726 pa_expand_builtin_1 (tree exp, rtx target,
727 rtx subtarget ATTRIBUTE_UNUSED,
728 machine_mode tmode ATTRIBUTE_UNUSED,
729 int ignore ATTRIBUTE_UNUSED)
731 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
732 enum pa_builtins code
733 = (enum pa_builtins) DECL_MD_FUNCTION_CODE (fndecl);
734 enum insn_code icode = pa_builtins_icode[code];
735 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
736 call_expr_arg_iterator iter;
737 int arg_count = 0;
738 rtx pat, op[4];
739 tree arg;
741 if (nonvoid)
743 machine_mode tmode = insn_data[icode].operand[0].mode;
744 if (!target
745 || GET_MODE (target) != tmode
746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
747 op[0] = gen_reg_rtx (tmode);
748 else
749 op[0] = target;
751 else
752 op[0] = NULL_RTX;
754 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
756 const struct insn_operand_data *insn_op;
757 int idx;
759 if (arg == error_mark_node)
760 return NULL_RTX;
762 arg_count++;
763 idx = arg_count - !nonvoid;
764 insn_op = &insn_data[icode].operand[idx];
765 op[arg_count] = expand_normal (arg);
767 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
768 insn_op->mode))
769 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
772 switch (arg_count)
774 case 0:
775 pat = GEN_FCN (icode) (op[0]);
776 break;
777 case 1:
778 if (nonvoid)
779 pat = GEN_FCN (icode) (op[0], op[1]);
780 else
781 pat = GEN_FCN (icode) (op[1]);
782 break;
783 case 2:
784 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
785 break;
786 case 3:
787 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
788 break;
789 default:
790 gcc_unreachable ();
793 if (!pat)
794 return NULL_RTX;
796 emit_insn (pat);
798 return (nonvoid ? op[0] : const0_rtx);
801 static rtx
802 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
803 machine_mode mode ATTRIBUTE_UNUSED,
804 int ignore ATTRIBUTE_UNUSED)
806 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
807 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
809 switch (fcode)
811 case PA_BUILTIN_GET_FPSR:
812 case PA_BUILTIN_SET_FPSR:
813 return pa_expand_builtin_1 (exp, target, subtarget, mode, ignore);
815 case PA_BUILTIN_FABSQ:
816 case PA_BUILTIN_COPYSIGNQ:
817 return expand_call (exp, target, ignore);
819 case PA_BUILTIN_INFQ:
820 case PA_BUILTIN_HUGE_VALQ:
822 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
823 REAL_VALUE_TYPE inf;
824 rtx tmp;
826 real_inf (&inf);
827 tmp = const_double_from_real_value (inf, target_mode);
829 tmp = validize_mem (force_const_mem (target_mode, tmp));
831 if (target == 0)
832 target = gen_reg_rtx (target_mode);
834 emit_move_insn (target, tmp);
835 return target;
838 default:
839 gcc_unreachable ();
842 return NULL_RTX;
845 /* Function to init struct machine_function.
846 This will be called, via a pointer variable,
847 from push_function_context. */
849 static struct machine_function *
850 pa_init_machine_status (void)
852 return ggc_cleared_alloc<machine_function> ();
855 /* If FROM is a probable pointer register, mark TO as a probable
856 pointer register with the same pointer alignment as FROM. */
858 static void
859 copy_reg_pointer (rtx to, rtx from)
861 if (REG_POINTER (from))
862 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
865 /* Return 1 if X contains a symbolic expression. We know these
866 expressions will have one of a few well defined forms, so
867 we need only check those forms. */
869 pa_symbolic_expression_p (rtx x)
872 /* Strip off any HIGH. */
873 if (GET_CODE (x) == HIGH)
874 x = XEXP (x, 0);
876 return symbolic_operand (x, VOIDmode);
879 /* Accept any constant that can be moved in one instruction into a
880 general register. */
882 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
884 /* OK if ldo, ldil, or zdepi, can be used. */
885 return (VAL_14_BITS_P (ival)
886 || pa_ldil_cint_p (ival)
887 || pa_zdepi_cint_p (ival));
890 /* True iff ldil can be used to load this CONST_INT. The least
891 significant 11 bits of the value must be zero and the value must
892 not change sign when extended from 32 to 64 bits. */
894 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
896 unsigned HOST_WIDE_INT x;
898 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
899 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
902 /* True iff zdepi can be used to generate this CONST_INT.
903 zdepi first sign extends a 5-bit signed number to a given field
904 length, then places this field anywhere in a zero. */
906 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
908 unsigned HOST_WIDE_INT lsb_mask, t;
910 /* This might not be obvious, but it's at least fast.
911 This function is critical; we don't have the time loops would take. */
912 lsb_mask = x & -x;
913 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
914 /* Return true iff t is a power of two. */
915 return ((t & (t - 1)) == 0);
918 /* True iff depi or extru can be used to compute (reg & mask).
919 Accept bit pattern like these:
920 0....01....1
921 1....10....0
922 1..10..01..1 */
924 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
926 mask = ~mask;
927 mask += mask & -mask;
928 return (mask & (mask - 1)) == 0;
931 /* True iff depi can be used to compute (reg | MASK). */
933 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
935 mask += mask & -mask;
936 return (mask & (mask - 1)) == 0;
939 /* Legitimize PIC addresses. If the address is already
940 position-independent, we return ORIG. Newly generated
941 position-independent addresses go to REG. If we need more
942 than one register, we lose. */
944 static rtx
945 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
947 rtx pic_ref = orig;
949 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
951 /* Labels need special handling. */
952 if (pic_label_operand (orig, mode))
954 rtx_insn *insn;
956 /* We do not want to go through the movXX expanders here since that
957 would create recursion.
959 Nor do we really want to call a generator for a named pattern
960 since that requires multiple patterns if we want to support
961 multiple word sizes.
963 So instead we just emit the raw set, which avoids the movXX
964 expanders completely. */
965 mark_reg_pointer (reg, BITS_PER_UNIT);
966 insn = emit_insn (gen_rtx_SET (reg, orig));
968 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
969 add_reg_note (insn, REG_EQUAL, orig);
971 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
972 and update LABEL_NUSES because this is not done automatically. */
973 if (reload_in_progress || reload_completed)
975 /* Extract LABEL_REF. */
976 if (GET_CODE (orig) == CONST)
977 orig = XEXP (XEXP (orig, 0), 0);
978 /* Extract CODE_LABEL. */
979 orig = XEXP (orig, 0);
980 add_reg_note (insn, REG_LABEL_OPERAND, orig);
981 /* Make sure we have label and not a note. */
982 if (LABEL_P (orig))
983 LABEL_NUSES (orig)++;
985 crtl->uses_pic_offset_table = 1;
986 return reg;
988 if (GET_CODE (orig) == SYMBOL_REF)
990 rtx_insn *insn;
991 rtx tmp_reg;
993 gcc_assert (reg);
995 /* Before reload, allocate a temporary register for the intermediate
996 result. This allows the sequence to be deleted when the final
997 result is unused and the insns are trivially dead. */
998 tmp_reg = ((reload_in_progress || reload_completed)
999 ? reg : gen_reg_rtx (Pmode));
1001 if (function_label_operand (orig, VOIDmode))
1003 /* Force function label into memory in word mode. */
1004 orig = XEXP (force_const_mem (word_mode, orig), 0);
1005 /* Load plabel address from DLT. */
1006 emit_move_insn (tmp_reg,
1007 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
1008 gen_rtx_HIGH (word_mode, orig)));
1009 pic_ref
1010 = gen_const_mem (Pmode,
1011 gen_rtx_LO_SUM (Pmode, tmp_reg,
1012 gen_rtx_UNSPEC (Pmode,
1013 gen_rtvec (1, orig),
1014 UNSPEC_DLTIND14R)));
1015 emit_move_insn (reg, pic_ref);
1016 /* Now load address of function descriptor. */
1017 pic_ref = gen_rtx_MEM (Pmode, reg);
1019 else
1021 /* Load symbol reference from DLT. */
1022 emit_move_insn (tmp_reg,
1023 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
1024 gen_rtx_HIGH (word_mode, orig)));
1025 pic_ref
1026 = gen_const_mem (Pmode,
1027 gen_rtx_LO_SUM (Pmode, tmp_reg,
1028 gen_rtx_UNSPEC (Pmode,
1029 gen_rtvec (1, orig),
1030 UNSPEC_DLTIND14R)));
1033 crtl->uses_pic_offset_table = 1;
1034 mark_reg_pointer (reg, BITS_PER_UNIT);
1035 insn = emit_move_insn (reg, pic_ref);
1037 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
1038 set_unique_reg_note (insn, REG_EQUAL, orig);
1040 return reg;
1042 else if (GET_CODE (orig) == CONST)
1044 rtx base;
1046 if (GET_CODE (XEXP (orig, 0)) == PLUS
1047 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
1048 return orig;
1050 gcc_assert (reg);
1051 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
1053 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
1054 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
1055 base == reg ? 0 : reg);
1057 if (GET_CODE (orig) == CONST_INT)
1059 if (INT_14_BITS (orig))
1060 return plus_constant (Pmode, base, INTVAL (orig));
1061 orig = force_reg (Pmode, orig);
1063 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
1064 /* Likewise, should we set special REG_NOTEs here? */
1067 return pic_ref;
1070 static GTY(()) rtx gen_tls_tga;
1072 static rtx
1073 gen_tls_get_addr (void)
1075 if (!gen_tls_tga)
1076 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1077 return gen_tls_tga;
1080 static rtx
1081 hppa_tls_call (rtx arg)
1083 rtx ret;
1085 ret = gen_reg_rtx (Pmode);
1086 emit_library_call_value (gen_tls_get_addr (), ret,
1087 LCT_CONST, Pmode, arg, Pmode);
1089 return ret;
1092 static rtx
1093 legitimize_tls_address (rtx addr)
1095 rtx ret, tmp, t1, t2, tp;
1096 rtx_insn *insn;
1098 /* Currently, we can't handle anything but a SYMBOL_REF. */
1099 if (GET_CODE (addr) != SYMBOL_REF)
1100 return addr;
1102 switch (SYMBOL_REF_TLS_MODEL (addr))
1104 case TLS_MODEL_GLOBAL_DYNAMIC:
1105 tmp = gen_reg_rtx (Pmode);
1106 if (flag_pic)
1107 emit_insn (gen_tgd_load_pic (tmp, addr));
1108 else
1109 emit_insn (gen_tgd_load (tmp, addr));
1110 ret = hppa_tls_call (tmp);
1111 break;
1113 case TLS_MODEL_LOCAL_DYNAMIC:
1114 ret = gen_reg_rtx (Pmode);
1115 tmp = gen_reg_rtx (Pmode);
1116 start_sequence ();
1117 if (flag_pic)
1118 emit_insn (gen_tld_load_pic (tmp, addr));
1119 else
1120 emit_insn (gen_tld_load (tmp, addr));
1121 t1 = hppa_tls_call (tmp);
1122 insn = get_insns ();
1123 end_sequence ();
1124 t2 = gen_reg_rtx (Pmode);
1125 emit_libcall_block (insn, t2, t1,
1126 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1127 UNSPEC_TLSLDBASE));
1128 emit_insn (gen_tld_offset_load (ret, addr, t2));
1129 break;
1131 case TLS_MODEL_INITIAL_EXEC:
1132 tp = gen_reg_rtx (Pmode);
1133 tmp = gen_reg_rtx (Pmode);
1134 ret = gen_reg_rtx (Pmode);
1135 emit_insn (gen_tp_load (tp));
1136 if (flag_pic)
1137 emit_insn (gen_tie_load_pic (tmp, addr));
1138 else
1139 emit_insn (gen_tie_load (tmp, addr));
1140 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
1141 break;
1143 case TLS_MODEL_LOCAL_EXEC:
1144 tp = gen_reg_rtx (Pmode);
1145 ret = gen_reg_rtx (Pmode);
1146 emit_insn (gen_tp_load (tp));
1147 emit_insn (gen_tle_load (ret, addr, tp));
1148 break;
1150 default:
1151 gcc_unreachable ();
1154 return ret;
1157 /* Helper for hppa_legitimize_address. Given X, return true if it
1158 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1160 This respectively represent canonical shift-add rtxs or scaled
1161 memory addresses. */
1162 static bool
1163 mem_shadd_or_shadd_rtx_p (rtx x)
1165 return ((GET_CODE (x) == ASHIFT
1166 || GET_CODE (x) == MULT)
1167 && GET_CODE (XEXP (x, 1)) == CONST_INT
1168 && ((GET_CODE (x) == ASHIFT
1169 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1170 || (GET_CODE (x) == MULT
1171 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1174 /* Try machine-dependent ways of modifying an illegitimate address
1175 to be legitimate. If we find one, return the new, valid address.
1176 This macro is used in only one place: `memory_address' in explow.cc.
1178 OLDX is the address as it was before break_out_memory_refs was called.
1179 In some cases it is useful to look at this to decide what needs to be done.
1181 It is always safe for this macro to do nothing. It exists to recognize
1182 opportunities to optimize the output.
1184 For the PA, transform:
1186 memory(X + <large int>)
1188 into:
1190 if (<large int> & mask) >= 16
1191 Y = (<large int> & ~mask) + mask + 1 Round up.
1192 else
1193 Y = (<large int> & ~mask) Round down.
1194 Z = X + Y
1195 memory (Z + (<large int> - Y));
1197 This is for CSE to find several similar references, and only use one Z.
1199 X can either be a SYMBOL_REF or REG, but because combine cannot
1200 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1201 D will not fit in 14 bits.
1203 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1204 0x1f as the mask.
1206 MODE_INT references allow displacements which fit in 14 bits, so use
1207 0x3fff as the mask.
1209 This relies on the fact that most mode MODE_FLOAT references will use FP
1210 registers and most mode MODE_INT references will use integer registers.
1211 (In the rare case of an FP register used in an integer MODE, we depend
1212 on secondary reloads to clean things up.)
1215 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1216 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1217 addressing modes to be used).
1219 Note that the addresses passed into hppa_legitimize_address always
1220 come from a MEM, so we only have to match the MULT form on incoming
1221 addresses. But to be future proof we also match the ASHIFT form.
1223 However, this routine always places those shift-add sequences into
1224 registers, so we have to generate the ASHIFT form as our output.
1226 Put X and Z into registers. Then put the entire expression into
1227 a register. */
1230 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1231 machine_mode mode)
1233 rtx orig = x;
1235 /* We need to canonicalize the order of operands in unscaled indexed
1236 addresses since the code that checks if an address is valid doesn't
1237 always try both orders. */
1238 if (!TARGET_NO_SPACE_REGS
1239 && GET_CODE (x) == PLUS
1240 && GET_MODE (x) == Pmode
1241 && REG_P (XEXP (x, 0))
1242 && REG_P (XEXP (x, 1))
1243 && REG_POINTER (XEXP (x, 0))
1244 && !REG_POINTER (XEXP (x, 1)))
1245 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1247 if (tls_referenced_p (x))
1248 return legitimize_tls_address (x);
1249 else if (flag_pic)
1250 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1252 /* Strip off CONST. */
1253 if (GET_CODE (x) == CONST)
1254 x = XEXP (x, 0);
1256 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1257 That should always be safe. */
1258 if (GET_CODE (x) == PLUS
1259 && GET_CODE (XEXP (x, 0)) == REG
1260 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1262 rtx reg = force_reg (Pmode, XEXP (x, 1));
1263 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1266 /* Note we must reject symbols which represent function addresses
1267 since the assembler/linker can't handle arithmetic on plabels. */
1268 if (GET_CODE (x) == PLUS
1269 && GET_CODE (XEXP (x, 1)) == CONST_INT
1270 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1271 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1272 || GET_CODE (XEXP (x, 0)) == REG))
1274 rtx int_part, ptr_reg;
1275 int newoffset;
1276 int offset = INTVAL (XEXP (x, 1));
1277 int mask;
1279 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1280 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1282 /* Choose which way to round the offset. Round up if we
1283 are >= halfway to the next boundary. */
1284 if ((offset & mask) >= ((mask + 1) / 2))
1285 newoffset = (offset & ~ mask) + mask + 1;
1286 else
1287 newoffset = (offset & ~ mask);
1289 /* If the newoffset will not fit in 14 bits (ldo), then
1290 handling this would take 4 or 5 instructions (2 to load
1291 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1292 add the new offset and the SYMBOL_REF.) Combine cannot
1293 handle 4->2 or 5->2 combinations, so do not create
1294 them. */
1295 if (! VAL_14_BITS_P (newoffset)
1296 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1298 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1299 rtx tmp_reg
1300 = force_reg (Pmode,
1301 gen_rtx_HIGH (Pmode, const_part));
1302 ptr_reg
1303 = force_reg (Pmode,
1304 gen_rtx_LO_SUM (Pmode,
1305 tmp_reg, const_part));
1307 else
1309 if (! VAL_14_BITS_P (newoffset))
1310 int_part = force_reg (Pmode, GEN_INT (newoffset));
1311 else
1312 int_part = GEN_INT (newoffset);
1314 ptr_reg = force_reg (Pmode,
1315 gen_rtx_PLUS (Pmode,
1316 force_reg (Pmode, XEXP (x, 0)),
1317 int_part));
1319 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1322 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1324 if (GET_CODE (x) == PLUS
1325 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1326 && (OBJECT_P (XEXP (x, 1))
1327 || GET_CODE (XEXP (x, 1)) == SUBREG)
1328 && GET_CODE (XEXP (x, 1)) != CONST)
1330 /* If we were given a MULT, we must fix the constant
1331 as we're going to create the ASHIFT form. */
1332 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1333 if (GET_CODE (XEXP (x, 0)) == MULT)
1334 shift_val = exact_log2 (shift_val);
1336 rtx reg1, reg2;
1337 reg1 = XEXP (x, 1);
1338 if (GET_CODE (reg1) != REG)
1339 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1341 reg2 = XEXP (XEXP (x, 0), 0);
1342 if (GET_CODE (reg2) != REG)
1343 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1345 return force_reg (Pmode,
1346 gen_rtx_PLUS (Pmode,
1347 gen_rtx_ASHIFT (Pmode, reg2,
1348 GEN_INT (shift_val)),
1349 reg1));
1352 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1354 Only do so for floating point modes since this is more speculative
1355 and we lose if it's an integer store. */
1356 if (GET_CODE (x) == PLUS
1357 && GET_CODE (XEXP (x, 0)) == PLUS
1358 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1359 && (mode == SFmode || mode == DFmode))
1361 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1363 /* If we were given a MULT, we must fix the constant
1364 as we're going to create the ASHIFT form. */
1365 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1366 shift_val = exact_log2 (shift_val);
1368 /* Try and figure out what to use as a base register. */
1369 rtx reg1, reg2, base, idx;
1371 reg1 = XEXP (XEXP (x, 0), 1);
1372 reg2 = XEXP (x, 1);
1373 base = NULL_RTX;
1374 idx = NULL_RTX;
1376 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1377 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1378 it's a base register below. */
1379 if (GET_CODE (reg1) != REG)
1380 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1382 if (GET_CODE (reg2) != REG)
1383 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1385 /* Figure out what the base and index are. */
1387 if (GET_CODE (reg1) == REG
1388 && REG_POINTER (reg1))
1390 base = reg1;
1391 idx = gen_rtx_PLUS (Pmode,
1392 gen_rtx_ASHIFT (Pmode,
1393 XEXP (XEXP (XEXP (x, 0), 0), 0),
1394 GEN_INT (shift_val)),
1395 XEXP (x, 1));
1397 else if (GET_CODE (reg2) == REG
1398 && REG_POINTER (reg2))
1400 base = reg2;
1401 idx = XEXP (x, 0);
1404 if (base == 0)
1405 return orig;
1407 /* If the index adds a large constant, try to scale the
1408 constant so that it can be loaded with only one insn. */
1409 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1410 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1411 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1412 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1414 /* Divide the CONST_INT by the scale factor, then add it to A. */
1415 int val = INTVAL (XEXP (idx, 1));
1416 val /= (1 << shift_val);
1418 reg1 = XEXP (XEXP (idx, 0), 0);
1419 if (GET_CODE (reg1) != REG)
1420 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1422 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1424 /* We can now generate a simple scaled indexed address. */
1425 return
1426 force_reg
1427 (Pmode, gen_rtx_PLUS (Pmode,
1428 gen_rtx_ASHIFT (Pmode, reg1,
1429 GEN_INT (shift_val)),
1430 base));
1433 /* If B + C is still a valid base register, then add them. */
1434 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1435 && INTVAL (XEXP (idx, 1)) <= 4096
1436 && INTVAL (XEXP (idx, 1)) >= -4096)
1438 rtx reg1, reg2;
1440 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1442 reg2 = XEXP (XEXP (idx, 0), 0);
1443 if (GET_CODE (reg2) != CONST_INT)
1444 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1446 return force_reg (Pmode,
1447 gen_rtx_PLUS (Pmode,
1448 gen_rtx_ASHIFT (Pmode, reg2,
1449 GEN_INT (shift_val)),
1450 reg1));
1453 /* Get the index into a register, then add the base + index and
1454 return a register holding the result. */
1456 /* First get A into a register. */
1457 reg1 = XEXP (XEXP (idx, 0), 0);
1458 if (GET_CODE (reg1) != REG)
1459 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1461 /* And get B into a register. */
1462 reg2 = XEXP (idx, 1);
1463 if (GET_CODE (reg2) != REG)
1464 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1466 reg1 = force_reg (Pmode,
1467 gen_rtx_PLUS (Pmode,
1468 gen_rtx_ASHIFT (Pmode, reg1,
1469 GEN_INT (shift_val)),
1470 reg2));
1472 /* Add the result to our base register and return. */
1473 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1477 /* Uh-oh. We might have an address for x[n-100000]. This needs
1478 special handling to avoid creating an indexed memory address
1479 with x-100000 as the base.
1481 If the constant part is small enough, then it's still safe because
1482 there is a guard page at the beginning and end of the data segment.
1484 Scaled references are common enough that we want to try and rearrange the
1485 terms so that we can use indexing for these addresses too. Only
1486 do the optimization for floatint point modes. */
1488 if (GET_CODE (x) == PLUS
1489 && pa_symbolic_expression_p (XEXP (x, 1)))
1491 /* Ugly. We modify things here so that the address offset specified
1492 by the index expression is computed first, then added to x to form
1493 the entire address. */
1495 rtx regx1, regx2, regy1, regy2, y;
1497 /* Strip off any CONST. */
1498 y = XEXP (x, 1);
1499 if (GET_CODE (y) == CONST)
1500 y = XEXP (y, 0);
1502 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1504 /* See if this looks like
1505 (plus (mult (reg) (mem_shadd_const))
1506 (const (plus (symbol_ref) (const_int))))
1508 Where const_int is small. In that case the const
1509 expression is a valid pointer for indexing.
1511 If const_int is big, but can be divided evenly by shadd_const
1512 and added to (reg). This allows more scaled indexed addresses. */
1513 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1514 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1515 && GET_CODE (XEXP (y, 1)) == CONST_INT
1516 && INTVAL (XEXP (y, 1)) >= -4096
1517 && INTVAL (XEXP (y, 1)) <= 4095)
1519 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1521 /* If we were given a MULT, we must fix the constant
1522 as we're going to create the ASHIFT form. */
1523 if (GET_CODE (XEXP (x, 0)) == MULT)
1524 shift_val = exact_log2 (shift_val);
1526 rtx reg1, reg2;
1528 reg1 = XEXP (x, 1);
1529 if (GET_CODE (reg1) != REG)
1530 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1532 reg2 = XEXP (XEXP (x, 0), 0);
1533 if (GET_CODE (reg2) != REG)
1534 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1536 return
1537 force_reg (Pmode,
1538 gen_rtx_PLUS (Pmode,
1539 gen_rtx_ASHIFT (Pmode,
1540 reg2,
1541 GEN_INT (shift_val)),
1542 reg1));
1544 else if ((mode == DFmode || mode == SFmode)
1545 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1546 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1547 && GET_CODE (XEXP (y, 1)) == CONST_INT
1548 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1550 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1552 /* If we were given a MULT, we must fix the constant
1553 as we're going to create the ASHIFT form. */
1554 if (GET_CODE (XEXP (x, 0)) == MULT)
1555 shift_val = exact_log2 (shift_val);
1557 regx1
1558 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1559 / INTVAL (XEXP (XEXP (x, 0), 1))));
1560 regx2 = XEXP (XEXP (x, 0), 0);
1561 if (GET_CODE (regx2) != REG)
1562 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1563 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1564 regx2, regx1));
1565 return
1566 force_reg (Pmode,
1567 gen_rtx_PLUS (Pmode,
1568 gen_rtx_ASHIFT (Pmode, regx2,
1569 GEN_INT (shift_val)),
1570 force_reg (Pmode, XEXP (y, 0))));
1572 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1573 && INTVAL (XEXP (y, 1)) >= -4096
1574 && INTVAL (XEXP (y, 1)) <= 4095)
1576 /* This is safe because of the guard page at the
1577 beginning and end of the data space. Just
1578 return the original address. */
1579 return orig;
1581 else
1583 /* Doesn't look like one we can optimize. */
1584 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1585 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1586 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1587 regx1 = force_reg (Pmode,
1588 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1589 regx1, regy2));
1590 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1595 return orig;
1598 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1600 Compute extra cost of moving data between one register class
1601 and another.
1603 Make moves from SAR so expensive they should never happen. We used to
1604 have 0xffff here, but that generates overflow in rare cases.
1606 Copies involving a FP register and a non-FP register are relatively
1607 expensive because they must go through memory.
1609 Other copies are reasonably cheap. */
1611 static int
1612 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1613 reg_class_t from, reg_class_t to)
1615 if (from == SHIFT_REGS)
1616 return 0x100;
1617 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1618 return 18;
1619 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1620 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1621 return 16;
1622 else
1623 return 2;
1626 /* For the HPPA, REG and REG+CONST is cost 0
1627 and addresses involving symbolic constants are cost 2.
1629 PIC addresses are very expensive.
1631 It is no coincidence that this has the same structure
1632 as pa_legitimate_address_p. */
1634 static int
1635 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1636 addr_space_t as ATTRIBUTE_UNUSED,
1637 bool speed ATTRIBUTE_UNUSED)
1639 switch (GET_CODE (X))
1641 case REG:
1642 case PLUS:
1643 case LO_SUM:
1644 return 1;
1645 case HIGH:
1646 return 2;
1647 default:
1648 return 4;
1652 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1653 The machine mode of X is known to be SImode or DImode. */
1655 static bool
1656 hppa_rtx_costs_shadd_p (rtx x)
1658 if (GET_CODE (x) != PLUS
1659 || !REG_P (XEXP (x, 1)))
1660 return false;
1661 rtx op0 = XEXP (x, 0);
1662 if (GET_CODE (op0) == ASHIFT
1663 && CONST_INT_P (XEXP (op0, 1))
1664 && REG_P (XEXP (op0, 0)))
1666 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1667 return x == 1 || x == 2 || x == 3;
1669 if (GET_CODE (op0) == MULT
1670 && CONST_INT_P (XEXP (op0, 1))
1671 && REG_P (XEXP (op0, 0)))
1673 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1674 return x == 2 || x == 4 || x == 8;
1676 return false;
1679 /* Compute a (partial) cost for rtx X. Return true if the complete
1680 cost has been computed, and false if subexpressions should be
1681 scanned. In either case, *TOTAL contains the cost result. */
1683 static bool
1684 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1685 int opno ATTRIBUTE_UNUSED,
1686 int *total, bool speed)
1688 int code = GET_CODE (x);
1690 switch (code)
1692 case CONST_INT:
1693 if (outer_code == SET)
1694 *total = COSTS_N_INSNS (1);
1695 else if (INTVAL (x) == 0)
1696 *total = 0;
1697 else if (INT_14_BITS (x))
1698 *total = 1;
1699 else
1700 *total = 2;
1701 return true;
1703 case HIGH:
1704 *total = 2;
1705 return true;
1707 case CONST:
1708 case LABEL_REF:
1709 case SYMBOL_REF:
1710 *total = 4;
1711 return true;
1713 case CONST_DOUBLE:
1714 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1715 && outer_code != SET)
1716 *total = 0;
1717 else
1718 *total = 8;
1719 return true;
1721 case MULT:
1722 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1724 *total = COSTS_N_INSNS (3);
1726 else if (mode == DImode)
1728 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1729 *total = COSTS_N_INSNS (25);
1730 else
1731 *total = COSTS_N_INSNS (80);
1733 else
1735 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1736 *total = COSTS_N_INSNS (8);
1737 else
1738 *total = COSTS_N_INSNS (20);
1740 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1742 case DIV:
1743 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1745 *total = COSTS_N_INSNS (14);
1746 return false;
1748 /* FALLTHRU */
1750 case UDIV:
1751 case MOD:
1752 case UMOD:
1753 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1754 if (mode == DImode)
1755 *total = COSTS_N_INSNS (240);
1756 else
1757 *total = COSTS_N_INSNS (60);
1758 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1760 case PLUS: /* this includes shNadd insns */
1761 case MINUS:
1762 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1763 *total = COSTS_N_INSNS (3);
1764 else if (mode == DImode)
1766 if (TARGET_64BIT)
1768 *total = COSTS_N_INSNS (1);
1769 /* Handle shladd,l instructions. */
1770 if (hppa_rtx_costs_shadd_p (x))
1771 return true;
1773 else
1774 *total = COSTS_N_INSNS (2);
1776 else
1778 *total = COSTS_N_INSNS (1);
1779 /* Handle shNadd instructions. */
1780 if (hppa_rtx_costs_shadd_p (x))
1781 return true;
1783 return REG_P (XEXP (x, 0))
1784 && (REG_P (XEXP (x, 1))
1785 || CONST_INT_P (XEXP (x, 1)));
1787 case ASHIFT:
1788 if (mode == DImode)
1790 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1792 if (TARGET_64BIT)
1793 *total = COSTS_N_INSNS (1);
1794 else
1795 *total = COSTS_N_INSNS (2);
1796 return true;
1798 else if (TARGET_64BIT)
1799 *total = COSTS_N_INSNS (3);
1800 else if (speed)
1801 *total = COSTS_N_INSNS (13);
1802 else
1803 *total = COSTS_N_INSNS (18);
1805 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1807 if (TARGET_64BIT)
1808 *total = COSTS_N_INSNS (2);
1809 else
1810 *total = COSTS_N_INSNS (1);
1811 return true;
1813 else if (TARGET_64BIT)
1814 *total = COSTS_N_INSNS (4);
1815 else
1816 *total = COSTS_N_INSNS (2);
1817 return REG_P (XEXP (x, 0))
1818 && (REG_P (XEXP (x, 1))
1819 || CONST_INT_P (XEXP (x, 1)));
1821 case ASHIFTRT:
1822 if (mode == DImode)
1824 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1826 if (TARGET_64BIT)
1827 *total = COSTS_N_INSNS (1);
1828 else
1829 *total = COSTS_N_INSNS (2);
1830 return true;
1832 else if (TARGET_64BIT)
1833 *total = COSTS_N_INSNS (3);
1834 else if (speed)
1835 *total = COSTS_N_INSNS (14);
1836 else
1837 *total = COSTS_N_INSNS (19);
1839 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1841 if (TARGET_64BIT)
1842 *total = COSTS_N_INSNS (2);
1843 else
1844 *total = COSTS_N_INSNS (1);
1845 return true;
1847 else if (TARGET_64BIT)
1848 *total = COSTS_N_INSNS (4);
1849 else
1850 *total = COSTS_N_INSNS (2);
1851 return REG_P (XEXP (x, 0))
1852 && (REG_P (XEXP (x, 1))
1853 || CONST_INT_P (XEXP (x, 1)));
1855 case LSHIFTRT:
1856 if (mode == DImode)
1858 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1860 if (TARGET_64BIT)
1861 *total = COSTS_N_INSNS (1);
1862 else
1863 *total = COSTS_N_INSNS (2);
1864 return true;
1866 else if (TARGET_64BIT)
1867 *total = COSTS_N_INSNS (2);
1868 else if (speed)
1869 *total = COSTS_N_INSNS (12);
1870 else
1871 *total = COSTS_N_INSNS (15);
1873 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1875 *total = COSTS_N_INSNS (1);
1876 return true;
1878 else if (TARGET_64BIT)
1879 *total = COSTS_N_INSNS (3);
1880 else
1881 *total = COSTS_N_INSNS (2);
1882 return REG_P (XEXP (x, 0))
1883 && (REG_P (XEXP (x, 1))
1884 || CONST_INT_P (XEXP (x, 1)));
1886 default:
1887 return false;
1891 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1892 new rtx with the correct mode. */
1893 static inline rtx
1894 force_mode (machine_mode mode, rtx orig)
1896 if (mode == GET_MODE (orig))
1897 return orig;
1899 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1901 return gen_rtx_REG (mode, REGNO (orig));
1904 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1906 static bool
1907 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1909 return tls_referenced_p (x);
1912 /* Emit insns to move operands[1] into operands[0].
1914 Return 1 if we have written out everything that needs to be done to
1915 do the move. Otherwise, return 0 and the caller will emit the move
1916 normally.
1918 Note SCRATCH_REG may not be in the proper mode depending on how it
1919 will be used. This routine is responsible for creating a new copy
1920 of SCRATCH_REG in the proper mode. */
1923 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1925 rtx operand0 = operands[0];
1926 rtx operand1 = operands[1];
1927 rtx tem;
1929 /* We can only handle indexed addresses in the destination operand
1930 of floating point stores. Thus, we need to break out indexed
1931 addresses from the destination operand. */
1932 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1934 gcc_assert (can_create_pseudo_p ());
1936 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1937 operand0 = replace_equiv_address (operand0, tem);
1940 /* On targets with non-equivalent space registers, break out unscaled
1941 indexed addresses from the source operand before the final CSE.
1942 We have to do this because the REG_POINTER flag is not correctly
1943 carried through various optimization passes and CSE may substitute
1944 a pseudo without the pointer set for one with the pointer set. As
1945 a result, we loose various opportunities to create insns with
1946 unscaled indexed addresses. */
1947 if (!TARGET_NO_SPACE_REGS
1948 && !cse_not_expected
1949 && GET_CODE (operand1) == MEM
1950 && GET_CODE (XEXP (operand1, 0)) == PLUS
1951 && REG_P (XEXP (XEXP (operand1, 0), 0))
1952 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1953 operand1
1954 = replace_equiv_address (operand1,
1955 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1957 if (scratch_reg
1958 && reload_in_progress && GET_CODE (operand0) == REG
1959 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1960 operand0 = reg_equiv_mem (REGNO (operand0));
1961 else if (scratch_reg
1962 && reload_in_progress && GET_CODE (operand0) == SUBREG
1963 && GET_CODE (SUBREG_REG (operand0)) == REG
1964 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1966 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1967 the code which tracks sets/uses for delete_output_reload. */
1968 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1969 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1970 SUBREG_BYTE (operand0));
1971 operand0 = alter_subreg (&temp, true);
1974 if (scratch_reg
1975 && reload_in_progress && GET_CODE (operand1) == REG
1976 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1977 operand1 = reg_equiv_mem (REGNO (operand1));
1978 else if (scratch_reg
1979 && reload_in_progress && GET_CODE (operand1) == SUBREG
1980 && GET_CODE (SUBREG_REG (operand1)) == REG
1981 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1983 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1984 the code which tracks sets/uses for delete_output_reload. */
1985 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1986 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1987 SUBREG_BYTE (operand1));
1988 operand1 = alter_subreg (&temp, true);
1991 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1992 && ((tem = find_replacement (&XEXP (operand0, 0)))
1993 != XEXP (operand0, 0)))
1994 operand0 = replace_equiv_address (operand0, tem);
1996 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1997 && ((tem = find_replacement (&XEXP (operand1, 0)))
1998 != XEXP (operand1, 0)))
1999 operand1 = replace_equiv_address (operand1, tem);
2001 /* Handle secondary reloads for loads/stores of FP registers from
2002 REG+D addresses where D does not fit in 5 or 14 bits, including
2003 (subreg (mem (addr))) cases, and reloads for other unsupported
2004 memory operands. */
2005 if (scratch_reg
2006 && FP_REG_P (operand0)
2007 && (MEM_P (operand1)
2008 || (GET_CODE (operand1) == SUBREG
2009 && MEM_P (XEXP (operand1, 0)))))
2011 rtx op1 = operand1;
2013 if (GET_CODE (op1) == SUBREG)
2014 op1 = XEXP (op1, 0);
2016 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
2018 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
2019 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
2021 /* SCRATCH_REG will hold an address and maybe the actual data.
2022 We want it in WORD_MODE regardless of what mode it was
2023 originally given to us. */
2024 scratch_reg = force_mode (word_mode, scratch_reg);
2026 /* D might not fit in 14 bits either; for such cases load D
2027 into scratch reg. */
2028 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
2030 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
2031 emit_move_insn (scratch_reg,
2032 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
2033 Pmode,
2034 XEXP (XEXP (op1, 0), 0),
2035 scratch_reg));
2037 else
2038 emit_move_insn (scratch_reg, XEXP (op1, 0));
2039 op1 = replace_equiv_address (op1, scratch_reg);
2042 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
2043 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
2044 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
2046 /* Load memory address into SCRATCH_REG. */
2047 scratch_reg = force_mode (word_mode, scratch_reg);
2048 emit_move_insn (scratch_reg, XEXP (op1, 0));
2049 op1 = replace_equiv_address (op1, scratch_reg);
2051 emit_insn (gen_rtx_SET (operand0, op1));
2052 return 1;
2054 else if (scratch_reg
2055 && FP_REG_P (operand1)
2056 && (MEM_P (operand0)
2057 || (GET_CODE (operand0) == SUBREG
2058 && MEM_P (XEXP (operand0, 0)))))
2060 rtx op0 = operand0;
2062 if (GET_CODE (op0) == SUBREG)
2063 op0 = XEXP (op0, 0);
2065 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
2067 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
2068 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
2070 /* SCRATCH_REG will hold an address and maybe the actual data.
2071 We want it in WORD_MODE regardless of what mode it was
2072 originally given to us. */
2073 scratch_reg = force_mode (word_mode, scratch_reg);
2075 /* D might not fit in 14 bits either; for such cases load D
2076 into scratch reg. */
2077 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
2079 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
2080 emit_move_insn (scratch_reg,
2081 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
2082 Pmode,
2083 XEXP (XEXP (op0, 0), 0),
2084 scratch_reg));
2086 else
2087 emit_move_insn (scratch_reg, XEXP (op0, 0));
2088 op0 = replace_equiv_address (op0, scratch_reg);
2091 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
2092 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
2093 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
2095 /* Load memory address into SCRATCH_REG. */
2096 scratch_reg = force_mode (word_mode, scratch_reg);
2097 emit_move_insn (scratch_reg, XEXP (op0, 0));
2098 op0 = replace_equiv_address (op0, scratch_reg);
2100 emit_insn (gen_rtx_SET (op0, operand1));
2101 return 1;
2103 /* Handle secondary reloads for loads of FP registers from constant
2104 expressions by forcing the constant into memory. For the most part,
2105 this is only necessary for SImode and DImode.
2107 Use scratch_reg to hold the address of the memory location. */
2108 else if (scratch_reg
2109 && CONSTANT_P (operand1)
2110 && FP_REG_P (operand0))
2112 rtx const_mem, xoperands[2];
2114 if (operand1 == CONST0_RTX (mode))
2116 emit_insn (gen_rtx_SET (operand0, operand1));
2117 return 1;
2120 /* SCRATCH_REG will hold an address and maybe the actual data. We want
2121 it in WORD_MODE regardless of what mode it was originally given
2122 to us. */
2123 scratch_reg = force_mode (word_mode, scratch_reg);
2125 /* Force the constant into memory and put the address of the
2126 memory location into scratch_reg. */
2127 const_mem = force_const_mem (mode, operand1);
2128 xoperands[0] = scratch_reg;
2129 xoperands[1] = XEXP (const_mem, 0);
2130 pa_emit_move_sequence (xoperands, Pmode, 0);
2132 /* Now load the destination register. */
2133 emit_insn (gen_rtx_SET (operand0,
2134 replace_equiv_address (const_mem, scratch_reg)));
2135 return 1;
2137 /* Handle secondary reloads for SAR. These occur when trying to load
2138 the SAR from memory or a constant. */
2139 else if (scratch_reg
2140 && GET_CODE (operand0) == REG
2141 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2142 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2143 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2145 /* D might not fit in 14 bits either; for such cases load D into
2146 scratch reg. */
2147 if (GET_CODE (operand1) == MEM
2148 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2150 /* We are reloading the address into the scratch register, so we
2151 want to make sure the scratch register is a full register. */
2152 scratch_reg = force_mode (word_mode, scratch_reg);
2154 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2155 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2156 0)),
2157 Pmode,
2158 XEXP (XEXP (operand1, 0),
2160 scratch_reg));
2162 /* Now we are going to load the scratch register from memory,
2163 we want to load it in the same width as the original MEM,
2164 which must be the same as the width of the ultimate destination,
2165 OPERAND0. */
2166 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2168 emit_move_insn (scratch_reg,
2169 replace_equiv_address (operand1, scratch_reg));
2171 else
2173 /* We want to load the scratch register using the same mode as
2174 the ultimate destination. */
2175 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2177 emit_move_insn (scratch_reg, operand1);
2180 /* And emit the insn to set the ultimate destination. We know that
2181 the scratch register has the same mode as the destination at this
2182 point. */
2183 emit_move_insn (operand0, scratch_reg);
2184 return 1;
2187 /* Handle the most common case: storing into a register. */
2188 if (register_operand (operand0, mode))
2190 /* Legitimize TLS symbol references. This happens for references
2191 that aren't a legitimate constant. */
2192 if (PA_SYMBOL_REF_TLS_P (operand1))
2193 operand1 = legitimize_tls_address (operand1);
2195 if (register_operand (operand1, mode)
2196 || (GET_CODE (operand1) == CONST_INT
2197 && pa_cint_ok_for_move (UINTVAL (operand1)))
2198 || (operand1 == CONST0_RTX (mode))
2199 || (GET_CODE (operand1) == HIGH
2200 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2201 /* Only `general_operands' can come here, so MEM is ok. */
2202 || GET_CODE (operand1) == MEM)
2204 /* Various sets are created during RTL generation which don't
2205 have the REG_POINTER flag correctly set. After the CSE pass,
2206 instruction recognition can fail if we don't consistently
2207 set this flag when performing register copies. This should
2208 also improve the opportunities for creating insns that use
2209 unscaled indexing. */
2210 if (REG_P (operand0) && REG_P (operand1))
2212 if (REG_POINTER (operand1)
2213 && !REG_POINTER (operand0)
2214 && !HARD_REGISTER_P (operand0))
2215 copy_reg_pointer (operand0, operand1);
2218 /* When MEMs are broken out, the REG_POINTER flag doesn't
2219 get set. In some cases, we can set the REG_POINTER flag
2220 from the declaration for the MEM. */
2221 if (REG_P (operand0)
2222 && GET_CODE (operand1) == MEM
2223 && !REG_POINTER (operand0))
2225 tree decl = MEM_EXPR (operand1);
2227 /* Set the register pointer flag and register alignment
2228 if the declaration for this memory reference is a
2229 pointer type. */
2230 if (decl)
2232 tree type;
2234 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2235 tree operand 1. */
2236 if (TREE_CODE (decl) == COMPONENT_REF)
2237 decl = TREE_OPERAND (decl, 1);
2239 type = TREE_TYPE (decl);
2240 type = strip_array_types (type);
2242 if (POINTER_TYPE_P (type))
2243 mark_reg_pointer (operand0, BITS_PER_UNIT);
2247 emit_insn (gen_rtx_SET (operand0, operand1));
2248 return 1;
2251 else if (GET_CODE (operand0) == MEM)
2253 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2254 && !(reload_in_progress || reload_completed))
2256 rtx temp = gen_reg_rtx (DFmode);
2258 emit_insn (gen_rtx_SET (temp, operand1));
2259 emit_insn (gen_rtx_SET (operand0, temp));
2260 return 1;
2262 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2264 /* Run this case quickly. */
2265 emit_insn (gen_rtx_SET (operand0, operand1));
2266 return 1;
2268 if (! (reload_in_progress || reload_completed))
2270 operands[0] = validize_mem (operand0);
2271 operands[1] = operand1 = force_reg (mode, operand1);
2275 /* Simplify the source if we need to.
2276 Note we do have to handle function labels here, even though we do
2277 not consider them legitimate constants. Loop optimizations can
2278 call the emit_move_xxx with one as a source. */
2279 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2280 || (GET_CODE (operand1) == HIGH
2281 && symbolic_operand (XEXP (operand1, 0), mode))
2282 || function_label_operand (operand1, VOIDmode)
2283 || tls_referenced_p (operand1))
2285 int ishighonly = 0;
2287 if (GET_CODE (operand1) == HIGH)
2289 ishighonly = 1;
2290 operand1 = XEXP (operand1, 0);
2292 if (symbolic_operand (operand1, mode))
2294 /* Argh. The assembler and linker can't handle arithmetic
2295 involving plabels.
2297 So we force the plabel into memory, load operand0 from
2298 the memory location, then add in the constant part. */
2299 if ((GET_CODE (operand1) == CONST
2300 && GET_CODE (XEXP (operand1, 0)) == PLUS
2301 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2302 VOIDmode))
2303 || function_label_operand (operand1, VOIDmode))
2305 rtx temp, const_part;
2307 /* Figure out what (if any) scratch register to use. */
2308 if (reload_in_progress || reload_completed)
2310 scratch_reg = scratch_reg ? scratch_reg : operand0;
2311 /* SCRATCH_REG will hold an address and maybe the actual
2312 data. We want it in WORD_MODE regardless of what mode it
2313 was originally given to us. */
2314 scratch_reg = force_mode (word_mode, scratch_reg);
2316 else if (flag_pic)
2317 scratch_reg = gen_reg_rtx (Pmode);
2319 if (GET_CODE (operand1) == CONST)
2321 /* Save away the constant part of the expression. */
2322 const_part = XEXP (XEXP (operand1, 0), 1);
2323 gcc_assert (GET_CODE (const_part) == CONST_INT);
2325 /* Force the function label into memory. */
2326 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2328 else
2330 /* No constant part. */
2331 const_part = NULL_RTX;
2333 /* Force the function label into memory. */
2334 temp = force_const_mem (mode, operand1);
2338 /* Get the address of the memory location. PIC-ify it if
2339 necessary. */
2340 temp = XEXP (temp, 0);
2341 if (flag_pic)
2342 temp = legitimize_pic_address (temp, mode, scratch_reg);
2344 /* Put the address of the memory location into our destination
2345 register. */
2346 operands[1] = temp;
2347 pa_emit_move_sequence (operands, mode, scratch_reg);
2349 /* Now load from the memory location into our destination
2350 register. */
2351 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2352 pa_emit_move_sequence (operands, mode, scratch_reg);
2354 /* And add back in the constant part. */
2355 if (const_part != NULL_RTX)
2356 expand_inc (operand0, const_part);
2358 return 1;
2361 if (flag_pic)
2363 rtx_insn *insn;
2364 rtx temp;
2366 if (reload_in_progress || reload_completed)
2368 temp = scratch_reg ? scratch_reg : operand0;
2369 /* TEMP will hold an address and maybe the actual
2370 data. We want it in WORD_MODE regardless of what mode it
2371 was originally given to us. */
2372 temp = force_mode (word_mode, temp);
2374 else
2375 temp = gen_reg_rtx (Pmode);
2377 /* Force (const (plus (symbol) (const_int))) to memory
2378 if the const_int will not fit in 14 bits. Although
2379 this requires a relocation, the instruction sequence
2380 needed to load the value is shorter. */
2381 if (GET_CODE (operand1) == CONST
2382 && GET_CODE (XEXP (operand1, 0)) == PLUS
2383 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2384 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2386 rtx x, m = force_const_mem (mode, operand1);
2388 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2389 x = replace_equiv_address (m, x);
2390 insn = emit_move_insn (operand0, x);
2392 else
2394 operands[1] = legitimize_pic_address (operand1, mode, temp);
2395 if (REG_P (operand0) && REG_P (operands[1]))
2396 copy_reg_pointer (operand0, operands[1]);
2397 insn = emit_move_insn (operand0, operands[1]);
2400 /* Put a REG_EQUAL note on this insn. */
2401 set_unique_reg_note (insn, REG_EQUAL, operand1);
2403 /* On the HPPA, references to data space are supposed to use dp,
2404 register 27, but showing it in the RTL inhibits various cse
2405 and loop optimizations. */
2406 else
2408 rtx temp, set;
2410 if (reload_in_progress || reload_completed)
2412 temp = scratch_reg ? scratch_reg : operand0;
2413 /* TEMP will hold an address and maybe the actual
2414 data. We want it in WORD_MODE regardless of what mode it
2415 was originally given to us. */
2416 temp = force_mode (word_mode, temp);
2418 else
2419 temp = gen_reg_rtx (mode);
2421 /* Loading a SYMBOL_REF into a register makes that register
2422 safe to be used as the base in an indexed address.
2424 Don't mark hard registers though. That loses. */
2425 if (GET_CODE (operand0) == REG
2426 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2427 mark_reg_pointer (operand0, BITS_PER_UNIT);
2428 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2429 mark_reg_pointer (temp, BITS_PER_UNIT);
2431 if (ishighonly)
2432 set = gen_rtx_SET (operand0, temp);
2433 else
2434 set = gen_rtx_SET (operand0,
2435 gen_rtx_LO_SUM (mode, temp, operand1));
2437 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2438 emit_insn (set);
2441 return 1;
2443 else if (tls_referenced_p (operand1))
2445 rtx tmp = operand1;
2446 rtx addend = NULL;
2448 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2450 addend = XEXP (XEXP (tmp, 0), 1);
2451 tmp = XEXP (XEXP (tmp, 0), 0);
2454 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2455 tmp = legitimize_tls_address (tmp);
2456 if (addend)
2458 tmp = gen_rtx_PLUS (mode, tmp, addend);
2459 tmp = force_operand (tmp, operands[0]);
2461 operands[1] = tmp;
2463 else if (GET_CODE (operand1) != CONST_INT
2464 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2466 rtx temp;
2467 rtx_insn *insn;
2468 rtx op1 = operand1;
2469 HOST_WIDE_INT value = 0;
2470 HOST_WIDE_INT insv = 0;
2471 int insert = 0;
2473 if (GET_CODE (operand1) == CONST_INT)
2474 value = INTVAL (operand1);
2476 if (TARGET_64BIT
2477 && GET_CODE (operand1) == CONST_INT
2478 && HOST_BITS_PER_WIDE_INT > 32
2479 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2481 HOST_WIDE_INT nval;
2483 /* Extract the low order 32 bits of the value and sign extend.
2484 If the new value is the same as the original value, we can
2485 can use the original value as-is. If the new value is
2486 different, we use it and insert the most-significant 32-bits
2487 of the original value into the final result. */
2488 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2489 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2490 if (value != nval)
2492 #if HOST_BITS_PER_WIDE_INT > 32
2493 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2494 #endif
2495 insert = 1;
2496 value = nval;
2497 operand1 = GEN_INT (nval);
2501 if (reload_in_progress || reload_completed)
2502 temp = scratch_reg ? scratch_reg : operand0;
2503 else
2504 temp = gen_reg_rtx (mode);
2506 /* We don't directly split DImode constants on 32-bit targets
2507 because PLUS uses an 11-bit immediate and the insn sequence
2508 generated is not as efficient as the one using HIGH/LO_SUM. */
2509 if (GET_CODE (operand1) == CONST_INT
2510 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2511 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2512 && !insert)
2514 /* Directly break constant into high and low parts. This
2515 provides better optimization opportunities because various
2516 passes recognize constants split with PLUS but not LO_SUM.
2517 We use a 14-bit signed low part except when the addition
2518 of 0x4000 to the high part might change the sign of the
2519 high part. */
2520 HOST_WIDE_INT low = value & 0x3fff;
2521 HOST_WIDE_INT high = value & ~ 0x3fff;
2523 if (low >= 0x2000)
2525 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2526 high += 0x2000;
2527 else
2528 high += 0x4000;
2531 low = value - high;
2533 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2534 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2536 else
2538 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2539 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2542 insn = emit_move_insn (operands[0], operands[1]);
2544 /* Now insert the most significant 32 bits of the value
2545 into the register. When we don't have a second register
2546 available, it could take up to nine instructions to load
2547 a 64-bit integer constant. Prior to reload, we force
2548 constants that would take more than three instructions
2549 to load to the constant pool. During and after reload,
2550 we have to handle all possible values. */
2551 if (insert)
2553 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2554 register and the value to be inserted is outside the
2555 range that can be loaded with three depdi instructions. */
2556 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2558 operand1 = GEN_INT (insv);
2560 emit_insn (gen_rtx_SET (temp,
2561 gen_rtx_HIGH (mode, operand1)));
2562 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2563 if (mode == DImode)
2564 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2565 const0_rtx, temp));
2566 else
2567 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2568 const0_rtx, temp));
2570 else
2572 int len = 5, pos = 27;
2574 /* Insert the bits using the depdi instruction. */
2575 while (pos >= 0)
2577 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2578 HOST_WIDE_INT sign = v5 < 0;
2580 /* Left extend the insertion. */
2581 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2582 while (pos > 0 && (insv & 1) == sign)
2584 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2585 len += 1;
2586 pos -= 1;
2589 if (mode == DImode)
2590 insn = emit_insn (gen_insvdi (operand0,
2591 GEN_INT (len),
2592 GEN_INT (pos),
2593 GEN_INT (v5)));
2594 else
2595 insn = emit_insn (gen_insvsi (operand0,
2596 GEN_INT (len),
2597 GEN_INT (pos),
2598 GEN_INT (v5)));
2600 len = pos > 0 && pos < 5 ? pos : 5;
2601 pos -= len;
2606 set_unique_reg_note (insn, REG_EQUAL, op1);
2608 return 1;
2611 /* Now have insn-emit do whatever it normally does. */
2612 return 0;
2615 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2616 it will need a link/runtime reloc). */
2619 pa_reloc_needed (tree exp)
2621 int reloc = 0;
2623 switch (TREE_CODE (exp))
2625 case ADDR_EXPR:
2626 return 1;
2628 case POINTER_PLUS_EXPR:
2629 case PLUS_EXPR:
2630 case MINUS_EXPR:
2631 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2632 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2633 break;
2635 CASE_CONVERT:
2636 case NON_LVALUE_EXPR:
2637 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2638 break;
2640 case CONSTRUCTOR:
2642 tree value;
2643 unsigned HOST_WIDE_INT ix;
2645 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2646 if (value)
2647 reloc |= pa_reloc_needed (value);
2649 break;
2651 case ERROR_MARK:
2652 break;
2654 default:
2655 break;
2657 return reloc;
2661 /* Return the best assembler insn template
2662 for moving operands[1] into operands[0] as a fullword. */
2663 const char *
2664 pa_singlemove_string (rtx *operands)
2666 HOST_WIDE_INT intval;
2668 if (GET_CODE (operands[0]) == MEM)
2669 return "stw %r1,%0";
2670 if (GET_CODE (operands[1]) == MEM)
2671 return "ldw %1,%0";
2672 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2674 long i;
2676 gcc_assert (GET_MODE (operands[1]) == SFmode);
2678 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2679 bit pattern. */
2680 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2682 operands[1] = GEN_INT (i);
2683 /* Fall through to CONST_INT case. */
2685 if (GET_CODE (operands[1]) == CONST_INT)
2687 intval = INTVAL (operands[1]);
2689 if (VAL_14_BITS_P (intval))
2690 return "ldi %1,%0";
2691 else if ((intval & 0x7ff) == 0)
2692 return "ldil L'%1,%0";
2693 else if (pa_zdepi_cint_p (intval))
2694 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2695 else
2696 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2698 return "copy %1,%0";
2702 /* Compute position (in OP[1]) and width (in OP[2])
2703 useful for copying IMM to a register using the zdepi
2704 instructions. Store the immediate value to insert in OP[0]. */
2705 static void
2706 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2708 int lsb, len;
2710 /* Find the least significant set bit in IMM. */
2711 for (lsb = 0; lsb < 32; lsb++)
2713 if ((imm & 1) != 0)
2714 break;
2715 imm >>= 1;
2718 /* Choose variants based on *sign* of the 5-bit field. */
2719 if ((imm & 0x10) == 0)
2720 len = (lsb <= 28) ? 4 : 32 - lsb;
2721 else
2723 /* Find the width of the bitstring in IMM. */
2724 for (len = 5; len < 32 - lsb; len++)
2726 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2727 break;
2730 /* Sign extend IMM as a 5-bit value. */
2731 imm = (imm & 0xf) - 0x10;
2734 op[0] = imm;
2735 op[1] = 31 - lsb;
2736 op[2] = len;
2739 /* Compute position (in OP[1]) and width (in OP[2])
2740 useful for copying IMM to a register using the depdi,z
2741 instructions. Store the immediate value to insert in OP[0]. */
2743 static void
2744 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2746 int lsb, len, maxlen;
2748 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2750 /* Find the least significant set bit in IMM. */
2751 for (lsb = 0; lsb < maxlen; lsb++)
2753 if ((imm & 1) != 0)
2754 break;
2755 imm >>= 1;
2758 /* Choose variants based on *sign* of the 5-bit field. */
2759 if ((imm & 0x10) == 0)
2760 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2761 else
2763 /* Find the width of the bitstring in IMM. */
2764 for (len = 5; len < maxlen - lsb; len++)
2766 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2767 break;
2770 /* Extend length if host is narrow and IMM is negative. */
2771 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2772 len += 32;
2774 /* Sign extend IMM as a 5-bit value. */
2775 imm = (imm & 0xf) - 0x10;
2778 op[0] = imm;
2779 op[1] = 63 - lsb;
2780 op[2] = len;
2783 /* Output assembler code to perform a doubleword move insn
2784 with operands OPERANDS. */
2786 const char *
2787 pa_output_move_double (rtx *operands)
2789 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2790 rtx latehalf[2];
2791 rtx addreg0 = 0, addreg1 = 0;
2792 int highonly = 0;
2794 /* First classify both operands. */
2796 if (REG_P (operands[0]))
2797 optype0 = REGOP;
2798 else if (offsettable_memref_p (operands[0]))
2799 optype0 = OFFSOP;
2800 else if (GET_CODE (operands[0]) == MEM)
2801 optype0 = MEMOP;
2802 else
2803 optype0 = RNDOP;
2805 if (REG_P (operands[1]))
2806 optype1 = REGOP;
2807 else if (CONSTANT_P (operands[1]))
2808 optype1 = CNSTOP;
2809 else if (offsettable_memref_p (operands[1]))
2810 optype1 = OFFSOP;
2811 else if (GET_CODE (operands[1]) == MEM)
2812 optype1 = MEMOP;
2813 else
2814 optype1 = RNDOP;
2816 /* Check for the cases that the operand constraints are not
2817 supposed to allow to happen. */
2818 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2820 /* Handle copies between general and floating registers. */
2822 if (optype0 == REGOP && optype1 == REGOP
2823 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2825 if (FP_REG_P (operands[0]))
2827 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2828 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2829 return "{fldds|fldd} -16(%%sp),%0";
2831 else
2833 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2834 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2835 return "{ldws|ldw} -12(%%sp),%R0";
2839 /* Handle auto decrementing and incrementing loads and stores
2840 specifically, since the structure of the function doesn't work
2841 for them without major modification. Do it better when we learn
2842 this port about the general inc/dec addressing of PA.
2843 (This was written by tege. Chide him if it doesn't work.) */
2845 if (optype0 == MEMOP)
2847 /* We have to output the address syntax ourselves, since print_operand
2848 doesn't deal with the addresses we want to use. Fix this later. */
2850 rtx addr = XEXP (operands[0], 0);
2851 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2853 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2855 operands[0] = XEXP (addr, 0);
2856 gcc_assert (GET_CODE (operands[1]) == REG
2857 && GET_CODE (operands[0]) == REG);
2859 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2861 /* No overlap between high target register and address
2862 register. (We do this in a non-obvious way to
2863 save a register file writeback) */
2864 if (GET_CODE (addr) == POST_INC)
2865 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2866 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2868 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2870 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2872 operands[0] = XEXP (addr, 0);
2873 gcc_assert (GET_CODE (operands[1]) == REG
2874 && GET_CODE (operands[0]) == REG);
2876 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2877 /* No overlap between high target register and address
2878 register. (We do this in a non-obvious way to save a
2879 register file writeback) */
2880 if (GET_CODE (addr) == PRE_INC)
2881 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2882 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2885 if (optype1 == MEMOP)
2887 /* We have to output the address syntax ourselves, since print_operand
2888 doesn't deal with the addresses we want to use. Fix this later. */
2890 rtx addr = XEXP (operands[1], 0);
2891 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2893 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2895 operands[1] = XEXP (addr, 0);
2896 gcc_assert (GET_CODE (operands[0]) == REG
2897 && GET_CODE (operands[1]) == REG);
2899 if (!reg_overlap_mentioned_p (high_reg, addr))
2901 /* No overlap between high target register and address
2902 register. (We do this in a non-obvious way to
2903 save a register file writeback) */
2904 if (GET_CODE (addr) == POST_INC)
2905 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2906 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2908 else
2910 /* This is an undefined situation. We should load into the
2911 address register *and* update that register. Probably
2912 we don't need to handle this at all. */
2913 if (GET_CODE (addr) == POST_INC)
2914 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2915 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2918 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2920 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2922 operands[1] = XEXP (addr, 0);
2923 gcc_assert (GET_CODE (operands[0]) == REG
2924 && GET_CODE (operands[1]) == REG);
2926 if (!reg_overlap_mentioned_p (high_reg, addr))
2928 /* No overlap between high target register and address
2929 register. (We do this in a non-obvious way to
2930 save a register file writeback) */
2931 if (GET_CODE (addr) == PRE_INC)
2932 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2933 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2935 else
2937 /* This is an undefined situation. We should load into the
2938 address register *and* update that register. Probably
2939 we don't need to handle this at all. */
2940 if (GET_CODE (addr) == PRE_INC)
2941 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2942 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2945 else if (GET_CODE (addr) == PLUS
2946 && GET_CODE (XEXP (addr, 0)) == MULT)
2948 rtx xoperands[4];
2950 /* Load address into left half of destination register. */
2951 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2952 xoperands[1] = XEXP (addr, 1);
2953 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2954 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2955 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2956 xoperands);
2957 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2959 else if (GET_CODE (addr) == PLUS
2960 && REG_P (XEXP (addr, 0))
2961 && REG_P (XEXP (addr, 1)))
2963 rtx xoperands[3];
2965 /* Load address into left half of destination register. */
2966 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2967 xoperands[1] = XEXP (addr, 0);
2968 xoperands[2] = XEXP (addr, 1);
2969 output_asm_insn ("{addl|add,l} %1,%2,%0",
2970 xoperands);
2971 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2975 /* If an operand is an unoffsettable memory ref, find a register
2976 we can increment temporarily to make it refer to the second word. */
2978 if (optype0 == MEMOP)
2979 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2981 if (optype1 == MEMOP)
2982 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2984 /* Ok, we can do one word at a time.
2985 Normally we do the low-numbered word first.
2987 In either case, set up in LATEHALF the operands to use
2988 for the high-numbered word and in some cases alter the
2989 operands in OPERANDS to be suitable for the low-numbered word. */
2991 if (optype0 == REGOP)
2992 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2993 else if (optype0 == OFFSOP)
2994 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2995 else
2996 latehalf[0] = operands[0];
2998 if (optype1 == REGOP)
2999 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
3000 else if (optype1 == OFFSOP)
3001 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
3002 else if (optype1 == CNSTOP)
3004 if (GET_CODE (operands[1]) == HIGH)
3006 operands[1] = XEXP (operands[1], 0);
3007 highonly = 1;
3009 split_double (operands[1], &operands[1], &latehalf[1]);
3011 else
3012 latehalf[1] = operands[1];
3014 /* If the first move would clobber the source of the second one,
3015 do them in the other order.
3017 This can happen in two cases:
3019 mem -> register where the first half of the destination register
3020 is the same register used in the memory's address. Reload
3021 can create such insns.
3023 mem in this case will be either register indirect or register
3024 indirect plus a valid offset.
3026 register -> register move where REGNO(dst) == REGNO(src + 1)
3027 someone (Tim/Tege?) claimed this can happen for parameter loads.
3029 Handle mem -> register case first. */
3030 if (optype0 == REGOP
3031 && (optype1 == MEMOP || optype1 == OFFSOP)
3032 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
3034 /* Do the late half first. */
3035 if (addreg1)
3036 output_asm_insn ("ldo 4(%0),%0", &addreg1);
3037 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3039 /* Then clobber. */
3040 if (addreg1)
3041 output_asm_insn ("ldo -4(%0),%0", &addreg1);
3042 return pa_singlemove_string (operands);
3045 /* Now handle register -> register case. */
3046 if (optype0 == REGOP && optype1 == REGOP
3047 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
3049 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3050 return pa_singlemove_string (operands);
3053 /* Normal case: do the two words, low-numbered first. */
3055 output_asm_insn (pa_singlemove_string (operands), operands);
3057 /* Make any unoffsettable addresses point at high-numbered word. */
3058 if (addreg0)
3059 output_asm_insn ("ldo 4(%0),%0", &addreg0);
3060 if (addreg1)
3061 output_asm_insn ("ldo 4(%0),%0", &addreg1);
3063 /* Do high-numbered word. */
3064 if (highonly)
3065 output_asm_insn ("ldil L'%1,%0", latehalf);
3066 else
3067 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
3069 /* Undo the adds we just did. */
3070 if (addreg0)
3071 output_asm_insn ("ldo -4(%0),%0", &addreg0);
3072 if (addreg1)
3073 output_asm_insn ("ldo -4(%0),%0", &addreg1);
3075 return "";
3078 const char *
3079 pa_output_fp_move_double (rtx *operands)
3081 if (FP_REG_P (operands[0]))
3083 if (FP_REG_P (operands[1])
3084 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
3085 output_asm_insn ("fcpy,dbl %f1,%0", operands);
3086 else
3087 output_asm_insn ("fldd%F1 %1,%0", operands);
3089 else if (FP_REG_P (operands[1]))
3091 output_asm_insn ("fstd%F0 %1,%0", operands);
3093 else
3095 rtx xoperands[2];
3097 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
3099 /* This is a pain. You have to be prepared to deal with an
3100 arbitrary address here including pre/post increment/decrement.
3102 so avoid this in the MD. */
3103 gcc_assert (GET_CODE (operands[0]) == REG);
3105 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
3106 xoperands[0] = operands[0];
3107 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
3109 return "";
3112 /* Return a REG that occurs in ADDR with coefficient 1.
3113 ADDR can be effectively incremented by incrementing REG. */
3115 static rtx
3116 find_addr_reg (rtx addr)
3118 while (GET_CODE (addr) == PLUS)
3120 if (GET_CODE (XEXP (addr, 0)) == REG)
3121 addr = XEXP (addr, 0);
3122 else if (GET_CODE (XEXP (addr, 1)) == REG)
3123 addr = XEXP (addr, 1);
3124 else if (CONSTANT_P (XEXP (addr, 0)))
3125 addr = XEXP (addr, 1);
3126 else if (CONSTANT_P (XEXP (addr, 1)))
3127 addr = XEXP (addr, 0);
3128 else
3129 gcc_unreachable ();
3131 gcc_assert (GET_CODE (addr) == REG);
3132 return addr;
3135 /* Emit code to perform a block move.
3137 OPERANDS[0] is the destination pointer as a REG, clobbered.
3138 OPERANDS[1] is the source pointer as a REG, clobbered.
3139 OPERANDS[2] is a register for temporary storage.
3140 OPERANDS[3] is a register for temporary storage.
3141 OPERANDS[4] is the size as a CONST_INT
3142 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3143 OPERANDS[6] is another temporary register. */
3145 const char *
3146 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3148 int align = INTVAL (operands[5]);
3149 unsigned long n_bytes = INTVAL (operands[4]);
3151 /* We can't move more than a word at a time because the PA
3152 has no longer integer move insns. (Could use fp mem ops?) */
3153 if (align > (TARGET_64BIT ? 8 : 4))
3154 align = (TARGET_64BIT ? 8 : 4);
3156 /* Note that we know each loop below will execute at least twice
3157 (else we would have open-coded the copy). */
3158 switch (align)
3160 case 8:
3161 /* Pre-adjust the loop counter. */
3162 operands[4] = GEN_INT (n_bytes - 16);
3163 output_asm_insn ("ldi %4,%2", operands);
3165 /* Copying loop. */
3166 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3167 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3168 output_asm_insn ("std,ma %3,8(%0)", operands);
3169 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3170 output_asm_insn ("std,ma %6,8(%0)", operands);
3172 /* Handle the residual. There could be up to 7 bytes of
3173 residual to copy! */
3174 if (n_bytes % 16 != 0)
3176 operands[4] = GEN_INT (n_bytes % 8);
3177 if (n_bytes % 16 >= 8)
3178 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3179 if (n_bytes % 8 != 0)
3180 output_asm_insn ("ldd 0(%1),%6", operands);
3181 if (n_bytes % 16 >= 8)
3182 output_asm_insn ("std,ma %3,8(%0)", operands);
3183 if (n_bytes % 8 != 0)
3184 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3186 return "";
3188 case 4:
3189 /* Pre-adjust the loop counter. */
3190 operands[4] = GEN_INT (n_bytes - 8);
3191 output_asm_insn ("ldi %4,%2", operands);
3193 /* Copying loop. */
3194 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3195 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3196 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3197 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3198 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3200 /* Handle the residual. There could be up to 7 bytes of
3201 residual to copy! */
3202 if (n_bytes % 8 != 0)
3204 operands[4] = GEN_INT (n_bytes % 4);
3205 if (n_bytes % 8 >= 4)
3206 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3207 if (n_bytes % 4 != 0)
3208 output_asm_insn ("ldw 0(%1),%6", operands);
3209 if (n_bytes % 8 >= 4)
3210 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3211 if (n_bytes % 4 != 0)
3212 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3214 return "";
3216 case 2:
3217 /* Pre-adjust the loop counter. */
3218 operands[4] = GEN_INT (n_bytes - 4);
3219 output_asm_insn ("ldi %4,%2", operands);
3221 /* Copying loop. */
3222 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3223 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3224 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3225 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3226 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3228 /* Handle the residual. */
3229 if (n_bytes % 4 != 0)
3231 if (n_bytes % 4 >= 2)
3232 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3233 if (n_bytes % 2 != 0)
3234 output_asm_insn ("ldb 0(%1),%6", operands);
3235 if (n_bytes % 4 >= 2)
3236 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3237 if (n_bytes % 2 != 0)
3238 output_asm_insn ("stb %6,0(%0)", operands);
3240 return "";
3242 case 1:
3243 /* Pre-adjust the loop counter. */
3244 operands[4] = GEN_INT (n_bytes - 2);
3245 output_asm_insn ("ldi %4,%2", operands);
3247 /* Copying loop. */
3248 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3249 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3250 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3251 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3252 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3254 /* Handle the residual. */
3255 if (n_bytes % 2 != 0)
3257 output_asm_insn ("ldb 0(%1),%3", operands);
3258 output_asm_insn ("stb %3,0(%0)", operands);
3260 return "";
3262 default:
3263 gcc_unreachable ();
3267 /* Count the number of insns necessary to handle this block move.
3269 Basic structure is the same as emit_block_move, except that we
3270 count insns rather than emit them. */
3272 static int
3273 compute_cpymem_length (rtx_insn *insn)
3275 rtx pat = PATTERN (insn);
3276 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3277 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3278 unsigned int n_insns = 0;
3280 /* We can't move more than four bytes at a time because the PA
3281 has no longer integer move insns. (Could use fp mem ops?) */
3282 if (align > (TARGET_64BIT ? 8 : 4))
3283 align = (TARGET_64BIT ? 8 : 4);
3285 /* The basic copying loop. */
3286 n_insns = 6;
3288 /* Residuals. */
3289 if (n_bytes % (2 * align) != 0)
3291 if ((n_bytes % (2 * align)) >= align)
3292 n_insns += 2;
3294 if ((n_bytes % align) != 0)
3295 n_insns += 2;
3298 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3299 return n_insns * 4;
3302 /* Emit code to perform a block clear.
3304 OPERANDS[0] is the destination pointer as a REG, clobbered.
3305 OPERANDS[1] is a register for temporary storage.
3306 OPERANDS[2] is the size as a CONST_INT
3307 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3309 const char *
3310 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3312 int align = INTVAL (operands[3]);
3313 unsigned long n_bytes = INTVAL (operands[2]);
3315 /* We can't clear more than a word at a time because the PA
3316 has no longer integer move insns. */
3317 if (align > (TARGET_64BIT ? 8 : 4))
3318 align = (TARGET_64BIT ? 8 : 4);
3320 /* Note that we know each loop below will execute at least twice
3321 (else we would have open-coded the copy). */
3322 switch (align)
3324 case 8:
3325 /* Pre-adjust the loop counter. */
3326 operands[2] = GEN_INT (n_bytes - 16);
3327 output_asm_insn ("ldi %2,%1", operands);
3329 /* Loop. */
3330 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3331 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3332 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3334 /* Handle the residual. There could be up to 7 bytes of
3335 residual to copy! */
3336 if (n_bytes % 16 != 0)
3338 operands[2] = GEN_INT (n_bytes % 8);
3339 if (n_bytes % 16 >= 8)
3340 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3341 if (n_bytes % 8 != 0)
3342 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3344 return "";
3346 case 4:
3347 /* Pre-adjust the loop counter. */
3348 operands[2] = GEN_INT (n_bytes - 8);
3349 output_asm_insn ("ldi %2,%1", operands);
3351 /* Loop. */
3352 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3353 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3354 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3356 /* Handle the residual. There could be up to 7 bytes of
3357 residual to copy! */
3358 if (n_bytes % 8 != 0)
3360 operands[2] = GEN_INT (n_bytes % 4);
3361 if (n_bytes % 8 >= 4)
3362 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3363 if (n_bytes % 4 != 0)
3364 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3366 return "";
3368 case 2:
3369 /* Pre-adjust the loop counter. */
3370 operands[2] = GEN_INT (n_bytes - 4);
3371 output_asm_insn ("ldi %2,%1", operands);
3373 /* Loop. */
3374 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3375 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3376 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3378 /* Handle the residual. */
3379 if (n_bytes % 4 != 0)
3381 if (n_bytes % 4 >= 2)
3382 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3383 if (n_bytes % 2 != 0)
3384 output_asm_insn ("stb %%r0,0(%0)", operands);
3386 return "";
3388 case 1:
3389 /* Pre-adjust the loop counter. */
3390 operands[2] = GEN_INT (n_bytes - 2);
3391 output_asm_insn ("ldi %2,%1", operands);
3393 /* Loop. */
3394 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3395 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3396 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3398 /* Handle the residual. */
3399 if (n_bytes % 2 != 0)
3400 output_asm_insn ("stb %%r0,0(%0)", operands);
3402 return "";
3404 default:
3405 gcc_unreachable ();
3409 /* Count the number of insns necessary to handle this block move.
3411 Basic structure is the same as emit_block_move, except that we
3412 count insns rather than emit them. */
3414 static int
3415 compute_clrmem_length (rtx_insn *insn)
3417 rtx pat = PATTERN (insn);
3418 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3419 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3420 unsigned int n_insns = 0;
3422 /* We can't clear more than a word at a time because the PA
3423 has no longer integer move insns. */
3424 if (align > (TARGET_64BIT ? 8 : 4))
3425 align = (TARGET_64BIT ? 8 : 4);
3427 /* The basic loop. */
3428 n_insns = 4;
3430 /* Residuals. */
3431 if (n_bytes % (2 * align) != 0)
3433 if ((n_bytes % (2 * align)) >= align)
3434 n_insns++;
3436 if ((n_bytes % align) != 0)
3437 n_insns++;
3440 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3441 return n_insns * 4;
3445 const char *
3446 pa_output_and (rtx *operands)
3448 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3450 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3451 int ls0, ls1, ms0, p, len;
3453 for (ls0 = 0; ls0 < 32; ls0++)
3454 if ((mask & (1 << ls0)) == 0)
3455 break;
3457 for (ls1 = ls0; ls1 < 32; ls1++)
3458 if ((mask & (1 << ls1)) != 0)
3459 break;
3461 for (ms0 = ls1; ms0 < 32; ms0++)
3462 if ((mask & (1 << ms0)) == 0)
3463 break;
3465 gcc_assert (ms0 == 32);
3467 if (ls1 == 32)
3469 len = ls0;
3471 gcc_assert (len);
3473 operands[2] = GEN_INT (len);
3474 return "{extru|extrw,u} %1,31,%2,%0";
3476 else
3478 /* We could use this `depi' for the case above as well, but `depi'
3479 requires one more register file access than an `extru'. */
3481 p = 31 - ls0;
3482 len = ls1 - ls0;
3484 operands[2] = GEN_INT (p);
3485 operands[3] = GEN_INT (len);
3486 return "{depi|depwi} 0,%2,%3,%0";
3489 else
3490 return "and %1,%2,%0";
3493 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3494 storing the result in operands[0]. */
3495 const char *
3496 pa_output_64bit_and (rtx *operands)
3498 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3500 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3501 int ls0, ls1, ms0, p, len;
3503 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3504 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3505 break;
3507 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3508 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3509 break;
3511 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3512 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3513 break;
3515 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3517 if (ls1 == HOST_BITS_PER_WIDE_INT)
3519 len = ls0;
3521 gcc_assert (len);
3523 operands[2] = GEN_INT (len);
3524 return "extrd,u %1,63,%2,%0";
3526 else
3528 /* We could use this `depi' for the case above as well, but `depi'
3529 requires one more register file access than an `extru'. */
3531 p = 63 - ls0;
3532 len = ls1 - ls0;
3534 operands[2] = GEN_INT (p);
3535 operands[3] = GEN_INT (len);
3536 return "depdi 0,%2,%3,%0";
3539 else
3540 return "and %1,%2,%0";
3543 const char *
3544 pa_output_ior (rtx *operands)
3546 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3547 int bs0, bs1, p, len;
3549 if (INTVAL (operands[2]) == 0)
3550 return "copy %1,%0";
3552 for (bs0 = 0; bs0 < 32; bs0++)
3553 if ((mask & (1 << bs0)) != 0)
3554 break;
3556 for (bs1 = bs0; bs1 < 32; bs1++)
3557 if ((mask & (1 << bs1)) == 0)
3558 break;
3560 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3562 p = 31 - bs0;
3563 len = bs1 - bs0;
3565 operands[2] = GEN_INT (p);
3566 operands[3] = GEN_INT (len);
3567 return "{depi|depwi} -1,%2,%3,%0";
3570 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3571 storing the result in operands[0]. */
3572 const char *
3573 pa_output_64bit_ior (rtx *operands)
3575 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3576 int bs0, bs1, p, len;
3578 if (INTVAL (operands[2]) == 0)
3579 return "copy %1,%0";
3581 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3582 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3583 break;
3585 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3586 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3587 break;
3589 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3590 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3592 p = 63 - bs0;
3593 len = bs1 - bs0;
3595 operands[2] = GEN_INT (p);
3596 operands[3] = GEN_INT (len);
3597 return "depdi -1,%2,%3,%0";
3600 /* Target hook for assembling integer objects. This code handles
3601 aligned SI and DI integers specially since function references
3602 must be preceded by P%. */
3604 static bool
3605 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3607 bool result;
3608 tree decl = NULL;
3610 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3611 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3612 calling output_addr_const. Otherwise, it may call assemble_external
3613 in the midst of outputing the assembler code for the SYMBOL_REF.
3614 We restore the SYMBOL_REF_DECL after the output is done. */
3615 if (GET_CODE (x) == SYMBOL_REF)
3617 decl = SYMBOL_REF_DECL (x);
3618 if (decl)
3620 assemble_external (decl);
3621 SET_SYMBOL_REF_DECL (x, NULL);
3625 if (size == UNITS_PER_WORD
3626 && aligned_p
3627 && function_label_operand (x, VOIDmode))
3629 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3631 /* We don't want an OPD when generating fast indirect calls. */
3632 if (!TARGET_FAST_INDIRECT_CALLS)
3633 fputs ("P%", asm_out_file);
3635 output_addr_const (asm_out_file, x);
3636 fputc ('\n', asm_out_file);
3637 result = true;
3639 else
3640 result = default_assemble_integer (x, size, aligned_p);
3642 if (decl)
3643 SET_SYMBOL_REF_DECL (x, decl);
3645 return result;
3648 /* Output an ascii string. */
3649 void
3650 pa_output_ascii (FILE *file, const char *p, int size)
3652 int i;
3653 int chars_output;
3654 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3656 /* The HP assembler can only take strings of 256 characters at one
3657 time. This is a limitation on input line length, *not* the
3658 length of the string. Sigh. Even worse, it seems that the
3659 restriction is in number of input characters (see \xnn &
3660 \whatever). So we have to do this very carefully. */
3662 fputs ("\t.STRING \"", file);
3664 chars_output = 0;
3665 for (i = 0; i < size; i += 4)
3667 int co = 0;
3668 int io = 0;
3669 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3671 unsigned int c = (unsigned char) p[i + io];
3673 if (c == '\"' || c == '\\')
3674 partial_output[co++] = '\\';
3675 if (c >= ' ' && c < 0177)
3676 partial_output[co++] = c;
3677 else
3679 unsigned int hexd;
3680 partial_output[co++] = '\\';
3681 partial_output[co++] = 'x';
3682 hexd = c / 16 - 0 + '0';
3683 if (hexd > '9')
3684 hexd -= '9' - 'a' + 1;
3685 partial_output[co++] = hexd;
3686 hexd = c % 16 - 0 + '0';
3687 if (hexd > '9')
3688 hexd -= '9' - 'a' + 1;
3689 partial_output[co++] = hexd;
3692 if (chars_output + co > 243)
3694 fputs ("\"\n\t.STRING \"", file);
3695 chars_output = 0;
3697 fwrite (partial_output, 1, (size_t) co, file);
3698 chars_output += co;
3699 co = 0;
3701 fputs ("\"\n", file);
3704 /* Try to rewrite floating point comparisons & branches to avoid
3705 useless add,tr insns.
3707 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3708 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3709 first attempt to remove useless add,tr insns. It is zero
3710 for the second pass as reorg sometimes leaves bogus REG_DEAD
3711 notes lying around.
3713 When CHECK_NOTES is zero we can only eliminate add,tr insns
3714 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3715 instructions. */
3716 static void
3717 remove_useless_addtr_insns (int check_notes)
3719 rtx_insn *insn;
3720 static int pass = 0;
3722 /* This is fairly cheap, so always run it when optimizing. */
3723 if (optimize > 0)
3725 int fcmp_count = 0;
3726 int fbranch_count = 0;
3728 /* Walk all the insns in this function looking for fcmp & fbranch
3729 instructions. Keep track of how many of each we find. */
3730 for (insn = get_insns (); insn; insn = next_insn (insn))
3732 rtx tmp;
3734 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3735 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3736 continue;
3738 tmp = PATTERN (insn);
3740 /* It must be a set. */
3741 if (GET_CODE (tmp) != SET)
3742 continue;
3744 /* If the destination is CCFP, then we've found an fcmp insn. */
3745 tmp = SET_DEST (tmp);
3746 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3748 fcmp_count++;
3749 continue;
3752 tmp = PATTERN (insn);
3753 /* If this is an fbranch instruction, bump the fbranch counter. */
3754 if (GET_CODE (tmp) == SET
3755 && SET_DEST (tmp) == pc_rtx
3756 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3757 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3758 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3759 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3761 fbranch_count++;
3762 continue;
3767 /* Find all floating point compare + branch insns. If possible,
3768 reverse the comparison & the branch to avoid add,tr insns. */
3769 for (insn = get_insns (); insn; insn = next_insn (insn))
3771 rtx tmp;
3772 rtx_insn *next;
3774 /* Ignore anything that isn't an INSN. */
3775 if (! NONJUMP_INSN_P (insn))
3776 continue;
3778 tmp = PATTERN (insn);
3780 /* It must be a set. */
3781 if (GET_CODE (tmp) != SET)
3782 continue;
3784 /* The destination must be CCFP, which is register zero. */
3785 tmp = SET_DEST (tmp);
3786 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3787 continue;
3789 /* INSN should be a set of CCFP.
3791 See if the result of this insn is used in a reversed FP
3792 conditional branch. If so, reverse our condition and
3793 the branch. Doing so avoids useless add,tr insns. */
3794 next = next_insn (insn);
3795 while (next)
3797 /* Jumps, calls and labels stop our search. */
3798 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3799 break;
3801 /* As does another fcmp insn. */
3802 if (NONJUMP_INSN_P (next)
3803 && GET_CODE (PATTERN (next)) == SET
3804 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3805 && REGNO (SET_DEST (PATTERN (next))) == 0)
3806 break;
3808 next = next_insn (next);
3811 /* Is NEXT_INSN a branch? */
3812 if (next && JUMP_P (next))
3814 rtx pattern = PATTERN (next);
3816 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3817 and CCFP dies, then reverse our conditional and the branch
3818 to avoid the add,tr. */
3819 if (GET_CODE (pattern) == SET
3820 && SET_DEST (pattern) == pc_rtx
3821 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3822 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3823 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3824 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3825 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3826 && (fcmp_count == fbranch_count
3827 || (check_notes
3828 && find_regno_note (next, REG_DEAD, 0))))
3830 /* Reverse the branch. */
3831 tmp = XEXP (SET_SRC (pattern), 1);
3832 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3833 XEXP (SET_SRC (pattern), 2) = tmp;
3834 INSN_CODE (next) = -1;
3836 /* Reverse our condition. */
3837 tmp = PATTERN (insn);
3838 PUT_CODE (XEXP (tmp, 1),
3839 (reverse_condition_maybe_unordered
3840 (GET_CODE (XEXP (tmp, 1)))));
3846 pass = !pass;
3850 /* You may have trouble believing this, but this is the 32 bit HP-PA
3851 stack layout. Wow.
3853 Offset Contents
3855 Variable arguments (optional; any number may be allocated)
3857 SP-(4*(N+9)) arg word N
3859 SP-56 arg word 5
3860 SP-52 arg word 4
3862 Fixed arguments (must be allocated; may remain unused)
3864 SP-48 arg word 3
3865 SP-44 arg word 2
3866 SP-40 arg word 1
3867 SP-36 arg word 0
3869 Frame Marker
3871 SP-32 External Data Pointer (DP)
3872 SP-28 External sr4
3873 SP-24 External/stub RP (RP')
3874 SP-20 Current RP
3875 SP-16 Static Link
3876 SP-12 Clean up
3877 SP-8 Calling Stub RP (RP'')
3878 SP-4 Previous SP
3880 Top of Frame
3882 SP-0 Stack Pointer (points to next available address)
3886 /* This function saves registers as follows. Registers marked with ' are
3887 this function's registers (as opposed to the previous function's).
3888 If a frame_pointer isn't needed, r4 is saved as a general register;
3889 the space for the frame pointer is still allocated, though, to keep
3890 things simple.
3893 Top of Frame
3895 SP (FP') Previous FP
3896 SP + 4 Alignment filler (sigh)
3897 SP + 8 Space for locals reserved here.
3901 SP + n All call saved register used.
3905 SP + o All call saved fp registers used.
3909 SP + p (SP') points to next available address.
3913 /* Global variables set by output_function_prologue(). */
3914 /* Size of frame. Need to know this to emit return insns from
3915 leaf procedures. */
3916 static HOST_WIDE_INT actual_fsize, local_fsize;
3917 static int save_fregs;
3919 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3920 Handle case where DISP > 8k by using the add_high_const patterns.
3922 Note in DISP > 8k case, we will leave the high part of the address
3923 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3925 static void
3926 store_reg (int reg, HOST_WIDE_INT disp, int base)
3928 rtx dest, src, basereg;
3929 rtx_insn *insn;
3931 src = gen_rtx_REG (word_mode, reg);
3932 basereg = gen_rtx_REG (Pmode, base);
3933 if (VAL_14_BITS_P (disp))
3935 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3936 insn = emit_move_insn (dest, src);
3938 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3940 rtx delta = GEN_INT (disp);
3941 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3943 emit_move_insn (tmpreg, delta);
3944 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3945 if (DO_FRAME_NOTES)
3947 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3948 gen_rtx_SET (tmpreg,
3949 gen_rtx_PLUS (Pmode, basereg, delta)));
3950 RTX_FRAME_RELATED_P (insn) = 1;
3952 dest = gen_rtx_MEM (word_mode, tmpreg);
3953 insn = emit_move_insn (dest, src);
3955 else
3957 rtx delta = GEN_INT (disp);
3958 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3959 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3961 emit_move_insn (tmpreg, high);
3962 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3963 insn = emit_move_insn (dest, src);
3964 if (DO_FRAME_NOTES)
3965 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3966 gen_rtx_SET (gen_rtx_MEM (word_mode,
3967 gen_rtx_PLUS (word_mode,
3968 basereg,
3969 delta)),
3970 src));
3973 if (DO_FRAME_NOTES)
3974 RTX_FRAME_RELATED_P (insn) = 1;
3977 /* Emit RTL to store REG at the memory location specified by BASE and then
3978 add MOD to BASE. MOD must be <= 8k. */
3980 static void
3981 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3983 rtx basereg, srcreg, delta;
3984 rtx_insn *insn;
3986 gcc_assert (VAL_14_BITS_P (mod));
3988 basereg = gen_rtx_REG (Pmode, base);
3989 srcreg = gen_rtx_REG (word_mode, reg);
3990 delta = GEN_INT (mod);
3992 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3993 if (DO_FRAME_NOTES)
3995 RTX_FRAME_RELATED_P (insn) = 1;
3997 /* RTX_FRAME_RELATED_P must be set on each frame related set
3998 in a parallel with more than one element. */
3999 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
4000 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4004 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
4005 where DISP > 8k by using the add_high_const patterns. NOTE indicates
4006 whether to add a frame note or not.
4008 In the DISP > 8k case, we leave the high part of the address in %r1.
4009 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
4011 static void
4012 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
4014 rtx_insn *insn;
4016 if (VAL_14_BITS_P (disp))
4018 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4019 plus_constant (Pmode,
4020 gen_rtx_REG (Pmode, base), disp));
4022 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4024 rtx basereg = gen_rtx_REG (Pmode, base);
4025 rtx delta = GEN_INT (disp);
4026 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4028 emit_move_insn (tmpreg, delta);
4029 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4030 gen_rtx_PLUS (Pmode, tmpreg, basereg));
4031 if (DO_FRAME_NOTES)
4032 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4033 gen_rtx_SET (tmpreg,
4034 gen_rtx_PLUS (Pmode, basereg, delta)));
4036 else
4038 rtx basereg = gen_rtx_REG (Pmode, base);
4039 rtx delta = GEN_INT (disp);
4040 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4042 emit_move_insn (tmpreg,
4043 gen_rtx_PLUS (Pmode, basereg,
4044 gen_rtx_HIGH (Pmode, delta)));
4045 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
4046 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4049 if (DO_FRAME_NOTES && note)
4050 RTX_FRAME_RELATED_P (insn) = 1;
4053 HOST_WIDE_INT
4054 pa_compute_frame_size (poly_int64 size, int *fregs_live)
4056 int freg_saved = 0;
4057 int i, j;
4059 /* The code in pa_expand_prologue and pa_expand_epilogue must
4060 be consistent with the rounding and size calculation done here.
4061 Change them at the same time. */
4063 /* We do our own stack alignment. First, round the size of the
4064 stack locals up to a word boundary. */
4065 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4067 /* Space for previous frame pointer + filler. If any frame is
4068 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
4069 waste some space here for the sake of HP compatibility. The
4070 first slot is only used when the frame pointer is needed. */
4071 if (size || frame_pointer_needed)
4072 size += pa_starting_frame_offset ();
4074 /* If the current function calls __builtin_eh_return, then we need
4075 to allocate stack space for registers that will hold data for
4076 the exception handler. */
4077 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4079 unsigned int i;
4081 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
4082 continue;
4083 size += i * UNITS_PER_WORD;
4086 /* Account for space used by the callee general register saves. */
4087 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
4088 if (df_regs_ever_live_p (i))
4089 size += UNITS_PER_WORD;
4091 /* Account for space used by the callee floating point register saves. */
4092 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4093 if (df_regs_ever_live_p (i)
4094 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4096 freg_saved = 1;
4098 /* We always save both halves of the FP register, so always
4099 increment the frame size by 8 bytes. */
4100 size += 8;
4103 /* If any of the floating registers are saved, account for the
4104 alignment needed for the floating point register save block. */
4105 if (freg_saved)
4107 size = (size + 7) & ~7;
4108 if (fregs_live)
4109 *fregs_live = 1;
4112 /* The various ABIs include space for the outgoing parameters in the
4113 size of the current function's stack frame. We don't need to align
4114 for the outgoing arguments as their alignment is set by the final
4115 rounding for the frame as a whole. */
4116 size += crtl->outgoing_args_size;
4118 /* Allocate space for the fixed frame marker. This space must be
4119 allocated for any function that makes calls or allocates
4120 stack space. */
4121 if (!crtl->is_leaf || size)
4122 size += TARGET_64BIT ? 48 : 32;
4124 /* Finally, round to the preferred stack boundary. */
4125 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
4126 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
4129 /* Output function label, and associated .PROC and .CALLINFO statements. */
4131 void
4132 pa_output_function_label (FILE *file)
4134 /* The function's label and associated .PROC must never be
4135 separated and must be output *after* any profiling declarations
4136 to avoid changing spaces/subspaces within a procedure. */
4137 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
4138 ASM_OUTPUT_FUNCTION_LABEL (file, name, current_function_decl);
4139 fputs ("\t.PROC\n", file);
4141 /* pa_expand_prologue does the dirty work now. We just need
4142 to output the assembler directives which denote the start
4143 of a function. */
4144 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4145 if (crtl->is_leaf)
4146 fputs (",NO_CALLS", file);
4147 else
4148 fputs (",CALLS", file);
4149 if (rp_saved)
4150 fputs (",SAVE_RP", file);
4152 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4153 at the beginning of the frame and that it is used as the frame
4154 pointer for the frame. We do this because our current frame
4155 layout doesn't conform to that specified in the HP runtime
4156 documentation and we need a way to indicate to programs such as
4157 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4158 isn't used by HP compilers but is supported by the assembler.
4159 However, SAVE_SP is supposed to indicate that the previous stack
4160 pointer has been saved in the frame marker. */
4161 if (frame_pointer_needed)
4162 fputs (",SAVE_SP", file);
4164 /* Pass on information about the number of callee register saves
4165 performed in the prologue.
4167 The compiler is supposed to pass the highest register number
4168 saved, the assembler then has to adjust that number before
4169 entering it into the unwind descriptor (to account for any
4170 caller saved registers with lower register numbers than the
4171 first callee saved register). */
4172 if (gr_saved)
4173 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4175 if (fr_saved)
4176 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4178 fputs ("\n\t.ENTRY\n", file);
4181 /* Output function prologue. */
4183 static void
4184 pa_output_function_prologue (FILE *file)
4186 pa_output_function_label (file);
4187 remove_useless_addtr_insns (0);
4190 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4192 static void
4193 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4195 remove_useless_addtr_insns (0);
4198 void
4199 pa_expand_prologue (void)
4201 int merge_sp_adjust_with_store = 0;
4202 HOST_WIDE_INT size = get_frame_size ();
4203 HOST_WIDE_INT offset;
4204 int i;
4205 rtx tmpreg;
4206 rtx_insn *insn;
4208 gr_saved = 0;
4209 fr_saved = 0;
4210 save_fregs = 0;
4212 /* Compute total size for frame pointer, filler, locals and rounding to
4213 the next word boundary. Similar code appears in pa_compute_frame_size
4214 and must be changed in tandem with this code. */
4215 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4216 if (local_fsize || frame_pointer_needed)
4217 local_fsize += pa_starting_frame_offset ();
4219 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4220 if (flag_stack_usage_info)
4221 current_function_static_stack_size = actual_fsize;
4223 /* Compute a few things we will use often. */
4224 tmpreg = gen_rtx_REG (word_mode, 1);
4226 /* Save RP first. The calling conventions manual states RP will
4227 always be stored into the caller's frame at sp - 20 or sp - 16
4228 depending on which ABI is in use. */
4229 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4231 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4232 rp_saved = true;
4234 else
4235 rp_saved = false;
4237 /* Allocate the local frame and set up the frame pointer if needed. */
4238 if (actual_fsize != 0)
4240 if (frame_pointer_needed)
4242 /* Copy the old frame pointer temporarily into %r1. Set up the
4243 new stack pointer, then store away the saved old frame pointer
4244 into the stack at sp and at the same time update the stack
4245 pointer by actual_fsize bytes. Two versions, first
4246 handles small (<8k) frames. The second handles large (>=8k)
4247 frames. */
4248 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4249 if (DO_FRAME_NOTES)
4250 RTX_FRAME_RELATED_P (insn) = 1;
4252 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4253 if (DO_FRAME_NOTES)
4254 RTX_FRAME_RELATED_P (insn) = 1;
4256 if (VAL_14_BITS_P (actual_fsize))
4257 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4258 else
4260 /* It is incorrect to store the saved frame pointer at *sp,
4261 then increment sp (writes beyond the current stack boundary).
4263 So instead use stwm to store at *sp and post-increment the
4264 stack pointer as an atomic operation. Then increment sp to
4265 finish allocating the new frame. */
4266 HOST_WIDE_INT adjust1 = 8192 - 64;
4267 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4269 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4270 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4271 adjust2, 1);
4274 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4275 we need to store the previous stack pointer (frame pointer)
4276 into the frame marker on targets that use the HP unwind
4277 library. This allows the HP unwind library to be used to
4278 unwind GCC frames. However, we are not fully compatible
4279 with the HP library because our frame layout differs from
4280 that specified in the HP runtime specification.
4282 We don't want a frame note on this instruction as the frame
4283 marker moves during dynamic stack allocation.
4285 This instruction also serves as a blockage to prevent
4286 register spills from being scheduled before the stack
4287 pointer is raised. This is necessary as we store
4288 registers using the frame pointer as a base register,
4289 and the frame pointer is set before sp is raised. */
4290 if (TARGET_HPUX_UNWIND_LIBRARY)
4292 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4293 GEN_INT (TARGET_64BIT ? -8 : -4));
4295 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4296 hard_frame_pointer_rtx);
4298 else
4299 emit_insn (gen_blockage ());
4301 /* no frame pointer needed. */
4302 else
4304 /* In some cases we can perform the first callee register save
4305 and allocating the stack frame at the same time. If so, just
4306 make a note of it and defer allocating the frame until saving
4307 the callee registers. */
4308 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4309 merge_sp_adjust_with_store = 1;
4310 /* Cannot optimize. Adjust the stack frame by actual_fsize
4311 bytes. */
4312 else
4313 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4314 actual_fsize, 1);
4318 /* Normal register save.
4320 Do not save the frame pointer in the frame_pointer_needed case. It
4321 was done earlier. */
4322 if (frame_pointer_needed)
4324 offset = local_fsize;
4326 /* Saving the EH return data registers in the frame is the simplest
4327 way to get the frame unwind information emitted. We put them
4328 just before the general registers. */
4329 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4331 unsigned int i, regno;
4333 for (i = 0; ; ++i)
4335 regno = EH_RETURN_DATA_REGNO (i);
4336 if (regno == INVALID_REGNUM)
4337 break;
4339 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4340 offset += UNITS_PER_WORD;
4344 for (i = 18; i >= 4; i--)
4345 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4347 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4348 offset += UNITS_PER_WORD;
4349 gr_saved++;
4351 /* Account for %r3 which is saved in a special place. */
4352 gr_saved++;
4354 /* No frame pointer needed. */
4355 else
4357 offset = local_fsize - actual_fsize;
4359 /* Saving the EH return data registers in the frame is the simplest
4360 way to get the frame unwind information emitted. */
4361 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4363 unsigned int i, regno;
4365 for (i = 0; ; ++i)
4367 regno = EH_RETURN_DATA_REGNO (i);
4368 if (regno == INVALID_REGNUM)
4369 break;
4371 /* If merge_sp_adjust_with_store is nonzero, then we can
4372 optimize the first save. */
4373 if (merge_sp_adjust_with_store)
4375 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4376 merge_sp_adjust_with_store = 0;
4378 else
4379 store_reg (regno, offset, STACK_POINTER_REGNUM);
4380 offset += UNITS_PER_WORD;
4384 for (i = 18; i >= 3; i--)
4385 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4387 /* If merge_sp_adjust_with_store is nonzero, then we can
4388 optimize the first GR save. */
4389 if (merge_sp_adjust_with_store)
4391 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4392 merge_sp_adjust_with_store = 0;
4394 else
4395 store_reg (i, offset, STACK_POINTER_REGNUM);
4396 offset += UNITS_PER_WORD;
4397 gr_saved++;
4400 /* If we wanted to merge the SP adjustment with a GR save, but we never
4401 did any GR saves, then just emit the adjustment here. */
4402 if (merge_sp_adjust_with_store)
4403 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4404 actual_fsize, 1);
4407 /* The hppa calling conventions say that %r19, the pic offset
4408 register, is saved at sp - 32 (in this function's frame)
4409 when generating PIC code. FIXME: What is the correct thing
4410 to do for functions which make no calls and allocate no
4411 frame? Do we need to allocate a frame, or can we just omit
4412 the save? For now we'll just omit the save.
4414 We don't want a note on this insn as the frame marker can
4415 move if there is a dynamic stack allocation. */
4416 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4418 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4420 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4424 /* Align pointer properly (doubleword boundary). */
4425 offset = (offset + 7) & ~7;
4427 /* Floating point register store. */
4428 if (save_fregs)
4430 rtx base;
4432 /* First get the frame or stack pointer to the start of the FP register
4433 save area. */
4434 if (frame_pointer_needed)
4436 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4437 base = hard_frame_pointer_rtx;
4439 else
4441 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4442 base = stack_pointer_rtx;
4445 /* Now actually save the FP registers. */
4446 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4448 if (df_regs_ever_live_p (i)
4449 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4451 rtx addr, reg;
4452 rtx_insn *insn;
4453 addr = gen_rtx_MEM (DFmode,
4454 gen_rtx_POST_INC (word_mode, tmpreg));
4455 reg = gen_rtx_REG (DFmode, i);
4456 insn = emit_move_insn (addr, reg);
4457 if (DO_FRAME_NOTES)
4459 RTX_FRAME_RELATED_P (insn) = 1;
4460 if (TARGET_64BIT)
4462 rtx mem = gen_rtx_MEM (DFmode,
4463 plus_constant (Pmode, base,
4464 offset));
4465 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4466 gen_rtx_SET (mem, reg));
4468 else
4470 rtx meml = gen_rtx_MEM (SFmode,
4471 plus_constant (Pmode, base,
4472 offset));
4473 rtx memr = gen_rtx_MEM (SFmode,
4474 plus_constant (Pmode, base,
4475 offset + 4));
4476 rtx regl = gen_rtx_REG (SFmode, i);
4477 rtx regr = gen_rtx_REG (SFmode, i + 1);
4478 rtx setl = gen_rtx_SET (meml, regl);
4479 rtx setr = gen_rtx_SET (memr, regr);
4480 rtvec vec;
4482 RTX_FRAME_RELATED_P (setl) = 1;
4483 RTX_FRAME_RELATED_P (setr) = 1;
4484 vec = gen_rtvec (2, setl, setr);
4485 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4486 gen_rtx_SEQUENCE (VOIDmode, vec));
4489 offset += GET_MODE_SIZE (DFmode);
4490 fr_saved++;
4496 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4497 Handle case where DISP > 8k by using the add_high_const patterns. */
4499 static void
4500 load_reg (int reg, HOST_WIDE_INT disp, int base)
4502 rtx dest = gen_rtx_REG (word_mode, reg);
4503 rtx basereg = gen_rtx_REG (Pmode, base);
4504 rtx src;
4506 if (VAL_14_BITS_P (disp))
4507 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4508 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4510 rtx delta = GEN_INT (disp);
4511 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4513 emit_move_insn (tmpreg, delta);
4514 if (TARGET_DISABLE_INDEXING)
4516 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4517 src = gen_rtx_MEM (word_mode, tmpreg);
4519 else
4520 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4522 else
4524 rtx delta = GEN_INT (disp);
4525 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4526 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4528 emit_move_insn (tmpreg, high);
4529 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4532 emit_move_insn (dest, src);
4535 /* Update the total code bytes output to the text section. */
4537 static void
4538 update_total_code_bytes (unsigned int nbytes)
4540 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4541 && !IN_NAMED_SECTION_P (cfun->decl))
4543 unsigned int old_total = total_code_bytes;
4545 total_code_bytes += nbytes;
4547 /* Be prepared to handle overflows. */
4548 if (old_total > total_code_bytes)
4549 total_code_bytes = UINT_MAX;
4553 /* This function generates the assembly code for function exit.
4554 Args are as for output_function_prologue ().
4556 The function epilogue should not depend on the current stack
4557 pointer! It should use the frame pointer only. This is mandatory
4558 because of alloca; we also take advantage of it to omit stack
4559 adjustments before returning. */
4561 static void
4562 pa_output_function_epilogue (FILE *file)
4564 rtx_insn *insn = get_last_insn ();
4565 bool extra_nop;
4567 /* pa_expand_epilogue does the dirty work now. We just need
4568 to output the assembler directives which denote the end
4569 of a function.
4571 To make debuggers happy, emit a nop if the epilogue was completely
4572 eliminated due to a volatile call as the last insn in the
4573 current function. That way the return address (in %r2) will
4574 always point to a valid instruction in the current function. */
4576 /* Get the last real insn. */
4577 if (NOTE_P (insn))
4578 insn = prev_real_insn (insn);
4580 /* If it is a sequence, then look inside. */
4581 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4582 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4584 /* If insn is a CALL_INSN, then it must be a call to a volatile
4585 function (otherwise there would be epilogue insns). */
4586 if (insn && CALL_P (insn))
4588 fputs ("\tnop\n", file);
4589 extra_nop = true;
4591 else
4592 extra_nop = false;
4594 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4596 if (TARGET_SOM && TARGET_GAS)
4598 /* We are done with this subspace except possibly for some additional
4599 debug information. Forget that we are in this subspace to ensure
4600 that the next function is output in its own subspace. */
4601 in_section = NULL;
4602 cfun->machine->in_nsubspa = 2;
4605 /* Thunks do their own insn accounting. */
4606 if (cfun->is_thunk)
4607 return;
4609 if (INSN_ADDRESSES_SET_P ())
4611 last_address = extra_nop ? 4 : 0;
4612 insn = get_last_nonnote_insn ();
4613 if (insn)
4615 last_address += INSN_ADDRESSES (INSN_UID (insn));
4616 if (INSN_P (insn))
4617 last_address += insn_default_length (insn);
4619 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4620 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4622 else
4623 last_address = UINT_MAX;
4625 /* Finally, update the total number of code bytes output so far. */
4626 update_total_code_bytes (last_address);
4629 void
4630 pa_expand_epilogue (void)
4632 rtx tmpreg;
4633 HOST_WIDE_INT offset;
4634 HOST_WIDE_INT ret_off = 0;
4635 int i;
4636 int merge_sp_adjust_with_load = 0;
4638 /* We will use this often. */
4639 tmpreg = gen_rtx_REG (word_mode, 1);
4641 /* Try to restore RP early to avoid load/use interlocks when
4642 RP gets used in the return (bv) instruction. This appears to still
4643 be necessary even when we schedule the prologue and epilogue. */
4644 if (rp_saved)
4646 ret_off = TARGET_64BIT ? -16 : -20;
4647 if (frame_pointer_needed)
4649 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4650 ret_off = 0;
4652 else
4654 /* No frame pointer, and stack is smaller than 8k. */
4655 if (VAL_14_BITS_P (ret_off - actual_fsize))
4657 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4658 ret_off = 0;
4663 /* General register restores. */
4664 if (frame_pointer_needed)
4666 offset = local_fsize;
4668 /* If the current function calls __builtin_eh_return, then we need
4669 to restore the saved EH data registers. */
4670 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4672 unsigned int i, regno;
4674 for (i = 0; ; ++i)
4676 regno = EH_RETURN_DATA_REGNO (i);
4677 if (regno == INVALID_REGNUM)
4678 break;
4680 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4681 offset += UNITS_PER_WORD;
4685 for (i = 18; i >= 4; i--)
4686 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4688 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4689 offset += UNITS_PER_WORD;
4692 else
4694 offset = local_fsize - actual_fsize;
4696 /* If the current function calls __builtin_eh_return, then we need
4697 to restore the saved EH data registers. */
4698 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4700 unsigned int i, regno;
4702 for (i = 0; ; ++i)
4704 regno = EH_RETURN_DATA_REGNO (i);
4705 if (regno == INVALID_REGNUM)
4706 break;
4708 /* Only for the first load.
4709 merge_sp_adjust_with_load holds the register load
4710 with which we will merge the sp adjustment. */
4711 if (merge_sp_adjust_with_load == 0
4712 && local_fsize == 0
4713 && VAL_14_BITS_P (-actual_fsize))
4714 merge_sp_adjust_with_load = regno;
4715 else
4716 load_reg (regno, offset, STACK_POINTER_REGNUM);
4717 offset += UNITS_PER_WORD;
4721 for (i = 18; i >= 3; i--)
4723 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4725 /* Only for the first load.
4726 merge_sp_adjust_with_load holds the register load
4727 with which we will merge the sp adjustment. */
4728 if (merge_sp_adjust_with_load == 0
4729 && local_fsize == 0
4730 && VAL_14_BITS_P (-actual_fsize))
4731 merge_sp_adjust_with_load = i;
4732 else
4733 load_reg (i, offset, STACK_POINTER_REGNUM);
4734 offset += UNITS_PER_WORD;
4739 /* Align pointer properly (doubleword boundary). */
4740 offset = (offset + 7) & ~7;
4742 /* FP register restores. */
4743 if (save_fregs)
4745 /* Adjust the register to index off of. */
4746 if (frame_pointer_needed)
4747 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4748 else
4749 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4751 /* Actually do the restores now. */
4752 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4753 if (df_regs_ever_live_p (i)
4754 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4756 rtx src = gen_rtx_MEM (DFmode,
4757 gen_rtx_POST_INC (word_mode, tmpreg));
4758 rtx dest = gen_rtx_REG (DFmode, i);
4759 emit_move_insn (dest, src);
4763 /* Emit a blockage insn here to keep these insns from being moved to
4764 an earlier spot in the epilogue, or into the main instruction stream.
4766 This is necessary as we must not cut the stack back before all the
4767 restores are finished. */
4768 emit_insn (gen_blockage ());
4770 /* Reset stack pointer (and possibly frame pointer). The stack
4771 pointer is initially set to fp + 64 to avoid a race condition. */
4772 if (frame_pointer_needed)
4774 rtx delta = GEN_INT (-64);
4776 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4777 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4778 stack_pointer_rtx, delta));
4780 /* If we were deferring a callee register restore, do it now. */
4781 else if (merge_sp_adjust_with_load)
4783 rtx delta = GEN_INT (-actual_fsize);
4784 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4786 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4788 else if (actual_fsize != 0)
4789 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4790 - actual_fsize, 0);
4792 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4793 frame greater than 8k), do so now. */
4794 if (ret_off != 0)
4795 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4797 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4799 rtx sa = EH_RETURN_STACKADJ_RTX;
4801 emit_insn (gen_blockage ());
4802 emit_insn (TARGET_64BIT
4803 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4804 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4808 bool
4809 pa_can_use_return_insn (void)
4811 if (!reload_completed)
4812 return false;
4814 if (frame_pointer_needed)
4815 return false;
4817 if (df_regs_ever_live_p (2))
4818 return false;
4820 if (crtl->profile)
4821 return false;
4823 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4827 hppa_pic_save_rtx (void)
4829 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4832 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4833 #define NO_DEFERRED_PROFILE_COUNTERS 0
4834 #endif
4837 /* Vector of funcdef numbers. */
4838 static vec<int> funcdef_nos;
4840 /* Output deferred profile counters. */
4841 static void
4842 output_deferred_profile_counters (void)
4844 unsigned int i;
4845 int align, n;
4847 if (funcdef_nos.is_empty ())
4848 return;
4850 switch_to_section (data_section);
4851 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4852 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4854 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4856 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4857 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4860 funcdef_nos.release ();
4863 void
4864 hppa_profile_hook (int label_no)
4866 rtx_code_label *label_rtx = gen_label_rtx ();
4867 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4868 rtx arg_bytes, begin_label_rtx, mcount, sym;
4869 rtx_insn *call_insn;
4870 char begin_label_name[16];
4871 bool use_mcount_pcrel_call;
4873 /* Set up call destination. */
4874 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4875 pa_encode_label (sym);
4876 mcount = gen_rtx_MEM (Pmode, sym);
4878 /* If we can reach _mcount with a pc-relative call, we can optimize
4879 loading the address of the current function. This requires linker
4880 long branch stub support. */
4881 if (!TARGET_PORTABLE_RUNTIME
4882 && !TARGET_LONG_CALLS
4883 && (TARGET_SOM || flag_function_sections))
4884 use_mcount_pcrel_call = TRUE;
4885 else
4886 use_mcount_pcrel_call = FALSE;
4888 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4889 label_no);
4890 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4892 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4894 if (!use_mcount_pcrel_call)
4896 /* The address of the function is loaded into %r25 with an instruction-
4897 relative sequence that avoids the use of relocations. We use SImode
4898 for the address of the function in both 32 and 64-bit code to avoid
4899 having to provide DImode versions of the lcla2 pattern. */
4900 if (TARGET_PA_20)
4901 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4902 else
4903 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4906 if (!NO_DEFERRED_PROFILE_COUNTERS)
4908 rtx count_label_rtx, addr, r24;
4909 char count_label_name[16];
4911 funcdef_nos.safe_push (label_no);
4912 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4913 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4914 ggc_strdup (count_label_name));
4916 addr = force_reg (Pmode, count_label_rtx);
4917 r24 = gen_rtx_REG (Pmode, 24);
4918 emit_move_insn (r24, addr);
4920 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4921 if (use_mcount_pcrel_call)
4922 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4923 begin_label_rtx));
4924 else
4925 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4927 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4929 else
4931 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4932 if (use_mcount_pcrel_call)
4933 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4934 begin_label_rtx));
4935 else
4936 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4939 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4940 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4942 /* Indicate the _mcount call cannot throw, nor will it execute a
4943 non-local goto. */
4944 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4946 /* Allocate space for fixed arguments. */
4947 if (reg_parm_stack_space > crtl->outgoing_args_size)
4948 crtl->outgoing_args_size = reg_parm_stack_space;
4951 /* Fetch the return address for the frame COUNT steps up from
4952 the current frame, after the prologue. FRAMEADDR is the
4953 frame pointer of the COUNT frame.
4955 We want to ignore any export stub remnants here. To handle this,
4956 we examine the code at the return address, and if it is an export
4957 stub, we return a memory rtx for the stub return address stored
4958 at frame-24.
4960 The value returned is used in two different ways:
4962 1. To find a function's caller.
4964 2. To change the return address for a function.
4966 This function handles most instances of case 1; however, it will
4967 fail if there are two levels of stubs to execute on the return
4968 path. The only way I believe that can happen is if the return value
4969 needs a parameter relocation, which never happens for C code.
4971 This function handles most instances of case 2; however, it will
4972 fail if we did not originally have stub code on the return path
4973 but will need stub code on the new return path. This can happen if
4974 the caller & callee are both in the main program, but the new
4975 return location is in a shared library. */
4978 pa_return_addr_rtx (int count, rtx frameaddr)
4980 rtx label;
4981 rtx rp;
4982 rtx saved_rp;
4983 rtx ins;
4985 /* The instruction stream at the return address of a PA1.X export stub is:
4987 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4988 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4989 0x00011820 | stub+16: mtsp r1,sr0
4990 0xe0400002 | stub+20: be,n 0(sr0,rp)
4992 0xe0400002 must be specified as -532676606 so that it won't be
4993 rejected as an invalid immediate operand on 64-bit hosts.
4995 The instruction stream at the return address of a PA2.0 export stub is:
4997 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4998 0xe840d002 | stub+12: bve,n (rp)
5001 HOST_WIDE_INT insns[4];
5002 int i, len;
5004 if (count != 0)
5005 return NULL_RTX;
5007 rp = get_hard_reg_initial_val (Pmode, 2);
5009 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
5010 return rp;
5012 /* If there is no export stub then just use the value saved from
5013 the return pointer register. */
5015 saved_rp = gen_reg_rtx (Pmode);
5016 emit_move_insn (saved_rp, rp);
5018 /* Get pointer to the instruction stream. We have to mask out the
5019 privilege level from the two low order bits of the return address
5020 pointer here so that ins will point to the start of the first
5021 instruction that would have been executed if we returned. */
5022 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
5023 label = gen_label_rtx ();
5025 if (TARGET_PA_20)
5027 insns[0] = 0x4bc23fd1;
5028 insns[1] = -398405630;
5029 len = 2;
5031 else
5033 insns[0] = 0x4bc23fd1;
5034 insns[1] = 0x004010a1;
5035 insns[2] = 0x00011820;
5036 insns[3] = -532676606;
5037 len = 4;
5040 /* Check the instruction stream at the normal return address for the
5041 export stub. If it is an export stub, than our return address is
5042 really in -24[frameaddr]. */
5044 for (i = 0; i < len; i++)
5046 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
5047 rtx op1 = GEN_INT (insns[i]);
5048 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
5051 /* Here we know that our return address points to an export
5052 stub. We don't want to return the address of the export stub,
5053 but rather the return address of the export stub. That return
5054 address is stored at -24[frameaddr]. */
5056 emit_move_insn (saved_rp,
5057 gen_rtx_MEM (Pmode,
5058 memory_address (Pmode,
5059 plus_constant (Pmode, frameaddr,
5060 -24))));
5062 emit_label (label);
5064 return saved_rp;
5067 void
5068 pa_emit_bcond_fp (rtx operands[])
5070 enum rtx_code code = GET_CODE (operands[0]);
5071 rtx operand0 = operands[1];
5072 rtx operand1 = operands[2];
5073 rtx label = operands[3];
5075 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
5076 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
5078 emit_jump_insn (gen_rtx_SET (pc_rtx,
5079 gen_rtx_IF_THEN_ELSE (VOIDmode,
5080 gen_rtx_fmt_ee (NE,
5081 VOIDmode,
5082 gen_rtx_REG (CCFPmode, 0),
5083 const0_rtx),
5084 gen_rtx_LABEL_REF (VOIDmode, label),
5085 pc_rtx)));
5089 /* Adjust the cost of a scheduling dependency. Return the new cost of
5090 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5092 static int
5093 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
5094 unsigned int)
5096 enum attr_type attr_type;
5098 /* Don't adjust costs for a pa8000 chip, also do not adjust any
5099 true dependencies as they are described with bypasses now. */
5100 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
5101 return cost;
5103 if (! recog_memoized (insn))
5104 return 0;
5106 attr_type = get_attr_type (insn);
5108 switch (dep_type)
5110 case REG_DEP_ANTI:
5111 /* Anti dependency; DEP_INSN reads a register that INSN writes some
5112 cycles later. */
5114 if (attr_type == TYPE_FPLOAD)
5116 rtx pat = PATTERN (insn);
5117 rtx dep_pat = PATTERN (dep_insn);
5118 if (GET_CODE (pat) == PARALLEL)
5120 /* This happens for the fldXs,mb patterns. */
5121 pat = XVECEXP (pat, 0, 0);
5123 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5124 /* If this happens, we have to extend this to schedule
5125 optimally. Return 0 for now. */
5126 return 0;
5128 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5130 if (! recog_memoized (dep_insn))
5131 return 0;
5132 switch (get_attr_type (dep_insn))
5134 case TYPE_FPALU:
5135 case TYPE_FPMULSGL:
5136 case TYPE_FPMULDBL:
5137 case TYPE_FPDIVSGL:
5138 case TYPE_FPDIVDBL:
5139 case TYPE_FPSQRTSGL:
5140 case TYPE_FPSQRTDBL:
5141 /* A fpload can't be issued until one cycle before a
5142 preceding arithmetic operation has finished if
5143 the target of the fpload is any of the sources
5144 (or destination) of the arithmetic operation. */
5145 return insn_default_latency (dep_insn) - 1;
5147 default:
5148 return 0;
5152 else if (attr_type == TYPE_FPALU)
5154 rtx pat = PATTERN (insn);
5155 rtx dep_pat = PATTERN (dep_insn);
5156 if (GET_CODE (pat) == PARALLEL)
5158 /* This happens for the fldXs,mb patterns. */
5159 pat = XVECEXP (pat, 0, 0);
5161 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5162 /* If this happens, we have to extend this to schedule
5163 optimally. Return 0 for now. */
5164 return 0;
5166 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5168 if (! recog_memoized (dep_insn))
5169 return 0;
5170 switch (get_attr_type (dep_insn))
5172 case TYPE_FPDIVSGL:
5173 case TYPE_FPDIVDBL:
5174 case TYPE_FPSQRTSGL:
5175 case TYPE_FPSQRTDBL:
5176 /* An ALU flop can't be issued until two cycles before a
5177 preceding divide or sqrt operation has finished if
5178 the target of the ALU flop is any of the sources
5179 (or destination) of the divide or sqrt operation. */
5180 return insn_default_latency (dep_insn) - 2;
5182 default:
5183 return 0;
5188 /* For other anti dependencies, the cost is 0. */
5189 return 0;
5191 case REG_DEP_OUTPUT:
5192 /* Output dependency; DEP_INSN writes a register that INSN writes some
5193 cycles later. */
5194 if (attr_type == TYPE_FPLOAD)
5196 rtx pat = PATTERN (insn);
5197 rtx dep_pat = PATTERN (dep_insn);
5198 if (GET_CODE (pat) == PARALLEL)
5200 /* This happens for the fldXs,mb patterns. */
5201 pat = XVECEXP (pat, 0, 0);
5203 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5204 /* If this happens, we have to extend this to schedule
5205 optimally. Return 0 for now. */
5206 return 0;
5208 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5210 if (! recog_memoized (dep_insn))
5211 return 0;
5212 switch (get_attr_type (dep_insn))
5214 case TYPE_FPALU:
5215 case TYPE_FPMULSGL:
5216 case TYPE_FPMULDBL:
5217 case TYPE_FPDIVSGL:
5218 case TYPE_FPDIVDBL:
5219 case TYPE_FPSQRTSGL:
5220 case TYPE_FPSQRTDBL:
5221 /* A fpload can't be issued until one cycle before a
5222 preceding arithmetic operation has finished if
5223 the target of the fpload is the destination of the
5224 arithmetic operation.
5226 Exception: For PA7100LC, PA7200 and PA7300, the cost
5227 is 3 cycles, unless they bundle together. We also
5228 pay the penalty if the second insn is a fpload. */
5229 return insn_default_latency (dep_insn) - 1;
5231 default:
5232 return 0;
5236 else if (attr_type == TYPE_FPALU)
5238 rtx pat = PATTERN (insn);
5239 rtx dep_pat = PATTERN (dep_insn);
5240 if (GET_CODE (pat) == PARALLEL)
5242 /* This happens for the fldXs,mb patterns. */
5243 pat = XVECEXP (pat, 0, 0);
5245 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5246 /* If this happens, we have to extend this to schedule
5247 optimally. Return 0 for now. */
5248 return 0;
5250 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5252 if (! recog_memoized (dep_insn))
5253 return 0;
5254 switch (get_attr_type (dep_insn))
5256 case TYPE_FPDIVSGL:
5257 case TYPE_FPDIVDBL:
5258 case TYPE_FPSQRTSGL:
5259 case TYPE_FPSQRTDBL:
5260 /* An ALU flop can't be issued until two cycles before a
5261 preceding divide or sqrt operation has finished if
5262 the target of the ALU flop is also the target of
5263 the divide or sqrt operation. */
5264 return insn_default_latency (dep_insn) - 2;
5266 default:
5267 return 0;
5272 /* For other output dependencies, the cost is 0. */
5273 return 0;
5275 default:
5276 gcc_unreachable ();
5280 /* The 700 can only issue a single insn at a time.
5281 The 7XXX processors can issue two insns at a time.
5282 The 8000 can issue 4 insns at a time. */
5283 static int
5284 pa_issue_rate (void)
5286 switch (pa_cpu)
5288 case PROCESSOR_700: return 1;
5289 case PROCESSOR_7100: return 2;
5290 case PROCESSOR_7100LC: return 2;
5291 case PROCESSOR_7200: return 2;
5292 case PROCESSOR_7300: return 2;
5293 case PROCESSOR_8000: return 4;
5295 default:
5296 gcc_unreachable ();
5302 /* Return any length plus adjustment needed by INSN which already has
5303 its length computed as LENGTH. Return LENGTH if no adjustment is
5304 necessary.
5306 Also compute the length of an inline block move here as it is too
5307 complicated to express as a length attribute in pa.md. */
5309 pa_adjust_insn_length (rtx_insn *insn, int length)
5311 rtx pat = PATTERN (insn);
5313 /* If length is negative or undefined, provide initial length. */
5314 if ((unsigned int) length >= INT_MAX)
5316 if (GET_CODE (pat) == SEQUENCE)
5317 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5319 switch (get_attr_type (insn))
5321 case TYPE_MILLI:
5322 length = pa_attr_length_millicode_call (insn);
5323 break;
5324 case TYPE_CALL:
5325 length = pa_attr_length_call (insn, 0);
5326 break;
5327 case TYPE_SIBCALL:
5328 length = pa_attr_length_call (insn, 1);
5329 break;
5330 case TYPE_DYNCALL:
5331 length = pa_attr_length_indirect_call (insn);
5332 break;
5333 case TYPE_SH_FUNC_ADRS:
5334 length = pa_attr_length_millicode_call (insn) + 20;
5335 break;
5336 default:
5337 gcc_unreachable ();
5341 /* Block move pattern. */
5342 if (NONJUMP_INSN_P (insn)
5343 && GET_CODE (pat) == PARALLEL
5344 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5345 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5346 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5347 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5348 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5349 length += compute_cpymem_length (insn) - 4;
5350 /* Block clear pattern. */
5351 else if (NONJUMP_INSN_P (insn)
5352 && GET_CODE (pat) == PARALLEL
5353 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5354 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5355 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5356 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5357 length += compute_clrmem_length (insn) - 4;
5358 /* Conditional branch with an unfilled delay slot. */
5359 else if (JUMP_P (insn) && ! simplejump_p (insn))
5361 /* Adjust a short backwards conditional with an unfilled delay slot. */
5362 if (GET_CODE (pat) == SET
5363 && length == 4
5364 && JUMP_LABEL (insn) != NULL_RTX
5365 && ! forward_branch_p (insn))
5366 length += 4;
5367 else if (GET_CODE (pat) == PARALLEL
5368 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5369 && length == 4)
5370 length += 4;
5371 /* Adjust dbra insn with short backwards conditional branch with
5372 unfilled delay slot -- only for case where counter is in a
5373 general register register. */
5374 else if (GET_CODE (pat) == PARALLEL
5375 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5376 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5377 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5378 && length == 4
5379 && ! forward_branch_p (insn))
5380 length += 4;
5382 return length;
5385 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5387 static bool
5388 pa_print_operand_punct_valid_p (unsigned char code)
5390 if (code == '@'
5391 || code == '#'
5392 || code == '*'
5393 || code == '^')
5394 return true;
5396 return false;
5399 /* Print operand X (an rtx) in assembler syntax to file FILE.
5400 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5401 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5403 void
5404 pa_print_operand (FILE *file, rtx x, int code)
5406 switch (code)
5408 case '#':
5409 /* Output a 'nop' if there's nothing for the delay slot. */
5410 if (dbr_sequence_length () == 0)
5411 fputs ("\n\tnop", file);
5412 return;
5413 case '*':
5414 /* Output a nullification completer if there's nothing for the */
5415 /* delay slot or nullification is requested. */
5416 if (dbr_sequence_length () == 0 ||
5417 (final_sequence &&
5418 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5419 fputs (",n", file);
5420 return;
5421 case 'R':
5422 /* Print out the second register name of a register pair.
5423 I.e., R (6) => 7. */
5424 fputs (reg_names[REGNO (x) + 1], file);
5425 return;
5426 case 'r':
5427 /* A register or zero. */
5428 if (x == const0_rtx
5429 || (x == CONST0_RTX (DFmode))
5430 || (x == CONST0_RTX (SFmode)))
5432 fputs ("%r0", file);
5433 return;
5435 else
5436 break;
5437 case 'f':
5438 /* A register or zero (floating point). */
5439 if (x == const0_rtx
5440 || (x == CONST0_RTX (DFmode))
5441 || (x == CONST0_RTX (SFmode)))
5443 fputs ("%fr0", file);
5444 return;
5446 else
5447 break;
5448 case 'A':
5450 rtx xoperands[2];
5452 xoperands[0] = XEXP (XEXP (x, 0), 0);
5453 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5454 pa_output_global_address (file, xoperands[1], 0);
5455 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5456 return;
5459 case 'C': /* Plain (C)ondition */
5460 case 'X':
5461 switch (GET_CODE (x))
5463 case EQ:
5464 fputs ("=", file); break;
5465 case NE:
5466 fputs ("<>", file); break;
5467 case GT:
5468 fputs (">", file); break;
5469 case GE:
5470 fputs (">=", file); break;
5471 case GEU:
5472 fputs (">>=", file); break;
5473 case GTU:
5474 fputs (">>", file); break;
5475 case LT:
5476 fputs ("<", file); break;
5477 case LE:
5478 fputs ("<=", file); break;
5479 case LEU:
5480 fputs ("<<=", file); break;
5481 case LTU:
5482 fputs ("<<", file); break;
5483 default:
5484 gcc_unreachable ();
5486 return;
5487 case 'N': /* Condition, (N)egated */
5488 switch (GET_CODE (x))
5490 case EQ:
5491 fputs ("<>", file); break;
5492 case NE:
5493 fputs ("=", file); break;
5494 case GT:
5495 fputs ("<=", file); break;
5496 case GE:
5497 fputs ("<", file); break;
5498 case GEU:
5499 fputs ("<<", file); break;
5500 case GTU:
5501 fputs ("<<=", file); break;
5502 case LT:
5503 fputs (">=", file); break;
5504 case LE:
5505 fputs (">", file); break;
5506 case LEU:
5507 fputs (">>", file); break;
5508 case LTU:
5509 fputs (">>=", file); break;
5510 default:
5511 gcc_unreachable ();
5513 return;
5514 /* For floating point comparisons. Note that the output
5515 predicates are the complement of the desired mode. The
5516 conditions for GT, GE, LT, LE and LTGT cause an invalid
5517 operation exception if the result is unordered and this
5518 exception is enabled in the floating-point status register. */
5519 case 'Y':
5520 switch (GET_CODE (x))
5522 case EQ:
5523 fputs ("!=", file); break;
5524 case NE:
5525 fputs ("=", file); break;
5526 case GT:
5527 fputs ("!>", file); break;
5528 case GE:
5529 fputs ("!>=", file); break;
5530 case LT:
5531 fputs ("!<", file); break;
5532 case LE:
5533 fputs ("!<=", file); break;
5534 case LTGT:
5535 fputs ("!<>", file); break;
5536 case UNLE:
5537 fputs ("!?<=", file); break;
5538 case UNLT:
5539 fputs ("!?<", file); break;
5540 case UNGE:
5541 fputs ("!?>=", file); break;
5542 case UNGT:
5543 fputs ("!?>", file); break;
5544 case UNEQ:
5545 fputs ("!?=", file); break;
5546 case UNORDERED:
5547 fputs ("!?", file); break;
5548 case ORDERED:
5549 fputs ("?", file); break;
5550 default:
5551 gcc_unreachable ();
5553 return;
5554 case 'S': /* Condition, operands are (S)wapped. */
5555 switch (GET_CODE (x))
5557 case EQ:
5558 fputs ("=", file); break;
5559 case NE:
5560 fputs ("<>", file); break;
5561 case GT:
5562 fputs ("<", file); break;
5563 case GE:
5564 fputs ("<=", file); break;
5565 case GEU:
5566 fputs ("<<=", file); break;
5567 case GTU:
5568 fputs ("<<", file); break;
5569 case LT:
5570 fputs (">", file); break;
5571 case LE:
5572 fputs (">=", file); break;
5573 case LEU:
5574 fputs (">>=", file); break;
5575 case LTU:
5576 fputs (">>", file); break;
5577 default:
5578 gcc_unreachable ();
5580 return;
5581 case 'B': /* Condition, (B)oth swapped and negate. */
5582 switch (GET_CODE (x))
5584 case EQ:
5585 fputs ("<>", file); break;
5586 case NE:
5587 fputs ("=", file); break;
5588 case GT:
5589 fputs (">=", file); break;
5590 case GE:
5591 fputs (">", file); break;
5592 case GEU:
5593 fputs (">>", file); break;
5594 case GTU:
5595 fputs (">>=", file); break;
5596 case LT:
5597 fputs ("<=", file); break;
5598 case LE:
5599 fputs ("<", file); break;
5600 case LEU:
5601 fputs ("<<", file); break;
5602 case LTU:
5603 fputs ("<<=", file); break;
5604 default:
5605 gcc_unreachable ();
5607 return;
5608 case 'k':
5609 gcc_assert (GET_CODE (x) == CONST_INT);
5610 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5611 return;
5612 case 'Q':
5613 gcc_assert (GET_CODE (x) == CONST_INT);
5614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5615 return;
5616 case 'L':
5617 gcc_assert (GET_CODE (x) == CONST_INT);
5618 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5619 return;
5620 case 'o':
5621 gcc_assert (GET_CODE (x) == CONST_INT
5622 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5624 return;
5625 case 'O':
5626 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5627 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5628 return;
5629 case 'p':
5630 gcc_assert (GET_CODE (x) == CONST_INT);
5631 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5632 return;
5633 case 'P':
5634 gcc_assert (GET_CODE (x) == CONST_INT);
5635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5636 return;
5637 case 'I':
5638 if (GET_CODE (x) == CONST_INT)
5639 fputs ("i", file);
5640 return;
5641 case 'M':
5642 case 'F':
5643 switch (GET_CODE (XEXP (x, 0)))
5645 case PRE_DEC:
5646 case PRE_INC:
5647 if (ASSEMBLER_DIALECT == 0)
5648 fputs ("s,mb", file);
5649 else
5650 fputs (",mb", file);
5651 break;
5652 case POST_DEC:
5653 case POST_INC:
5654 if (ASSEMBLER_DIALECT == 0)
5655 fputs ("s,ma", file);
5656 else
5657 fputs (",ma", file);
5658 break;
5659 case PLUS:
5660 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5661 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5663 if (ASSEMBLER_DIALECT == 0)
5664 fputs ("x", file);
5666 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5667 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5669 if (ASSEMBLER_DIALECT == 0)
5670 fputs ("x,s", file);
5671 else
5672 fputs (",s", file);
5674 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5675 fputs ("s", file);
5676 break;
5677 default:
5678 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5679 fputs ("s", file);
5680 break;
5682 return;
5683 case 'G':
5684 pa_output_global_address (file, x, 0);
5685 return;
5686 case 'H':
5687 pa_output_global_address (file, x, 1);
5688 return;
5689 case 0: /* Don't do anything special */
5690 break;
5691 case 'Z':
5693 unsigned op[3];
5694 compute_zdepwi_operands (INTVAL (x), op);
5695 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5696 return;
5698 case 'z':
5700 unsigned op[3];
5701 compute_zdepdi_operands (INTVAL (x), op);
5702 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5703 return;
5705 case 'c':
5706 /* We can get here from a .vtable_inherit due to our
5707 CONSTANT_ADDRESS_P rejecting perfectly good constant
5708 addresses. */
5709 break;
5710 default:
5711 gcc_unreachable ();
5713 if (GET_CODE (x) == REG)
5715 fputs (reg_names [REGNO (x)], file);
5716 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5718 fputs ("R", file);
5719 return;
5721 if (FP_REG_P (x)
5722 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5723 && (REGNO (x) & 1) == 0)
5724 fputs ("L", file);
5726 else if (GET_CODE (x) == MEM)
5728 int size = GET_MODE_SIZE (GET_MODE (x));
5729 rtx base = NULL_RTX;
5730 switch (GET_CODE (XEXP (x, 0)))
5732 case PRE_DEC:
5733 case POST_DEC:
5734 base = XEXP (XEXP (x, 0), 0);
5735 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5736 break;
5737 case PRE_INC:
5738 case POST_INC:
5739 base = XEXP (XEXP (x, 0), 0);
5740 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5741 break;
5742 case PLUS:
5743 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5744 fprintf (file, "%s(%s)",
5745 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5746 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5747 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5748 fprintf (file, "%s(%s)",
5749 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5750 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5751 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5752 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5754 /* Because the REG_POINTER flag can get lost during reload,
5755 pa_legitimate_address_p canonicalizes the order of the
5756 index and base registers in the combined move patterns. */
5757 rtx base = XEXP (XEXP (x, 0), 1);
5758 rtx index = XEXP (XEXP (x, 0), 0);
5760 fprintf (file, "%s(%s)",
5761 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5763 else
5764 output_address (GET_MODE (x), XEXP (x, 0));
5765 break;
5766 default:
5767 output_address (GET_MODE (x), XEXP (x, 0));
5768 break;
5771 else
5772 output_addr_const (file, x);
5775 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5777 void
5778 pa_output_global_address (FILE *file, rtx x, int round_constant)
5781 /* Imagine (high (const (plus ...))). */
5782 if (GET_CODE (x) == HIGH)
5783 x = XEXP (x, 0);
5785 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5786 output_addr_const (file, x);
5787 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5789 output_addr_const (file, x);
5790 fputs ("-$global$", file);
5792 else if (GET_CODE (x) == CONST)
5794 const char *sep = "";
5795 int offset = 0; /* assembler wants -$global$ at end */
5796 rtx base = NULL_RTX;
5798 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5800 case LABEL_REF:
5801 case SYMBOL_REF:
5802 base = XEXP (XEXP (x, 0), 0);
5803 output_addr_const (file, base);
5804 break;
5805 case CONST_INT:
5806 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5807 break;
5808 default:
5809 gcc_unreachable ();
5812 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5814 case LABEL_REF:
5815 case SYMBOL_REF:
5816 base = XEXP (XEXP (x, 0), 1);
5817 output_addr_const (file, base);
5818 break;
5819 case CONST_INT:
5820 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5821 break;
5822 default:
5823 gcc_unreachable ();
5826 /* How bogus. The compiler is apparently responsible for
5827 rounding the constant if it uses an LR field selector.
5829 The linker and/or assembler seem a better place since
5830 they have to do this kind of thing already.
5832 If we fail to do this, HP's optimizing linker may eliminate
5833 an addil, but not update the ldw/stw/ldo instruction that
5834 uses the result of the addil. */
5835 if (round_constant)
5836 offset = ((offset + 0x1000) & ~0x1fff);
5838 switch (GET_CODE (XEXP (x, 0)))
5840 case PLUS:
5841 if (offset < 0)
5843 offset = -offset;
5844 sep = "-";
5846 else
5847 sep = "+";
5848 break;
5850 case MINUS:
5851 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5852 sep = "-";
5853 break;
5855 default:
5856 gcc_unreachable ();
5859 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5860 fputs ("-$global$", file);
5861 if (offset)
5862 fprintf (file, "%s%d", sep, offset);
5864 else
5865 output_addr_const (file, x);
5868 /* Output boilerplate text to appear at the beginning of the file.
5869 There are several possible versions. */
5870 #define aputs(x) fputs(x, asm_out_file)
5871 static inline void
5872 pa_file_start_level (void)
5874 if (TARGET_64BIT)
5875 aputs ("\t.LEVEL 2.0w\n");
5876 else if (TARGET_PA_20)
5877 aputs ("\t.LEVEL 2.0\n");
5878 else if (TARGET_PA_11)
5879 aputs ("\t.LEVEL 1.1\n");
5880 else
5881 aputs ("\t.LEVEL 1.0\n");
5884 static inline void
5885 pa_file_start_space (int sortspace)
5887 aputs ("\t.SPACE $PRIVATE$");
5888 if (sortspace)
5889 aputs (",SORT=16");
5890 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5891 if (flag_tm)
5892 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5893 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5894 "\n\t.SPACE $TEXT$");
5895 if (sortspace)
5896 aputs (",SORT=8");
5897 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5898 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5901 static inline void
5902 pa_file_start_file (int want_version)
5904 if (write_symbols != NO_DEBUG)
5906 output_file_directive (asm_out_file, main_input_filename);
5907 if (want_version)
5908 aputs ("\t.version\t\"01.01\"\n");
5912 static inline void
5913 pa_file_start_mcount (const char *aswhat)
5915 if (profile_flag)
5916 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5919 static void
5920 pa_elf_file_start (void)
5922 pa_file_start_level ();
5923 pa_file_start_mcount ("ENTRY");
5924 pa_file_start_file (0);
5927 static void
5928 pa_som_file_start (void)
5930 pa_file_start_level ();
5931 pa_file_start_space (0);
5932 aputs ("\t.IMPORT $global$,DATA\n"
5933 "\t.IMPORT $$dyncall,MILLICODE\n");
5934 pa_file_start_mcount ("CODE");
5935 pa_file_start_file (0);
5938 static void
5939 pa_linux_file_start (void)
5941 pa_file_start_file (1);
5942 pa_file_start_level ();
5943 pa_file_start_mcount ("CODE");
5946 static void
5947 pa_hpux64_gas_file_start (void)
5949 pa_file_start_level ();
5950 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5951 if (profile_flag)
5952 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5953 #endif
5954 pa_file_start_file (1);
5957 static void
5958 pa_hpux64_hpas_file_start (void)
5960 pa_file_start_level ();
5961 pa_file_start_space (1);
5962 pa_file_start_mcount ("CODE");
5963 pa_file_start_file (0);
5965 #undef aputs
5967 /* Search the deferred plabel list for SYMBOL and return its internal
5968 label. If an entry for SYMBOL is not found, a new entry is created. */
5971 pa_get_deferred_plabel (rtx symbol)
5973 const char *fname = XSTR (symbol, 0);
5974 size_t i;
5976 /* See if we have already put this function on the list of deferred
5977 plabels. This list is generally small, so a liner search is not
5978 too ugly. If it proves too slow replace it with something faster. */
5979 for (i = 0; i < n_deferred_plabels; i++)
5980 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5981 break;
5983 /* If the deferred plabel list is empty, or this entry was not found
5984 on the list, create a new entry on the list. */
5985 if (deferred_plabels == NULL || i == n_deferred_plabels)
5987 tree id;
5989 if (deferred_plabels == 0)
5990 deferred_plabels = ggc_alloc<deferred_plabel> ();
5991 else
5992 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5993 deferred_plabels,
5994 n_deferred_plabels + 1);
5996 i = n_deferred_plabels++;
5997 deferred_plabels[i].internal_label = gen_label_rtx ();
5998 deferred_plabels[i].symbol = symbol;
6000 /* Gross. We have just implicitly taken the address of this
6001 function. Mark it in the same manner as assemble_name. */
6002 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
6003 if (id)
6004 mark_referenced (id);
6007 return deferred_plabels[i].internal_label;
6010 static void
6011 output_deferred_plabels (void)
6013 size_t i;
6015 /* If we have some deferred plabels, then we need to switch into the
6016 data or readonly data section, and align it to a 4 byte boundary
6017 before outputting the deferred plabels. */
6018 if (n_deferred_plabels)
6020 switch_to_section (flag_pic ? data_section : readonly_data_section);
6021 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
6024 /* Now output the deferred plabels. */
6025 for (i = 0; i < n_deferred_plabels; i++)
6027 targetm.asm_out.internal_label (asm_out_file, "L",
6028 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
6029 assemble_integer (deferred_plabels[i].symbol,
6030 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
6034 /* Initialize optabs to point to emulation routines. */
6036 static void
6037 pa_init_libfuncs (void)
6039 if (HPUX_LONG_DOUBLE_LIBRARY)
6041 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
6042 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
6043 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
6044 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
6045 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
6046 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
6047 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
6048 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
6049 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
6051 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
6052 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
6053 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
6054 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
6055 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
6056 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
6057 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
6059 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
6060 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
6061 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
6062 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
6064 set_conv_libfunc (sfix_optab, SImode, TFmode,
6065 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
6066 : "_U_Qfcnvfxt_quad_to_sgl");
6067 set_conv_libfunc (sfix_optab, DImode, TFmode,
6068 "_U_Qfcnvfxt_quad_to_dbl");
6069 set_conv_libfunc (ufix_optab, SImode, TFmode,
6070 "_U_Qfcnvfxt_quad_to_usgl");
6071 set_conv_libfunc (ufix_optab, DImode, TFmode,
6072 "_U_Qfcnvfxt_quad_to_udbl");
6074 set_conv_libfunc (sfloat_optab, TFmode, SImode,
6075 "_U_Qfcnvxf_sgl_to_quad");
6076 set_conv_libfunc (sfloat_optab, TFmode, DImode,
6077 "_U_Qfcnvxf_dbl_to_quad");
6078 set_conv_libfunc (ufloat_optab, TFmode, SImode,
6079 "_U_Qfcnvxf_usgl_to_quad");
6080 set_conv_libfunc (ufloat_optab, TFmode, DImode,
6081 "_U_Qfcnvxf_udbl_to_quad");
6084 if (TARGET_SYNC_LIBCALLS)
6085 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
6088 /* HP's millicode routines mean something special to the assembler.
6089 Keep track of which ones we have used. */
6091 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
6092 static void import_milli (enum millicodes);
6093 static char imported[(int) end1000];
6094 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
6095 static const char import_string[] = ".IMPORT $$....,MILLICODE";
6096 #define MILLI_START 10
6098 static void
6099 import_milli (enum millicodes code)
6101 char str[sizeof (import_string)];
6103 if (!imported[(int) code])
6105 imported[(int) code] = 1;
6106 strcpy (str, import_string);
6107 memcpy (str + MILLI_START, milli_names[(int) code], 4);
6108 output_asm_insn (str, 0);
6112 /* The register constraints have put the operands and return value in
6113 the proper registers. */
6115 const char *
6116 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
6118 import_milli (mulI);
6119 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
6122 /* Emit the rtl for doing a division by a constant. */
6124 /* Do magic division millicodes exist for this value? */
6125 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
6127 /* We'll use an array to keep track of the magic millicodes and
6128 whether or not we've used them already. [n][0] is signed, [n][1] is
6129 unsigned. */
6131 static int div_milli[16][2];
6134 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
6136 if (GET_CODE (operands[2]) == CONST_INT
6137 && INTVAL (operands[2]) > 0
6138 && INTVAL (operands[2]) < 16
6139 && pa_magic_milli[INTVAL (operands[2])])
6141 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6143 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6144 emit
6145 (gen_rtx_PARALLEL
6146 (VOIDmode,
6147 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6148 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6149 SImode,
6150 gen_rtx_REG (SImode, 26),
6151 operands[2])),
6152 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6153 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6154 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6155 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6156 gen_rtx_CLOBBER (VOIDmode, ret))));
6157 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6158 return 1;
6160 return 0;
6163 const char *
6164 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6166 int divisor;
6168 /* If the divisor is a constant, try to use one of the special
6169 opcodes .*/
6170 if (GET_CODE (operands[0]) == CONST_INT)
6172 static char buf[100];
6173 divisor = INTVAL (operands[0]);
6174 if (!div_milli[divisor][unsignedp])
6176 div_milli[divisor][unsignedp] = 1;
6177 if (unsignedp)
6178 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6179 else
6180 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6182 if (unsignedp)
6184 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6185 INTVAL (operands[0]));
6186 return pa_output_millicode_call (insn,
6187 gen_rtx_SYMBOL_REF (SImode, buf));
6189 else
6191 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6192 INTVAL (operands[0]));
6193 return pa_output_millicode_call (insn,
6194 gen_rtx_SYMBOL_REF (SImode, buf));
6197 /* Divisor isn't a special constant. */
6198 else
6200 if (unsignedp)
6202 import_milli (divU);
6203 return pa_output_millicode_call (insn,
6204 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6206 else
6208 import_milli (divI);
6209 return pa_output_millicode_call (insn,
6210 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6215 /* Output a $$rem millicode to do mod. */
6217 const char *
6218 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6220 if (unsignedp)
6222 import_milli (remU);
6223 return pa_output_millicode_call (insn,
6224 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6226 else
6228 import_milli (remI);
6229 return pa_output_millicode_call (insn,
6230 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6234 void
6235 pa_output_arg_descriptor (rtx_insn *call_insn)
6237 const char *arg_regs[4];
6238 machine_mode arg_mode;
6239 rtx link;
6240 int i, output_flag = 0;
6241 int regno;
6243 /* We neither need nor want argument location descriptors for the
6244 64bit runtime environment or the ELF32 environment. */
6245 if (TARGET_64BIT || TARGET_ELF32)
6246 return;
6248 for (i = 0; i < 4; i++)
6249 arg_regs[i] = 0;
6251 /* Specify explicitly that no argument relocations should take place
6252 if using the portable runtime calling conventions. */
6253 if (TARGET_PORTABLE_RUNTIME)
6255 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6256 asm_out_file);
6257 return;
6260 gcc_assert (CALL_P (call_insn));
6261 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6262 link; link = XEXP (link, 1))
6264 rtx use = XEXP (link, 0);
6266 if (! (GET_CODE (use) == USE
6267 && GET_CODE (XEXP (use, 0)) == REG
6268 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6269 continue;
6271 arg_mode = GET_MODE (XEXP (use, 0));
6272 regno = REGNO (XEXP (use, 0));
6273 if (regno >= 23 && regno <= 26)
6275 arg_regs[26 - regno] = "GR";
6276 if (arg_mode == DImode)
6277 arg_regs[25 - regno] = "GR";
6279 else if (regno >= 32 && regno <= 39)
6281 if (arg_mode == SFmode)
6282 arg_regs[(regno - 32) / 2] = "FR";
6283 else
6285 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6286 arg_regs[(regno - 34) / 2] = "FR";
6287 arg_regs[(regno - 34) / 2 + 1] = "FU";
6288 #else
6289 arg_regs[(regno - 34) / 2] = "FU";
6290 arg_regs[(regno - 34) / 2 + 1] = "FR";
6291 #endif
6295 fputs ("\t.CALL ", asm_out_file);
6296 for (i = 0; i < 4; i++)
6298 if (arg_regs[i])
6300 if (output_flag++)
6301 fputc (',', asm_out_file);
6302 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6305 fputc ('\n', asm_out_file);
6308 /* Inform reload about cases where moving X with a mode MODE to or from
6309 a register in RCLASS requires an extra scratch or immediate register.
6310 Return the class needed for the immediate register. */
6312 static reg_class_t
6313 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6314 machine_mode mode, secondary_reload_info *sri)
6316 int regno;
6317 enum reg_class rclass = (enum reg_class) rclass_i;
6319 /* Handle the easy stuff first. */
6320 if (rclass == R1_REGS)
6321 return NO_REGS;
6323 if (REG_P (x))
6325 regno = REGNO (x);
6326 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6327 return NO_REGS;
6329 else
6330 regno = -1;
6332 /* If we have something like (mem (mem (...)), we can safely assume the
6333 inner MEM will end up in a general register after reloading, so there's
6334 no need for a secondary reload. */
6335 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6336 return NO_REGS;
6338 /* Trying to load a constant into a FP register during PIC code
6339 generation requires %r1 as a scratch register. For float modes,
6340 the only legitimate constant is CONST0_RTX. However, there are
6341 a few patterns that accept constant double operands. */
6342 if (flag_pic
6343 && FP_REG_CLASS_P (rclass)
6344 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6346 switch (mode)
6348 case E_SImode:
6349 sri->icode = CODE_FOR_reload_insi_r1;
6350 break;
6352 case E_DImode:
6353 sri->icode = CODE_FOR_reload_indi_r1;
6354 break;
6356 case E_SFmode:
6357 sri->icode = CODE_FOR_reload_insf_r1;
6358 break;
6360 case E_DFmode:
6361 sri->icode = CODE_FOR_reload_indf_r1;
6362 break;
6364 default:
6365 gcc_unreachable ();
6367 return NO_REGS;
6370 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6371 register when we're generating PIC code or when the operand isn't
6372 readonly. */
6373 if (pa_symbolic_expression_p (x))
6375 if (GET_CODE (x) == HIGH)
6376 x = XEXP (x, 0);
6378 if (flag_pic || !read_only_operand (x, VOIDmode))
6380 switch (mode)
6382 case E_SImode:
6383 sri->icode = CODE_FOR_reload_insi_r1;
6384 break;
6386 case E_DImode:
6387 sri->icode = CODE_FOR_reload_indi_r1;
6388 break;
6390 default:
6391 gcc_unreachable ();
6393 return NO_REGS;
6397 /* Profiling showed the PA port spends about 1.3% of its compilation
6398 time in true_regnum from calls inside pa_secondary_reload_class. */
6399 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6400 regno = true_regnum (x);
6402 /* Handle reloads for floating point loads and stores. */
6403 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6404 && FP_REG_CLASS_P (rclass))
6406 if (MEM_P (x))
6408 x = XEXP (x, 0);
6410 /* We don't need a secondary reload for indexed memory addresses.
6412 When INT14_OK_STRICT is true, it might appear that we could
6413 directly allow register indirect memory addresses. However,
6414 this doesn't work because we don't support SUBREGs in
6415 floating-point register copies and reload doesn't tell us
6416 when it's going to use a SUBREG. */
6417 if (IS_INDEX_ADDR_P (x))
6418 return NO_REGS;
6421 /* Request a secondary reload with a general scratch register
6422 for everything else. ??? Could symbolic operands be handled
6423 directly when generating non-pic PA 2.0 code? */
6424 sri->icode = (in_p
6425 ? direct_optab_handler (reload_in_optab, mode)
6426 : direct_optab_handler (reload_out_optab, mode));
6427 return NO_REGS;
6430 /* A SAR<->FP register copy requires an intermediate general register
6431 and secondary memory. We need a secondary reload with a general
6432 scratch register for spills. */
6433 if (rclass == SHIFT_REGS)
6435 /* Handle spill. */
6436 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6438 sri->icode = (in_p
6439 ? direct_optab_handler (reload_in_optab, mode)
6440 : direct_optab_handler (reload_out_optab, mode));
6441 return NO_REGS;
6444 /* Handle FP copy. */
6445 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6446 return GENERAL_REGS;
6449 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6450 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6451 && FP_REG_CLASS_P (rclass))
6452 return GENERAL_REGS;
6454 return NO_REGS;
6457 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6459 static bool
6460 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6461 reg_class_t class1 ATTRIBUTE_UNUSED,
6462 reg_class_t class2 ATTRIBUTE_UNUSED)
6464 #ifdef PA_SECONDARY_MEMORY_NEEDED
6465 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6466 #else
6467 return false;
6468 #endif
6471 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6472 is only marked as live on entry by df-scan when it is a fixed
6473 register. It isn't a fixed register in the 64-bit runtime,
6474 so we need to mark it here. */
6476 static void
6477 pa_extra_live_on_entry (bitmap regs)
6479 if (TARGET_64BIT)
6480 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6483 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6484 to prevent it from being deleted. */
6487 pa_eh_return_handler_rtx (void)
6489 rtx tmp;
6491 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6492 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6493 tmp = gen_rtx_MEM (word_mode, tmp);
6494 tmp->volatil = 1;
6495 return tmp;
6498 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6499 by invisible reference. As a GCC extension, we also pass anything
6500 with a zero or variable size by reference.
6502 The 64-bit runtime does not describe passing any types by invisible
6503 reference. The internals of GCC can't currently handle passing
6504 empty structures, and zero or variable length arrays when they are
6505 not passed entirely on the stack or by reference. Thus, as a GCC
6506 extension, we pass these types by reference. The HP compiler doesn't
6507 support these types, so hopefully there shouldn't be any compatibility
6508 issues. This may have to be revisited when HP releases a C99 compiler
6509 or updates the ABI. */
6511 static bool
6512 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6514 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6515 if (TARGET_64BIT)
6516 return size <= 0;
6517 else
6518 return size <= 0 || size > 8;
6521 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6523 static pad_direction
6524 pa_function_arg_padding (machine_mode mode, const_tree type)
6526 if (mode == BLKmode
6527 || (TARGET_64BIT
6528 && type
6529 && (AGGREGATE_TYPE_P (type)
6530 || TREE_CODE (type) == COMPLEX_TYPE
6531 || VECTOR_TYPE_P (type))))
6533 /* Return PAD_NONE if justification is not required. */
6534 if (type
6535 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6536 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6537 return PAD_NONE;
6539 /* The directions set here are ignored when a BLKmode argument larger
6540 than a word is placed in a register. Different code is used for
6541 the stack and registers. This makes it difficult to have a
6542 consistent data representation for both the stack and registers.
6543 For both runtimes, the justification and padding for arguments on
6544 the stack and in registers should be identical. */
6545 if (TARGET_64BIT)
6546 /* The 64-bit runtime specifies left justification for aggregates. */
6547 return PAD_UPWARD;
6548 else
6549 /* The 32-bit runtime architecture specifies right justification.
6550 When the argument is passed on the stack, the argument is padded
6551 with garbage on the left. The HP compiler pads with zeros. */
6552 return PAD_DOWNWARD;
6555 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6556 return PAD_DOWNWARD;
6557 else
6558 return PAD_NONE;
6562 /* Do what is necessary for `va_start'. We look at the current function
6563 to determine if stdargs or varargs is used and fill in an initial
6564 va_list. A pointer to this constructor is returned. */
6566 static rtx
6567 hppa_builtin_saveregs (void)
6569 rtx offset, dest;
6570 tree fntype = TREE_TYPE (current_function_decl);
6571 int argadj = ((!stdarg_p (fntype))
6572 ? UNITS_PER_WORD : 0);
6574 if (argadj)
6575 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6576 else
6577 offset = crtl->args.arg_offset_rtx;
6579 if (TARGET_64BIT)
6581 int i, off;
6583 /* Adjust for varargs/stdarg differences. */
6584 if (argadj)
6585 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6586 else
6587 offset = crtl->args.arg_offset_rtx;
6589 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6590 from the incoming arg pointer and growing to larger addresses. */
6591 for (i = 26, off = -64; i >= 19; i--, off += 8)
6592 emit_move_insn (gen_rtx_MEM (word_mode,
6593 plus_constant (Pmode,
6594 arg_pointer_rtx, off)),
6595 gen_rtx_REG (word_mode, i));
6597 /* The incoming args pointer points just beyond the flushback area;
6598 normally this is not a serious concern. However, when we are doing
6599 varargs/stdargs we want to make the arg pointer point to the start
6600 of the incoming argument area. */
6601 emit_move_insn (virtual_incoming_args_rtx,
6602 plus_constant (Pmode, arg_pointer_rtx, -64));
6604 /* Now return a pointer to the first anonymous argument. */
6605 return copy_to_reg (expand_binop (Pmode, add_optab,
6606 virtual_incoming_args_rtx,
6607 offset, 0, 0, OPTAB_LIB_WIDEN));
6610 /* Store general registers on the stack. */
6611 dest = gen_rtx_MEM (BLKmode,
6612 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6613 -16));
6614 set_mem_alias_set (dest, get_varargs_alias_set ());
6615 set_mem_align (dest, BITS_PER_WORD);
6616 move_block_from_reg (23, dest, 4);
6618 /* move_block_from_reg will emit code to store the argument registers
6619 individually as scalar stores.
6621 However, other insns may later load from the same addresses for
6622 a structure load (passing a struct to a varargs routine).
6624 The alias code assumes that such aliasing can never happen, so we
6625 have to keep memory referencing insns from moving up beyond the
6626 last argument register store. So we emit a blockage insn here. */
6627 emit_insn (gen_blockage ());
6629 return copy_to_reg (expand_binop (Pmode, add_optab,
6630 crtl->args.internal_arg_pointer,
6631 offset, 0, 0, OPTAB_LIB_WIDEN));
6634 static void
6635 hppa_va_start (tree valist, rtx nextarg)
6637 nextarg = expand_builtin_saveregs ();
6638 std_expand_builtin_va_start (valist, nextarg);
6641 static tree
6642 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6643 gimple_seq *post_p)
6645 if (TARGET_64BIT)
6647 /* Args grow upward. We can use the generic routines. */
6648 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6650 else /* !TARGET_64BIT */
6652 tree ptr = build_pointer_type (type);
6653 tree valist_type;
6654 tree t, u;
6655 unsigned int size, ofs;
6656 bool indirect;
6658 indirect = pass_va_arg_by_reference (type);
6659 if (indirect)
6661 type = ptr;
6662 ptr = build_pointer_type (type);
6664 size = int_size_in_bytes (type);
6665 valist_type = TREE_TYPE (valist);
6667 /* Args grow down. Not handled by generic routines. */
6669 u = fold_convert (sizetype, size_in_bytes (type));
6670 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6671 t = fold_build_pointer_plus (valist, u);
6673 /* Align to 4 or 8 byte boundary depending on argument size. */
6675 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6676 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6677 t = fold_convert (valist_type, t);
6679 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6681 ofs = (8 - size) % 4;
6682 if (ofs != 0)
6683 t = fold_build_pointer_plus_hwi (t, ofs);
6685 t = fold_convert (ptr, t);
6686 t = build_va_arg_indirect_ref (t);
6688 if (indirect)
6689 t = build_va_arg_indirect_ref (t);
6691 return t;
6695 /* True if MODE is valid for the target. By "valid", we mean able to
6696 be manipulated in non-trivial ways. In particular, this means all
6697 the arithmetic is supported. */
6699 static bool
6700 pa_scalar_mode_supported_p (scalar_mode mode)
6702 int precision = GET_MODE_PRECISION (mode);
6704 if (TARGET_64BIT && mode == TImode)
6705 return true;
6707 switch (GET_MODE_CLASS (mode))
6709 case MODE_PARTIAL_INT:
6710 case MODE_INT:
6711 if (precision == CHAR_TYPE_SIZE)
6712 return true;
6713 if (precision == SHORT_TYPE_SIZE)
6714 return true;
6715 if (precision == INT_TYPE_SIZE)
6716 return true;
6717 if (precision == LONG_TYPE_SIZE)
6718 return true;
6719 if (precision == LONG_LONG_TYPE_SIZE)
6720 return true;
6721 return false;
6723 case MODE_FLOAT:
6724 if (precision == FLOAT_TYPE_SIZE)
6725 return true;
6726 if (precision == DOUBLE_TYPE_SIZE)
6727 return true;
6728 if (precision == LONG_DOUBLE_TYPE_SIZE)
6729 return true;
6730 return false;
6732 case MODE_DECIMAL_FLOAT:
6733 return false;
6735 default:
6736 gcc_unreachable ();
6740 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6741 it branches into the delay slot. Otherwise, return FALSE. */
6743 static bool
6744 branch_to_delay_slot_p (rtx_insn *insn)
6746 rtx_insn *jump_insn;
6748 if (dbr_sequence_length ())
6749 return FALSE;
6751 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6752 while (insn)
6754 insn = next_active_insn (insn);
6755 if (jump_insn == insn)
6756 return TRUE;
6758 /* We can't rely on the length of asms. So, we return FALSE when
6759 the branch is followed by an asm. */
6760 if (!insn
6761 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6762 || asm_noperands (PATTERN (insn)) >= 0
6763 || get_attr_length (insn) > 0)
6764 break;
6767 return FALSE;
6770 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6772 This occurs when INSN has an unfilled delay slot and is followed
6773 by an asm. Disaster can occur if the asm is empty and the jump
6774 branches into the delay slot. So, we add a nop in the delay slot
6775 when this occurs. */
6777 static bool
6778 branch_needs_nop_p (rtx_insn *insn)
6780 rtx_insn *jump_insn;
6782 if (dbr_sequence_length ())
6783 return FALSE;
6785 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6786 while (insn)
6788 insn = next_active_insn (insn);
6789 if (!insn || jump_insn == insn)
6790 return TRUE;
6792 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6793 || asm_noperands (PATTERN (insn)) >= 0)
6794 && get_attr_length (insn) > 0)
6795 break;
6798 return FALSE;
6801 /* Return TRUE if INSN, a forward jump insn, can use nullification
6802 to skip the following instruction. This avoids an extra cycle due
6803 to a mis-predicted branch when we fall through. */
6805 static bool
6806 use_skip_p (rtx_insn *insn)
6808 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6810 while (insn)
6812 insn = next_active_insn (insn);
6814 /* We can't rely on the length of asms, so we can't skip asms. */
6815 if (!insn
6816 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6817 || asm_noperands (PATTERN (insn)) >= 0)
6818 break;
6819 if (get_attr_length (insn) == 4
6820 && jump_insn == next_active_insn (insn))
6821 return TRUE;
6822 if (get_attr_length (insn) > 0)
6823 break;
6826 return FALSE;
6829 /* This routine handles all the normal conditional branch sequences we
6830 might need to generate. It handles compare immediate vs compare
6831 register, nullification of delay slots, varying length branches,
6832 negated branches, and all combinations of the above. It returns the
6833 output appropriate to emit the branch corresponding to all given
6834 parameters. */
6836 const char *
6837 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6839 static char buf[100];
6840 bool useskip;
6841 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6842 int length = get_attr_length (insn);
6843 int xdelay;
6845 /* A conditional branch to the following instruction (e.g. the delay slot)
6846 is asking for a disaster. This can happen when not optimizing and
6847 when jump optimization fails.
6849 While it is usually safe to emit nothing, this can fail if the
6850 preceding instruction is a nullified branch with an empty delay
6851 slot and the same branch target as this branch. We could check
6852 for this but jump optimization should eliminate nop jumps. It
6853 is always safe to emit a nop. */
6854 if (branch_to_delay_slot_p (insn))
6855 return "nop";
6857 /* The doubleword form of the cmpib instruction doesn't have the LEU
6858 and GTU conditions while the cmpb instruction does. Since we accept
6859 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6860 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6861 operands[2] = gen_rtx_REG (DImode, 0);
6862 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6863 operands[1] = gen_rtx_REG (DImode, 0);
6865 /* If this is a long branch with its delay slot unfilled, set `nullify'
6866 as it can nullify the delay slot and save a nop. */
6867 if (length == 8 && dbr_sequence_length () == 0)
6868 nullify = 1;
6870 /* If this is a short forward conditional branch which did not get
6871 its delay slot filled, the delay slot can still be nullified. */
6872 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6873 nullify = forward_branch_p (insn);
6875 /* A forward branch over a single nullified insn can be done with a
6876 comclr instruction. This avoids a single cycle penalty due to
6877 mis-predicted branch if we fall through (branch not taken). */
6878 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6880 switch (length)
6882 /* All short conditional branches except backwards with an unfilled
6883 delay slot. */
6884 case 4:
6885 if (useskip)
6886 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6887 else
6888 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6889 if (GET_MODE (operands[1]) == DImode)
6890 strcat (buf, "*");
6891 if (negated)
6892 strcat (buf, "%B3");
6893 else
6894 strcat (buf, "%S3");
6895 if (useskip)
6896 strcat (buf, " %2,%r1,%%r0");
6897 else if (nullify)
6899 if (branch_needs_nop_p (insn))
6900 strcat (buf, ",n %2,%r1,%0%#");
6901 else
6902 strcat (buf, ",n %2,%r1,%0");
6904 else
6905 strcat (buf, " %2,%r1,%0");
6906 break;
6908 /* All long conditionals. Note a short backward branch with an
6909 unfilled delay slot is treated just like a long backward branch
6910 with an unfilled delay slot. */
6911 case 8:
6912 /* Handle weird backwards branch with a filled delay slot
6913 which is nullified. */
6914 if (dbr_sequence_length () != 0
6915 && ! forward_branch_p (insn)
6916 && nullify)
6918 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6919 if (GET_MODE (operands[1]) == DImode)
6920 strcat (buf, "*");
6921 if (negated)
6922 strcat (buf, "%S3");
6923 else
6924 strcat (buf, "%B3");
6925 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6927 /* Handle short backwards branch with an unfilled delay slot.
6928 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6929 taken and untaken branches. */
6930 else if (dbr_sequence_length () == 0
6931 && ! forward_branch_p (insn)
6932 && INSN_ADDRESSES_SET_P ()
6933 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6934 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6936 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6937 if (GET_MODE (operands[1]) == DImode)
6938 strcat (buf, "*");
6939 if (negated)
6940 strcat (buf, "%B3 %2,%r1,%0%#");
6941 else
6942 strcat (buf, "%S3 %2,%r1,%0%#");
6944 else
6946 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6947 if (GET_MODE (operands[1]) == DImode)
6948 strcat (buf, "*");
6949 if (negated)
6950 strcat (buf, "%S3");
6951 else
6952 strcat (buf, "%B3");
6953 if (nullify)
6954 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6955 else
6956 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6958 break;
6960 default:
6961 /* The reversed conditional branch must branch over one additional
6962 instruction if the delay slot is filled and needs to be extracted
6963 by pa_output_lbranch. If the delay slot is empty or this is a
6964 nullified forward branch, the instruction after the reversed
6965 condition branch must be nullified. */
6966 if (dbr_sequence_length () == 0
6967 || (nullify && forward_branch_p (insn)))
6969 nullify = 1;
6970 xdelay = 0;
6971 operands[4] = GEN_INT (length);
6973 else
6975 xdelay = 1;
6976 operands[4] = GEN_INT (length + 4);
6979 /* Create a reversed conditional branch which branches around
6980 the following insns. */
6981 if (GET_MODE (operands[1]) != DImode)
6983 if (nullify)
6985 if (negated)
6986 strcpy (buf,
6987 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6988 else
6989 strcpy (buf,
6990 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6992 else
6994 if (negated)
6995 strcpy (buf,
6996 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6997 else
6998 strcpy (buf,
6999 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
7002 else
7004 if (nullify)
7006 if (negated)
7007 strcpy (buf,
7008 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
7009 else
7010 strcpy (buf,
7011 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
7013 else
7015 if (negated)
7016 strcpy (buf,
7017 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
7018 else
7019 strcpy (buf,
7020 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
7024 output_asm_insn (buf, operands);
7025 return pa_output_lbranch (operands[0], insn, xdelay);
7027 return buf;
7030 /* Output a PIC pc-relative instruction sequence to load the address of
7031 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
7032 or a code label. OPERANDS[1] specifies the register to use to load
7033 the program counter. OPERANDS[3] may be used for label generation
7034 The sequence is always three instructions in length. The program
7035 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
7036 Register %r1 is clobbered. */
7038 static void
7039 pa_output_pic_pcrel_sequence (rtx *operands)
7041 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
7042 if (TARGET_PA_20)
7044 /* We can use mfia to determine the current program counter. */
7045 if (TARGET_SOM || !TARGET_GAS)
7047 operands[3] = gen_label_rtx ();
7048 targetm.asm_out.internal_label (asm_out_file, "L",
7049 CODE_LABEL_NUMBER (operands[3]));
7050 output_asm_insn ("mfia %1", operands);
7051 output_asm_insn ("addil L'%0-%l3,%1", operands);
7052 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
7054 else
7056 output_asm_insn ("mfia %1", operands);
7057 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
7058 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
7061 else
7063 /* We need to use a branch to determine the current program counter. */
7064 output_asm_insn ("{bl|b,l} .+8,%1", operands);
7065 if (TARGET_SOM || !TARGET_GAS)
7067 operands[3] = gen_label_rtx ();
7068 output_asm_insn ("addil L'%0-%l3,%1", operands);
7069 targetm.asm_out.internal_label (asm_out_file, "L",
7070 CODE_LABEL_NUMBER (operands[3]));
7071 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
7073 else
7075 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
7076 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
7081 /* This routine handles output of long unconditional branches that
7082 exceed the maximum range of a simple branch instruction. Since
7083 we don't have a register available for the branch, we save register
7084 %r1 in the frame marker, load the branch destination DEST into %r1,
7085 execute the branch, and restore %r1 in the delay slot of the branch.
7087 Since long branches may have an insn in the delay slot and the
7088 delay slot is used to restore %r1, we in general need to extract
7089 this insn and execute it before the branch. However, to facilitate
7090 use of this function by conditional branches, we also provide an
7091 option to not extract the delay insn so that it will be emitted
7092 after the long branch. So, if there is an insn in the delay slot,
7093 it is extracted if XDELAY is nonzero.
7095 The lengths of the various long-branch sequences are 20, 16 and 24
7096 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
7098 const char *
7099 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
7101 rtx xoperands[4];
7103 xoperands[0] = dest;
7105 /* First, free up the delay slot. */
7106 if (xdelay && dbr_sequence_length () != 0)
7108 /* We can't handle a jump in the delay slot. */
7109 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
7111 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7112 optimize, 0, NULL);
7114 /* Now delete the delay insn. */
7115 SET_INSN_DELETED (NEXT_INSN (insn));
7118 /* Output an insn to save %r1. The runtime documentation doesn't
7119 specify whether the "Clean Up" slot in the callers frame can
7120 be clobbered by the callee. It isn't copied by HP's builtin
7121 alloca, so this suggests that it can be clobbered if necessary.
7122 The "Static Link" location is copied by HP builtin alloca, so
7123 we avoid using it. Using the cleanup slot might be a problem
7124 if we have to interoperate with languages that pass cleanup
7125 information. However, it should be possible to handle these
7126 situations with GCC's asm feature.
7128 The "Current RP" slot is reserved for the called procedure, so
7129 we try to use it when we don't have a frame of our own. It's
7130 rather unlikely that we won't have a frame when we need to emit
7131 a very long branch.
7133 Really the way to go long term is a register scavenger; goto
7134 the target of the jump and find a register which we can use
7135 as a scratch to hold the value in %r1. Then, we wouldn't have
7136 to free up the delay slot or clobber a slot that may be needed
7137 for other purposes. */
7138 if (TARGET_64BIT)
7140 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7141 /* Use the return pointer slot in the frame marker. */
7142 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7143 else
7144 /* Use the slot at -40 in the frame marker since HP builtin
7145 alloca doesn't copy it. */
7146 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7148 else
7150 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7151 /* Use the return pointer slot in the frame marker. */
7152 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7153 else
7154 /* Use the "Clean Up" slot in the frame marker. In GCC,
7155 the only other use of this location is for copying a
7156 floating point double argument from a floating-point
7157 register to two general registers. The copy is done
7158 as an "atomic" operation when outputting a call, so it
7159 won't interfere with our using the location here. */
7160 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7163 if (TARGET_PORTABLE_RUNTIME)
7165 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7166 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7167 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7169 else if (flag_pic)
7171 xoperands[1] = gen_rtx_REG (Pmode, 1);
7172 xoperands[2] = xoperands[1];
7173 pa_output_pic_pcrel_sequence (xoperands);
7174 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7176 else
7177 /* Now output a very long branch to the original target. */
7178 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7180 /* Now restore the value of %r1 in the delay slot. */
7181 if (TARGET_64BIT)
7183 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7184 return "ldd -16(%%r30),%%r1";
7185 else
7186 return "ldd -40(%%r30),%%r1";
7188 else
7190 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7191 return "ldw -20(%%r30),%%r1";
7192 else
7193 return "ldw -12(%%r30),%%r1";
7197 /* This routine handles all the branch-on-bit conditional branch sequences we
7198 might need to generate. It handles nullification of delay slots,
7199 varying length branches, negated branches and all combinations of the
7200 above. it returns the appropriate output template to emit the branch. */
7202 const char *
7203 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7205 static char buf[100];
7206 bool useskip;
7207 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7208 int length = get_attr_length (insn);
7209 int xdelay;
7211 /* A conditional branch to the following instruction (e.g. the delay slot) is
7212 asking for a disaster. I do not think this can happen as this pattern
7213 is only used when optimizing; jump optimization should eliminate the
7214 jump. But be prepared just in case. */
7216 if (branch_to_delay_slot_p (insn))
7217 return "nop";
7219 /* If this is a long branch with its delay slot unfilled, set `nullify'
7220 as it can nullify the delay slot and save a nop. */
7221 if (length == 8 && dbr_sequence_length () == 0)
7222 nullify = 1;
7224 /* If this is a short forward conditional branch which did not get
7225 its delay slot filled, the delay slot can still be nullified. */
7226 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7227 nullify = forward_branch_p (insn);
7229 /* A forward branch over a single nullified insn can be done with a
7230 extrs instruction. This avoids a single cycle penalty due to
7231 mis-predicted branch if we fall through (branch not taken). */
7232 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7234 switch (length)
7237 /* All short conditional branches except backwards with an unfilled
7238 delay slot. */
7239 case 4:
7240 if (useskip)
7241 strcpy (buf, "{extrs,|extrw,s,}");
7242 else
7243 strcpy (buf, "bb,");
7244 if (useskip && GET_MODE (operands[0]) == DImode)
7245 strcpy (buf, "extrd,s,*");
7246 else if (GET_MODE (operands[0]) == DImode)
7247 strcpy (buf, "bb,*");
7248 if ((which == 0 && negated)
7249 || (which == 1 && ! negated))
7250 strcat (buf, ">=");
7251 else
7252 strcat (buf, "<");
7253 if (useskip)
7254 strcat (buf, " %0,%1,1,%%r0");
7255 else if (nullify && negated)
7257 if (branch_needs_nop_p (insn))
7258 strcat (buf, ",n %0,%1,%3%#");
7259 else
7260 strcat (buf, ",n %0,%1,%3");
7262 else if (nullify && ! negated)
7264 if (branch_needs_nop_p (insn))
7265 strcat (buf, ",n %0,%1,%2%#");
7266 else
7267 strcat (buf, ",n %0,%1,%2");
7269 else if (! nullify && negated)
7270 strcat (buf, " %0,%1,%3");
7271 else if (! nullify && ! negated)
7272 strcat (buf, " %0,%1,%2");
7273 break;
7275 /* All long conditionals. Note a short backward branch with an
7276 unfilled delay slot is treated just like a long backward branch
7277 with an unfilled delay slot. */
7278 case 8:
7279 /* Handle weird backwards branch with a filled delay slot
7280 which is nullified. */
7281 if (dbr_sequence_length () != 0
7282 && ! forward_branch_p (insn)
7283 && nullify)
7285 strcpy (buf, "bb,");
7286 if (GET_MODE (operands[0]) == DImode)
7287 strcat (buf, "*");
7288 if ((which == 0 && negated)
7289 || (which == 1 && ! negated))
7290 strcat (buf, "<");
7291 else
7292 strcat (buf, ">=");
7293 if (negated)
7294 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7295 else
7296 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7298 /* Handle short backwards branch with an unfilled delay slot.
7299 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7300 taken and untaken branches. */
7301 else if (dbr_sequence_length () == 0
7302 && ! forward_branch_p (insn)
7303 && INSN_ADDRESSES_SET_P ()
7304 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7305 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7307 strcpy (buf, "bb,");
7308 if (GET_MODE (operands[0]) == DImode)
7309 strcat (buf, "*");
7310 if ((which == 0 && negated)
7311 || (which == 1 && ! negated))
7312 strcat (buf, ">=");
7313 else
7314 strcat (buf, "<");
7315 if (negated)
7316 strcat (buf, " %0,%1,%3%#");
7317 else
7318 strcat (buf, " %0,%1,%2%#");
7320 else
7322 if (GET_MODE (operands[0]) == DImode)
7323 strcpy (buf, "extrd,s,*");
7324 else
7325 strcpy (buf, "{extrs,|extrw,s,}");
7326 if ((which == 0 && negated)
7327 || (which == 1 && ! negated))
7328 strcat (buf, "<");
7329 else
7330 strcat (buf, ">=");
7331 if (nullify && negated)
7332 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7333 else if (nullify && ! negated)
7334 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7335 else if (negated)
7336 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7337 else
7338 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7340 break;
7342 default:
7343 /* The reversed conditional branch must branch over one additional
7344 instruction if the delay slot is filled and needs to be extracted
7345 by pa_output_lbranch. If the delay slot is empty or this is a
7346 nullified forward branch, the instruction after the reversed
7347 condition branch must be nullified. */
7348 if (dbr_sequence_length () == 0
7349 || (nullify && forward_branch_p (insn)))
7351 nullify = 1;
7352 xdelay = 0;
7353 operands[4] = GEN_INT (length);
7355 else
7357 xdelay = 1;
7358 operands[4] = GEN_INT (length + 4);
7361 if (GET_MODE (operands[0]) == DImode)
7362 strcpy (buf, "bb,*");
7363 else
7364 strcpy (buf, "bb,");
7365 if ((which == 0 && negated)
7366 || (which == 1 && !negated))
7367 strcat (buf, "<");
7368 else
7369 strcat (buf, ">=");
7370 if (nullify)
7371 strcat (buf, ",n %0,%1,.+%4");
7372 else
7373 strcat (buf, " %0,%1,.+%4");
7374 output_asm_insn (buf, operands);
7375 return pa_output_lbranch (negated ? operands[3] : operands[2],
7376 insn, xdelay);
7378 return buf;
7381 /* This routine handles all the branch-on-variable-bit conditional branch
7382 sequences we might need to generate. It handles nullification of delay
7383 slots, varying length branches, negated branches and all combinations
7384 of the above. it returns the appropriate output template to emit the
7385 branch. */
7387 const char *
7388 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7389 int which)
7391 static char buf[100];
7392 bool useskip;
7393 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7394 int length = get_attr_length (insn);
7395 int xdelay;
7397 /* A conditional branch to the following instruction (e.g. the delay slot) is
7398 asking for a disaster. I do not think this can happen as this pattern
7399 is only used when optimizing; jump optimization should eliminate the
7400 jump. But be prepared just in case. */
7402 if (branch_to_delay_slot_p (insn))
7403 return "nop";
7405 /* If this is a long branch with its delay slot unfilled, set `nullify'
7406 as it can nullify the delay slot and save a nop. */
7407 if (length == 8 && dbr_sequence_length () == 0)
7408 nullify = 1;
7410 /* If this is a short forward conditional branch which did not get
7411 its delay slot filled, the delay slot can still be nullified. */
7412 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7413 nullify = forward_branch_p (insn);
7415 /* A forward branch over a single nullified insn can be done with a
7416 extrs instruction. This avoids a single cycle penalty due to
7417 mis-predicted branch if we fall through (branch not taken). */
7418 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7420 switch (length)
7423 /* All short conditional branches except backwards with an unfilled
7424 delay slot. */
7425 case 4:
7426 if (useskip)
7427 strcpy (buf, "{vextrs,|extrw,s,}");
7428 else
7429 strcpy (buf, "{bvb,|bb,}");
7430 if (useskip && GET_MODE (operands[0]) == DImode)
7431 strcpy (buf, "extrd,s,*");
7432 else if (GET_MODE (operands[0]) == DImode)
7433 strcpy (buf, "bb,*");
7434 if ((which == 0 && negated)
7435 || (which == 1 && ! negated))
7436 strcat (buf, ">=");
7437 else
7438 strcat (buf, "<");
7439 if (useskip)
7440 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7441 else if (nullify && negated)
7443 if (branch_needs_nop_p (insn))
7444 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7445 else
7446 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7448 else if (nullify && ! negated)
7450 if (branch_needs_nop_p (insn))
7451 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7452 else
7453 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7455 else if (! nullify && negated)
7456 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7457 else if (! nullify && ! negated)
7458 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7459 break;
7461 /* All long conditionals. Note a short backward branch with an
7462 unfilled delay slot is treated just like a long backward branch
7463 with an unfilled delay slot. */
7464 case 8:
7465 /* Handle weird backwards branch with a filled delay slot
7466 which is nullified. */
7467 if (dbr_sequence_length () != 0
7468 && ! forward_branch_p (insn)
7469 && nullify)
7471 strcpy (buf, "{bvb,|bb,}");
7472 if (GET_MODE (operands[0]) == DImode)
7473 strcat (buf, "*");
7474 if ((which == 0 && negated)
7475 || (which == 1 && ! negated))
7476 strcat (buf, "<");
7477 else
7478 strcat (buf, ">=");
7479 if (negated)
7480 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7481 else
7482 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7484 /* Handle short backwards branch with an unfilled delay slot.
7485 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7486 taken and untaken branches. */
7487 else if (dbr_sequence_length () == 0
7488 && ! forward_branch_p (insn)
7489 && INSN_ADDRESSES_SET_P ()
7490 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7491 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7493 strcpy (buf, "{bvb,|bb,}");
7494 if (GET_MODE (operands[0]) == DImode)
7495 strcat (buf, "*");
7496 if ((which == 0 && negated)
7497 || (which == 1 && ! negated))
7498 strcat (buf, ">=");
7499 else
7500 strcat (buf, "<");
7501 if (negated)
7502 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7503 else
7504 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7506 else
7508 strcpy (buf, "{vextrs,|extrw,s,}");
7509 if (GET_MODE (operands[0]) == DImode)
7510 strcpy (buf, "extrd,s,*");
7511 if ((which == 0 && negated)
7512 || (which == 1 && ! negated))
7513 strcat (buf, "<");
7514 else
7515 strcat (buf, ">=");
7516 if (nullify && negated)
7517 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7518 else if (nullify && ! negated)
7519 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7520 else if (negated)
7521 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7522 else
7523 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7525 break;
7527 default:
7528 /* The reversed conditional branch must branch over one additional
7529 instruction if the delay slot is filled and needs to be extracted
7530 by pa_output_lbranch. If the delay slot is empty or this is a
7531 nullified forward branch, the instruction after the reversed
7532 condition branch must be nullified. */
7533 if (dbr_sequence_length () == 0
7534 || (nullify && forward_branch_p (insn)))
7536 nullify = 1;
7537 xdelay = 0;
7538 operands[4] = GEN_INT (length);
7540 else
7542 xdelay = 1;
7543 operands[4] = GEN_INT (length + 4);
7546 if (GET_MODE (operands[0]) == DImode)
7547 strcpy (buf, "bb,*");
7548 else
7549 strcpy (buf, "{bvb,|bb,}");
7550 if ((which == 0 && negated)
7551 || (which == 1 && !negated))
7552 strcat (buf, "<");
7553 else
7554 strcat (buf, ">=");
7555 if (nullify)
7556 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7557 else
7558 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7559 output_asm_insn (buf, operands);
7560 return pa_output_lbranch (negated ? operands[3] : operands[2],
7561 insn, xdelay);
7563 return buf;
7566 /* Return the output template for emitting a dbra type insn.
7568 Note it may perform some output operations on its own before
7569 returning the final output string. */
7570 const char *
7571 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7573 int length = get_attr_length (insn);
7575 /* A conditional branch to the following instruction (e.g. the delay slot) is
7576 asking for a disaster. Be prepared! */
7578 if (branch_to_delay_slot_p (insn))
7580 if (which_alternative == 0)
7581 return "ldo %1(%0),%0";
7582 else if (which_alternative == 1)
7584 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7585 output_asm_insn ("ldw -16(%%r30),%4", operands);
7586 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7587 return "{fldws|fldw} -16(%%r30),%0";
7589 else
7591 output_asm_insn ("ldw %0,%4", operands);
7592 return "ldo %1(%4),%4\n\tstw %4,%0";
7596 if (which_alternative == 0)
7598 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7599 int xdelay;
7601 /* If this is a long branch with its delay slot unfilled, set `nullify'
7602 as it can nullify the delay slot and save a nop. */
7603 if (length == 8 && dbr_sequence_length () == 0)
7604 nullify = 1;
7606 /* If this is a short forward conditional branch which did not get
7607 its delay slot filled, the delay slot can still be nullified. */
7608 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7609 nullify = forward_branch_p (insn);
7611 switch (length)
7613 case 4:
7614 if (nullify)
7616 if (branch_needs_nop_p (insn))
7617 return "addib,%C2,n %1,%0,%3%#";
7618 else
7619 return "addib,%C2,n %1,%0,%3";
7621 else
7622 return "addib,%C2 %1,%0,%3";
7624 case 8:
7625 /* Handle weird backwards branch with a fulled delay slot
7626 which is nullified. */
7627 if (dbr_sequence_length () != 0
7628 && ! forward_branch_p (insn)
7629 && nullify)
7630 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7631 /* Handle short backwards branch with an unfilled delay slot.
7632 Using a addb;nop rather than addi;bl saves 1 cycle for both
7633 taken and untaken branches. */
7634 else if (dbr_sequence_length () == 0
7635 && ! forward_branch_p (insn)
7636 && INSN_ADDRESSES_SET_P ()
7637 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7638 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7639 return "addib,%C2 %1,%0,%3%#";
7641 /* Handle normal cases. */
7642 if (nullify)
7643 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7644 else
7645 return "addi,%N2 %1,%0,%0\n\tb %3";
7647 default:
7648 /* The reversed conditional branch must branch over one additional
7649 instruction if the delay slot is filled and needs to be extracted
7650 by pa_output_lbranch. If the delay slot is empty or this is a
7651 nullified forward branch, the instruction after the reversed
7652 condition branch must be nullified. */
7653 if (dbr_sequence_length () == 0
7654 || (nullify && forward_branch_p (insn)))
7656 nullify = 1;
7657 xdelay = 0;
7658 operands[4] = GEN_INT (length);
7660 else
7662 xdelay = 1;
7663 operands[4] = GEN_INT (length + 4);
7666 if (nullify)
7667 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7668 else
7669 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7671 return pa_output_lbranch (operands[3], insn, xdelay);
7675 /* Deal with gross reload from FP register case. */
7676 else if (which_alternative == 1)
7678 /* Move loop counter from FP register to MEM then into a GR,
7679 increment the GR, store the GR into MEM, and finally reload
7680 the FP register from MEM from within the branch's delay slot. */
7681 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7682 operands);
7683 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7684 if (length == 24)
7685 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7686 else if (length == 28)
7687 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7688 else
7690 operands[5] = GEN_INT (length - 16);
7691 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7692 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7693 return pa_output_lbranch (operands[3], insn, 0);
7696 /* Deal with gross reload from memory case. */
7697 else
7699 /* Reload loop counter from memory, the store back to memory
7700 happens in the branch's delay slot. */
7701 output_asm_insn ("ldw %0,%4", operands);
7702 if (length == 12)
7703 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7704 else if (length == 16)
7705 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7706 else
7708 operands[5] = GEN_INT (length - 4);
7709 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7710 return pa_output_lbranch (operands[3], insn, 0);
7715 /* Return the output template for emitting a movb type insn.
7717 Note it may perform some output operations on its own before
7718 returning the final output string. */
7719 const char *
7720 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7721 int reverse_comparison)
7723 int length = get_attr_length (insn);
7725 /* A conditional branch to the following instruction (e.g. the delay slot) is
7726 asking for a disaster. Be prepared! */
7728 if (branch_to_delay_slot_p (insn))
7730 if (which_alternative == 0)
7731 return "copy %1,%0";
7732 else if (which_alternative == 1)
7734 output_asm_insn ("stw %1,-16(%%r30)", operands);
7735 return "{fldws|fldw} -16(%%r30),%0";
7737 else if (which_alternative == 2)
7738 return "stw %1,%0";
7739 else
7740 return "mtsar %r1";
7743 /* Support the second variant. */
7744 if (reverse_comparison)
7745 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7747 if (which_alternative == 0)
7749 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7750 int xdelay;
7752 /* If this is a long branch with its delay slot unfilled, set `nullify'
7753 as it can nullify the delay slot and save a nop. */
7754 if (length == 8 && dbr_sequence_length () == 0)
7755 nullify = 1;
7757 /* If this is a short forward conditional branch which did not get
7758 its delay slot filled, the delay slot can still be nullified. */
7759 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7760 nullify = forward_branch_p (insn);
7762 switch (length)
7764 case 4:
7765 if (nullify)
7767 if (branch_needs_nop_p (insn))
7768 return "movb,%C2,n %1,%0,%3%#";
7769 else
7770 return "movb,%C2,n %1,%0,%3";
7772 else
7773 return "movb,%C2 %1,%0,%3";
7775 case 8:
7776 /* Handle weird backwards branch with a filled delay slot
7777 which is nullified. */
7778 if (dbr_sequence_length () != 0
7779 && ! forward_branch_p (insn)
7780 && nullify)
7781 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7783 /* Handle short backwards branch with an unfilled delay slot.
7784 Using a movb;nop rather than or;bl saves 1 cycle for both
7785 taken and untaken branches. */
7786 else if (dbr_sequence_length () == 0
7787 && ! forward_branch_p (insn)
7788 && INSN_ADDRESSES_SET_P ()
7789 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7790 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7791 return "movb,%C2 %1,%0,%3%#";
7792 /* Handle normal cases. */
7793 if (nullify)
7794 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7795 else
7796 return "or,%N2 %1,%%r0,%0\n\tb %3";
7798 default:
7799 /* The reversed conditional branch must branch over one additional
7800 instruction if the delay slot is filled and needs to be extracted
7801 by pa_output_lbranch. If the delay slot is empty or this is a
7802 nullified forward branch, the instruction after the reversed
7803 condition branch must be nullified. */
7804 if (dbr_sequence_length () == 0
7805 || (nullify && forward_branch_p (insn)))
7807 nullify = 1;
7808 xdelay = 0;
7809 operands[4] = GEN_INT (length);
7811 else
7813 xdelay = 1;
7814 operands[4] = GEN_INT (length + 4);
7817 if (nullify)
7818 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7819 else
7820 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7822 return pa_output_lbranch (operands[3], insn, xdelay);
7825 /* Deal with gross reload for FP destination register case. */
7826 else if (which_alternative == 1)
7828 /* Move source register to MEM, perform the branch test, then
7829 finally load the FP register from MEM from within the branch's
7830 delay slot. */
7831 output_asm_insn ("stw %1,-16(%%r30)", operands);
7832 if (length == 12)
7833 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7834 else if (length == 16)
7835 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7836 else
7838 operands[4] = GEN_INT (length - 4);
7839 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7840 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7841 return pa_output_lbranch (operands[3], insn, 0);
7844 /* Deal with gross reload from memory case. */
7845 else if (which_alternative == 2)
7847 /* Reload loop counter from memory, the store back to memory
7848 happens in the branch's delay slot. */
7849 if (length == 8)
7850 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7851 else if (length == 12)
7852 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7853 else
7855 operands[4] = GEN_INT (length);
7856 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7857 operands);
7858 return pa_output_lbranch (operands[3], insn, 0);
7861 /* Handle SAR as a destination. */
7862 else
7864 if (length == 8)
7865 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7866 else if (length == 12)
7867 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7868 else
7870 operands[4] = GEN_INT (length);
7871 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7872 operands);
7873 return pa_output_lbranch (operands[3], insn, 0);
7878 /* Copy any FP arguments in INSN into integer registers. */
7879 static void
7880 copy_fp_args (rtx_insn *insn)
7882 rtx link;
7883 rtx xoperands[2];
7885 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7887 int arg_mode, regno;
7888 rtx use = XEXP (link, 0);
7890 if (! (GET_CODE (use) == USE
7891 && GET_CODE (XEXP (use, 0)) == REG
7892 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7893 continue;
7895 arg_mode = GET_MODE (XEXP (use, 0));
7896 regno = REGNO (XEXP (use, 0));
7898 /* Is it a floating point register? */
7899 if (regno >= 32 && regno <= 39)
7901 /* Copy the FP register into an integer register via memory. */
7902 if (arg_mode == SFmode)
7904 xoperands[0] = XEXP (use, 0);
7905 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7906 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7907 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7909 else
7911 xoperands[0] = XEXP (use, 0);
7912 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7913 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7914 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7915 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7921 /* Compute length of the FP argument copy sequence for INSN. */
7922 static int
7923 length_fp_args (rtx_insn *insn)
7925 int length = 0;
7926 rtx link;
7928 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7930 int arg_mode, regno;
7931 rtx use = XEXP (link, 0);
7933 if (! (GET_CODE (use) == USE
7934 && GET_CODE (XEXP (use, 0)) == REG
7935 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7936 continue;
7938 arg_mode = GET_MODE (XEXP (use, 0));
7939 regno = REGNO (XEXP (use, 0));
7941 /* Is it a floating point register? */
7942 if (regno >= 32 && regno <= 39)
7944 if (arg_mode == SFmode)
7945 length += 8;
7946 else
7947 length += 12;
7951 return length;
7954 /* Return the attribute length for the millicode call instruction INSN.
7955 The length must match the code generated by pa_output_millicode_call.
7956 We include the delay slot in the returned length as it is better to
7957 over estimate the length than to under estimate it. */
7960 pa_attr_length_millicode_call (rtx_insn *insn)
7962 unsigned long distance = -1;
7963 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7965 if (INSN_ADDRESSES_SET_P ())
7967 distance = (total + insn_current_reference_address (insn));
7968 if (distance < total)
7969 distance = -1;
7972 if (TARGET_64BIT)
7974 if (!TARGET_LONG_CALLS && distance < 7600000)
7975 return 8;
7977 return 20;
7979 else if (TARGET_PORTABLE_RUNTIME)
7980 return 24;
7981 else
7983 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7984 return 8;
7986 if (!flag_pic)
7987 return 12;
7989 return 24;
7993 /* INSN is a function call.
7995 CALL_DEST is the routine we are calling. */
7997 const char *
7998 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
8000 int attr_length = get_attr_length (insn);
8001 int seq_length = dbr_sequence_length ();
8002 rtx xoperands[4];
8004 xoperands[0] = call_dest;
8006 /* Handle the common case where we are sure that the branch will
8007 reach the beginning of the $CODE$ subspace. The within reach
8008 form of the $$sh_func_adrs call has a length of 28. Because it
8009 has an attribute type of sh_func_adrs, it never has a nonzero
8010 sequence length (i.e., the delay slot is never filled). */
8011 if (!TARGET_LONG_CALLS
8012 && (attr_length == 8
8013 || (attr_length == 28
8014 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
8016 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
8017 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8019 else
8021 if (TARGET_64BIT)
8023 /* It might seem that one insn could be saved by accessing
8024 the millicode function using the linkage table. However,
8025 this doesn't work in shared libraries and other dynamically
8026 loaded objects. Using a pc-relative sequence also avoids
8027 problems related to the implicit use of the gp register. */
8028 xoperands[1] = gen_rtx_REG (Pmode, 1);
8029 xoperands[2] = xoperands[1];
8030 pa_output_pic_pcrel_sequence (xoperands);
8031 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8033 else if (TARGET_PORTABLE_RUNTIME)
8035 /* Pure portable runtime doesn't allow be/ble; we also don't
8036 have PIC support in the assembler/linker, so this sequence
8037 is needed. */
8039 /* Get the address of our target into %r1. */
8040 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8041 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
8043 /* Get our return address into %r31. */
8044 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
8045 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
8047 /* Jump to our target address in %r1. */
8048 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8050 else if (!flag_pic)
8052 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8053 if (TARGET_PA_20)
8054 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
8055 else
8056 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8058 else
8060 xoperands[1] = gen_rtx_REG (Pmode, 31);
8061 xoperands[2] = gen_rtx_REG (Pmode, 1);
8062 pa_output_pic_pcrel_sequence (xoperands);
8064 /* Adjust return address. */
8065 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
8067 /* Jump to our target address in %r1. */
8068 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8072 if (seq_length == 0)
8073 output_asm_insn ("nop", xoperands);
8075 return "";
8078 /* Return the attribute length of the call instruction INSN. The SIBCALL
8079 flag indicates whether INSN is a regular call or a sibling call. The
8080 length returned must be longer than the code actually generated by
8081 pa_output_call. Since branch shortening is done before delay branch
8082 sequencing, there is no way to determine whether or not the delay
8083 slot will be filled during branch shortening. Even when the delay
8084 slot is filled, we may have to add a nop if the delay slot contains
8085 a branch that can't reach its target. Thus, we always have to include
8086 the delay slot in the length estimate. This used to be done in
8087 pa_adjust_insn_length but we do it here now as some sequences always
8088 fill the delay slot and we can save four bytes in the estimate for
8089 these sequences. */
8092 pa_attr_length_call (rtx_insn *insn, int sibcall)
8094 int local_call;
8095 rtx call, call_dest;
8096 tree call_decl;
8097 int length = 0;
8098 rtx pat = PATTERN (insn);
8099 unsigned long distance = -1;
8101 gcc_assert (CALL_P (insn));
8103 if (INSN_ADDRESSES_SET_P ())
8105 unsigned long total;
8107 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8108 distance = (total + insn_current_reference_address (insn));
8109 if (distance < total)
8110 distance = -1;
8113 gcc_assert (GET_CODE (pat) == PARALLEL);
8115 /* Get the call rtx. */
8116 call = XVECEXP (pat, 0, 0);
8117 if (GET_CODE (call) == SET)
8118 call = SET_SRC (call);
8120 gcc_assert (GET_CODE (call) == CALL);
8122 /* Determine if this is a local call. */
8123 call_dest = XEXP (XEXP (call, 0), 0);
8124 call_decl = SYMBOL_REF_DECL (call_dest);
8125 local_call = call_decl && targetm.binds_local_p (call_decl);
8127 /* pc-relative branch. */
8128 if (!TARGET_LONG_CALLS
8129 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
8130 || distance < MAX_PCREL17F_OFFSET))
8131 length += 8;
8133 /* 64-bit plabel sequence. */
8134 else if (TARGET_64BIT && !local_call)
8135 length += 24;
8137 /* non-pic long absolute branch sequence. */
8138 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8139 length += 12;
8141 /* long pc-relative branch sequence. */
8142 else if (TARGET_LONG_PIC_SDIFF_CALL
8143 || (TARGET_GAS && !TARGET_SOM && local_call))
8145 length += 20;
8147 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8148 length += 8;
8151 /* 32-bit plabel sequence. */
8152 else
8154 length += 32;
8156 if (TARGET_SOM)
8157 length += length_fp_args (insn);
8159 if (flag_pic)
8160 length += 4;
8162 if (!TARGET_PA_20)
8164 if (!sibcall)
8165 length += 8;
8167 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8168 length += 8;
8172 return length;
8175 /* INSN is a function call.
8177 CALL_DEST is the routine we are calling. */
8179 const char *
8180 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8182 int seq_length = dbr_sequence_length ();
8183 tree call_decl = SYMBOL_REF_DECL (call_dest);
8184 int local_call = call_decl && targetm.binds_local_p (call_decl);
8185 rtx xoperands[4];
8187 xoperands[0] = call_dest;
8189 /* Handle the common case where we're sure that the branch will reach
8190 the beginning of the "$CODE$" subspace. This is the beginning of
8191 the current function if we are in a named section. */
8192 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8194 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8195 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8197 else
8199 if (TARGET_64BIT && !local_call)
8201 /* ??? As far as I can tell, the HP linker doesn't support the
8202 long pc-relative sequence described in the 64-bit runtime
8203 architecture. So, we use a slightly longer indirect call. */
8204 xoperands[0] = pa_get_deferred_plabel (call_dest);
8205 xoperands[1] = gen_label_rtx ();
8207 /* Put the load of %r27 into the delay slot. We don't need to
8208 do anything when generating fast indirect calls. */
8209 if (seq_length != 0)
8211 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8212 optimize, 0, NULL);
8214 /* Now delete the delay insn. */
8215 SET_INSN_DELETED (NEXT_INSN (insn));
8218 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8219 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8220 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8221 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8222 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8223 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8224 seq_length = 1;
8226 else
8228 int indirect_call = 0;
8230 /* Emit a long call. There are several different sequences
8231 of increasing length and complexity. In most cases,
8232 they don't allow an instruction in the delay slot. */
8233 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8234 && !TARGET_LONG_PIC_SDIFF_CALL
8235 && !(TARGET_GAS && !TARGET_SOM && local_call)
8236 && !TARGET_64BIT)
8237 indirect_call = 1;
8239 if (seq_length != 0
8240 && !sibcall
8241 && (!TARGET_PA_20
8242 || indirect_call
8243 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8245 /* A non-jump insn in the delay slot. By definition we can
8246 emit this insn before the call (and in fact before argument
8247 relocating. */
8248 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8249 NULL);
8251 /* Now delete the delay insn. */
8252 SET_INSN_DELETED (NEXT_INSN (insn));
8253 seq_length = 0;
8256 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8258 /* This is the best sequence for making long calls in
8259 non-pic code. Unfortunately, GNU ld doesn't provide
8260 the stub needed for external calls, and GAS's support
8261 for this with the SOM linker is buggy. It is safe
8262 to use this for local calls. */
8263 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8264 if (sibcall)
8265 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8266 else
8268 if (TARGET_PA_20)
8269 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8270 xoperands);
8271 else
8272 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8274 output_asm_insn ("copy %%r31,%%r2", xoperands);
8275 seq_length = 1;
8278 else
8280 /* The HP assembler and linker can handle relocations for
8281 the difference of two symbols. The HP assembler
8282 recognizes the sequence as a pc-relative call and
8283 the linker provides stubs when needed. */
8285 /* GAS currently can't generate the relocations that
8286 are needed for the SOM linker under HP-UX using this
8287 sequence. The GNU linker doesn't generate the stubs
8288 that are needed for external calls on TARGET_ELF32
8289 with this sequence. For now, we have to use a longer
8290 plabel sequence when using GAS for non local calls. */
8291 if (TARGET_LONG_PIC_SDIFF_CALL
8292 || (TARGET_GAS && !TARGET_SOM && local_call))
8294 xoperands[1] = gen_rtx_REG (Pmode, 1);
8295 xoperands[2] = xoperands[1];
8296 pa_output_pic_pcrel_sequence (xoperands);
8298 else
8300 /* Emit a long plabel-based call sequence. This is
8301 essentially an inline implementation of $$dyncall.
8302 We don't actually try to call $$dyncall as this is
8303 as difficult as calling the function itself. */
8304 xoperands[0] = pa_get_deferred_plabel (call_dest);
8305 xoperands[1] = gen_label_rtx ();
8307 /* Since the call is indirect, FP arguments in registers
8308 need to be copied to the general registers. Then, the
8309 argument relocation stub will copy them back. */
8310 if (TARGET_SOM)
8311 copy_fp_args (insn);
8313 if (flag_pic)
8315 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8316 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8317 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8319 else
8321 output_asm_insn ("addil LR'%0-$global$,%%r27",
8322 xoperands);
8323 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8324 xoperands);
8327 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8328 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8329 /* Should this be an ordered load to ensure the target
8330 address is loaded before the global pointer? */
8331 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8332 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8334 if (!sibcall && !TARGET_PA_20)
8336 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8337 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8338 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8339 else
8340 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8344 if (TARGET_PA_20)
8346 if (sibcall)
8347 output_asm_insn ("bve (%%r1)", xoperands);
8348 else
8350 if (indirect_call)
8352 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8353 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8354 seq_length = 1;
8356 else
8357 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8360 else
8362 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8363 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8364 xoperands);
8366 if (sibcall)
8368 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8369 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8370 else
8371 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8373 else
8375 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8376 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8377 else
8378 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8380 if (indirect_call)
8381 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8382 else
8383 output_asm_insn ("copy %%r31,%%r2", xoperands);
8384 seq_length = 1;
8391 if (seq_length == 0)
8392 output_asm_insn ("nop", xoperands);
8394 return "";
8397 /* Return the attribute length of the indirect call instruction INSN.
8398 The length must match the code generated by output_indirect call.
8399 The returned length includes the delay slot. Currently, the delay
8400 slot of an indirect call sequence is not exposed and it is used by
8401 the sequence itself. */
8404 pa_attr_length_indirect_call (rtx_insn *insn)
8406 unsigned long distance = -1;
8407 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8409 if (INSN_ADDRESSES_SET_P ())
8411 distance = (total + insn_current_reference_address (insn));
8412 if (distance < total)
8413 distance = -1;
8416 if (TARGET_64BIT)
8417 return 12;
8419 if (TARGET_FAST_INDIRECT_CALLS)
8420 return 8;
8422 if (TARGET_PORTABLE_RUNTIME)
8423 return 16;
8425 if (!TARGET_LONG_CALLS
8426 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8427 || distance < MAX_PCREL17F_OFFSET))
8428 return 8;
8430 /* Out of reach, can use ble. */
8431 if (!flag_pic)
8432 return 12;
8434 /* Inline versions of $$dyncall. */
8435 if (!optimize_size)
8437 if (TARGET_NO_SPACE_REGS)
8438 return 28;
8440 if (TARGET_PA_20)
8441 return 32;
8444 /* Long PIC pc-relative call. */
8445 return 20;
8448 const char *
8449 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8451 rtx xoperands[4];
8452 int length;
8454 if (TARGET_64BIT)
8456 xoperands[0] = call_dest;
8457 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8458 "bve,l (%%r2),%%r2\n\t"
8459 "ldd 24(%0),%%r27", xoperands);
8460 return "";
8463 /* First the special case for kernels, level 0 systems, etc. */
8464 if (TARGET_FAST_INDIRECT_CALLS)
8466 pa_output_arg_descriptor (insn);
8467 if (TARGET_PA_20)
8468 return "bve,l,n (%%r22),%%r2\n\tnop";
8469 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8472 if (TARGET_PORTABLE_RUNTIME)
8474 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8475 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8476 pa_output_arg_descriptor (insn);
8477 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8480 /* Now the normal case -- we can reach $$dyncall directly or
8481 we're sure that we can get there via a long-branch stub.
8483 No need to check target flags as the length uniquely identifies
8484 the remaining cases. */
8485 length = pa_attr_length_indirect_call (insn);
8486 if (length == 8)
8488 pa_output_arg_descriptor (insn);
8490 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8491 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8492 variant of the B,L instruction can't be used on the SOM target. */
8493 if (TARGET_PA_20 && !TARGET_SOM)
8494 return "b,l,n $$dyncall,%%r2\n\tnop";
8495 else
8496 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8499 /* Long millicode call, but we are not generating PIC or portable runtime
8500 code. */
8501 if (length == 12)
8503 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8504 pa_output_arg_descriptor (insn);
8505 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8508 /* The long PIC pc-relative call sequence is five instructions. So,
8509 let's use an inline version of $$dyncall when the calling sequence
8510 has a roughly similar number of instructions and we are not optimizing
8511 for size. We need two instructions to load the return pointer plus
8512 the $$dyncall implementation. */
8513 if (!optimize_size)
8515 if (TARGET_NO_SPACE_REGS)
8517 pa_output_arg_descriptor (insn);
8518 output_asm_insn ("bl .+8,%%r2\n\t"
8519 "ldo 20(%%r2),%%r2\n\t"
8520 "extru,<> %%r22,30,1,%%r0\n\t"
8521 "bv,n %%r0(%%r22)\n\t"
8522 "ldw -2(%%r22),%%r21\n\t"
8523 "bv %%r0(%%r21)\n\t"
8524 "ldw 2(%%r22),%%r19", xoperands);
8525 return "";
8527 if (TARGET_PA_20)
8529 pa_output_arg_descriptor (insn);
8530 output_asm_insn ("bl .+8,%%r2\n\t"
8531 "ldo 24(%%r2),%%r2\n\t"
8532 "stw %%r2,-24(%%sp)\n\t"
8533 "extru,<> %r22,30,1,%%r0\n\t"
8534 "bve,n (%%r22)\n\t"
8535 "ldw -2(%%r22),%%r21\n\t"
8536 "bve (%%r21)\n\t"
8537 "ldw 2(%%r22),%%r19", xoperands);
8538 return "";
8542 /* We need a long PIC call to $$dyncall. */
8543 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8544 xoperands[1] = gen_rtx_REG (Pmode, 2);
8545 xoperands[2] = gen_rtx_REG (Pmode, 1);
8546 pa_output_pic_pcrel_sequence (xoperands);
8547 pa_output_arg_descriptor (insn);
8548 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8551 /* In HPUX 8.0's shared library scheme, special relocations are needed
8552 for function labels if they might be passed to a function
8553 in a shared library (because shared libraries don't live in code
8554 space), and special magic is needed to construct their address. */
8556 void
8557 pa_encode_label (rtx sym)
8559 const char *str = XSTR (sym, 0);
8560 int len = strlen (str) + 1;
8561 char *newstr, *p;
8563 p = newstr = XALLOCAVEC (char, len + 1);
8564 *p++ = '@';
8565 strcpy (p, str);
8567 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8570 static void
8571 pa_encode_section_info (tree decl, rtx rtl, int first)
8573 int old_referenced = 0;
8575 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8576 old_referenced
8577 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8579 default_encode_section_info (decl, rtl, first);
8581 if (first && TEXT_SPACE_P (decl))
8583 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8584 if (TREE_CODE (decl) == FUNCTION_DECL)
8585 pa_encode_label (XEXP (rtl, 0));
8587 else if (old_referenced)
8588 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8591 /* This is sort of inverse to pa_encode_section_info. */
8593 static const char *
8594 pa_strip_name_encoding (const char *str)
8596 str += (*str == '@');
8597 str += (*str == '*');
8598 return str;
8601 /* Returns 1 if OP is a function label involved in a simple addition
8602 with a constant. Used to keep certain patterns from matching
8603 during instruction combination. */
8605 pa_is_function_label_plus_const (rtx op)
8607 /* Strip off any CONST. */
8608 if (GET_CODE (op) == CONST)
8609 op = XEXP (op, 0);
8611 return (GET_CODE (op) == PLUS
8612 && function_label_operand (XEXP (op, 0), VOIDmode)
8613 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8616 /* Output the assembler code for a thunk function. THUNK_DECL is the
8617 declaration for the thunk function itself, FUNCTION is the decl for
8618 the target function. DELTA is an immediate constant offset to be
8619 added to THIS. If VCALL_OFFSET is nonzero, the word at
8620 *(*this + vcall_offset) should be added to THIS. */
8622 static void
8623 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8624 HOST_WIDE_INT vcall_offset, tree function)
8626 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8627 static unsigned int current_thunk_number;
8628 int val_14 = VAL_14_BITS_P (delta);
8629 unsigned int old_last_address = last_address, nbytes = 0;
8630 char label[17];
8631 rtx xoperands[4];
8633 xoperands[0] = XEXP (DECL_RTL (function), 0);
8634 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8635 xoperands[2] = GEN_INT (delta);
8637 assemble_start_function (thunk_fndecl, fnname);
8638 final_start_function (emit_barrier (), file, 1);
8640 if (!vcall_offset)
8642 /* Output the thunk. We know that the function is in the same
8643 translation unit (i.e., the same space) as the thunk, and that
8644 thunks are output after their method. Thus, we don't need an
8645 external branch to reach the function. With SOM and GAS,
8646 functions and thunks are effectively in different sections.
8647 Thus, we can always use a IA-relative branch and the linker
8648 will add a long branch stub if necessary.
8650 However, we have to be careful when generating PIC code on the
8651 SOM port to ensure that the sequence does not transfer to an
8652 import stub for the target function as this could clobber the
8653 return value saved at SP-24. This would also apply to the
8654 32-bit linux port if the multi-space model is implemented. */
8655 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8656 && !(flag_pic && TREE_PUBLIC (function))
8657 && (TARGET_GAS || last_address < 262132))
8658 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8659 && ((targetm_common.have_named_sections
8660 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8661 /* The GNU 64-bit linker has rather poor stub management.
8662 So, we use a long branch from thunks that aren't in
8663 the same section as the target function. */
8664 && ((!TARGET_64BIT
8665 && (DECL_SECTION_NAME (thunk_fndecl)
8666 != DECL_SECTION_NAME (function)))
8667 || ((DECL_SECTION_NAME (thunk_fndecl)
8668 == DECL_SECTION_NAME (function))
8669 && last_address < 262132)))
8670 /* In this case, we need to be able to reach the start of
8671 the stub table even though the function is likely closer
8672 and can be jumped to directly. */
8673 || (targetm_common.have_named_sections
8674 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8675 && DECL_SECTION_NAME (function) == NULL
8676 && total_code_bytes < MAX_PCREL17F_OFFSET)
8677 /* Likewise. */
8678 || (!targetm_common.have_named_sections
8679 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8681 if (!val_14)
8682 output_asm_insn ("addil L'%2,%%r26", xoperands);
8684 output_asm_insn ("b %0", xoperands);
8686 if (val_14)
8688 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8689 nbytes += 8;
8691 else
8693 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8694 nbytes += 12;
8697 else if (TARGET_64BIT)
8699 rtx xop[4];
8701 /* We only have one call-clobbered scratch register, so we can't
8702 make use of the delay slot if delta doesn't fit in 14 bits. */
8703 if (!val_14)
8705 output_asm_insn ("addil L'%2,%%r26", xoperands);
8706 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8709 /* Load function address into %r1. */
8710 xop[0] = xoperands[0];
8711 xop[1] = gen_rtx_REG (Pmode, 1);
8712 xop[2] = xop[1];
8713 pa_output_pic_pcrel_sequence (xop);
8715 if (val_14)
8717 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8718 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8719 nbytes += 20;
8721 else
8723 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8724 nbytes += 24;
8727 else if (TARGET_PORTABLE_RUNTIME)
8729 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8730 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8732 if (!val_14)
8733 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8735 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8737 if (val_14)
8739 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8740 nbytes += 16;
8742 else
8744 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8745 nbytes += 20;
8748 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8750 /* The function is accessible from outside this module. The only
8751 way to avoid an import stub between the thunk and function is to
8752 call the function directly with an indirect sequence similar to
8753 that used by $$dyncall. This is possible because $$dyncall acts
8754 as the import stub in an indirect call. */
8755 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8756 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8757 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8758 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8759 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8760 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8761 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8762 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8763 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8765 if (!val_14)
8767 output_asm_insn ("addil L'%2,%%r26", xoperands);
8768 nbytes += 4;
8771 if (TARGET_PA_20)
8773 output_asm_insn ("bve (%%r22)", xoperands);
8774 nbytes += 36;
8776 else if (TARGET_NO_SPACE_REGS)
8778 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8779 nbytes += 36;
8781 else
8783 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8784 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8785 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8786 nbytes += 44;
8789 if (val_14)
8790 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8791 else
8792 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8794 else if (flag_pic)
8796 rtx xop[4];
8798 /* Load function address into %r22. */
8799 xop[0] = xoperands[0];
8800 xop[1] = gen_rtx_REG (Pmode, 1);
8801 xop[2] = gen_rtx_REG (Pmode, 22);
8802 pa_output_pic_pcrel_sequence (xop);
8804 if (!val_14)
8805 output_asm_insn ("addil L'%2,%%r26", xoperands);
8807 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8809 if (val_14)
8811 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8812 nbytes += 20;
8814 else
8816 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8817 nbytes += 24;
8820 else
8822 if (!val_14)
8823 output_asm_insn ("addil L'%2,%%r26", xoperands);
8825 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8826 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8828 if (val_14)
8830 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8831 nbytes += 12;
8833 else
8835 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8836 nbytes += 16;
8840 else
8842 rtx xop[4];
8844 /* Add DELTA to THIS. */
8845 if (val_14)
8847 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8848 nbytes += 4;
8850 else
8852 output_asm_insn ("addil L'%2,%%r26", xoperands);
8853 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8854 nbytes += 8;
8857 if (TARGET_64BIT)
8859 /* Load *(THIS + DELTA) to %r1. */
8860 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8862 val_14 = VAL_14_BITS_P (vcall_offset);
8863 xoperands[2] = GEN_INT (vcall_offset);
8865 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8866 if (val_14)
8868 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8869 nbytes += 8;
8871 else
8873 output_asm_insn ("addil L'%2,%%r1", xoperands);
8874 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8875 nbytes += 12;
8878 else
8880 /* Load *(THIS + DELTA) to %r1. */
8881 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8883 val_14 = VAL_14_BITS_P (vcall_offset);
8884 xoperands[2] = GEN_INT (vcall_offset);
8886 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8887 if (val_14)
8889 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8890 nbytes += 8;
8892 else
8894 output_asm_insn ("addil L'%2,%%r1", xoperands);
8895 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8896 nbytes += 12;
8900 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8901 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8902 && !(flag_pic && TREE_PUBLIC (function))
8903 && (TARGET_GAS || last_address < 262132))
8904 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8905 && ((targetm_common.have_named_sections
8906 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8907 /* The GNU 64-bit linker has rather poor stub management.
8908 So, we use a long branch from thunks that aren't in
8909 the same section as the target function. */
8910 && ((!TARGET_64BIT
8911 && (DECL_SECTION_NAME (thunk_fndecl)
8912 != DECL_SECTION_NAME (function)))
8913 || ((DECL_SECTION_NAME (thunk_fndecl)
8914 == DECL_SECTION_NAME (function))
8915 && last_address < 262132)))
8916 /* In this case, we need to be able to reach the start of
8917 the stub table even though the function is likely closer
8918 and can be jumped to directly. */
8919 || (targetm_common.have_named_sections
8920 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8921 && DECL_SECTION_NAME (function) == NULL
8922 && total_code_bytes < MAX_PCREL17F_OFFSET)
8923 /* Likewise. */
8924 || (!targetm_common.have_named_sections
8925 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8927 nbytes += 4;
8928 output_asm_insn ("b %0", xoperands);
8930 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8931 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8933 else if (TARGET_64BIT)
8935 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8936 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8938 /* Load function address into %r1. */
8939 nbytes += 16;
8940 xop[0] = xoperands[0];
8941 xop[1] = gen_rtx_REG (Pmode, 1);
8942 xop[2] = xop[1];
8943 pa_output_pic_pcrel_sequence (xop);
8945 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8947 else if (TARGET_PORTABLE_RUNTIME)
8949 /* Load function address into %r22. */
8950 nbytes += 12;
8951 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8952 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8954 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8956 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8957 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8959 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8961 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8962 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8964 /* The function is accessible from outside this module. The only
8965 way to avoid an import stub between the thunk and function is to
8966 call the function directly with an indirect sequence similar to
8967 that used by $$dyncall. This is possible because $$dyncall acts
8968 as the import stub in an indirect call. */
8969 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8970 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8971 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8972 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8973 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8974 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8975 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8976 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8977 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8979 if (TARGET_PA_20)
8981 output_asm_insn ("bve,n (%%r22)", xoperands);
8982 nbytes += 32;
8984 else if (TARGET_NO_SPACE_REGS)
8986 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8987 nbytes += 32;
8989 else
8991 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8992 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8993 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8994 nbytes += 40;
8997 else if (flag_pic)
8999 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9000 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
9002 /* Load function address into %r1. */
9003 nbytes += 16;
9004 xop[0] = xoperands[0];
9005 xop[1] = gen_rtx_REG (Pmode, 1);
9006 xop[2] = xop[1];
9007 pa_output_pic_pcrel_sequence (xop);
9009 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
9011 else
9013 /* Load function address into %r22. */
9014 nbytes += 8;
9015 output_asm_insn ("ldil L'%0,%%r22", xoperands);
9016 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
9018 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
9019 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
9023 final_end_function ();
9025 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
9027 switch_to_section (data_section);
9028 output_asm_insn (".align 4", xoperands);
9029 ASM_OUTPUT_LABEL (file, label);
9030 output_asm_insn (".word P'%0", xoperands);
9033 current_thunk_number++;
9034 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
9035 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
9036 last_address += nbytes;
9037 if (old_last_address > last_address)
9038 last_address = UINT_MAX;
9039 update_total_code_bytes (nbytes);
9040 assemble_end_function (thunk_fndecl, fnname);
9043 /* Only direct calls to static functions are allowed to be sibling (tail)
9044 call optimized.
9046 This restriction is necessary because some linker generated stubs will
9047 store return pointers into rp' in some cases which might clobber a
9048 live value already in rp'.
9050 In a sibcall the current function and the target function share stack
9051 space. Thus if the path to the current function and the path to the
9052 target function save a value in rp', they save the value into the
9053 same stack slot, which has undesirable consequences.
9055 Because of the deferred binding nature of shared libraries any function
9056 with external scope could be in a different load module and thus require
9057 rp' to be saved when calling that function. So sibcall optimizations
9058 can only be safe for static function.
9060 Note that GCC never needs return value relocations, so we don't have to
9061 worry about static calls with return value relocations (which require
9062 saving rp').
9064 It is safe to perform a sibcall optimization when the target function
9065 will never return. */
9066 static bool
9067 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9069 /* Sibcalls are not ok because the arg pointer register is not a fixed
9070 register. This prevents the sibcall optimization from occurring. In
9071 addition, there are problems with stub placement using GNU ld. This
9072 is because a normal sibcall branch uses a 17-bit relocation while
9073 a regular call branch uses a 22-bit relocation. As a result, more
9074 care needs to be taken in the placement of long-branch stubs. */
9075 if (TARGET_64BIT)
9076 return false;
9078 if (TARGET_PORTABLE_RUNTIME)
9079 return false;
9081 /* Sibcalls are only ok within a translation unit. */
9082 return decl && targetm.binds_local_p (decl);
9085 /* ??? Addition is not commutative on the PA due to the weird implicit
9086 space register selection rules for memory addresses. Therefore, we
9087 don't consider a + b == b + a, as this might be inside a MEM. */
9088 static bool
9089 pa_commutative_p (const_rtx x, int outer_code)
9091 return (COMMUTATIVE_P (x)
9092 && (TARGET_NO_SPACE_REGS
9093 || (outer_code != UNKNOWN && outer_code != MEM)
9094 || GET_CODE (x) != PLUS));
9097 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9098 use in fmpyadd instructions. */
9100 pa_fmpyaddoperands (rtx *operands)
9102 machine_mode mode = GET_MODE (operands[0]);
9104 /* Must be a floating point mode. */
9105 if (mode != SFmode && mode != DFmode)
9106 return 0;
9108 /* All modes must be the same. */
9109 if (! (mode == GET_MODE (operands[1])
9110 && mode == GET_MODE (operands[2])
9111 && mode == GET_MODE (operands[3])
9112 && mode == GET_MODE (operands[4])
9113 && mode == GET_MODE (operands[5])))
9114 return 0;
9116 /* All operands must be registers. */
9117 if (! (GET_CODE (operands[1]) == REG
9118 && GET_CODE (operands[2]) == REG
9119 && GET_CODE (operands[3]) == REG
9120 && GET_CODE (operands[4]) == REG
9121 && GET_CODE (operands[5]) == REG))
9122 return 0;
9124 /* Only 2 real operands to the addition. One of the input operands must
9125 be the same as the output operand. */
9126 if (! rtx_equal_p (operands[3], operands[4])
9127 && ! rtx_equal_p (operands[3], operands[5]))
9128 return 0;
9130 /* Inout operand of add cannot conflict with any operands from multiply. */
9131 if (rtx_equal_p (operands[3], operands[0])
9132 || rtx_equal_p (operands[3], operands[1])
9133 || rtx_equal_p (operands[3], operands[2]))
9134 return 0;
9136 /* multiply cannot feed into addition operands. */
9137 if (rtx_equal_p (operands[4], operands[0])
9138 || rtx_equal_p (operands[5], operands[0]))
9139 return 0;
9141 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9142 if (mode == SFmode
9143 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9144 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9145 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9146 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9147 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9148 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9149 return 0;
9151 /* Passed. Operands are suitable for fmpyadd. */
9152 return 1;
9155 #if !defined(USE_COLLECT2)
9156 static void
9157 pa_asm_out_constructor (rtx symbol, int priority)
9159 if (!function_label_operand (symbol, VOIDmode))
9160 pa_encode_label (symbol);
9162 #ifdef CTORS_SECTION_ASM_OP
9163 default_ctor_section_asm_out_constructor (symbol, priority);
9164 #else
9165 # ifdef TARGET_ASM_NAMED_SECTION
9166 default_named_section_asm_out_constructor (symbol, priority);
9167 # else
9168 default_stabs_asm_out_constructor (symbol, priority);
9169 # endif
9170 #endif
9173 static void
9174 pa_asm_out_destructor (rtx symbol, int priority)
9176 if (!function_label_operand (symbol, VOIDmode))
9177 pa_encode_label (symbol);
9179 #ifdef DTORS_SECTION_ASM_OP
9180 default_dtor_section_asm_out_destructor (symbol, priority);
9181 #else
9182 # ifdef TARGET_ASM_NAMED_SECTION
9183 default_named_section_asm_out_destructor (symbol, priority);
9184 # else
9185 default_stabs_asm_out_destructor (symbol, priority);
9186 # endif
9187 #endif
9189 #endif
9191 /* This function places uninitialized global data in the bss section.
9192 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9193 function on the SOM port to prevent uninitialized global data from
9194 being placed in the data section. */
9196 void
9197 pa_asm_output_aligned_bss (FILE *stream,
9198 const char *name,
9199 unsigned HOST_WIDE_INT size,
9200 unsigned int align)
9202 switch_to_section (bss_section);
9204 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9205 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9206 #endif
9208 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9209 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9210 #endif
9212 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9213 ASM_OUTPUT_LABEL (stream, name);
9214 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9217 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9218 that doesn't allow the alignment of global common storage to be directly
9219 specified. The SOM linker aligns common storage based on the rounded
9220 value of the NUM_BYTES parameter in the .comm directive. It's not
9221 possible to use the .align directive as it doesn't affect the alignment
9222 of the label associated with a .comm directive. */
9224 void
9225 pa_asm_output_aligned_common (FILE *stream,
9226 const char *name,
9227 unsigned HOST_WIDE_INT size,
9228 unsigned int align)
9230 unsigned int max_common_align;
9232 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9233 if (align > max_common_align)
9235 /* Alignment exceeds maximum alignment for global common data. */
9236 align = max_common_align;
9239 switch_to_section (bss_section);
9241 assemble_name (stream, name);
9242 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9243 MAX (size, align / BITS_PER_UNIT));
9246 /* We can't use .comm for local common storage as the SOM linker effectively
9247 treats the symbol as universal and uses the same storage for local symbols
9248 with the same name in different object files. The .block directive
9249 reserves an uninitialized block of storage. However, it's not common
9250 storage. Fortunately, GCC never requests common storage with the same
9251 name in any given translation unit. */
9253 void
9254 pa_asm_output_aligned_local (FILE *stream,
9255 const char *name,
9256 unsigned HOST_WIDE_INT size,
9257 unsigned int align)
9259 switch_to_section (bss_section);
9260 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9262 #ifdef LOCAL_ASM_OP
9263 fprintf (stream, "%s", LOCAL_ASM_OP);
9264 assemble_name (stream, name);
9265 fprintf (stream, "\n");
9266 #endif
9268 ASM_OUTPUT_LABEL (stream, name);
9269 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9272 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9273 use in fmpysub instructions. */
9275 pa_fmpysuboperands (rtx *operands)
9277 machine_mode mode = GET_MODE (operands[0]);
9279 /* Must be a floating point mode. */
9280 if (mode != SFmode && mode != DFmode)
9281 return 0;
9283 /* All modes must be the same. */
9284 if (! (mode == GET_MODE (operands[1])
9285 && mode == GET_MODE (operands[2])
9286 && mode == GET_MODE (operands[3])
9287 && mode == GET_MODE (operands[4])
9288 && mode == GET_MODE (operands[5])))
9289 return 0;
9291 /* All operands must be registers. */
9292 if (! (GET_CODE (operands[1]) == REG
9293 && GET_CODE (operands[2]) == REG
9294 && GET_CODE (operands[3]) == REG
9295 && GET_CODE (operands[4]) == REG
9296 && GET_CODE (operands[5]) == REG))
9297 return 0;
9299 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9300 operation, so operands[4] must be the same as operand[3]. */
9301 if (! rtx_equal_p (operands[3], operands[4]))
9302 return 0;
9304 /* multiply cannot feed into subtraction. */
9305 if (rtx_equal_p (operands[5], operands[0]))
9306 return 0;
9308 /* Inout operand of sub cannot conflict with any operands from multiply. */
9309 if (rtx_equal_p (operands[3], operands[0])
9310 || rtx_equal_p (operands[3], operands[1])
9311 || rtx_equal_p (operands[3], operands[2]))
9312 return 0;
9314 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9315 if (mode == SFmode
9316 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9317 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9318 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9319 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9320 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9321 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9322 return 0;
9324 /* Passed. Operands are suitable for fmpysub. */
9325 return 1;
9328 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9329 constants for a MULT embedded inside a memory address. */
9331 pa_mem_shadd_constant_p (int val)
9333 if (val == 2 || val == 4 || val == 8)
9334 return 1;
9335 else
9336 return 0;
9339 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9340 constants for shadd instructions. */
9342 pa_shadd_constant_p (int val)
9344 if (val == 1 || val == 2 || val == 3)
9345 return 1;
9346 else
9347 return 0;
9350 /* Return TRUE if INSN branches forward. */
9352 static bool
9353 forward_branch_p (rtx_insn *insn)
9355 rtx lab = JUMP_LABEL (insn);
9357 /* The INSN must have a jump label. */
9358 gcc_assert (lab != NULL_RTX);
9360 if (INSN_ADDRESSES_SET_P ())
9361 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9363 while (insn)
9365 if (insn == lab)
9366 return true;
9367 else
9368 insn = NEXT_INSN (insn);
9371 return false;
9374 /* Output an unconditional move and branch insn. */
9376 const char *
9377 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9379 int length = get_attr_length (insn);
9381 /* These are the cases in which we win. */
9382 if (length == 4)
9383 return "mov%I1b,tr %1,%0,%2";
9385 /* None of the following cases win, but they don't lose either. */
9386 if (length == 8)
9388 if (dbr_sequence_length () == 0)
9390 /* Nothing in the delay slot, fake it by putting the combined
9391 insn (the copy or add) in the delay slot of a bl. */
9392 if (GET_CODE (operands[1]) == CONST_INT)
9393 return "b %2\n\tldi %1,%0";
9394 else
9395 return "b %2\n\tcopy %1,%0";
9397 else
9399 /* Something in the delay slot, but we've got a long branch. */
9400 if (GET_CODE (operands[1]) == CONST_INT)
9401 return "ldi %1,%0\n\tb %2";
9402 else
9403 return "copy %1,%0\n\tb %2";
9407 if (GET_CODE (operands[1]) == CONST_INT)
9408 output_asm_insn ("ldi %1,%0", operands);
9409 else
9410 output_asm_insn ("copy %1,%0", operands);
9411 return pa_output_lbranch (operands[2], insn, 1);
9414 /* Output an unconditional add and branch insn. */
9416 const char *
9417 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9419 int length = get_attr_length (insn);
9421 /* To make life easy we want operand0 to be the shared input/output
9422 operand and operand1 to be the readonly operand. */
9423 if (operands[0] == operands[1])
9424 operands[1] = operands[2];
9426 /* These are the cases in which we win. */
9427 if (length == 4)
9428 return "add%I1b,tr %1,%0,%3";
9430 /* None of the following cases win, but they don't lose either. */
9431 if (length == 8)
9433 if (dbr_sequence_length () == 0)
9434 /* Nothing in the delay slot, fake it by putting the combined
9435 insn (the copy or add) in the delay slot of a bl. */
9436 return "b %3\n\tadd%I1 %1,%0,%0";
9437 else
9438 /* Something in the delay slot, but we've got a long branch. */
9439 return "add%I1 %1,%0,%0\n\tb %3";
9442 output_asm_insn ("add%I1 %1,%0,%0", operands);
9443 return pa_output_lbranch (operands[3], insn, 1);
9446 /* We use this hook to perform a PA specific optimization which is difficult
9447 to do in earlier passes. */
9449 static void
9450 pa_reorg (void)
9452 remove_useless_addtr_insns (1);
9454 if (pa_cpu < PROCESSOR_8000)
9455 pa_combine_instructions ();
9458 /* The PA has a number of odd instructions which can perform multiple
9459 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9460 it may be profitable to combine two instructions into one instruction
9461 with two outputs. It's not profitable PA2.0 machines because the
9462 two outputs would take two slots in the reorder buffers.
9464 This routine finds instructions which can be combined and combines
9465 them. We only support some of the potential combinations, and we
9466 only try common ways to find suitable instructions.
9468 * addb can add two registers or a register and a small integer
9469 and jump to a nearby (+-8k) location. Normally the jump to the
9470 nearby location is conditional on the result of the add, but by
9471 using the "true" condition we can make the jump unconditional.
9472 Thus addb can perform two independent operations in one insn.
9474 * movb is similar to addb in that it can perform a reg->reg
9475 or small immediate->reg copy and jump to a nearby (+-8k location).
9477 * fmpyadd and fmpysub can perform a FP multiply and either an
9478 FP add or FP sub if the operands of the multiply and add/sub are
9479 independent (there are other minor restrictions). Note both
9480 the fmpy and fadd/fsub can in theory move to better spots according
9481 to data dependencies, but for now we require the fmpy stay at a
9482 fixed location.
9484 * Many of the memory operations can perform pre & post updates
9485 of index registers. GCC's pre/post increment/decrement addressing
9486 is far too simple to take advantage of all the possibilities. This
9487 pass may not be suitable since those insns may not be independent.
9489 * comclr can compare two ints or an int and a register, nullify
9490 the following instruction and zero some other register. This
9491 is more difficult to use as it's harder to find an insn which
9492 will generate a comclr than finding something like an unconditional
9493 branch. (conditional moves & long branches create comclr insns).
9495 * Most arithmetic operations can conditionally skip the next
9496 instruction. They can be viewed as "perform this operation
9497 and conditionally jump to this nearby location" (where nearby
9498 is an insns away). These are difficult to use due to the
9499 branch length restrictions. */
9501 static void
9502 pa_combine_instructions (void)
9504 rtx_insn *anchor;
9506 /* This can get expensive since the basic algorithm is on the
9507 order of O(n^2) (or worse). Only do it for -O2 or higher
9508 levels of optimization. */
9509 if (optimize < 2)
9510 return;
9512 /* Walk down the list of insns looking for "anchor" insns which
9513 may be combined with "floating" insns. As the name implies,
9514 "anchor" instructions don't move, while "floating" insns may
9515 move around. */
9516 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9517 rtx_insn *new_rtx = make_insn_raw (par);
9519 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9521 enum attr_pa_combine_type anchor_attr;
9522 enum attr_pa_combine_type floater_attr;
9524 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9525 Also ignore any special USE insns. */
9526 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9527 || GET_CODE (PATTERN (anchor)) == USE
9528 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9529 continue;
9531 anchor_attr = get_attr_pa_combine_type (anchor);
9532 /* See if anchor is an insn suitable for combination. */
9533 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9534 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9535 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9536 && ! forward_branch_p (anchor)))
9538 rtx_insn *floater;
9540 for (floater = PREV_INSN (anchor);
9541 floater;
9542 floater = PREV_INSN (floater))
9544 if (NOTE_P (floater)
9545 || (NONJUMP_INSN_P (floater)
9546 && (GET_CODE (PATTERN (floater)) == USE
9547 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9548 continue;
9550 /* Anything except a regular INSN will stop our search. */
9551 if (! NONJUMP_INSN_P (floater))
9553 floater = NULL;
9554 break;
9557 /* See if FLOATER is suitable for combination with the
9558 anchor. */
9559 floater_attr = get_attr_pa_combine_type (floater);
9560 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9561 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9562 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9563 && floater_attr == PA_COMBINE_TYPE_FMPY))
9565 /* If ANCHOR and FLOATER can be combined, then we're
9566 done with this pass. */
9567 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9568 SET_DEST (PATTERN (floater)),
9569 XEXP (SET_SRC (PATTERN (floater)), 0),
9570 XEXP (SET_SRC (PATTERN (floater)), 1)))
9571 break;
9574 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9575 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9577 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9579 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9580 SET_DEST (PATTERN (floater)),
9581 XEXP (SET_SRC (PATTERN (floater)), 0),
9582 XEXP (SET_SRC (PATTERN (floater)), 1)))
9583 break;
9585 else
9587 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9588 SET_DEST (PATTERN (floater)),
9589 SET_SRC (PATTERN (floater)),
9590 SET_SRC (PATTERN (floater))))
9591 break;
9596 /* If we didn't find anything on the backwards scan try forwards. */
9597 if (!floater
9598 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9599 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9601 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9603 if (NOTE_P (floater)
9604 || (NONJUMP_INSN_P (floater)
9605 && (GET_CODE (PATTERN (floater)) == USE
9606 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9608 continue;
9610 /* Anything except a regular INSN will stop our search. */
9611 if (! NONJUMP_INSN_P (floater))
9613 floater = NULL;
9614 break;
9617 /* See if FLOATER is suitable for combination with the
9618 anchor. */
9619 floater_attr = get_attr_pa_combine_type (floater);
9620 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9621 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9622 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9623 && floater_attr == PA_COMBINE_TYPE_FMPY))
9625 /* If ANCHOR and FLOATER can be combined, then we're
9626 done with this pass. */
9627 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9628 SET_DEST (PATTERN (floater)),
9629 XEXP (SET_SRC (PATTERN (floater)),
9631 XEXP (SET_SRC (PATTERN (floater)),
9632 1)))
9633 break;
9638 /* FLOATER will be nonzero if we found a suitable floating
9639 insn for combination with ANCHOR. */
9640 if (floater
9641 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9642 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9644 /* Emit the new instruction and delete the old anchor. */
9645 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9646 copy_rtx (PATTERN (floater)));
9647 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9648 emit_insn_before (temp, anchor);
9650 SET_INSN_DELETED (anchor);
9652 /* Emit a special USE insn for FLOATER, then delete
9653 the floating insn. */
9654 temp = copy_rtx (PATTERN (floater));
9655 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9656 delete_insn (floater);
9658 continue;
9660 else if (floater
9661 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9663 /* Emit the new_jump instruction and delete the old anchor. */
9664 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9665 copy_rtx (PATTERN (floater)));
9666 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9667 temp = emit_jump_insn_before (temp, anchor);
9669 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9670 SET_INSN_DELETED (anchor);
9672 /* Emit a special USE insn for FLOATER, then delete
9673 the floating insn. */
9674 temp = copy_rtx (PATTERN (floater));
9675 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9676 delete_insn (floater);
9677 continue;
9683 static int
9684 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9685 int reversed, rtx dest,
9686 rtx src1, rtx src2)
9688 int insn_code_number;
9689 rtx_insn *start, *end;
9691 /* Create a PARALLEL with the patterns of ANCHOR and
9692 FLOATER, try to recognize it, then test constraints
9693 for the resulting pattern.
9695 If the pattern doesn't match or the constraints
9696 aren't met keep searching for a suitable floater
9697 insn. */
9698 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9699 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9700 INSN_CODE (new_rtx) = -1;
9701 insn_code_number = recog_memoized (new_rtx);
9702 basic_block bb = BLOCK_FOR_INSN (anchor);
9703 if (insn_code_number < 0
9704 || (extract_insn (new_rtx),
9705 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9706 return 0;
9708 if (reversed)
9710 start = anchor;
9711 end = floater;
9713 else
9715 start = floater;
9716 end = anchor;
9719 /* There's up to three operands to consider. One
9720 output and two inputs.
9722 The output must not be used between FLOATER & ANCHOR
9723 exclusive. The inputs must not be set between
9724 FLOATER and ANCHOR exclusive. */
9726 if (reg_used_between_p (dest, start, end))
9727 return 0;
9729 if (reg_set_between_p (src1, start, end))
9730 return 0;
9732 if (reg_set_between_p (src2, start, end))
9733 return 0;
9735 /* If we get here, then everything is good. */
9736 return 1;
9739 /* Return nonzero if references for INSN are delayed.
9741 Millicode insns are actually function calls with some special
9742 constraints on arguments and register usage.
9744 Millicode calls always expect their arguments in the integer argument
9745 registers, and always return their result in %r29 (ret1). They
9746 are expected to clobber their arguments, %r1, %r29, and the return
9747 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9749 This function tells reorg that the references to arguments and
9750 millicode calls do not appear to happen until after the millicode call.
9751 This allows reorg to put insns which set the argument registers into the
9752 delay slot of the millicode call -- thus they act more like traditional
9753 CALL_INSNs.
9755 Note we cannot consider side effects of the insn to be delayed because
9756 the branch and link insn will clobber the return pointer. If we happened
9757 to use the return pointer in the delay slot of the call, then we lose.
9759 get_attr_type will try to recognize the given insn, so make sure to
9760 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9761 in particular. */
9763 pa_insn_refs_are_delayed (rtx_insn *insn)
9765 return ((NONJUMP_INSN_P (insn)
9766 && GET_CODE (PATTERN (insn)) != SEQUENCE
9767 && GET_CODE (PATTERN (insn)) != USE
9768 && GET_CODE (PATTERN (insn)) != CLOBBER
9769 && get_attr_type (insn) == TYPE_MILLI));
9772 /* Promote the return value, but not the arguments. */
9774 static machine_mode
9775 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9776 machine_mode mode,
9777 int *punsignedp ATTRIBUTE_UNUSED,
9778 const_tree fntype ATTRIBUTE_UNUSED,
9779 int for_return)
9781 if (for_return == 0)
9782 return mode;
9783 return promote_mode (type, mode, punsignedp);
9786 /* On the HP-PA the value is found in register(s) 28(-29), unless
9787 the mode is SF or DF. Then the value is returned in fr4 (32).
9789 This must perform the same promotions as PROMOTE_MODE, else promoting
9790 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9792 Small structures must be returned in a PARALLEL on PA64 in order
9793 to match the HP Compiler ABI. */
9795 static rtx
9796 pa_function_value (const_tree valtype,
9797 const_tree func ATTRIBUTE_UNUSED,
9798 bool outgoing ATTRIBUTE_UNUSED)
9800 machine_mode valmode;
9802 if (AGGREGATE_TYPE_P (valtype)
9803 || TREE_CODE (valtype) == COMPLEX_TYPE
9804 || VECTOR_TYPE_P (valtype))
9806 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9808 /* Handle aggregates that fit exactly in a word or double word. */
9809 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9810 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9812 if (TARGET_64BIT)
9814 /* Aggregates with a size less than or equal to 128 bits are
9815 returned in GR 28(-29). They are left justified. The pad
9816 bits are undefined. Larger aggregates are returned in
9817 memory. */
9818 rtx loc[2];
9819 int i, offset = 0;
9820 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9822 for (i = 0; i < ub; i++)
9824 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9825 gen_rtx_REG (DImode, 28 + i),
9826 GEN_INT (offset));
9827 offset += 8;
9830 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9832 else if (valsize > UNITS_PER_WORD)
9834 /* Aggregates 5 to 8 bytes in size are returned in general
9835 registers r28-r29 in the same manner as other non
9836 floating-point objects. The data is right-justified and
9837 zero-extended to 64 bits. This is opposite to the normal
9838 justification used on big endian targets and requires
9839 special treatment. */
9840 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9841 gen_rtx_REG (DImode, 28), const0_rtx);
9842 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9846 if ((INTEGRAL_TYPE_P (valtype)
9847 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9848 || POINTER_TYPE_P (valtype))
9849 valmode = word_mode;
9850 else
9851 valmode = TYPE_MODE (valtype);
9853 if (SCALAR_FLOAT_TYPE_P (valtype)
9854 && !AGGREGATE_TYPE_P (valtype)
9855 && TYPE_MODE (valtype) != TFmode
9856 && !TARGET_SOFT_FLOAT)
9857 return gen_rtx_REG (valmode, 32);
9859 return gen_rtx_REG (valmode, 28);
9862 /* Implement the TARGET_LIBCALL_VALUE hook. */
9864 static rtx
9865 pa_libcall_value (machine_mode mode,
9866 const_rtx fun ATTRIBUTE_UNUSED)
9868 if (! TARGET_SOFT_FLOAT
9869 && (mode == SFmode || mode == DFmode))
9870 return gen_rtx_REG (mode, 32);
9871 else
9872 return gen_rtx_REG (mode, 28);
9875 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9877 static bool
9878 pa_function_value_regno_p (const unsigned int regno)
9880 if (regno == 28
9881 || (! TARGET_SOFT_FLOAT && regno == 32))
9882 return true;
9884 return false;
9887 /* Update the data in CUM to advance over argument ARG. */
9889 static void
9890 pa_function_arg_advance (cumulative_args_t cum_v,
9891 const function_arg_info &arg)
9893 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9894 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9896 cum->nargs_prototype--;
9897 cum->words += (arg_size
9898 + ((cum->words & 01)
9899 && arg.type != NULL_TREE
9900 && arg_size > 1));
9903 /* Return the location of a parameter that is passed in a register or NULL
9904 if the parameter has any component that is passed in memory.
9906 This is new code and will be pushed to into the net sources after
9907 further testing.
9909 ??? We might want to restructure this so that it looks more like other
9910 ports. */
9911 static rtx
9912 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9914 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9915 tree type = arg.type;
9916 machine_mode mode = arg.mode;
9917 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9918 int alignment = 0;
9919 int arg_size;
9920 int fpr_reg_base;
9921 int gpr_reg_base;
9922 rtx retval;
9924 if (arg.end_marker_p ())
9925 return NULL_RTX;
9927 arg_size = pa_function_arg_size (mode, type);
9928 if (!arg_size)
9929 return NULL_RTX;
9931 /* If this arg would be passed partially or totally on the stack, then
9932 this routine should return zero. pa_arg_partial_bytes will
9933 handle arguments which are split between regs and stack slots if
9934 the ABI mandates split arguments. */
9935 if (!TARGET_64BIT)
9937 /* The 32-bit ABI does not split arguments. */
9938 if (cum->words + arg_size > max_arg_words)
9939 return NULL_RTX;
9941 else
9943 if (arg_size > 1)
9944 alignment = cum->words & 1;
9945 if (cum->words + alignment >= max_arg_words)
9946 return NULL_RTX;
9949 /* The 32bit ABIs and the 64bit ABIs are rather different,
9950 particularly in their handling of FP registers. We might
9951 be able to cleverly share code between them, but I'm not
9952 going to bother in the hope that splitting them up results
9953 in code that is more easily understood. */
9955 if (TARGET_64BIT)
9957 /* Advance the base registers to their current locations.
9959 Remember, gprs grow towards smaller register numbers while
9960 fprs grow to higher register numbers. Also remember that
9961 although FP regs are 32-bit addressable, we pretend that
9962 the registers are 64-bits wide. */
9963 gpr_reg_base = 26 - cum->words;
9964 fpr_reg_base = 32 + cum->words;
9966 /* Arguments wider than one word and small aggregates need special
9967 treatment. */
9968 if (arg_size > 1
9969 || mode == BLKmode
9970 || (type && (AGGREGATE_TYPE_P (type)
9971 || TREE_CODE (type) == COMPLEX_TYPE
9972 || VECTOR_TYPE_P (type))))
9974 /* Double-extended precision (80-bit), quad-precision (128-bit)
9975 and aggregates including complex numbers are aligned on
9976 128-bit boundaries. The first eight 64-bit argument slots
9977 are associated one-to-one, with general registers r26
9978 through r19, and also with floating-point registers fr4
9979 through fr11. Arguments larger than one word are always
9980 passed in general registers.
9982 Using a PARALLEL with a word mode register results in left
9983 justified data on a big-endian target. */
9985 rtx loc[8];
9986 int i, offset = 0, ub = arg_size;
9988 /* Align the base register. */
9989 gpr_reg_base -= alignment;
9991 ub = MIN (ub, max_arg_words - cum->words - alignment);
9992 for (i = 0; i < ub; i++)
9994 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9995 gen_rtx_REG (DImode, gpr_reg_base),
9996 GEN_INT (offset));
9997 gpr_reg_base -= 1;
9998 offset += 8;
10001 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
10004 else
10006 /* If the argument is larger than a word, then we know precisely
10007 which registers we must use. */
10008 if (arg_size > 1)
10010 if (cum->words)
10012 gpr_reg_base = 23;
10013 fpr_reg_base = 38;
10015 else
10017 gpr_reg_base = 25;
10018 fpr_reg_base = 34;
10021 /* Structures 5 to 8 bytes in size are passed in the general
10022 registers in the same manner as other non floating-point
10023 objects. The data is right-justified and zero-extended
10024 to 64 bits. This is opposite to the normal justification
10025 used on big endian targets and requires special treatment.
10026 We now define BLOCK_REG_PADDING to pad these objects.
10027 Aggregates, complex and vector types are passed in the same
10028 manner as structures. */
10029 if (mode == BLKmode
10030 || (type && (AGGREGATE_TYPE_P (type)
10031 || TREE_CODE (type) == COMPLEX_TYPE
10032 || VECTOR_TYPE_P (type))))
10034 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
10035 gen_rtx_REG (DImode, gpr_reg_base),
10036 const0_rtx);
10037 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
10040 else
10042 /* We have a single word (32 bits). A simple computation
10043 will get us the register #s we need. */
10044 gpr_reg_base = 26 - cum->words;
10045 fpr_reg_base = 32 + 2 * cum->words;
10049 /* Determine if the argument needs to be passed in both general and
10050 floating point registers. */
10051 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
10052 /* If we are doing soft-float with portable runtime, then there
10053 is no need to worry about FP regs. */
10054 && !TARGET_SOFT_FLOAT
10055 /* The parameter must be some kind of scalar float, else we just
10056 pass it in integer registers. */
10057 && GET_MODE_CLASS (mode) == MODE_FLOAT
10058 /* The target function must not have a prototype. */
10059 && cum->nargs_prototype <= 0
10060 /* libcalls do not need to pass items in both FP and general
10061 registers. */
10062 && type != NULL_TREE
10063 /* All this hair applies to "outgoing" args only. This includes
10064 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
10065 && !cum->incoming)
10066 /* Also pass outgoing floating arguments in both registers in indirect
10067 calls with the 32 bit ABI and the HP assembler since there is no
10068 way to the specify argument locations in static functions. */
10069 || (!TARGET_64BIT
10070 && !TARGET_GAS
10071 && !cum->incoming
10072 && cum->indirect
10073 && GET_MODE_CLASS (mode) == MODE_FLOAT))
10075 retval
10076 = gen_rtx_PARALLEL
10077 (mode,
10078 gen_rtvec (2,
10079 gen_rtx_EXPR_LIST (VOIDmode,
10080 gen_rtx_REG (mode, fpr_reg_base),
10081 const0_rtx),
10082 gen_rtx_EXPR_LIST (VOIDmode,
10083 gen_rtx_REG (mode, gpr_reg_base),
10084 const0_rtx)));
10086 else
10088 /* See if we should pass this parameter in a general register. */
10089 if (TARGET_SOFT_FLOAT
10090 /* Indirect calls in the normal 32bit ABI require all arguments
10091 to be passed in general registers. */
10092 || (!TARGET_PORTABLE_RUNTIME
10093 && !TARGET_64BIT
10094 && !TARGET_ELF32
10095 && cum->indirect)
10096 /* If the parameter is not a scalar floating-point parameter,
10097 then it belongs in GPRs. */
10098 || GET_MODE_CLASS (mode) != MODE_FLOAT
10099 /* Structure with single SFmode field belongs in GPR. */
10100 || (type && AGGREGATE_TYPE_P (type)))
10101 retval = gen_rtx_REG (mode, gpr_reg_base);
10102 else
10103 retval = gen_rtx_REG (mode, fpr_reg_base);
10105 return retval;
10108 /* Arguments larger than one word are double word aligned. */
10110 static unsigned int
10111 pa_function_arg_boundary (machine_mode mode, const_tree type)
10113 bool singleword = (type
10114 ? (integer_zerop (TYPE_SIZE (type))
10115 || !TREE_CONSTANT (TYPE_SIZE (type))
10116 || int_size_in_bytes (type) <= UNITS_PER_WORD)
10117 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
10119 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
10122 /* If this arg would be passed totally in registers or totally on the stack,
10123 then this routine should return zero. */
10125 static int
10126 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
10128 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10129 unsigned int max_arg_words = 8;
10130 unsigned int offset = 0;
10131 int arg_size;
10133 if (!TARGET_64BIT)
10134 return 0;
10136 arg_size = pa_function_arg_size (arg.mode, arg.type);
10137 if (arg_size > 1 && (cum->words & 1))
10138 offset = 1;
10140 if (cum->words + offset + arg_size <= max_arg_words)
10141 /* Arg fits fully into registers. */
10142 return 0;
10143 else if (cum->words + offset >= max_arg_words)
10144 /* Arg fully on the stack. */
10145 return 0;
10146 else
10147 /* Arg is split. */
10148 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10152 /* A get_unnamed_section callback for switching to the text section.
10154 This function is only used with SOM. Because we don't support
10155 named subspaces, we can only create a new subspace or switch back
10156 to the default text subspace. */
10158 static void
10159 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10161 gcc_assert (TARGET_SOM);
10162 if (TARGET_GAS)
10164 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10166 /* We only want to emit a .nsubspa directive once at the
10167 start of the function. */
10168 cfun->machine->in_nsubspa = 1;
10170 /* Create a new subspace for the text. This provides
10171 better stub placement and one-only functions. */
10172 if (cfun->decl
10173 && DECL_ONE_ONLY (cfun->decl)
10174 && !DECL_WEAK (cfun->decl))
10176 output_section_asm_op ("\t.SPACE $TEXT$\n"
10177 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10178 "ACCESS=44,SORT=24,COMDAT");
10179 return;
10182 else
10184 /* There isn't a current function or the body of the current
10185 function has been completed. So, we are changing to the
10186 text section to output debugging information. Thus, we
10187 need to forget that we are in the text section so that
10188 varasm.cc will call us when text_section is selected again. */
10189 gcc_assert (!cfun || !cfun->machine
10190 || cfun->machine->in_nsubspa == 2);
10191 in_section = NULL;
10193 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10194 return;
10196 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10199 /* A get_unnamed_section callback for switching to comdat data
10200 sections. This function is only used with SOM. */
10202 static void
10203 som_output_comdat_data_section_asm_op (const char *data)
10205 in_section = NULL;
10206 output_section_asm_op (data);
10209 /* Implement TARGET_ASM_INIT_SECTIONS. */
10211 static void
10212 pa_som_asm_init_sections (void)
10214 text_section
10215 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10217 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10218 is not being generated. */
10219 som_readonly_data_section
10220 = get_unnamed_section (0, output_section_asm_op,
10221 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10223 /* When secondary definitions are not supported, SOM makes readonly
10224 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10225 the comdat flag. */
10226 som_one_only_readonly_data_section
10227 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10228 "\t.SPACE $TEXT$\n"
10229 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10230 "ACCESS=0x2c,SORT=16,COMDAT");
10233 /* When secondary definitions are not supported, SOM makes data one-only
10234 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10235 som_one_only_data_section
10236 = get_unnamed_section (SECTION_WRITE,
10237 som_output_comdat_data_section_asm_op,
10238 "\t.SPACE $PRIVATE$\n"
10239 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10240 "ACCESS=31,SORT=24,COMDAT");
10242 if (flag_tm)
10243 som_tm_clone_table_section
10244 = get_unnamed_section (0, output_section_asm_op,
10245 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10247 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10248 reference data within the $TEXT$ space (for example constant
10249 strings in the $LIT$ subspace).
10251 The assemblers (GAS and HP as) both have problems with handling
10252 the difference of two symbols. This is the other correct way to
10253 reference constant data during PIC code generation.
10255 Thus, we can't put constant data needing relocation in the $TEXT$
10256 space during PIC generation.
10258 Previously, we placed all constant data into the $DATA$ subspace
10259 when generating PIC code. This reduces sharing, but it works
10260 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10261 This puts constant data not needing relocation into the $TEXT$ space. */
10262 readonly_data_section = som_readonly_data_section;
10264 /* We must not have a reference to an external symbol defined in a
10265 shared library in a readonly section, else the SOM linker will
10266 complain.
10268 So, we force exception information into the data section. */
10269 exception_section = data_section;
10272 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10274 static section *
10275 pa_som_tm_clone_table_section (void)
10277 return som_tm_clone_table_section;
10280 /* On hpux10, the linker will give an error if we have a reference
10281 in the read-only data section to a symbol defined in a shared
10282 library. Therefore, expressions that might require a reloc
10283 cannot be placed in the read-only data section. */
10285 static section *
10286 pa_select_section (tree exp, int reloc,
10287 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10289 if (TREE_CODE (exp) == VAR_DECL
10290 && TREE_READONLY (exp)
10291 && !TREE_THIS_VOLATILE (exp)
10292 && DECL_INITIAL (exp)
10293 && (DECL_INITIAL (exp) == error_mark_node
10294 || TREE_CONSTANT (DECL_INITIAL (exp)))
10295 && !(reloc & pa_reloc_rw_mask ()))
10297 if (TARGET_SOM
10298 && DECL_ONE_ONLY (exp)
10299 && !DECL_WEAK (exp))
10300 return som_one_only_readonly_data_section;
10301 else
10302 return readonly_data_section;
10304 else if (CONSTANT_CLASS_P (exp)
10305 && !(reloc & pa_reloc_rw_mask ()))
10306 return readonly_data_section;
10307 else if (TARGET_SOM
10308 && TREE_CODE (exp) == VAR_DECL
10309 && DECL_ONE_ONLY (exp)
10310 && !DECL_WEAK (exp))
10311 return som_one_only_data_section;
10312 else
10313 return data_section;
10316 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10317 and the function is in a COMDAT group, place the plabel reference in the
10318 .data.rel.ro.local section. The linker ignores references to symbols in
10319 discarded sections from this section. */
10321 static section *
10322 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10323 unsigned HOST_WIDE_INT align)
10325 if (function_label_operand (x, VOIDmode))
10327 tree decl = SYMBOL_REF_DECL (x);
10329 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10330 return get_named_section (NULL, ".data.rel.ro.local", 1);
10333 return default_elf_select_rtx_section (mode, x, align);
10336 /* Implement pa_reloc_rw_mask. */
10338 static int
10339 pa_reloc_rw_mask (void)
10341 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10342 return 3;
10344 /* HP linker does not support global relocs in readonly memory. */
10345 return TARGET_SOM ? 2 : 0;
10348 static void
10349 pa_globalize_label (FILE *stream, const char *name)
10351 /* We only handle DATA objects here, functions are globalized in
10352 ASM_DECLARE_FUNCTION_NAME. */
10353 if (! FUNCTION_NAME_P (name))
10355 fputs ("\t.EXPORT ", stream);
10356 assemble_name (stream, name);
10357 fputs (",DATA\n", stream);
10361 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10363 static rtx
10364 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10365 int incoming ATTRIBUTE_UNUSED)
10367 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10370 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10372 bool
10373 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10375 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10376 PA64 ABI says that objects larger than 128 bits are returned in memory.
10377 Note, int_size_in_bytes can return -1 if the size of the object is
10378 variable or larger than the maximum value that can be expressed as
10379 a HOST_WIDE_INT. It can also return zero for an empty type. The
10380 simplest way to handle variable and empty types is to pass them in
10381 memory. This avoids problems in defining the boundaries of argument
10382 slots, allocating registers, etc. */
10383 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10384 || int_size_in_bytes (type) <= 0);
10387 /* Structure to hold declaration and name of external symbols that are
10388 emitted by GCC. We generate a vector of these symbols and output them
10389 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10390 This avoids putting out names that are never really used. */
10392 typedef struct GTY(()) extern_symbol
10394 tree decl;
10395 const char *name;
10396 } extern_symbol;
10398 /* Define gc'd vector type for extern_symbol. */
10400 /* Vector of extern_symbol pointers. */
10401 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10403 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10404 /* Mark DECL (name NAME) as an external reference (assembler output
10405 file FILE). This saves the names to output at the end of the file
10406 if actually referenced. */
10408 void
10409 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10411 gcc_assert (file == asm_out_file);
10412 extern_symbol p = {decl, name};
10413 vec_safe_push (extern_symbols, p);
10415 #endif
10417 /* Output text required at the end of an assembler file.
10418 This includes deferred plabels and .import directives for
10419 all external symbols that were actually referenced. */
10421 static void
10422 pa_file_end (void)
10424 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10425 unsigned int i;
10426 extern_symbol *p;
10428 if (!NO_DEFERRED_PROFILE_COUNTERS)
10429 output_deferred_profile_counters ();
10430 #endif
10432 output_deferred_plabels ();
10434 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10435 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10437 tree decl = p->decl;
10439 if (!TREE_ASM_WRITTEN (decl)
10440 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10441 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10444 vec_free (extern_symbols);
10445 #endif
10447 if (NEED_INDICATE_EXEC_STACK)
10448 file_end_indicate_exec_stack ();
10451 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10453 static bool
10454 pa_can_change_mode_class (machine_mode from, machine_mode to,
10455 reg_class_t rclass)
10457 if (from == to)
10458 return true;
10460 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10461 return true;
10463 /* Reject changes to/from modes with zero size. */
10464 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10465 return false;
10467 /* Reject changes to/from complex and vector modes. */
10468 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10469 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10470 return false;
10472 /* There is no way to load QImode or HImode values directly from memory
10473 to a FP register. SImode loads to the FP registers are not zero
10474 extended. On the 64-bit target, this conflicts with the definition
10475 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10476 except for DImode to SImode on the 64-bit target. It is handled by
10477 register renaming in pa_print_operand. */
10478 if (MAYBE_FP_REG_CLASS_P (rclass))
10479 return TARGET_64BIT && from == DImode && to == SImode;
10481 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10482 in specific sets of registers. Thus, we cannot allow changing
10483 to a larger mode when it's larger than a word. */
10484 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10485 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10486 return false;
10488 return true;
10491 /* Implement TARGET_MODES_TIEABLE_P.
10493 We should return FALSE for QImode and HImode because these modes
10494 are not ok in the floating-point registers. However, this prevents
10495 tieing these modes to SImode and DImode in the general registers.
10496 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10497 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10498 in the floating-point registers. */
10500 static bool
10501 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10503 /* Don't tie modes in different classes. */
10504 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10505 return false;
10507 return true;
10511 /* Length in units of the trampoline instruction code. */
10513 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10516 /* Output assembler code for a block containing the constant parts
10517 of a trampoline, leaving space for the variable parts.\
10519 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10520 and then branches to the specified routine.
10522 This code template is copied from text segment to stack location
10523 and then patched with pa_trampoline_init to contain valid values,
10524 and then entered as a subroutine.
10526 It is best to keep this as small as possible to avoid having to
10527 flush multiple lines in the cache. */
10529 static void
10530 pa_asm_trampoline_template (FILE *f)
10532 if (!TARGET_64BIT)
10534 if (TARGET_PA_20)
10536 fputs ("\tmfia %r20\n", f);
10537 fputs ("\tldw 48(%r20),%r22\n", f);
10538 fputs ("\tcopy %r22,%r21\n", f);
10539 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10540 fputs ("\tdepwi 0,31,2,%r22\n", f);
10541 fputs ("\tldw 0(%r22),%r21\n", f);
10542 fputs ("\tldw 4(%r22),%r19\n", f);
10543 fputs ("\tbve (%r21)\n", f);
10544 fputs ("\tldw 52(%r20),%r29\n", f);
10545 fputs ("\t.word 0\n", f);
10546 fputs ("\t.word 0\n", f);
10547 fputs ("\t.word 0\n", f);
10549 else
10551 if (ASSEMBLER_DIALECT == 0)
10553 fputs ("\tbl .+8,%r20\n", f);
10554 fputs ("\tdepi 0,31,2,%r20\n", f);
10556 else
10558 fputs ("\tb,l .+8,%r20\n", f);
10559 fputs ("\tdepwi 0,31,2,%r20\n", f);
10561 fputs ("\tldw 40(%r20),%r22\n", f);
10562 fputs ("\tcopy %r22,%r21\n", f);
10563 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10564 if (ASSEMBLER_DIALECT == 0)
10565 fputs ("\tdepi 0,31,2,%r22\n", f);
10566 else
10567 fputs ("\tdepwi 0,31,2,%r22\n", f);
10568 fputs ("\tldw 0(%r22),%r21\n", f);
10569 fputs ("\tldw 4(%r22),%r19\n", f);
10570 fputs ("\tldsid (%r21),%r1\n", f);
10571 fputs ("\tmtsp %r1,%sr0\n", f);
10572 fputs ("\tbe 0(%sr0,%r21)\n", f);
10573 fputs ("\tldw 44(%r20),%r29\n", f);
10575 fputs ("\t.word 0\n", f);
10576 fputs ("\t.word 0\n", f);
10577 fputs ("\t.word 0\n", f);
10578 fputs ("\t.word 0\n", f);
10580 else
10582 fputs ("\t.dword 0\n", f);
10583 fputs ("\t.dword 0\n", f);
10584 fputs ("\t.dword 0\n", f);
10585 fputs ("\t.dword 0\n", f);
10586 fputs ("\tmfia %r31\n", f);
10587 fputs ("\tldd 24(%r31),%r27\n", f);
10588 fputs ("\tldd 32(%r31),%r31\n", f);
10589 fputs ("\tldd 16(%r27),%r1\n", f);
10590 fputs ("\tbve (%r1)\n", f);
10591 fputs ("\tldd 24(%r27),%r27\n", f);
10592 fputs ("\t.dword 0 ; fptr\n", f);
10593 fputs ("\t.dword 0 ; static link\n", f);
10597 /* Emit RTL insns to initialize the variable parts of a trampoline.
10598 FNADDR is an RTX for the address of the function's pure code.
10599 CXT is an RTX for the static chain value for the function.
10601 Move the function address to the trampoline template at offset 48.
10602 Move the static chain value to trampoline template at offset 52.
10603 Move the trampoline address to trampoline template at offset 56.
10604 Move r19 to trampoline template at offset 60. The latter two
10605 words create a plabel for the indirect call to the trampoline.
10607 A similar sequence is used for the 64-bit port but the plabel is
10608 at the beginning of the trampoline.
10610 Finally, the cache entries for the trampoline code are flushed.
10611 This is necessary to ensure that the trampoline instruction sequence
10612 is written to memory prior to any attempts at prefetching the code
10613 sequence. */
10615 static void
10616 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10618 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10619 rtx start_addr = gen_reg_rtx (Pmode);
10620 rtx end_addr = gen_reg_rtx (Pmode);
10621 rtx line_length = gen_reg_rtx (Pmode);
10622 rtx r_tramp, tmp;
10624 emit_block_move (m_tramp, assemble_trampoline_template (),
10625 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10626 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10628 if (!TARGET_64BIT)
10630 tmp = adjust_address (m_tramp, Pmode, 48);
10631 emit_move_insn (tmp, fnaddr);
10632 tmp = adjust_address (m_tramp, Pmode, 52);
10633 emit_move_insn (tmp, chain_value);
10635 /* Create a fat pointer for the trampoline. */
10636 tmp = adjust_address (m_tramp, Pmode, 56);
10637 emit_move_insn (tmp, r_tramp);
10638 tmp = adjust_address (m_tramp, Pmode, 60);
10639 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10641 /* fdc and fic only use registers for the address to flush,
10642 they do not accept integer displacements. We align the
10643 start and end addresses to the beginning of their respective
10644 cache lines to minimize the number of lines flushed. */
10645 emit_insn (gen_andsi3 (start_addr, r_tramp,
10646 GEN_INT (-MIN_CACHELINE_SIZE)));
10647 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10648 TRAMPOLINE_CODE_SIZE-1));
10649 emit_insn (gen_andsi3 (end_addr, tmp,
10650 GEN_INT (-MIN_CACHELINE_SIZE)));
10651 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10652 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10653 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10654 gen_reg_rtx (Pmode),
10655 gen_reg_rtx (Pmode)));
10657 else
10659 tmp = adjust_address (m_tramp, Pmode, 56);
10660 emit_move_insn (tmp, fnaddr);
10661 tmp = adjust_address (m_tramp, Pmode, 64);
10662 emit_move_insn (tmp, chain_value);
10664 /* Create a fat pointer for the trampoline. */
10665 tmp = adjust_address (m_tramp, Pmode, 16);
10666 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10667 r_tramp, 32)));
10668 tmp = adjust_address (m_tramp, Pmode, 24);
10669 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10671 /* fdc and fic only use registers for the address to flush,
10672 they do not accept integer displacements. We align the
10673 start and end addresses to the beginning of their respective
10674 cache lines to minimize the number of lines flushed. */
10675 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10676 emit_insn (gen_anddi3 (start_addr, tmp,
10677 GEN_INT (-MIN_CACHELINE_SIZE)));
10678 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10679 TRAMPOLINE_CODE_SIZE - 1));
10680 emit_insn (gen_anddi3 (end_addr, tmp,
10681 GEN_INT (-MIN_CACHELINE_SIZE)));
10682 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10683 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10684 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10685 gen_reg_rtx (Pmode),
10686 gen_reg_rtx (Pmode)));
10689 #ifdef HAVE_ENABLE_EXECUTE_STACK
10690 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10691 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10692 #endif
10695 /* Perform any machine-specific adjustment in the address of the trampoline.
10696 ADDR contains the address that was passed to pa_trampoline_init.
10697 Adjust the trampoline address to point to the plabel at offset 56. */
10699 static rtx
10700 pa_trampoline_adjust_address (rtx addr)
10702 if (!TARGET_64BIT)
10703 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10704 return addr;
10707 static rtx
10708 pa_delegitimize_address (rtx orig_x)
10710 rtx x = delegitimize_mem_from_attrs (orig_x);
10712 if (GET_CODE (x) == LO_SUM
10713 && GET_CODE (XEXP (x, 1)) == UNSPEC
10714 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10715 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10716 return x;
10719 static rtx
10720 pa_internal_arg_pointer (void)
10722 /* The argument pointer and the hard frame pointer are the same in
10723 the 32-bit runtime, so we don't need a copy. */
10724 if (TARGET_64BIT)
10725 return copy_to_reg (virtual_incoming_args_rtx);
10726 else
10727 return virtual_incoming_args_rtx;
10730 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10731 Frame pointer elimination is automatically handled. */
10733 static bool
10734 pa_can_eliminate (const int from, const int to)
10736 /* The argument cannot be eliminated in the 64-bit runtime. */
10737 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10738 return false;
10740 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10741 ? ! frame_pointer_needed
10742 : true);
10745 /* Define the offset between two registers, FROM to be eliminated and its
10746 replacement TO, at the start of a routine. */
10747 HOST_WIDE_INT
10748 pa_initial_elimination_offset (int from, int to)
10750 HOST_WIDE_INT offset;
10752 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10753 && to == STACK_POINTER_REGNUM)
10754 offset = -pa_compute_frame_size (get_frame_size (), 0);
10755 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10756 offset = 0;
10757 else
10758 gcc_unreachable ();
10760 return offset;
10763 static void
10764 pa_conditional_register_usage (void)
10766 int i;
10768 if (!TARGET_64BIT && !TARGET_PA_11)
10770 for (i = 56; i <= FP_REG_LAST; i++)
10771 fixed_regs[i] = call_used_regs[i] = 1;
10772 for (i = 33; i < 56; i += 2)
10773 fixed_regs[i] = call_used_regs[i] = 1;
10775 if (TARGET_SOFT_FLOAT)
10777 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10778 fixed_regs[i] = call_used_regs[i] = 1;
10780 if (flag_pic)
10781 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10784 /* Target hook for c_mode_for_suffix. */
10786 static machine_mode
10787 pa_c_mode_for_suffix (char suffix)
10789 if (HPUX_LONG_DOUBLE_LIBRARY)
10791 if (suffix == 'q')
10792 return TFmode;
10795 return VOIDmode;
10798 /* Target hook for function_section. */
10800 static section *
10801 pa_function_section (tree decl, enum node_frequency freq,
10802 bool startup, bool exit)
10804 /* Put functions in text section if target doesn't have named sections. */
10805 if (!targetm_common.have_named_sections)
10806 return text_section;
10808 /* Force nested functions into the same section as the containing
10809 function. */
10810 if (decl
10811 && DECL_SECTION_NAME (decl) == NULL
10812 && DECL_CONTEXT (decl) != NULL_TREE
10813 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10814 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10815 return function_section (DECL_CONTEXT (decl));
10817 /* Otherwise, use the default function section. */
10818 return default_function_section (decl, freq, startup, exit);
10821 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10823 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10824 that need more than three instructions to load prior to reload. This
10825 limit is somewhat arbitrary. It takes three instructions to load a
10826 CONST_INT from memory but two are memory accesses. It may be better
10827 to increase the allowed range for CONST_INTS. We may also be able
10828 to handle CONST_DOUBLES. */
10830 static bool
10831 pa_legitimate_constant_p (machine_mode mode, rtx x)
10833 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10834 return false;
10836 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10837 return false;
10839 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10840 legitimate constants. The other variants can't be handled by
10841 the move patterns after reload starts. */
10842 if (tls_referenced_p (x))
10843 return false;
10845 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10846 return false;
10848 if (TARGET_64BIT
10849 && HOST_BITS_PER_WIDE_INT > 32
10850 && GET_CODE (x) == CONST_INT
10851 && !reload_in_progress
10852 && !reload_completed
10853 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10854 && !pa_cint_ok_for_move (UINTVAL (x)))
10855 return false;
10857 if (function_label_operand (x, mode))
10858 return false;
10860 return true;
10863 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10865 static unsigned int
10866 pa_section_type_flags (tree decl, const char *name, int reloc)
10868 unsigned int flags;
10870 flags = default_section_type_flags (decl, name, reloc);
10872 /* Function labels are placed in the constant pool. This can
10873 cause a section conflict if decls are put in ".data.rel.ro"
10874 or ".data.rel.ro.local" using the __attribute__ construct. */
10875 if (strcmp (name, ".data.rel.ro") == 0
10876 || strcmp (name, ".data.rel.ro.local") == 0)
10877 flags |= SECTION_WRITE | SECTION_RELRO;
10879 return flags;
10882 /* pa_legitimate_address_p recognizes an RTL expression that is a
10883 valid memory address for an instruction. The MODE argument is the
10884 machine mode for the MEM expression that wants to use this address.
10886 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10887 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10888 available with floating point loads and stores, and integer loads.
10889 We get better code by allowing indexed addresses in the initial
10890 RTL generation.
10892 The acceptance of indexed addresses as legitimate implies that we
10893 must provide patterns for doing indexed integer stores, or the move
10894 expanders must force the address of an indexed store to a register.
10895 We have adopted the latter approach.
10897 Another function of pa_legitimate_address_p is to ensure that
10898 the base register is a valid pointer for indexed instructions.
10899 On targets that have non-equivalent space registers, we have to
10900 know at the time of assembler output which register in a REG+REG
10901 pair is the base register. The REG_POINTER flag is sometimes lost
10902 in reload and the following passes, so it can't be relied on during
10903 code generation. Thus, we either have to canonicalize the order
10904 of the registers in REG+REG indexed addresses, or treat REG+REG
10905 addresses separately and provide patterns for both permutations.
10907 The latter approach requires several hundred additional lines of
10908 code in pa.md. The downside to canonicalizing is that a PLUS
10909 in the wrong order can't combine to form to make a scaled indexed
10910 memory operand. As we won't need to canonicalize the operands if
10911 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10913 We initially break out scaled indexed addresses in canonical order
10914 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10915 scaled indexed addresses during RTL generation. However, fold_rtx
10916 has its own opinion on how the operands of a PLUS should be ordered.
10917 If one of the operands is equivalent to a constant, it will make
10918 that operand the second operand. As the base register is likely to
10919 be equivalent to a SYMBOL_REF, we have made it the second operand.
10921 pa_legitimate_address_p accepts REG+REG as legitimate when the
10922 operands are in the order INDEX+BASE on targets with non-equivalent
10923 space registers, and in any order on targets with equivalent space
10924 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10926 We treat a SYMBOL_REF as legitimate if it is part of the current
10927 function's constant-pool, because such addresses can actually be
10928 output as REG+SMALLINT. */
10930 static bool
10931 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
10933 if ((REG_P (x)
10934 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10935 : REG_OK_FOR_BASE_P (x)))
10936 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10937 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10938 && REG_P (XEXP (x, 0))
10939 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10940 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10941 return true;
10943 if (GET_CODE (x) == PLUS)
10945 rtx base, index;
10947 /* For REG+REG, the base register should be in XEXP (x, 1),
10948 so check it first. */
10949 if (REG_P (XEXP (x, 1))
10950 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10951 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10952 base = XEXP (x, 1), index = XEXP (x, 0);
10953 else if (REG_P (XEXP (x, 0))
10954 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10955 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10956 base = XEXP (x, 0), index = XEXP (x, 1);
10957 else
10958 return false;
10960 if (GET_CODE (index) == CONST_INT)
10962 /* Short 5-bit displacements always okay. */
10963 if (INT_5_BITS (index))
10964 return true;
10966 if (!base14_operand (index, mode))
10967 return false;
10969 /* Long 14-bit displacements always okay for these cases. */
10970 if (INT14_OK_STRICT
10971 || reload_completed
10972 || mode == QImode
10973 || mode == HImode)
10974 return true;
10976 /* We have to limit displacements to those supported by
10977 both floating-point and integer accesses as reload can't
10978 fix invalid displacements. See PR114288. */
10979 return false;
10982 if (!TARGET_DISABLE_INDEXING
10983 /* Only accept the "canonical" INDEX+BASE operand order
10984 on targets with non-equivalent space registers. */
10985 && (TARGET_NO_SPACE_REGS
10986 ? REG_P (index)
10987 : (base == XEXP (x, 1) && REG_P (index)
10988 && (reload_completed
10989 || (reload_in_progress && HARD_REGISTER_P (base))
10990 || REG_POINTER (base))
10991 && (reload_completed
10992 || (reload_in_progress && HARD_REGISTER_P (index))
10993 || !REG_POINTER (index))))
10994 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10995 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10996 : REG_OK_FOR_INDEX_P (index))
10997 && borx_reg_operand (base, Pmode)
10998 && borx_reg_operand (index, Pmode))
10999 return true;
11001 if (!TARGET_DISABLE_INDEXING
11002 && GET_CODE (index) == MULT
11003 /* Only accept base operands with the REG_POINTER flag prior to
11004 reload on targets with non-equivalent space registers. */
11005 && (TARGET_NO_SPACE_REGS
11006 || (base == XEXP (x, 1)
11007 && (reload_completed
11008 || (reload_in_progress && HARD_REGISTER_P (base))
11009 || REG_POINTER (base))))
11010 && REG_P (XEXP (index, 0))
11011 && GET_MODE (XEXP (index, 0)) == Pmode
11012 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
11013 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
11014 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
11015 && GET_CODE (XEXP (index, 1)) == CONST_INT
11016 && INTVAL (XEXP (index, 1))
11017 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
11018 && borx_reg_operand (base, Pmode))
11019 return true;
11021 return false;
11024 if (GET_CODE (x) == LO_SUM)
11026 rtx y = XEXP (x, 0);
11028 if (GET_CODE (y) == SUBREG)
11029 y = SUBREG_REG (y);
11031 if (REG_P (y)
11032 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
11033 : REG_OK_FOR_BASE_P (y)))
11035 /* Needed for -fPIC */
11036 if (mode == Pmode
11037 && GET_CODE (XEXP (x, 1)) == UNSPEC)
11038 return true;
11040 if (!INT14_OK_STRICT
11041 && (strict || !(reload_in_progress || reload_completed))
11042 && mode != QImode
11043 && mode != HImode)
11044 return false;
11046 if (CONSTANT_P (XEXP (x, 1)))
11047 return true;
11049 return false;
11052 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
11053 return true;
11055 return false;
11058 /* Look for machine dependent ways to make the invalid address AD a
11059 valid address.
11061 For the PA, transform:
11063 memory(X + <large int>)
11065 into:
11067 if (<large int> & mask) >= 16
11068 Y = (<large int> & ~mask) + mask + 1 Round up.
11069 else
11070 Y = (<large int> & ~mask) Round down.
11071 Z = X + Y
11072 memory (Z + (<large int> - Y));
11074 This makes reload inheritance and reload_cse work better since Z
11075 can be reused.
11077 There may be more opportunities to improve code with this hook. */
11080 pa_legitimize_reload_address (rtx ad, machine_mode mode,
11081 int opnum, int type,
11082 int ind_levels ATTRIBUTE_UNUSED)
11084 long offset, newoffset, mask;
11085 rtx new_rtx, temp = NULL_RTX;
11087 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
11088 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
11090 if (optimize && GET_CODE (ad) == PLUS)
11091 temp = simplify_binary_operation (PLUS, Pmode,
11092 XEXP (ad, 0), XEXP (ad, 1));
11094 new_rtx = temp ? temp : ad;
11096 if (optimize
11097 && GET_CODE (new_rtx) == PLUS
11098 && GET_CODE (XEXP (new_rtx, 0)) == REG
11099 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
11101 offset = INTVAL (XEXP ((new_rtx), 1));
11103 /* Choose rounding direction. Round up if we are >= halfway. */
11104 if ((offset & mask) >= ((mask + 1) / 2))
11105 newoffset = (offset & ~mask) + mask + 1;
11106 else
11107 newoffset = offset & ~mask;
11109 /* Ensure that long displacements are aligned. */
11110 if (mask == 0x3fff
11111 && (GET_MODE_CLASS (mode) == MODE_FLOAT
11112 || (TARGET_64BIT && (mode) == DImode)))
11113 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
11115 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
11117 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
11118 GEN_INT (newoffset));
11119 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
11120 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
11121 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
11122 opnum, (enum reload_type) type);
11123 return ad;
11127 return NULL_RTX;
11130 /* Output address vector. */
11132 void
11133 pa_output_addr_vec (rtx lab, rtx body)
11135 int idx, vlen = XVECLEN (body, 0);
11137 if (!TARGET_SOM)
11138 fputs ("\t.align 4\n", asm_out_file);
11139 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11140 if (TARGET_GAS)
11141 fputs ("\t.begin_brtab\n", asm_out_file);
11142 for (idx = 0; idx < vlen; idx++)
11144 ASM_OUTPUT_ADDR_VEC_ELT
11145 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11147 if (TARGET_GAS)
11148 fputs ("\t.end_brtab\n", asm_out_file);
11151 /* Output address difference vector. */
11153 void
11154 pa_output_addr_diff_vec (rtx lab, rtx body)
11156 rtx base = XEXP (XEXP (body, 0), 0);
11157 int idx, vlen = XVECLEN (body, 1);
11159 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11160 if (TARGET_GAS)
11161 fputs ("\t.begin_brtab\n", asm_out_file);
11162 for (idx = 0; idx < vlen; idx++)
11164 ASM_OUTPUT_ADDR_DIFF_ELT
11165 (asm_out_file,
11166 body,
11167 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11168 CODE_LABEL_NUMBER (base));
11170 if (TARGET_GAS)
11171 fputs ("\t.end_brtab\n", asm_out_file);
11174 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11175 arguments passed by hidden reference in the 32-bit HP runtime. Users
11176 can override this behavior for better compatibility with openmp at the
11177 risk of library incompatibilities. Arguments are always passed by value
11178 in the 64-bit HP runtime. */
11180 static bool
11181 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11183 return !TARGET_CALLER_COPIES;
11186 /* Implement TARGET_HARD_REGNO_NREGS. */
11188 static unsigned int
11189 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11191 return PA_HARD_REGNO_NREGS (regno, mode);
11194 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11196 static bool
11197 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11199 return PA_HARD_REGNO_MODE_OK (regno, mode);
11202 /* Implement TARGET_STARTING_FRAME_OFFSET.
11204 On the 32-bit ports, we reserve one slot for the previous frame
11205 pointer and one fill slot. The fill slot is for compatibility
11206 with HP compiled programs. On the 64-bit ports, we reserve one
11207 slot for the previous frame pointer. */
11209 static HOST_WIDE_INT
11210 pa_starting_frame_offset (void)
11212 return 8;
11215 /* Figure out the size in words of the function argument. */
11218 pa_function_arg_size (machine_mode mode, const_tree type)
11220 HOST_WIDE_INT size;
11222 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11224 /* The 64-bit runtime does not restrict the size of stack frames,
11225 but the gcc calling conventions limit argument sizes to 1G. Our
11226 prologue/epilogue code limits frame sizes to just under 32 bits.
11227 1G is also the maximum frame size that can be handled by the HPUX
11228 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11229 occur for (parallel:BLK []), we need to ignore large arguments
11230 passed by value. */
11231 if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11232 size = 0;
11233 return (int) CEIL (size, UNITS_PER_WORD);
11236 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
11238 static void
11239 pa_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
11241 const unsigned PA_FE_INEXACT = 1;
11242 const unsigned PA_FE_UNDERFLOW = 2;
11243 const unsigned PA_FE_OVERFLOW = 4;
11244 const unsigned PA_FE_DIVBYZERO = 8;
11245 const unsigned PA_FE_INVALID = 16;
11246 const unsigned HOST_WIDE_INT PA_FE_ALL_EXCEPT = (PA_FE_INVALID
11247 | PA_FE_DIVBYZERO
11248 | PA_FE_OVERFLOW
11249 | PA_FE_UNDERFLOW
11250 | PA_FE_INEXACT);
11251 const unsigned HOST_WIDE_INT PA_FE_EXCEPT_SHIFT = 27;
11252 tree fenv_var, get_fpsr, set_fpsr, mask, ld_fenv, masked_fenv;
11253 tree hold_all, new_fenv_var, reload_fenv, restore_fnenv;
11254 tree get_fpsr_call, set_fpsr_call, update_call, atomic_feraiseexcept;
11256 if (TARGET_SOFT_FLOAT)
11257 return;
11259 /* Generate the equivalent of :
11260 unsigned int fenv_var;
11261 fenv_var = __builtin_get_fpsr ();
11263 unsigned int masked_fenv;
11264 masked_fenv = fenv_var & mask;
11266 __builtin_set_fpsr (masked_fenv); */
11268 fenv_var = create_tmp_var_raw (unsigned_type_node);
11269 get_fpsr = pa_builtins[PA_BUILTIN_GET_FPSR];
11270 set_fpsr = pa_builtins[PA_BUILTIN_SET_FPSR];
11271 mask = build_int_cst (unsigned_type_node,
11272 ~((PA_FE_ALL_EXCEPT << PA_FE_EXCEPT_SHIFT)
11273 | PA_FE_ALL_EXCEPT));
11275 get_fpsr_call = build_call_expr (get_fpsr, 0);
11276 ld_fenv = build4 (TARGET_EXPR, unsigned_type_node,
11277 fenv_var, get_fpsr_call,
11278 NULL_TREE, NULL_TREE);
11279 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
11280 hold_all = build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv);
11281 set_fpsr_call = build_call_expr (set_fpsr, 1, masked_fenv);
11282 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fpsr_call);
11284 /* Store the value of masked_fenv to clear the exceptions:
11285 __builtin_set_fpsr (masked_fenv); */
11287 *clear = set_fpsr_call;
11289 /* Generate the equivalent of :
11290 unsigned int new_fenv_var;
11291 new_fenv_var = __builtin_get_fpsr ();
11293 __builtin_set_fpsr (fenv_var);
11295 __atomic_feraiseexcept (new_fenv_var); */
11297 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
11298 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node, new_fenv_var,
11299 get_fpsr_call, NULL_TREE, NULL_TREE);
11300 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_var);
11301 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
11302 update_call = build_call_expr (atomic_feraiseexcept, 1,
11303 fold_convert (integer_type_node,
11304 new_fenv_var));
11305 *update = build2 (COMPOUND_EXPR, void_type_node,
11306 build2 (COMPOUND_EXPR, void_type_node,
11307 reload_fenv, restore_fnenv), update_call);
11310 #include "gt-pa.h"