hppa: Fix typo in PA 2.0 trampoline template
[official-gcc.git] / gcc / config / pa / pa.cc
blob218c48b4ae000d252a67522fd6fddf9c2bd30cb1
1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2023 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
57 /* This file should be included last. */
58 #include "target-def.h"
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
81 other_mode = GET_MODE (SET_SRC (set));
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool,
200 code_helper = ERROR_MARK);
201 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
202 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
203 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
204 static bool pa_modes_tieable_p (machine_mode, machine_mode);
205 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
206 static HOST_WIDE_INT pa_starting_frame_offset (void);
207 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
209 /* The following extra sections are only used for SOM. */
210 static GTY(()) section *som_readonly_data_section;
211 static GTY(()) section *som_one_only_readonly_data_section;
212 static GTY(()) section *som_one_only_data_section;
213 static GTY(()) section *som_tm_clone_table_section;
215 /* Counts for the number of callee-saved general and floating point
216 registers which were saved by the current function's prologue. */
217 static int gr_saved, fr_saved;
219 /* Boolean indicating whether the return pointer was saved by the
220 current function's prologue. */
221 static bool rp_saved;
223 static rtx find_addr_reg (rtx);
225 /* Keep track of the number of bytes we have output in the CODE subspace
226 during this compilation so we'll know when to emit inline long-calls. */
227 unsigned long total_code_bytes;
229 /* The last address of the previous function plus the number of bytes in
230 associated thunks that have been output. This is used to determine if
231 a thunk can use an IA-relative branch to reach its target function. */
232 static unsigned int last_address;
234 /* Variables to handle plabels that we discover are necessary at assembly
235 output time. They are output after the current function. */
236 struct GTY(()) deferred_plabel
238 rtx internal_label;
239 rtx symbol;
241 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
242 deferred_plabels;
243 static size_t n_deferred_plabels = 0;
245 /* Initialize the GCC target structure. */
247 #undef TARGET_OPTION_OVERRIDE
248 #define TARGET_OPTION_OVERRIDE pa_option_override
250 #undef TARGET_ASM_ALIGNED_HI_OP
251 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
252 #undef TARGET_ASM_ALIGNED_SI_OP
253 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
254 #undef TARGET_ASM_ALIGNED_DI_OP
255 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
256 #undef TARGET_ASM_UNALIGNED_HI_OP
257 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
258 #undef TARGET_ASM_UNALIGNED_SI_OP
259 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
260 #undef TARGET_ASM_UNALIGNED_DI_OP
261 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
262 #undef TARGET_ASM_INTEGER
263 #define TARGET_ASM_INTEGER pa_assemble_integer
265 #undef TARGET_ASM_FUNCTION_EPILOGUE
266 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268 #undef TARGET_FUNCTION_VALUE
269 #define TARGET_FUNCTION_VALUE pa_function_value
270 #undef TARGET_LIBCALL_VALUE
271 #define TARGET_LIBCALL_VALUE pa_libcall_value
272 #undef TARGET_FUNCTION_VALUE_REGNO_P
273 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275 #undef TARGET_LEGITIMIZE_ADDRESS
276 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278 #undef TARGET_SCHED_ADJUST_COST
279 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
280 #undef TARGET_SCHED_ISSUE_RATE
281 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283 #undef TARGET_ENCODE_SECTION_INFO
284 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
285 #undef TARGET_STRIP_NAME_ENCODING
286 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
288 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
289 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291 #undef TARGET_COMMUTATIVE_P
292 #define TARGET_COMMUTATIVE_P pa_commutative_p
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
296 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
297 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
299 #undef TARGET_ASM_FILE_END
300 #define TARGET_ASM_FILE_END pa_file_end
302 #undef TARGET_ASM_RELOC_RW_MASK
303 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
306 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 #if !defined(USE_COLLECT2)
309 #undef TARGET_ASM_CONSTRUCTOR
310 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
311 #undef TARGET_ASM_DESTRUCTOR
312 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
313 #endif
315 #undef TARGET_INIT_BUILTINS
316 #define TARGET_INIT_BUILTINS pa_init_builtins
318 #undef TARGET_EXPAND_BUILTIN
319 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321 #undef TARGET_REGISTER_MOVE_COST
322 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
323 #undef TARGET_RTX_COSTS
324 #define TARGET_RTX_COSTS hppa_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hppa_address_cost
328 #undef TARGET_MACHINE_DEPENDENT_REORG
329 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331 #undef TARGET_INIT_LIBFUNCS
332 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334 #undef TARGET_PROMOTE_FUNCTION_MODE
335 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
336 #undef TARGET_PROMOTE_PROTOTYPES
337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 #undef TARGET_STRUCT_VALUE_RTX
340 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
341 #undef TARGET_RETURN_IN_MEMORY
342 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
343 #undef TARGET_MUST_PASS_IN_STACK
344 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
347 #undef TARGET_CALLEE_COPIES
348 #define TARGET_CALLEE_COPIES pa_callee_copies
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
351 #undef TARGET_FUNCTION_ARG
352 #define TARGET_FUNCTION_ARG pa_function_arg
353 #undef TARGET_FUNCTION_ARG_ADVANCE
354 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
355 #undef TARGET_FUNCTION_ARG_PADDING
356 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
357 #undef TARGET_FUNCTION_ARG_BOUNDARY
358 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
360 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
361 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
362 #undef TARGET_EXPAND_BUILTIN_VA_START
363 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
364 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
365 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
367 #undef TARGET_SCALAR_MODE_SUPPORTED_P
368 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
370 #undef TARGET_CANNOT_FORCE_CONST_MEM
371 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
373 #undef TARGET_SECONDARY_RELOAD
374 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
375 #undef TARGET_SECONDARY_MEMORY_NEEDED
376 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
378 #undef TARGET_EXTRA_LIVE_ON_ENTRY
379 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
381 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
382 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
383 #undef TARGET_TRAMPOLINE_INIT
384 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
385 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
386 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
387 #undef TARGET_DELEGITIMIZE_ADDRESS
388 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
389 #undef TARGET_INTERNAL_ARG_POINTER
390 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
391 #undef TARGET_CAN_ELIMINATE
392 #define TARGET_CAN_ELIMINATE pa_can_eliminate
393 #undef TARGET_CONDITIONAL_REGISTER_USAGE
394 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
395 #undef TARGET_C_MODE_FOR_SUFFIX
396 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
397 #undef TARGET_ASM_FUNCTION_SECTION
398 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
400 #undef TARGET_LEGITIMATE_CONSTANT_P
401 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
402 #undef TARGET_SECTION_TYPE_FLAGS
403 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
404 #undef TARGET_LEGITIMATE_ADDRESS_P
405 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
407 #undef TARGET_LRA_P
408 #define TARGET_LRA_P hook_bool_void_false
410 #undef TARGET_HARD_REGNO_NREGS
411 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
412 #undef TARGET_HARD_REGNO_MODE_OK
413 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
414 #undef TARGET_MODES_TIEABLE_P
415 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
417 #undef TARGET_CAN_CHANGE_MODE_CLASS
418 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
420 #undef TARGET_CONSTANT_ALIGNMENT
421 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
423 #undef TARGET_STARTING_FRAME_OFFSET
424 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
426 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
427 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
429 struct gcc_target targetm = TARGET_INITIALIZER;
431 /* Parse the -mfixed-range= option string. */
433 static void
434 fix_range (const char *const_str)
436 int i, first, last;
437 char *str, *dash, *comma;
439 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
440 REG2 are either register names or register numbers. The effect
441 of this option is to mark the registers in the range from REG1 to
442 REG2 as ``fixed'' so they won't be used by the compiler. This is
443 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
445 i = strlen (const_str);
446 str = (char *) alloca (i + 1);
447 memcpy (str, const_str, i + 1);
449 while (1)
451 dash = strchr (str, '-');
452 if (!dash)
454 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
455 return;
457 *dash = '\0';
459 comma = strchr (dash + 1, ',');
460 if (comma)
461 *comma = '\0';
463 first = decode_reg_name (str);
464 if (first < 0)
466 warning (0, "unknown register name: %s", str);
467 return;
470 last = decode_reg_name (dash + 1);
471 if (last < 0)
473 warning (0, "unknown register name: %s", dash + 1);
474 return;
477 *dash = '-';
479 if (first > last)
481 warning (0, "%s-%s is an empty range", str, dash + 1);
482 return;
485 for (i = first; i <= last; ++i)
486 fixed_regs[i] = call_used_regs[i] = 1;
488 if (!comma)
489 break;
491 *comma = ',';
492 str = comma + 1;
495 /* Check if all floating point registers have been fixed. */
496 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
497 if (!fixed_regs[i])
498 break;
500 if (i > FP_REG_LAST)
501 target_flags |= MASK_SOFT_FLOAT;
504 /* Implement the TARGET_OPTION_OVERRIDE hook. */
506 static void
507 pa_option_override (void)
509 unsigned int i;
510 cl_deferred_option *opt;
511 vec<cl_deferred_option> *v
512 = (vec<cl_deferred_option> *) pa_deferred_options;
514 if (v)
515 FOR_EACH_VEC_ELT (*v, i, opt)
517 switch (opt->opt_index)
519 case OPT_mfixed_range_:
520 fix_range (opt->arg);
521 break;
523 default:
524 gcc_unreachable ();
528 if (flag_pic && TARGET_PORTABLE_RUNTIME)
530 warning (0, "PIC code generation is not supported in the portable runtime model");
533 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
535 warning (0, "PIC code generation is not compatible with fast indirect calls");
538 if (! TARGET_GAS && write_symbols != NO_DEBUG)
540 warning (0, "%<-g%> is only supported when using GAS on this processor");
541 warning (0, "%<-g%> option disabled");
542 write_symbols = NO_DEBUG;
545 if (TARGET_64BIT && TARGET_HPUX)
547 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
548 specifically selected. */
549 if (!OPTION_SET_P (dwarf_strict))
550 dwarf_strict = 1;
551 if (!OPTION_SET_P (dwarf_version))
552 dwarf_version = 4;
555 /* We only support the "big PIC" model now. And we always generate PIC
556 code when in 64bit mode. */
557 if (flag_pic == 1 || TARGET_64BIT)
558 flag_pic = 2;
560 /* Disable -freorder-blocks-and-partition as we don't support hot and
561 cold partitioning. */
562 if (flag_reorder_blocks_and_partition)
564 inform (input_location,
565 "%<-freorder-blocks-and-partition%> does not work "
566 "on this architecture");
567 flag_reorder_blocks_and_partition = 0;
568 flag_reorder_blocks = 1;
571 /* Disable -fstack-protector to suppress warning. */
572 flag_stack_protect = 0;
574 /* We can't guarantee that .dword is available for 32-bit targets. */
575 if (UNITS_PER_WORD == 4)
576 targetm.asm_out.aligned_op.di = NULL;
578 /* The unaligned ops are only available when using GAS. */
579 if (!TARGET_GAS)
581 targetm.asm_out.unaligned_op.hi = NULL;
582 targetm.asm_out.unaligned_op.si = NULL;
583 targetm.asm_out.unaligned_op.di = NULL;
586 init_machine_status = pa_init_machine_status;
589 enum pa_builtins
591 PA_BUILTIN_COPYSIGNQ,
592 PA_BUILTIN_FABSQ,
593 PA_BUILTIN_INFQ,
594 PA_BUILTIN_HUGE_VALQ,
595 PA_BUILTIN_max
598 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
600 static void
601 pa_init_builtins (void)
603 #ifdef DONT_HAVE_FPUTC_UNLOCKED
605 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
606 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
607 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
609 #endif
610 #if TARGET_HPUX_11
612 tree decl;
614 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
615 set_user_assembler_name (decl, "_Isfinite");
616 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
617 set_user_assembler_name (decl, "_Isfinitef");
619 #endif
621 if (HPUX_LONG_DOUBLE_LIBRARY)
623 tree decl, ftype;
625 /* Under HPUX, the __float128 type is a synonym for "long double". */
626 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
627 "__float128");
629 /* TFmode support builtins. */
630 ftype = build_function_type_list (long_double_type_node,
631 long_double_type_node,
632 NULL_TREE);
633 decl = add_builtin_function ("__builtin_fabsq", ftype,
634 PA_BUILTIN_FABSQ, BUILT_IN_MD,
635 "_U_Qfabs", NULL_TREE);
636 TREE_READONLY (decl) = 1;
637 pa_builtins[PA_BUILTIN_FABSQ] = decl;
639 ftype = build_function_type_list (long_double_type_node,
640 long_double_type_node,
641 long_double_type_node,
642 NULL_TREE);
643 decl = add_builtin_function ("__builtin_copysignq", ftype,
644 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
645 "_U_Qfcopysign", NULL_TREE);
646 TREE_READONLY (decl) = 1;
647 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
649 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
650 decl = add_builtin_function ("__builtin_infq", ftype,
651 PA_BUILTIN_INFQ, BUILT_IN_MD,
652 NULL, NULL_TREE);
653 pa_builtins[PA_BUILTIN_INFQ] = decl;
655 decl = add_builtin_function ("__builtin_huge_valq", ftype,
656 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
657 NULL, NULL_TREE);
658 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
662 static rtx
663 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
664 machine_mode mode ATTRIBUTE_UNUSED,
665 int ignore ATTRIBUTE_UNUSED)
667 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
668 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
670 switch (fcode)
672 case PA_BUILTIN_FABSQ:
673 case PA_BUILTIN_COPYSIGNQ:
674 return expand_call (exp, target, ignore);
676 case PA_BUILTIN_INFQ:
677 case PA_BUILTIN_HUGE_VALQ:
679 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
680 REAL_VALUE_TYPE inf;
681 rtx tmp;
683 real_inf (&inf);
684 tmp = const_double_from_real_value (inf, target_mode);
686 tmp = validize_mem (force_const_mem (target_mode, tmp));
688 if (target == 0)
689 target = gen_reg_rtx (target_mode);
691 emit_move_insn (target, tmp);
692 return target;
695 default:
696 gcc_unreachable ();
699 return NULL_RTX;
702 /* Function to init struct machine_function.
703 This will be called, via a pointer variable,
704 from push_function_context. */
706 static struct machine_function *
707 pa_init_machine_status (void)
709 return ggc_cleared_alloc<machine_function> ();
712 /* If FROM is a probable pointer register, mark TO as a probable
713 pointer register with the same pointer alignment as FROM. */
715 static void
716 copy_reg_pointer (rtx to, rtx from)
718 if (REG_POINTER (from))
719 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
722 /* Return 1 if X contains a symbolic expression. We know these
723 expressions will have one of a few well defined forms, so
724 we need only check those forms. */
726 pa_symbolic_expression_p (rtx x)
729 /* Strip off any HIGH. */
730 if (GET_CODE (x) == HIGH)
731 x = XEXP (x, 0);
733 return symbolic_operand (x, VOIDmode);
736 /* Accept any constant that can be moved in one instruction into a
737 general register. */
739 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
741 /* OK if ldo, ldil, or zdepi, can be used. */
742 return (VAL_14_BITS_P (ival)
743 || pa_ldil_cint_p (ival)
744 || pa_zdepi_cint_p (ival));
747 /* True iff ldil can be used to load this CONST_INT. The least
748 significant 11 bits of the value must be zero and the value must
749 not change sign when extended from 32 to 64 bits. */
751 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
753 unsigned HOST_WIDE_INT x;
755 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
756 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
759 /* True iff zdepi can be used to generate this CONST_INT.
760 zdepi first sign extends a 5-bit signed number to a given field
761 length, then places this field anywhere in a zero. */
763 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
765 unsigned HOST_WIDE_INT lsb_mask, t;
767 /* This might not be obvious, but it's at least fast.
768 This function is critical; we don't have the time loops would take. */
769 lsb_mask = x & -x;
770 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
771 /* Return true iff t is a power of two. */
772 return ((t & (t - 1)) == 0);
775 /* True iff depi or extru can be used to compute (reg & mask).
776 Accept bit pattern like these:
777 0....01....1
778 1....10....0
779 1..10..01..1 */
781 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
783 mask = ~mask;
784 mask += mask & -mask;
785 return (mask & (mask - 1)) == 0;
788 /* True iff depi can be used to compute (reg | MASK). */
790 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
792 mask += mask & -mask;
793 return (mask & (mask - 1)) == 0;
796 /* Legitimize PIC addresses. If the address is already
797 position-independent, we return ORIG. Newly generated
798 position-independent addresses go to REG. If we need more
799 than one register, we lose. */
801 static rtx
802 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
804 rtx pic_ref = orig;
806 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
808 /* Labels need special handling. */
809 if (pic_label_operand (orig, mode))
811 rtx_insn *insn;
813 /* We do not want to go through the movXX expanders here since that
814 would create recursion.
816 Nor do we really want to call a generator for a named pattern
817 since that requires multiple patterns if we want to support
818 multiple word sizes.
820 So instead we just emit the raw set, which avoids the movXX
821 expanders completely. */
822 mark_reg_pointer (reg, BITS_PER_UNIT);
823 insn = emit_insn (gen_rtx_SET (reg, orig));
825 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
826 add_reg_note (insn, REG_EQUAL, orig);
828 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
829 and update LABEL_NUSES because this is not done automatically. */
830 if (reload_in_progress || reload_completed)
832 /* Extract LABEL_REF. */
833 if (GET_CODE (orig) == CONST)
834 orig = XEXP (XEXP (orig, 0), 0);
835 /* Extract CODE_LABEL. */
836 orig = XEXP (orig, 0);
837 add_reg_note (insn, REG_LABEL_OPERAND, orig);
838 /* Make sure we have label and not a note. */
839 if (LABEL_P (orig))
840 LABEL_NUSES (orig)++;
842 crtl->uses_pic_offset_table = 1;
843 return reg;
845 if (GET_CODE (orig) == SYMBOL_REF)
847 rtx_insn *insn;
848 rtx tmp_reg;
850 gcc_assert (reg);
852 /* Before reload, allocate a temporary register for the intermediate
853 result. This allows the sequence to be deleted when the final
854 result is unused and the insns are trivially dead. */
855 tmp_reg = ((reload_in_progress || reload_completed)
856 ? reg : gen_reg_rtx (Pmode));
858 if (function_label_operand (orig, VOIDmode))
860 /* Force function label into memory in word mode. */
861 orig = XEXP (force_const_mem (word_mode, orig), 0);
862 /* Load plabel address from DLT. */
863 emit_move_insn (tmp_reg,
864 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
865 gen_rtx_HIGH (word_mode, orig)));
866 pic_ref
867 = gen_const_mem (Pmode,
868 gen_rtx_LO_SUM (Pmode, tmp_reg,
869 gen_rtx_UNSPEC (Pmode,
870 gen_rtvec (1, orig),
871 UNSPEC_DLTIND14R)));
872 emit_move_insn (reg, pic_ref);
873 /* Now load address of function descriptor. */
874 pic_ref = gen_rtx_MEM (Pmode, reg);
876 else
878 /* Load symbol reference from DLT. */
879 emit_move_insn (tmp_reg,
880 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
881 gen_rtx_HIGH (word_mode, orig)));
882 pic_ref
883 = gen_const_mem (Pmode,
884 gen_rtx_LO_SUM (Pmode, tmp_reg,
885 gen_rtx_UNSPEC (Pmode,
886 gen_rtvec (1, orig),
887 UNSPEC_DLTIND14R)));
890 crtl->uses_pic_offset_table = 1;
891 mark_reg_pointer (reg, BITS_PER_UNIT);
892 insn = emit_move_insn (reg, pic_ref);
894 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
895 set_unique_reg_note (insn, REG_EQUAL, orig);
897 return reg;
899 else if (GET_CODE (orig) == CONST)
901 rtx base;
903 if (GET_CODE (XEXP (orig, 0)) == PLUS
904 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
905 return orig;
907 gcc_assert (reg);
908 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
910 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
911 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
912 base == reg ? 0 : reg);
914 if (GET_CODE (orig) == CONST_INT)
916 if (INT_14_BITS (orig))
917 return plus_constant (Pmode, base, INTVAL (orig));
918 orig = force_reg (Pmode, orig);
920 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
921 /* Likewise, should we set special REG_NOTEs here? */
924 return pic_ref;
927 static GTY(()) rtx gen_tls_tga;
929 static rtx
930 gen_tls_get_addr (void)
932 if (!gen_tls_tga)
933 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
934 return gen_tls_tga;
937 static rtx
938 hppa_tls_call (rtx arg)
940 rtx ret;
942 ret = gen_reg_rtx (Pmode);
943 emit_library_call_value (gen_tls_get_addr (), ret,
944 LCT_CONST, Pmode, arg, Pmode);
946 return ret;
949 static rtx
950 legitimize_tls_address (rtx addr)
952 rtx ret, tmp, t1, t2, tp;
953 rtx_insn *insn;
955 /* Currently, we can't handle anything but a SYMBOL_REF. */
956 if (GET_CODE (addr) != SYMBOL_REF)
957 return addr;
959 switch (SYMBOL_REF_TLS_MODEL (addr))
961 case TLS_MODEL_GLOBAL_DYNAMIC:
962 tmp = gen_reg_rtx (Pmode);
963 if (flag_pic)
964 emit_insn (gen_tgd_load_pic (tmp, addr));
965 else
966 emit_insn (gen_tgd_load (tmp, addr));
967 ret = hppa_tls_call (tmp);
968 break;
970 case TLS_MODEL_LOCAL_DYNAMIC:
971 ret = gen_reg_rtx (Pmode);
972 tmp = gen_reg_rtx (Pmode);
973 start_sequence ();
974 if (flag_pic)
975 emit_insn (gen_tld_load_pic (tmp, addr));
976 else
977 emit_insn (gen_tld_load (tmp, addr));
978 t1 = hppa_tls_call (tmp);
979 insn = get_insns ();
980 end_sequence ();
981 t2 = gen_reg_rtx (Pmode);
982 emit_libcall_block (insn, t2, t1,
983 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
984 UNSPEC_TLSLDBASE));
985 emit_insn (gen_tld_offset_load (ret, addr, t2));
986 break;
988 case TLS_MODEL_INITIAL_EXEC:
989 tp = gen_reg_rtx (Pmode);
990 tmp = gen_reg_rtx (Pmode);
991 ret = gen_reg_rtx (Pmode);
992 emit_insn (gen_tp_load (tp));
993 if (flag_pic)
994 emit_insn (gen_tie_load_pic (tmp, addr));
995 else
996 emit_insn (gen_tie_load (tmp, addr));
997 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
998 break;
1000 case TLS_MODEL_LOCAL_EXEC:
1001 tp = gen_reg_rtx (Pmode);
1002 ret = gen_reg_rtx (Pmode);
1003 emit_insn (gen_tp_load (tp));
1004 emit_insn (gen_tle_load (ret, addr, tp));
1005 break;
1007 default:
1008 gcc_unreachable ();
1011 return ret;
1014 /* Helper for hppa_legitimize_address. Given X, return true if it
1015 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1017 This respectively represent canonical shift-add rtxs or scaled
1018 memory addresses. */
1019 static bool
1020 mem_shadd_or_shadd_rtx_p (rtx x)
1022 return ((GET_CODE (x) == ASHIFT
1023 || GET_CODE (x) == MULT)
1024 && GET_CODE (XEXP (x, 1)) == CONST_INT
1025 && ((GET_CODE (x) == ASHIFT
1026 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1027 || (GET_CODE (x) == MULT
1028 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1031 /* Try machine-dependent ways of modifying an illegitimate address
1032 to be legitimate. If we find one, return the new, valid address.
1033 This macro is used in only one place: `memory_address' in explow.cc.
1035 OLDX is the address as it was before break_out_memory_refs was called.
1036 In some cases it is useful to look at this to decide what needs to be done.
1038 It is always safe for this macro to do nothing. It exists to recognize
1039 opportunities to optimize the output.
1041 For the PA, transform:
1043 memory(X + <large int>)
1045 into:
1047 if (<large int> & mask) >= 16
1048 Y = (<large int> & ~mask) + mask + 1 Round up.
1049 else
1050 Y = (<large int> & ~mask) Round down.
1051 Z = X + Y
1052 memory (Z + (<large int> - Y));
1054 This is for CSE to find several similar references, and only use one Z.
1056 X can either be a SYMBOL_REF or REG, but because combine cannot
1057 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1058 D will not fit in 14 bits.
1060 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1061 0x1f as the mask.
1063 MODE_INT references allow displacements which fit in 14 bits, so use
1064 0x3fff as the mask.
1066 This relies on the fact that most mode MODE_FLOAT references will use FP
1067 registers and most mode MODE_INT references will use integer registers.
1068 (In the rare case of an FP register used in an integer MODE, we depend
1069 on secondary reloads to clean things up.)
1072 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1073 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1074 addressing modes to be used).
1076 Note that the addresses passed into hppa_legitimize_address always
1077 come from a MEM, so we only have to match the MULT form on incoming
1078 addresses. But to be future proof we also match the ASHIFT form.
1080 However, this routine always places those shift-add sequences into
1081 registers, so we have to generate the ASHIFT form as our output.
1083 Put X and Z into registers. Then put the entire expression into
1084 a register. */
1087 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1088 machine_mode mode)
1090 rtx orig = x;
1092 /* We need to canonicalize the order of operands in unscaled indexed
1093 addresses since the code that checks if an address is valid doesn't
1094 always try both orders. */
1095 if (!TARGET_NO_SPACE_REGS
1096 && GET_CODE (x) == PLUS
1097 && GET_MODE (x) == Pmode
1098 && REG_P (XEXP (x, 0))
1099 && REG_P (XEXP (x, 1))
1100 && REG_POINTER (XEXP (x, 0))
1101 && !REG_POINTER (XEXP (x, 1)))
1102 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1104 if (tls_referenced_p (x))
1105 return legitimize_tls_address (x);
1106 else if (flag_pic)
1107 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1109 /* Strip off CONST. */
1110 if (GET_CODE (x) == CONST)
1111 x = XEXP (x, 0);
1113 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1114 That should always be safe. */
1115 if (GET_CODE (x) == PLUS
1116 && GET_CODE (XEXP (x, 0)) == REG
1117 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1119 rtx reg = force_reg (Pmode, XEXP (x, 1));
1120 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1123 /* Note we must reject symbols which represent function addresses
1124 since the assembler/linker can't handle arithmetic on plabels. */
1125 if (GET_CODE (x) == PLUS
1126 && GET_CODE (XEXP (x, 1)) == CONST_INT
1127 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1128 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1129 || GET_CODE (XEXP (x, 0)) == REG))
1131 rtx int_part, ptr_reg;
1132 int newoffset;
1133 int offset = INTVAL (XEXP (x, 1));
1134 int mask;
1136 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1137 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1139 /* Choose which way to round the offset. Round up if we
1140 are >= halfway to the next boundary. */
1141 if ((offset & mask) >= ((mask + 1) / 2))
1142 newoffset = (offset & ~ mask) + mask + 1;
1143 else
1144 newoffset = (offset & ~ mask);
1146 /* If the newoffset will not fit in 14 bits (ldo), then
1147 handling this would take 4 or 5 instructions (2 to load
1148 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1149 add the new offset and the SYMBOL_REF.) Combine cannot
1150 handle 4->2 or 5->2 combinations, so do not create
1151 them. */
1152 if (! VAL_14_BITS_P (newoffset)
1153 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1155 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1156 rtx tmp_reg
1157 = force_reg (Pmode,
1158 gen_rtx_HIGH (Pmode, const_part));
1159 ptr_reg
1160 = force_reg (Pmode,
1161 gen_rtx_LO_SUM (Pmode,
1162 tmp_reg, const_part));
1164 else
1166 if (! VAL_14_BITS_P (newoffset))
1167 int_part = force_reg (Pmode, GEN_INT (newoffset));
1168 else
1169 int_part = GEN_INT (newoffset);
1171 ptr_reg = force_reg (Pmode,
1172 gen_rtx_PLUS (Pmode,
1173 force_reg (Pmode, XEXP (x, 0)),
1174 int_part));
1176 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1179 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1181 if (GET_CODE (x) == PLUS
1182 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1183 && (OBJECT_P (XEXP (x, 1))
1184 || GET_CODE (XEXP (x, 1)) == SUBREG)
1185 && GET_CODE (XEXP (x, 1)) != CONST)
1187 /* If we were given a MULT, we must fix the constant
1188 as we're going to create the ASHIFT form. */
1189 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1190 if (GET_CODE (XEXP (x, 0)) == MULT)
1191 shift_val = exact_log2 (shift_val);
1193 rtx reg1, reg2;
1194 reg1 = XEXP (x, 1);
1195 if (GET_CODE (reg1) != REG)
1196 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1198 reg2 = XEXP (XEXP (x, 0), 0);
1199 if (GET_CODE (reg2) != REG)
1200 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1202 return force_reg (Pmode,
1203 gen_rtx_PLUS (Pmode,
1204 gen_rtx_ASHIFT (Pmode, reg2,
1205 GEN_INT (shift_val)),
1206 reg1));
1209 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1211 Only do so for floating point modes since this is more speculative
1212 and we lose if it's an integer store. */
1213 if (GET_CODE (x) == PLUS
1214 && GET_CODE (XEXP (x, 0)) == PLUS
1215 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1216 && (mode == SFmode || mode == DFmode))
1218 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1220 /* If we were given a MULT, we must fix the constant
1221 as we're going to create the ASHIFT form. */
1222 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1223 shift_val = exact_log2 (shift_val);
1225 /* Try and figure out what to use as a base register. */
1226 rtx reg1, reg2, base, idx;
1228 reg1 = XEXP (XEXP (x, 0), 1);
1229 reg2 = XEXP (x, 1);
1230 base = NULL_RTX;
1231 idx = NULL_RTX;
1233 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1234 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1235 it's a base register below. */
1236 if (GET_CODE (reg1) != REG)
1237 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1239 if (GET_CODE (reg2) != REG)
1240 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1242 /* Figure out what the base and index are. */
1244 if (GET_CODE (reg1) == REG
1245 && REG_POINTER (reg1))
1247 base = reg1;
1248 idx = gen_rtx_PLUS (Pmode,
1249 gen_rtx_ASHIFT (Pmode,
1250 XEXP (XEXP (XEXP (x, 0), 0), 0),
1251 GEN_INT (shift_val)),
1252 XEXP (x, 1));
1254 else if (GET_CODE (reg2) == REG
1255 && REG_POINTER (reg2))
1257 base = reg2;
1258 idx = XEXP (x, 0);
1261 if (base == 0)
1262 return orig;
1264 /* If the index adds a large constant, try to scale the
1265 constant so that it can be loaded with only one insn. */
1266 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1267 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1268 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1269 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1271 /* Divide the CONST_INT by the scale factor, then add it to A. */
1272 int val = INTVAL (XEXP (idx, 1));
1273 val /= (1 << shift_val);
1275 reg1 = XEXP (XEXP (idx, 0), 0);
1276 if (GET_CODE (reg1) != REG)
1277 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1279 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1281 /* We can now generate a simple scaled indexed address. */
1282 return
1283 force_reg
1284 (Pmode, gen_rtx_PLUS (Pmode,
1285 gen_rtx_ASHIFT (Pmode, reg1,
1286 GEN_INT (shift_val)),
1287 base));
1290 /* If B + C is still a valid base register, then add them. */
1291 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1292 && INTVAL (XEXP (idx, 1)) <= 4096
1293 && INTVAL (XEXP (idx, 1)) >= -4096)
1295 rtx reg1, reg2;
1297 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1299 reg2 = XEXP (XEXP (idx, 0), 0);
1300 if (GET_CODE (reg2) != CONST_INT)
1301 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1303 return force_reg (Pmode,
1304 gen_rtx_PLUS (Pmode,
1305 gen_rtx_ASHIFT (Pmode, reg2,
1306 GEN_INT (shift_val)),
1307 reg1));
1310 /* Get the index into a register, then add the base + index and
1311 return a register holding the result. */
1313 /* First get A into a register. */
1314 reg1 = XEXP (XEXP (idx, 0), 0);
1315 if (GET_CODE (reg1) != REG)
1316 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1318 /* And get B into a register. */
1319 reg2 = XEXP (idx, 1);
1320 if (GET_CODE (reg2) != REG)
1321 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1323 reg1 = force_reg (Pmode,
1324 gen_rtx_PLUS (Pmode,
1325 gen_rtx_ASHIFT (Pmode, reg1,
1326 GEN_INT (shift_val)),
1327 reg2));
1329 /* Add the result to our base register and return. */
1330 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1334 /* Uh-oh. We might have an address for x[n-100000]. This needs
1335 special handling to avoid creating an indexed memory address
1336 with x-100000 as the base.
1338 If the constant part is small enough, then it's still safe because
1339 there is a guard page at the beginning and end of the data segment.
1341 Scaled references are common enough that we want to try and rearrange the
1342 terms so that we can use indexing for these addresses too. Only
1343 do the optimization for floatint point modes. */
1345 if (GET_CODE (x) == PLUS
1346 && pa_symbolic_expression_p (XEXP (x, 1)))
1348 /* Ugly. We modify things here so that the address offset specified
1349 by the index expression is computed first, then added to x to form
1350 the entire address. */
1352 rtx regx1, regx2, regy1, regy2, y;
1354 /* Strip off any CONST. */
1355 y = XEXP (x, 1);
1356 if (GET_CODE (y) == CONST)
1357 y = XEXP (y, 0);
1359 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1361 /* See if this looks like
1362 (plus (mult (reg) (mem_shadd_const))
1363 (const (plus (symbol_ref) (const_int))))
1365 Where const_int is small. In that case the const
1366 expression is a valid pointer for indexing.
1368 If const_int is big, but can be divided evenly by shadd_const
1369 and added to (reg). This allows more scaled indexed addresses. */
1370 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1371 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1372 && GET_CODE (XEXP (y, 1)) == CONST_INT
1373 && INTVAL (XEXP (y, 1)) >= -4096
1374 && INTVAL (XEXP (y, 1)) <= 4095)
1376 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1378 /* If we were given a MULT, we must fix the constant
1379 as we're going to create the ASHIFT form. */
1380 if (GET_CODE (XEXP (x, 0)) == MULT)
1381 shift_val = exact_log2 (shift_val);
1383 rtx reg1, reg2;
1385 reg1 = XEXP (x, 1);
1386 if (GET_CODE (reg1) != REG)
1387 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1389 reg2 = XEXP (XEXP (x, 0), 0);
1390 if (GET_CODE (reg2) != REG)
1391 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1393 return
1394 force_reg (Pmode,
1395 gen_rtx_PLUS (Pmode,
1396 gen_rtx_ASHIFT (Pmode,
1397 reg2,
1398 GEN_INT (shift_val)),
1399 reg1));
1401 else if ((mode == DFmode || mode == SFmode)
1402 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1403 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1404 && GET_CODE (XEXP (y, 1)) == CONST_INT
1405 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1407 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1409 /* If we were given a MULT, we must fix the constant
1410 as we're going to create the ASHIFT form. */
1411 if (GET_CODE (XEXP (x, 0)) == MULT)
1412 shift_val = exact_log2 (shift_val);
1414 regx1
1415 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1416 / INTVAL (XEXP (XEXP (x, 0), 1))));
1417 regx2 = XEXP (XEXP (x, 0), 0);
1418 if (GET_CODE (regx2) != REG)
1419 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1420 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1421 regx2, regx1));
1422 return
1423 force_reg (Pmode,
1424 gen_rtx_PLUS (Pmode,
1425 gen_rtx_ASHIFT (Pmode, regx2,
1426 GEN_INT (shift_val)),
1427 force_reg (Pmode, XEXP (y, 0))));
1429 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1430 && INTVAL (XEXP (y, 1)) >= -4096
1431 && INTVAL (XEXP (y, 1)) <= 4095)
1433 /* This is safe because of the guard page at the
1434 beginning and end of the data space. Just
1435 return the original address. */
1436 return orig;
1438 else
1440 /* Doesn't look like one we can optimize. */
1441 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1442 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1443 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1444 regx1 = force_reg (Pmode,
1445 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1446 regx1, regy2));
1447 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1452 return orig;
1455 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1457 Compute extra cost of moving data between one register class
1458 and another.
1460 Make moves from SAR so expensive they should never happen. We used to
1461 have 0xffff here, but that generates overflow in rare cases.
1463 Copies involving a FP register and a non-FP register are relatively
1464 expensive because they must go through memory.
1466 Other copies are reasonably cheap. */
1468 static int
1469 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1470 reg_class_t from, reg_class_t to)
1472 if (from == SHIFT_REGS)
1473 return 0x100;
1474 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1475 return 18;
1476 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1477 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1478 return 16;
1479 else
1480 return 2;
1483 /* For the HPPA, REG and REG+CONST is cost 0
1484 and addresses involving symbolic constants are cost 2.
1486 PIC addresses are very expensive.
1488 It is no coincidence that this has the same structure
1489 as pa_legitimate_address_p. */
1491 static int
1492 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1493 addr_space_t as ATTRIBUTE_UNUSED,
1494 bool speed ATTRIBUTE_UNUSED)
1496 switch (GET_CODE (X))
1498 case REG:
1499 case PLUS:
1500 case LO_SUM:
1501 return 1;
1502 case HIGH:
1503 return 2;
1504 default:
1505 return 4;
1509 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1510 The machine mode of X is known to be SImode or DImode. */
1512 static bool
1513 hppa_rtx_costs_shadd_p (rtx x)
1515 if (GET_CODE (x) != PLUS
1516 || !REG_P (XEXP (x, 1)))
1517 return false;
1518 rtx op0 = XEXP (x, 0);
1519 if (GET_CODE (op0) == ASHIFT
1520 && CONST_INT_P (XEXP (op0, 1))
1521 && REG_P (XEXP (op0, 0)))
1523 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1524 return x == 1 || x == 2 || x == 3;
1526 if (GET_CODE (op0) == MULT
1527 && CONST_INT_P (XEXP (op0, 1))
1528 && REG_P (XEXP (op0, 0)))
1530 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1531 return x == 2 || x == 4 || x == 8;
1533 return false;
1536 /* Compute a (partial) cost for rtx X. Return true if the complete
1537 cost has been computed, and false if subexpressions should be
1538 scanned. In either case, *TOTAL contains the cost result. */
1540 static bool
1541 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1542 int opno ATTRIBUTE_UNUSED,
1543 int *total, bool speed)
1545 int code = GET_CODE (x);
1547 switch (code)
1549 case CONST_INT:
1550 if (outer_code == SET)
1551 *total = COSTS_N_INSNS (1);
1552 else if (INTVAL (x) == 0)
1553 *total = 0;
1554 else if (INT_14_BITS (x))
1555 *total = 1;
1556 else
1557 *total = 2;
1558 return true;
1560 case HIGH:
1561 *total = 2;
1562 return true;
1564 case CONST:
1565 case LABEL_REF:
1566 case SYMBOL_REF:
1567 *total = 4;
1568 return true;
1570 case CONST_DOUBLE:
1571 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1572 && outer_code != SET)
1573 *total = 0;
1574 else
1575 *total = 8;
1576 return true;
1578 case MULT:
1579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1581 *total = COSTS_N_INSNS (3);
1583 else if (mode == DImode)
1585 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1586 *total = COSTS_N_INSNS (25);
1587 else
1588 *total = COSTS_N_INSNS (80);
1590 else
1592 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1593 *total = COSTS_N_INSNS (8);
1594 else
1595 *total = COSTS_N_INSNS (20);
1597 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1599 case DIV:
1600 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1602 *total = COSTS_N_INSNS (14);
1603 return false;
1605 /* FALLTHRU */
1607 case UDIV:
1608 case MOD:
1609 case UMOD:
1610 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1611 if (mode == DImode)
1612 *total = COSTS_N_INSNS (240);
1613 else
1614 *total = COSTS_N_INSNS (60);
1615 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1617 case PLUS: /* this includes shNadd insns */
1618 case MINUS:
1619 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1620 *total = COSTS_N_INSNS (3);
1621 else if (mode == DImode)
1623 if (TARGET_64BIT)
1625 *total = COSTS_N_INSNS (1);
1626 /* Handle shladd,l instructions. */
1627 if (hppa_rtx_costs_shadd_p (x))
1628 return true;
1630 else
1631 *total = COSTS_N_INSNS (2);
1633 else
1635 *total = COSTS_N_INSNS (1);
1636 /* Handle shNadd instructions. */
1637 if (hppa_rtx_costs_shadd_p (x))
1638 return true;
1640 return REG_P (XEXP (x, 0))
1641 && (REG_P (XEXP (x, 1))
1642 || CONST_INT_P (XEXP (x, 1)));
1644 case ASHIFT:
1645 if (mode == DImode)
1647 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1649 if (TARGET_64BIT)
1650 *total = COSTS_N_INSNS (1);
1651 else
1652 *total = COSTS_N_INSNS (2);
1653 return true;
1655 else if (TARGET_64BIT)
1656 *total = COSTS_N_INSNS (3);
1657 else if (speed)
1658 *total = COSTS_N_INSNS (13);
1659 else
1660 *total = COSTS_N_INSNS (18);
1662 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1664 if (TARGET_64BIT)
1665 *total = COSTS_N_INSNS (2);
1666 else
1667 *total = COSTS_N_INSNS (1);
1668 return true;
1670 else if (TARGET_64BIT)
1671 *total = COSTS_N_INSNS (4);
1672 else
1673 *total = COSTS_N_INSNS (2);
1674 return REG_P (XEXP (x, 0))
1675 && (REG_P (XEXP (x, 1))
1676 || CONST_INT_P (XEXP (x, 1)));
1678 case ASHIFTRT:
1679 if (mode == DImode)
1681 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1683 if (TARGET_64BIT)
1684 *total = COSTS_N_INSNS (1);
1685 else
1686 *total = COSTS_N_INSNS (2);
1687 return true;
1689 else if (TARGET_64BIT)
1690 *total = COSTS_N_INSNS (3);
1691 else if (speed)
1692 *total = COSTS_N_INSNS (14);
1693 else
1694 *total = COSTS_N_INSNS (19);
1696 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1698 if (TARGET_64BIT)
1699 *total = COSTS_N_INSNS (2);
1700 else
1701 *total = COSTS_N_INSNS (1);
1702 return true;
1704 else if (TARGET_64BIT)
1705 *total = COSTS_N_INSNS (4);
1706 else
1707 *total = COSTS_N_INSNS (2);
1708 return REG_P (XEXP (x, 0))
1709 && (REG_P (XEXP (x, 1))
1710 || CONST_INT_P (XEXP (x, 1)));
1712 case LSHIFTRT:
1713 if (mode == DImode)
1715 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1717 if (TARGET_64BIT)
1718 *total = COSTS_N_INSNS (1);
1719 else
1720 *total = COSTS_N_INSNS (2);
1721 return true;
1723 else if (TARGET_64BIT)
1724 *total = COSTS_N_INSNS (2);
1725 else if (speed)
1726 *total = COSTS_N_INSNS (12);
1727 else
1728 *total = COSTS_N_INSNS (15);
1730 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1732 *total = COSTS_N_INSNS (1);
1733 return true;
1735 else if (TARGET_64BIT)
1736 *total = COSTS_N_INSNS (3);
1737 else
1738 *total = COSTS_N_INSNS (2);
1739 return REG_P (XEXP (x, 0))
1740 && (REG_P (XEXP (x, 1))
1741 || CONST_INT_P (XEXP (x, 1)));
1743 default:
1744 return false;
1748 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1749 new rtx with the correct mode. */
1750 static inline rtx
1751 force_mode (machine_mode mode, rtx orig)
1753 if (mode == GET_MODE (orig))
1754 return orig;
1756 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1758 return gen_rtx_REG (mode, REGNO (orig));
1761 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1763 static bool
1764 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1766 return tls_referenced_p (x);
1769 /* Emit insns to move operands[1] into operands[0].
1771 Return 1 if we have written out everything that needs to be done to
1772 do the move. Otherwise, return 0 and the caller will emit the move
1773 normally.
1775 Note SCRATCH_REG may not be in the proper mode depending on how it
1776 will be used. This routine is responsible for creating a new copy
1777 of SCRATCH_REG in the proper mode. */
1780 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1782 rtx operand0 = operands[0];
1783 rtx operand1 = operands[1];
1784 rtx tem;
1786 /* We can only handle indexed addresses in the destination operand
1787 of floating point stores. Thus, we need to break out indexed
1788 addresses from the destination operand. */
1789 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1791 gcc_assert (can_create_pseudo_p ());
1793 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1794 operand0 = replace_equiv_address (operand0, tem);
1797 /* On targets with non-equivalent space registers, break out unscaled
1798 indexed addresses from the source operand before the final CSE.
1799 We have to do this because the REG_POINTER flag is not correctly
1800 carried through various optimization passes and CSE may substitute
1801 a pseudo without the pointer set for one with the pointer set. As
1802 a result, we loose various opportunities to create insns with
1803 unscaled indexed addresses. */
1804 if (!TARGET_NO_SPACE_REGS
1805 && !cse_not_expected
1806 && GET_CODE (operand1) == MEM
1807 && GET_CODE (XEXP (operand1, 0)) == PLUS
1808 && REG_P (XEXP (XEXP (operand1, 0), 0))
1809 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1810 operand1
1811 = replace_equiv_address (operand1,
1812 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1814 if (scratch_reg
1815 && reload_in_progress && GET_CODE (operand0) == REG
1816 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1817 operand0 = reg_equiv_mem (REGNO (operand0));
1818 else if (scratch_reg
1819 && reload_in_progress && GET_CODE (operand0) == SUBREG
1820 && GET_CODE (SUBREG_REG (operand0)) == REG
1821 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1823 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1824 the code which tracks sets/uses for delete_output_reload. */
1825 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1826 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1827 SUBREG_BYTE (operand0));
1828 operand0 = alter_subreg (&temp, true);
1831 if (scratch_reg
1832 && reload_in_progress && GET_CODE (operand1) == REG
1833 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1834 operand1 = reg_equiv_mem (REGNO (operand1));
1835 else if (scratch_reg
1836 && reload_in_progress && GET_CODE (operand1) == SUBREG
1837 && GET_CODE (SUBREG_REG (operand1)) == REG
1838 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1840 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1841 the code which tracks sets/uses for delete_output_reload. */
1842 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1843 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1844 SUBREG_BYTE (operand1));
1845 operand1 = alter_subreg (&temp, true);
1848 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1849 && ((tem = find_replacement (&XEXP (operand0, 0)))
1850 != XEXP (operand0, 0)))
1851 operand0 = replace_equiv_address (operand0, tem);
1853 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1854 && ((tem = find_replacement (&XEXP (operand1, 0)))
1855 != XEXP (operand1, 0)))
1856 operand1 = replace_equiv_address (operand1, tem);
1858 /* Handle secondary reloads for loads/stores of FP registers from
1859 REG+D addresses where D does not fit in 5 or 14 bits, including
1860 (subreg (mem (addr))) cases, and reloads for other unsupported
1861 memory operands. */
1862 if (scratch_reg
1863 && FP_REG_P (operand0)
1864 && (MEM_P (operand1)
1865 || (GET_CODE (operand1) == SUBREG
1866 && MEM_P (XEXP (operand1, 0)))))
1868 rtx op1 = operand1;
1870 if (GET_CODE (op1) == SUBREG)
1871 op1 = XEXP (op1, 0);
1873 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1875 if (!(TARGET_PA_20
1876 && !TARGET_ELF32
1877 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1878 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1880 /* SCRATCH_REG will hold an address and maybe the actual data.
1881 We want it in WORD_MODE regardless of what mode it was
1882 originally given to us. */
1883 scratch_reg = force_mode (word_mode, scratch_reg);
1885 /* D might not fit in 14 bits either; for such cases load D
1886 into scratch reg. */
1887 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1889 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1890 emit_move_insn (scratch_reg,
1891 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1892 Pmode,
1893 XEXP (XEXP (op1, 0), 0),
1894 scratch_reg));
1896 else
1897 emit_move_insn (scratch_reg, XEXP (op1, 0));
1898 op1 = replace_equiv_address (op1, scratch_reg);
1901 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1902 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1903 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1905 /* Load memory address into SCRATCH_REG. */
1906 scratch_reg = force_mode (word_mode, scratch_reg);
1907 emit_move_insn (scratch_reg, XEXP (op1, 0));
1908 op1 = replace_equiv_address (op1, scratch_reg);
1910 emit_insn (gen_rtx_SET (operand0, op1));
1911 return 1;
1913 else if (scratch_reg
1914 && FP_REG_P (operand1)
1915 && (MEM_P (operand0)
1916 || (GET_CODE (operand0) == SUBREG
1917 && MEM_P (XEXP (operand0, 0)))))
1919 rtx op0 = operand0;
1921 if (GET_CODE (op0) == SUBREG)
1922 op0 = XEXP (op0, 0);
1924 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1926 if (!(TARGET_PA_20
1927 && !TARGET_ELF32
1928 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1929 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1931 /* SCRATCH_REG will hold an address and maybe the actual data.
1932 We want it in WORD_MODE regardless of what mode it was
1933 originally given to us. */
1934 scratch_reg = force_mode (word_mode, scratch_reg);
1936 /* D might not fit in 14 bits either; for such cases load D
1937 into scratch reg. */
1938 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1940 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1941 emit_move_insn (scratch_reg,
1942 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1943 Pmode,
1944 XEXP (XEXP (op0, 0), 0),
1945 scratch_reg));
1947 else
1948 emit_move_insn (scratch_reg, XEXP (op0, 0));
1949 op0 = replace_equiv_address (op0, scratch_reg);
1952 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1953 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1954 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1956 /* Load memory address into SCRATCH_REG. */
1957 scratch_reg = force_mode (word_mode, scratch_reg);
1958 emit_move_insn (scratch_reg, XEXP (op0, 0));
1959 op0 = replace_equiv_address (op0, scratch_reg);
1961 emit_insn (gen_rtx_SET (op0, operand1));
1962 return 1;
1964 /* Handle secondary reloads for loads of FP registers from constant
1965 expressions by forcing the constant into memory. For the most part,
1966 this is only necessary for SImode and DImode.
1968 Use scratch_reg to hold the address of the memory location. */
1969 else if (scratch_reg
1970 && CONSTANT_P (operand1)
1971 && FP_REG_P (operand0))
1973 rtx const_mem, xoperands[2];
1975 if (operand1 == CONST0_RTX (mode))
1977 emit_insn (gen_rtx_SET (operand0, operand1));
1978 return 1;
1981 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1982 it in WORD_MODE regardless of what mode it was originally given
1983 to us. */
1984 scratch_reg = force_mode (word_mode, scratch_reg);
1986 /* Force the constant into memory and put the address of the
1987 memory location into scratch_reg. */
1988 const_mem = force_const_mem (mode, operand1);
1989 xoperands[0] = scratch_reg;
1990 xoperands[1] = XEXP (const_mem, 0);
1991 pa_emit_move_sequence (xoperands, Pmode, 0);
1993 /* Now load the destination register. */
1994 emit_insn (gen_rtx_SET (operand0,
1995 replace_equiv_address (const_mem, scratch_reg)));
1996 return 1;
1998 /* Handle secondary reloads for SAR. These occur when trying to load
1999 the SAR from memory or a constant. */
2000 else if (scratch_reg
2001 && GET_CODE (operand0) == REG
2002 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2003 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2004 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2006 /* D might not fit in 14 bits either; for such cases load D into
2007 scratch reg. */
2008 if (GET_CODE (operand1) == MEM
2009 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2011 /* We are reloading the address into the scratch register, so we
2012 want to make sure the scratch register is a full register. */
2013 scratch_reg = force_mode (word_mode, scratch_reg);
2015 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2016 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2017 0)),
2018 Pmode,
2019 XEXP (XEXP (operand1, 0),
2021 scratch_reg));
2023 /* Now we are going to load the scratch register from memory,
2024 we want to load it in the same width as the original MEM,
2025 which must be the same as the width of the ultimate destination,
2026 OPERAND0. */
2027 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2029 emit_move_insn (scratch_reg,
2030 replace_equiv_address (operand1, scratch_reg));
2032 else
2034 /* We want to load the scratch register using the same mode as
2035 the ultimate destination. */
2036 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2038 emit_move_insn (scratch_reg, operand1);
2041 /* And emit the insn to set the ultimate destination. We know that
2042 the scratch register has the same mode as the destination at this
2043 point. */
2044 emit_move_insn (operand0, scratch_reg);
2045 return 1;
2048 /* Handle the most common case: storing into a register. */
2049 if (register_operand (operand0, mode))
2051 /* Legitimize TLS symbol references. This happens for references
2052 that aren't a legitimate constant. */
2053 if (PA_SYMBOL_REF_TLS_P (operand1))
2054 operand1 = legitimize_tls_address (operand1);
2056 if (register_operand (operand1, mode)
2057 || (GET_CODE (operand1) == CONST_INT
2058 && pa_cint_ok_for_move (UINTVAL (operand1)))
2059 || (operand1 == CONST0_RTX (mode))
2060 || (GET_CODE (operand1) == HIGH
2061 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2062 /* Only `general_operands' can come here, so MEM is ok. */
2063 || GET_CODE (operand1) == MEM)
2065 /* Various sets are created during RTL generation which don't
2066 have the REG_POINTER flag correctly set. After the CSE pass,
2067 instruction recognition can fail if we don't consistently
2068 set this flag when performing register copies. This should
2069 also improve the opportunities for creating insns that use
2070 unscaled indexing. */
2071 if (REG_P (operand0) && REG_P (operand1))
2073 if (REG_POINTER (operand1)
2074 && !REG_POINTER (operand0)
2075 && !HARD_REGISTER_P (operand0))
2076 copy_reg_pointer (operand0, operand1);
2079 /* When MEMs are broken out, the REG_POINTER flag doesn't
2080 get set. In some cases, we can set the REG_POINTER flag
2081 from the declaration for the MEM. */
2082 if (REG_P (operand0)
2083 && GET_CODE (operand1) == MEM
2084 && !REG_POINTER (operand0))
2086 tree decl = MEM_EXPR (operand1);
2088 /* Set the register pointer flag and register alignment
2089 if the declaration for this memory reference is a
2090 pointer type. */
2091 if (decl)
2093 tree type;
2095 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2096 tree operand 1. */
2097 if (TREE_CODE (decl) == COMPONENT_REF)
2098 decl = TREE_OPERAND (decl, 1);
2100 type = TREE_TYPE (decl);
2101 type = strip_array_types (type);
2103 if (POINTER_TYPE_P (type))
2104 mark_reg_pointer (operand0, BITS_PER_UNIT);
2108 emit_insn (gen_rtx_SET (operand0, operand1));
2109 return 1;
2112 else if (GET_CODE (operand0) == MEM)
2114 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2115 && !(reload_in_progress || reload_completed))
2117 rtx temp = gen_reg_rtx (DFmode);
2119 emit_insn (gen_rtx_SET (temp, operand1));
2120 emit_insn (gen_rtx_SET (operand0, temp));
2121 return 1;
2123 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2125 /* Run this case quickly. */
2126 emit_insn (gen_rtx_SET (operand0, operand1));
2127 return 1;
2129 if (! (reload_in_progress || reload_completed))
2131 operands[0] = validize_mem (operand0);
2132 operands[1] = operand1 = force_reg (mode, operand1);
2136 /* Simplify the source if we need to.
2137 Note we do have to handle function labels here, even though we do
2138 not consider them legitimate constants. Loop optimizations can
2139 call the emit_move_xxx with one as a source. */
2140 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2141 || (GET_CODE (operand1) == HIGH
2142 && symbolic_operand (XEXP (operand1, 0), mode))
2143 || function_label_operand (operand1, VOIDmode)
2144 || tls_referenced_p (operand1))
2146 int ishighonly = 0;
2148 if (GET_CODE (operand1) == HIGH)
2150 ishighonly = 1;
2151 operand1 = XEXP (operand1, 0);
2153 if (symbolic_operand (operand1, mode))
2155 /* Argh. The assembler and linker can't handle arithmetic
2156 involving plabels.
2158 So we force the plabel into memory, load operand0 from
2159 the memory location, then add in the constant part. */
2160 if ((GET_CODE (operand1) == CONST
2161 && GET_CODE (XEXP (operand1, 0)) == PLUS
2162 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2163 VOIDmode))
2164 || function_label_operand (operand1, VOIDmode))
2166 rtx temp, const_part;
2168 /* Figure out what (if any) scratch register to use. */
2169 if (reload_in_progress || reload_completed)
2171 scratch_reg = scratch_reg ? scratch_reg : operand0;
2172 /* SCRATCH_REG will hold an address and maybe the actual
2173 data. We want it in WORD_MODE regardless of what mode it
2174 was originally given to us. */
2175 scratch_reg = force_mode (word_mode, scratch_reg);
2177 else if (flag_pic)
2178 scratch_reg = gen_reg_rtx (Pmode);
2180 if (GET_CODE (operand1) == CONST)
2182 /* Save away the constant part of the expression. */
2183 const_part = XEXP (XEXP (operand1, 0), 1);
2184 gcc_assert (GET_CODE (const_part) == CONST_INT);
2186 /* Force the function label into memory. */
2187 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2189 else
2191 /* No constant part. */
2192 const_part = NULL_RTX;
2194 /* Force the function label into memory. */
2195 temp = force_const_mem (mode, operand1);
2199 /* Get the address of the memory location. PIC-ify it if
2200 necessary. */
2201 temp = XEXP (temp, 0);
2202 if (flag_pic)
2203 temp = legitimize_pic_address (temp, mode, scratch_reg);
2205 /* Put the address of the memory location into our destination
2206 register. */
2207 operands[1] = temp;
2208 pa_emit_move_sequence (operands, mode, scratch_reg);
2210 /* Now load from the memory location into our destination
2211 register. */
2212 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2213 pa_emit_move_sequence (operands, mode, scratch_reg);
2215 /* And add back in the constant part. */
2216 if (const_part != NULL_RTX)
2217 expand_inc (operand0, const_part);
2219 return 1;
2222 if (flag_pic)
2224 rtx_insn *insn;
2225 rtx temp;
2227 if (reload_in_progress || reload_completed)
2229 temp = scratch_reg ? scratch_reg : operand0;
2230 /* TEMP will hold an address and maybe the actual
2231 data. We want it in WORD_MODE regardless of what mode it
2232 was originally given to us. */
2233 temp = force_mode (word_mode, temp);
2235 else
2236 temp = gen_reg_rtx (Pmode);
2238 /* Force (const (plus (symbol) (const_int))) to memory
2239 if the const_int will not fit in 14 bits. Although
2240 this requires a relocation, the instruction sequence
2241 needed to load the value is shorter. */
2242 if (GET_CODE (operand1) == CONST
2243 && GET_CODE (XEXP (operand1, 0)) == PLUS
2244 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2245 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2247 rtx x, m = force_const_mem (mode, operand1);
2249 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2250 x = replace_equiv_address (m, x);
2251 insn = emit_move_insn (operand0, x);
2253 else
2255 operands[1] = legitimize_pic_address (operand1, mode, temp);
2256 if (REG_P (operand0) && REG_P (operands[1]))
2257 copy_reg_pointer (operand0, operands[1]);
2258 insn = emit_move_insn (operand0, operands[1]);
2261 /* Put a REG_EQUAL note on this insn. */
2262 set_unique_reg_note (insn, REG_EQUAL, operand1);
2264 /* On the HPPA, references to data space are supposed to use dp,
2265 register 27, but showing it in the RTL inhibits various cse
2266 and loop optimizations. */
2267 else
2269 rtx temp, set;
2271 if (reload_in_progress || reload_completed)
2273 temp = scratch_reg ? scratch_reg : operand0;
2274 /* TEMP will hold an address and maybe the actual
2275 data. We want it in WORD_MODE regardless of what mode it
2276 was originally given to us. */
2277 temp = force_mode (word_mode, temp);
2279 else
2280 temp = gen_reg_rtx (mode);
2282 /* Loading a SYMBOL_REF into a register makes that register
2283 safe to be used as the base in an indexed address.
2285 Don't mark hard registers though. That loses. */
2286 if (GET_CODE (operand0) == REG
2287 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2288 mark_reg_pointer (operand0, BITS_PER_UNIT);
2289 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2290 mark_reg_pointer (temp, BITS_PER_UNIT);
2292 if (ishighonly)
2293 set = gen_rtx_SET (operand0, temp);
2294 else
2295 set = gen_rtx_SET (operand0,
2296 gen_rtx_LO_SUM (mode, temp, operand1));
2298 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2299 emit_insn (set);
2302 return 1;
2304 else if (tls_referenced_p (operand1))
2306 rtx tmp = operand1;
2307 rtx addend = NULL;
2309 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2311 addend = XEXP (XEXP (tmp, 0), 1);
2312 tmp = XEXP (XEXP (tmp, 0), 0);
2315 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2316 tmp = legitimize_tls_address (tmp);
2317 if (addend)
2319 tmp = gen_rtx_PLUS (mode, tmp, addend);
2320 tmp = force_operand (tmp, operands[0]);
2322 operands[1] = tmp;
2324 else if (GET_CODE (operand1) != CONST_INT
2325 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2327 rtx temp;
2328 rtx_insn *insn;
2329 rtx op1 = operand1;
2330 HOST_WIDE_INT value = 0;
2331 HOST_WIDE_INT insv = 0;
2332 int insert = 0;
2334 if (GET_CODE (operand1) == CONST_INT)
2335 value = INTVAL (operand1);
2337 if (TARGET_64BIT
2338 && GET_CODE (operand1) == CONST_INT
2339 && HOST_BITS_PER_WIDE_INT > 32
2340 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2342 HOST_WIDE_INT nval;
2344 /* Extract the low order 32 bits of the value and sign extend.
2345 If the new value is the same as the original value, we can
2346 can use the original value as-is. If the new value is
2347 different, we use it and insert the most-significant 32-bits
2348 of the original value into the final result. */
2349 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2350 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2351 if (value != nval)
2353 #if HOST_BITS_PER_WIDE_INT > 32
2354 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2355 #endif
2356 insert = 1;
2357 value = nval;
2358 operand1 = GEN_INT (nval);
2362 if (reload_in_progress || reload_completed)
2363 temp = scratch_reg ? scratch_reg : operand0;
2364 else
2365 temp = gen_reg_rtx (mode);
2367 /* We don't directly split DImode constants on 32-bit targets
2368 because PLUS uses an 11-bit immediate and the insn sequence
2369 generated is not as efficient as the one using HIGH/LO_SUM. */
2370 if (GET_CODE (operand1) == CONST_INT
2371 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2372 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2373 && !insert)
2375 /* Directly break constant into high and low parts. This
2376 provides better optimization opportunities because various
2377 passes recognize constants split with PLUS but not LO_SUM.
2378 We use a 14-bit signed low part except when the addition
2379 of 0x4000 to the high part might change the sign of the
2380 high part. */
2381 HOST_WIDE_INT low = value & 0x3fff;
2382 HOST_WIDE_INT high = value & ~ 0x3fff;
2384 if (low >= 0x2000)
2386 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2387 high += 0x2000;
2388 else
2389 high += 0x4000;
2392 low = value - high;
2394 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2395 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2397 else
2399 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2400 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2403 insn = emit_move_insn (operands[0], operands[1]);
2405 /* Now insert the most significant 32 bits of the value
2406 into the register. When we don't have a second register
2407 available, it could take up to nine instructions to load
2408 a 64-bit integer constant. Prior to reload, we force
2409 constants that would take more than three instructions
2410 to load to the constant pool. During and after reload,
2411 we have to handle all possible values. */
2412 if (insert)
2414 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2415 register and the value to be inserted is outside the
2416 range that can be loaded with three depdi instructions. */
2417 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2419 operand1 = GEN_INT (insv);
2421 emit_insn (gen_rtx_SET (temp,
2422 gen_rtx_HIGH (mode, operand1)));
2423 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2424 if (mode == DImode)
2425 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2426 const0_rtx, temp));
2427 else
2428 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2429 const0_rtx, temp));
2431 else
2433 int len = 5, pos = 27;
2435 /* Insert the bits using the depdi instruction. */
2436 while (pos >= 0)
2438 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2439 HOST_WIDE_INT sign = v5 < 0;
2441 /* Left extend the insertion. */
2442 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2443 while (pos > 0 && (insv & 1) == sign)
2445 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2446 len += 1;
2447 pos -= 1;
2450 if (mode == DImode)
2451 insn = emit_insn (gen_insvdi (operand0,
2452 GEN_INT (len),
2453 GEN_INT (pos),
2454 GEN_INT (v5)));
2455 else
2456 insn = emit_insn (gen_insvsi (operand0,
2457 GEN_INT (len),
2458 GEN_INT (pos),
2459 GEN_INT (v5)));
2461 len = pos > 0 && pos < 5 ? pos : 5;
2462 pos -= len;
2467 set_unique_reg_note (insn, REG_EQUAL, op1);
2469 return 1;
2472 /* Now have insn-emit do whatever it normally does. */
2473 return 0;
2476 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2477 it will need a link/runtime reloc). */
2480 pa_reloc_needed (tree exp)
2482 int reloc = 0;
2484 switch (TREE_CODE (exp))
2486 case ADDR_EXPR:
2487 return 1;
2489 case POINTER_PLUS_EXPR:
2490 case PLUS_EXPR:
2491 case MINUS_EXPR:
2492 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2493 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2494 break;
2496 CASE_CONVERT:
2497 case NON_LVALUE_EXPR:
2498 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2499 break;
2501 case CONSTRUCTOR:
2503 tree value;
2504 unsigned HOST_WIDE_INT ix;
2506 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2507 if (value)
2508 reloc |= pa_reloc_needed (value);
2510 break;
2512 case ERROR_MARK:
2513 break;
2515 default:
2516 break;
2518 return reloc;
2522 /* Return the best assembler insn template
2523 for moving operands[1] into operands[0] as a fullword. */
2524 const char *
2525 pa_singlemove_string (rtx *operands)
2527 HOST_WIDE_INT intval;
2529 if (GET_CODE (operands[0]) == MEM)
2530 return "stw %r1,%0";
2531 if (GET_CODE (operands[1]) == MEM)
2532 return "ldw %1,%0";
2533 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2535 long i;
2537 gcc_assert (GET_MODE (operands[1]) == SFmode);
2539 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2540 bit pattern. */
2541 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2543 operands[1] = GEN_INT (i);
2544 /* Fall through to CONST_INT case. */
2546 if (GET_CODE (operands[1]) == CONST_INT)
2548 intval = INTVAL (operands[1]);
2550 if (VAL_14_BITS_P (intval))
2551 return "ldi %1,%0";
2552 else if ((intval & 0x7ff) == 0)
2553 return "ldil L'%1,%0";
2554 else if (pa_zdepi_cint_p (intval))
2555 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2556 else
2557 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2559 return "copy %1,%0";
2563 /* Compute position (in OP[1]) and width (in OP[2])
2564 useful for copying IMM to a register using the zdepi
2565 instructions. Store the immediate value to insert in OP[0]. */
2566 static void
2567 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2569 int lsb, len;
2571 /* Find the least significant set bit in IMM. */
2572 for (lsb = 0; lsb < 32; lsb++)
2574 if ((imm & 1) != 0)
2575 break;
2576 imm >>= 1;
2579 /* Choose variants based on *sign* of the 5-bit field. */
2580 if ((imm & 0x10) == 0)
2581 len = (lsb <= 28) ? 4 : 32 - lsb;
2582 else
2584 /* Find the width of the bitstring in IMM. */
2585 for (len = 5; len < 32 - lsb; len++)
2587 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2588 break;
2591 /* Sign extend IMM as a 5-bit value. */
2592 imm = (imm & 0xf) - 0x10;
2595 op[0] = imm;
2596 op[1] = 31 - lsb;
2597 op[2] = len;
2600 /* Compute position (in OP[1]) and width (in OP[2])
2601 useful for copying IMM to a register using the depdi,z
2602 instructions. Store the immediate value to insert in OP[0]. */
2604 static void
2605 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2607 int lsb, len, maxlen;
2609 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2611 /* Find the least significant set bit in IMM. */
2612 for (lsb = 0; lsb < maxlen; lsb++)
2614 if ((imm & 1) != 0)
2615 break;
2616 imm >>= 1;
2619 /* Choose variants based on *sign* of the 5-bit field. */
2620 if ((imm & 0x10) == 0)
2621 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2622 else
2624 /* Find the width of the bitstring in IMM. */
2625 for (len = 5; len < maxlen - lsb; len++)
2627 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2628 break;
2631 /* Extend length if host is narrow and IMM is negative. */
2632 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2633 len += 32;
2635 /* Sign extend IMM as a 5-bit value. */
2636 imm = (imm & 0xf) - 0x10;
2639 op[0] = imm;
2640 op[1] = 63 - lsb;
2641 op[2] = len;
2644 /* Output assembler code to perform a doubleword move insn
2645 with operands OPERANDS. */
2647 const char *
2648 pa_output_move_double (rtx *operands)
2650 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2651 rtx latehalf[2];
2652 rtx addreg0 = 0, addreg1 = 0;
2653 int highonly = 0;
2655 /* First classify both operands. */
2657 if (REG_P (operands[0]))
2658 optype0 = REGOP;
2659 else if (offsettable_memref_p (operands[0]))
2660 optype0 = OFFSOP;
2661 else if (GET_CODE (operands[0]) == MEM)
2662 optype0 = MEMOP;
2663 else
2664 optype0 = RNDOP;
2666 if (REG_P (operands[1]))
2667 optype1 = REGOP;
2668 else if (CONSTANT_P (operands[1]))
2669 optype1 = CNSTOP;
2670 else if (offsettable_memref_p (operands[1]))
2671 optype1 = OFFSOP;
2672 else if (GET_CODE (operands[1]) == MEM)
2673 optype1 = MEMOP;
2674 else
2675 optype1 = RNDOP;
2677 /* Check for the cases that the operand constraints are not
2678 supposed to allow to happen. */
2679 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2681 /* Handle copies between general and floating registers. */
2683 if (optype0 == REGOP && optype1 == REGOP
2684 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2686 if (FP_REG_P (operands[0]))
2688 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2689 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2690 return "{fldds|fldd} -16(%%sp),%0";
2692 else
2694 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2695 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2696 return "{ldws|ldw} -12(%%sp),%R0";
2700 /* Handle auto decrementing and incrementing loads and stores
2701 specifically, since the structure of the function doesn't work
2702 for them without major modification. Do it better when we learn
2703 this port about the general inc/dec addressing of PA.
2704 (This was written by tege. Chide him if it doesn't work.) */
2706 if (optype0 == MEMOP)
2708 /* We have to output the address syntax ourselves, since print_operand
2709 doesn't deal with the addresses we want to use. Fix this later. */
2711 rtx addr = XEXP (operands[0], 0);
2712 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2714 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2716 operands[0] = XEXP (addr, 0);
2717 gcc_assert (GET_CODE (operands[1]) == REG
2718 && GET_CODE (operands[0]) == REG);
2720 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2722 /* No overlap between high target register and address
2723 register. (We do this in a non-obvious way to
2724 save a register file writeback) */
2725 if (GET_CODE (addr) == POST_INC)
2726 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2727 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2729 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2731 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2733 operands[0] = XEXP (addr, 0);
2734 gcc_assert (GET_CODE (operands[1]) == REG
2735 && GET_CODE (operands[0]) == REG);
2737 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2738 /* No overlap between high target register and address
2739 register. (We do this in a non-obvious way to save a
2740 register file writeback) */
2741 if (GET_CODE (addr) == PRE_INC)
2742 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2743 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2746 if (optype1 == MEMOP)
2748 /* We have to output the address syntax ourselves, since print_operand
2749 doesn't deal with the addresses we want to use. Fix this later. */
2751 rtx addr = XEXP (operands[1], 0);
2752 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2754 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2756 operands[1] = XEXP (addr, 0);
2757 gcc_assert (GET_CODE (operands[0]) == REG
2758 && GET_CODE (operands[1]) == REG);
2760 if (!reg_overlap_mentioned_p (high_reg, addr))
2762 /* No overlap between high target register and address
2763 register. (We do this in a non-obvious way to
2764 save a register file writeback) */
2765 if (GET_CODE (addr) == POST_INC)
2766 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2767 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2769 else
2771 /* This is an undefined situation. We should load into the
2772 address register *and* update that register. Probably
2773 we don't need to handle this at all. */
2774 if (GET_CODE (addr) == POST_INC)
2775 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2776 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2779 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2781 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2783 operands[1] = XEXP (addr, 0);
2784 gcc_assert (GET_CODE (operands[0]) == REG
2785 && GET_CODE (operands[1]) == REG);
2787 if (!reg_overlap_mentioned_p (high_reg, addr))
2789 /* No overlap between high target register and address
2790 register. (We do this in a non-obvious way to
2791 save a register file writeback) */
2792 if (GET_CODE (addr) == PRE_INC)
2793 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2794 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2796 else
2798 /* This is an undefined situation. We should load into the
2799 address register *and* update that register. Probably
2800 we don't need to handle this at all. */
2801 if (GET_CODE (addr) == PRE_INC)
2802 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2803 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2806 else if (GET_CODE (addr) == PLUS
2807 && GET_CODE (XEXP (addr, 0)) == MULT)
2809 rtx xoperands[4];
2811 /* Load address into left half of destination register. */
2812 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2813 xoperands[1] = XEXP (addr, 1);
2814 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2815 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2816 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2817 xoperands);
2818 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2820 else if (GET_CODE (addr) == PLUS
2821 && REG_P (XEXP (addr, 0))
2822 && REG_P (XEXP (addr, 1)))
2824 rtx xoperands[3];
2826 /* Load address into left half of destination register. */
2827 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2828 xoperands[1] = XEXP (addr, 0);
2829 xoperands[2] = XEXP (addr, 1);
2830 output_asm_insn ("{addl|add,l} %1,%2,%0",
2831 xoperands);
2832 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2836 /* If an operand is an unoffsettable memory ref, find a register
2837 we can increment temporarily to make it refer to the second word. */
2839 if (optype0 == MEMOP)
2840 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2842 if (optype1 == MEMOP)
2843 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2845 /* Ok, we can do one word at a time.
2846 Normally we do the low-numbered word first.
2848 In either case, set up in LATEHALF the operands to use
2849 for the high-numbered word and in some cases alter the
2850 operands in OPERANDS to be suitable for the low-numbered word. */
2852 if (optype0 == REGOP)
2853 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2854 else if (optype0 == OFFSOP)
2855 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2856 else
2857 latehalf[0] = operands[0];
2859 if (optype1 == REGOP)
2860 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2861 else if (optype1 == OFFSOP)
2862 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2863 else if (optype1 == CNSTOP)
2865 if (GET_CODE (operands[1]) == HIGH)
2867 operands[1] = XEXP (operands[1], 0);
2868 highonly = 1;
2870 split_double (operands[1], &operands[1], &latehalf[1]);
2872 else
2873 latehalf[1] = operands[1];
2875 /* If the first move would clobber the source of the second one,
2876 do them in the other order.
2878 This can happen in two cases:
2880 mem -> register where the first half of the destination register
2881 is the same register used in the memory's address. Reload
2882 can create such insns.
2884 mem in this case will be either register indirect or register
2885 indirect plus a valid offset.
2887 register -> register move where REGNO(dst) == REGNO(src + 1)
2888 someone (Tim/Tege?) claimed this can happen for parameter loads.
2890 Handle mem -> register case first. */
2891 if (optype0 == REGOP
2892 && (optype1 == MEMOP || optype1 == OFFSOP)
2893 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2895 /* Do the late half first. */
2896 if (addreg1)
2897 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2898 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2900 /* Then clobber. */
2901 if (addreg1)
2902 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2903 return pa_singlemove_string (operands);
2906 /* Now handle register -> register case. */
2907 if (optype0 == REGOP && optype1 == REGOP
2908 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2910 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2911 return pa_singlemove_string (operands);
2914 /* Normal case: do the two words, low-numbered first. */
2916 output_asm_insn (pa_singlemove_string (operands), operands);
2918 /* Make any unoffsettable addresses point at high-numbered word. */
2919 if (addreg0)
2920 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2921 if (addreg1)
2922 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2924 /* Do high-numbered word. */
2925 if (highonly)
2926 output_asm_insn ("ldil L'%1,%0", latehalf);
2927 else
2928 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2930 /* Undo the adds we just did. */
2931 if (addreg0)
2932 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2933 if (addreg1)
2934 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2936 return "";
2939 const char *
2940 pa_output_fp_move_double (rtx *operands)
2942 if (FP_REG_P (operands[0]))
2944 if (FP_REG_P (operands[1])
2945 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2946 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2947 else
2948 output_asm_insn ("fldd%F1 %1,%0", operands);
2950 else if (FP_REG_P (operands[1]))
2952 output_asm_insn ("fstd%F0 %1,%0", operands);
2954 else
2956 rtx xoperands[2];
2958 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2960 /* This is a pain. You have to be prepared to deal with an
2961 arbitrary address here including pre/post increment/decrement.
2963 so avoid this in the MD. */
2964 gcc_assert (GET_CODE (operands[0]) == REG);
2966 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2967 xoperands[0] = operands[0];
2968 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2970 return "";
2973 /* Return a REG that occurs in ADDR with coefficient 1.
2974 ADDR can be effectively incremented by incrementing REG. */
2976 static rtx
2977 find_addr_reg (rtx addr)
2979 while (GET_CODE (addr) == PLUS)
2981 if (GET_CODE (XEXP (addr, 0)) == REG)
2982 addr = XEXP (addr, 0);
2983 else if (GET_CODE (XEXP (addr, 1)) == REG)
2984 addr = XEXP (addr, 1);
2985 else if (CONSTANT_P (XEXP (addr, 0)))
2986 addr = XEXP (addr, 1);
2987 else if (CONSTANT_P (XEXP (addr, 1)))
2988 addr = XEXP (addr, 0);
2989 else
2990 gcc_unreachable ();
2992 gcc_assert (GET_CODE (addr) == REG);
2993 return addr;
2996 /* Emit code to perform a block move.
2998 OPERANDS[0] is the destination pointer as a REG, clobbered.
2999 OPERANDS[1] is the source pointer as a REG, clobbered.
3000 OPERANDS[2] is a register for temporary storage.
3001 OPERANDS[3] is a register for temporary storage.
3002 OPERANDS[4] is the size as a CONST_INT
3003 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3004 OPERANDS[6] is another temporary register. */
3006 const char *
3007 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3009 int align = INTVAL (operands[5]);
3010 unsigned long n_bytes = INTVAL (operands[4]);
3012 /* We can't move more than a word at a time because the PA
3013 has no longer integer move insns. (Could use fp mem ops?) */
3014 if (align > (TARGET_64BIT ? 8 : 4))
3015 align = (TARGET_64BIT ? 8 : 4);
3017 /* Note that we know each loop below will execute at least twice
3018 (else we would have open-coded the copy). */
3019 switch (align)
3021 case 8:
3022 /* Pre-adjust the loop counter. */
3023 operands[4] = GEN_INT (n_bytes - 16);
3024 output_asm_insn ("ldi %4,%2", operands);
3026 /* Copying loop. */
3027 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3028 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3029 output_asm_insn ("std,ma %3,8(%0)", operands);
3030 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3031 output_asm_insn ("std,ma %6,8(%0)", operands);
3033 /* Handle the residual. There could be up to 7 bytes of
3034 residual to copy! */
3035 if (n_bytes % 16 != 0)
3037 operands[4] = GEN_INT (n_bytes % 8);
3038 if (n_bytes % 16 >= 8)
3039 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3040 if (n_bytes % 8 != 0)
3041 output_asm_insn ("ldd 0(%1),%6", operands);
3042 if (n_bytes % 16 >= 8)
3043 output_asm_insn ("std,ma %3,8(%0)", operands);
3044 if (n_bytes % 8 != 0)
3045 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3047 return "";
3049 case 4:
3050 /* Pre-adjust the loop counter. */
3051 operands[4] = GEN_INT (n_bytes - 8);
3052 output_asm_insn ("ldi %4,%2", operands);
3054 /* Copying loop. */
3055 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3056 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3057 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3058 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3059 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3061 /* Handle the residual. There could be up to 7 bytes of
3062 residual to copy! */
3063 if (n_bytes % 8 != 0)
3065 operands[4] = GEN_INT (n_bytes % 4);
3066 if (n_bytes % 8 >= 4)
3067 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3068 if (n_bytes % 4 != 0)
3069 output_asm_insn ("ldw 0(%1),%6", operands);
3070 if (n_bytes % 8 >= 4)
3071 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3072 if (n_bytes % 4 != 0)
3073 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3075 return "";
3077 case 2:
3078 /* Pre-adjust the loop counter. */
3079 operands[4] = GEN_INT (n_bytes - 4);
3080 output_asm_insn ("ldi %4,%2", operands);
3082 /* Copying loop. */
3083 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3084 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3085 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3086 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3087 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3089 /* Handle the residual. */
3090 if (n_bytes % 4 != 0)
3092 if (n_bytes % 4 >= 2)
3093 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3094 if (n_bytes % 2 != 0)
3095 output_asm_insn ("ldb 0(%1),%6", operands);
3096 if (n_bytes % 4 >= 2)
3097 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3098 if (n_bytes % 2 != 0)
3099 output_asm_insn ("stb %6,0(%0)", operands);
3101 return "";
3103 case 1:
3104 /* Pre-adjust the loop counter. */
3105 operands[4] = GEN_INT (n_bytes - 2);
3106 output_asm_insn ("ldi %4,%2", operands);
3108 /* Copying loop. */
3109 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3110 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3111 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3112 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3113 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3115 /* Handle the residual. */
3116 if (n_bytes % 2 != 0)
3118 output_asm_insn ("ldb 0(%1),%3", operands);
3119 output_asm_insn ("stb %3,0(%0)", operands);
3121 return "";
3123 default:
3124 gcc_unreachable ();
3128 /* Count the number of insns necessary to handle this block move.
3130 Basic structure is the same as emit_block_move, except that we
3131 count insns rather than emit them. */
3133 static int
3134 compute_cpymem_length (rtx_insn *insn)
3136 rtx pat = PATTERN (insn);
3137 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3138 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3139 unsigned int n_insns = 0;
3141 /* We can't move more than four bytes at a time because the PA
3142 has no longer integer move insns. (Could use fp mem ops?) */
3143 if (align > (TARGET_64BIT ? 8 : 4))
3144 align = (TARGET_64BIT ? 8 : 4);
3146 /* The basic copying loop. */
3147 n_insns = 6;
3149 /* Residuals. */
3150 if (n_bytes % (2 * align) != 0)
3152 if ((n_bytes % (2 * align)) >= align)
3153 n_insns += 2;
3155 if ((n_bytes % align) != 0)
3156 n_insns += 2;
3159 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3160 return n_insns * 4;
3163 /* Emit code to perform a block clear.
3165 OPERANDS[0] is the destination pointer as a REG, clobbered.
3166 OPERANDS[1] is a register for temporary storage.
3167 OPERANDS[2] is the size as a CONST_INT
3168 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3170 const char *
3171 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3173 int align = INTVAL (operands[3]);
3174 unsigned long n_bytes = INTVAL (operands[2]);
3176 /* We can't clear more than a word at a time because the PA
3177 has no longer integer move insns. */
3178 if (align > (TARGET_64BIT ? 8 : 4))
3179 align = (TARGET_64BIT ? 8 : 4);
3181 /* Note that we know each loop below will execute at least twice
3182 (else we would have open-coded the copy). */
3183 switch (align)
3185 case 8:
3186 /* Pre-adjust the loop counter. */
3187 operands[2] = GEN_INT (n_bytes - 16);
3188 output_asm_insn ("ldi %2,%1", operands);
3190 /* Loop. */
3191 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3192 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3193 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3195 /* Handle the residual. There could be up to 7 bytes of
3196 residual to copy! */
3197 if (n_bytes % 16 != 0)
3199 operands[2] = GEN_INT (n_bytes % 8);
3200 if (n_bytes % 16 >= 8)
3201 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3202 if (n_bytes % 8 != 0)
3203 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3205 return "";
3207 case 4:
3208 /* Pre-adjust the loop counter. */
3209 operands[2] = GEN_INT (n_bytes - 8);
3210 output_asm_insn ("ldi %2,%1", operands);
3212 /* Loop. */
3213 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3214 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3215 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3217 /* Handle the residual. There could be up to 7 bytes of
3218 residual to copy! */
3219 if (n_bytes % 8 != 0)
3221 operands[2] = GEN_INT (n_bytes % 4);
3222 if (n_bytes % 8 >= 4)
3223 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3224 if (n_bytes % 4 != 0)
3225 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3227 return "";
3229 case 2:
3230 /* Pre-adjust the loop counter. */
3231 operands[2] = GEN_INT (n_bytes - 4);
3232 output_asm_insn ("ldi %2,%1", operands);
3234 /* Loop. */
3235 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3236 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3237 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3239 /* Handle the residual. */
3240 if (n_bytes % 4 != 0)
3242 if (n_bytes % 4 >= 2)
3243 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3244 if (n_bytes % 2 != 0)
3245 output_asm_insn ("stb %%r0,0(%0)", operands);
3247 return "";
3249 case 1:
3250 /* Pre-adjust the loop counter. */
3251 operands[2] = GEN_INT (n_bytes - 2);
3252 output_asm_insn ("ldi %2,%1", operands);
3254 /* Loop. */
3255 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3256 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3257 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3259 /* Handle the residual. */
3260 if (n_bytes % 2 != 0)
3261 output_asm_insn ("stb %%r0,0(%0)", operands);
3263 return "";
3265 default:
3266 gcc_unreachable ();
3270 /* Count the number of insns necessary to handle this block move.
3272 Basic structure is the same as emit_block_move, except that we
3273 count insns rather than emit them. */
3275 static int
3276 compute_clrmem_length (rtx_insn *insn)
3278 rtx pat = PATTERN (insn);
3279 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3280 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3281 unsigned int n_insns = 0;
3283 /* We can't clear more than a word at a time because the PA
3284 has no longer integer move insns. */
3285 if (align > (TARGET_64BIT ? 8 : 4))
3286 align = (TARGET_64BIT ? 8 : 4);
3288 /* The basic loop. */
3289 n_insns = 4;
3291 /* Residuals. */
3292 if (n_bytes % (2 * align) != 0)
3294 if ((n_bytes % (2 * align)) >= align)
3295 n_insns++;
3297 if ((n_bytes % align) != 0)
3298 n_insns++;
3301 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3302 return n_insns * 4;
3306 const char *
3307 pa_output_and (rtx *operands)
3309 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3311 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3312 int ls0, ls1, ms0, p, len;
3314 for (ls0 = 0; ls0 < 32; ls0++)
3315 if ((mask & (1 << ls0)) == 0)
3316 break;
3318 for (ls1 = ls0; ls1 < 32; ls1++)
3319 if ((mask & (1 << ls1)) != 0)
3320 break;
3322 for (ms0 = ls1; ms0 < 32; ms0++)
3323 if ((mask & (1 << ms0)) == 0)
3324 break;
3326 gcc_assert (ms0 == 32);
3328 if (ls1 == 32)
3330 len = ls0;
3332 gcc_assert (len);
3334 operands[2] = GEN_INT (len);
3335 return "{extru|extrw,u} %1,31,%2,%0";
3337 else
3339 /* We could use this `depi' for the case above as well, but `depi'
3340 requires one more register file access than an `extru'. */
3342 p = 31 - ls0;
3343 len = ls1 - ls0;
3345 operands[2] = GEN_INT (p);
3346 operands[3] = GEN_INT (len);
3347 return "{depi|depwi} 0,%2,%3,%0";
3350 else
3351 return "and %1,%2,%0";
3354 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3355 storing the result in operands[0]. */
3356 const char *
3357 pa_output_64bit_and (rtx *operands)
3359 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3361 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3362 int ls0, ls1, ms0, p, len;
3364 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3365 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3366 break;
3368 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3369 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3370 break;
3372 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3373 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3374 break;
3376 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3378 if (ls1 == HOST_BITS_PER_WIDE_INT)
3380 len = ls0;
3382 gcc_assert (len);
3384 operands[2] = GEN_INT (len);
3385 return "extrd,u %1,63,%2,%0";
3387 else
3389 /* We could use this `depi' for the case above as well, but `depi'
3390 requires one more register file access than an `extru'. */
3392 p = 63 - ls0;
3393 len = ls1 - ls0;
3395 operands[2] = GEN_INT (p);
3396 operands[3] = GEN_INT (len);
3397 return "depdi 0,%2,%3,%0";
3400 else
3401 return "and %1,%2,%0";
3404 const char *
3405 pa_output_ior (rtx *operands)
3407 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3408 int bs0, bs1, p, len;
3410 if (INTVAL (operands[2]) == 0)
3411 return "copy %1,%0";
3413 for (bs0 = 0; bs0 < 32; bs0++)
3414 if ((mask & (1 << bs0)) != 0)
3415 break;
3417 for (bs1 = bs0; bs1 < 32; bs1++)
3418 if ((mask & (1 << bs1)) == 0)
3419 break;
3421 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3423 p = 31 - bs0;
3424 len = bs1 - bs0;
3426 operands[2] = GEN_INT (p);
3427 operands[3] = GEN_INT (len);
3428 return "{depi|depwi} -1,%2,%3,%0";
3431 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3432 storing the result in operands[0]. */
3433 const char *
3434 pa_output_64bit_ior (rtx *operands)
3436 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3437 int bs0, bs1, p, len;
3439 if (INTVAL (operands[2]) == 0)
3440 return "copy %1,%0";
3442 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3443 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3444 break;
3446 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3447 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3448 break;
3450 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3451 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3453 p = 63 - bs0;
3454 len = bs1 - bs0;
3456 operands[2] = GEN_INT (p);
3457 operands[3] = GEN_INT (len);
3458 return "depdi -1,%2,%3,%0";
3461 /* Target hook for assembling integer objects. This code handles
3462 aligned SI and DI integers specially since function references
3463 must be preceded by P%. */
3465 static bool
3466 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3468 bool result;
3469 tree decl = NULL;
3471 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3472 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3473 calling output_addr_const. Otherwise, it may call assemble_external
3474 in the midst of outputing the assembler code for the SYMBOL_REF.
3475 We restore the SYMBOL_REF_DECL after the output is done. */
3476 if (GET_CODE (x) == SYMBOL_REF)
3478 decl = SYMBOL_REF_DECL (x);
3479 if (decl)
3481 assemble_external (decl);
3482 SET_SYMBOL_REF_DECL (x, NULL);
3486 if (size == UNITS_PER_WORD
3487 && aligned_p
3488 && function_label_operand (x, VOIDmode))
3490 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3492 /* We don't want an OPD when generating fast indirect calls. */
3493 if (!TARGET_FAST_INDIRECT_CALLS)
3494 fputs ("P%", asm_out_file);
3496 output_addr_const (asm_out_file, x);
3497 fputc ('\n', asm_out_file);
3498 result = true;
3500 else
3501 result = default_assemble_integer (x, size, aligned_p);
3503 if (decl)
3504 SET_SYMBOL_REF_DECL (x, decl);
3506 return result;
3509 /* Output an ascii string. */
3510 void
3511 pa_output_ascii (FILE *file, const char *p, int size)
3513 int i;
3514 int chars_output;
3515 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3517 /* The HP assembler can only take strings of 256 characters at one
3518 time. This is a limitation on input line length, *not* the
3519 length of the string. Sigh. Even worse, it seems that the
3520 restriction is in number of input characters (see \xnn &
3521 \whatever). So we have to do this very carefully. */
3523 fputs ("\t.STRING \"", file);
3525 chars_output = 0;
3526 for (i = 0; i < size; i += 4)
3528 int co = 0;
3529 int io = 0;
3530 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3532 unsigned int c = (unsigned char) p[i + io];
3534 if (c == '\"' || c == '\\')
3535 partial_output[co++] = '\\';
3536 if (c >= ' ' && c < 0177)
3537 partial_output[co++] = c;
3538 else
3540 unsigned int hexd;
3541 partial_output[co++] = '\\';
3542 partial_output[co++] = 'x';
3543 hexd = c / 16 - 0 + '0';
3544 if (hexd > '9')
3545 hexd -= '9' - 'a' + 1;
3546 partial_output[co++] = hexd;
3547 hexd = c % 16 - 0 + '0';
3548 if (hexd > '9')
3549 hexd -= '9' - 'a' + 1;
3550 partial_output[co++] = hexd;
3553 if (chars_output + co > 243)
3555 fputs ("\"\n\t.STRING \"", file);
3556 chars_output = 0;
3558 fwrite (partial_output, 1, (size_t) co, file);
3559 chars_output += co;
3560 co = 0;
3562 fputs ("\"\n", file);
3565 /* Try to rewrite floating point comparisons & branches to avoid
3566 useless add,tr insns.
3568 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3569 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3570 first attempt to remove useless add,tr insns. It is zero
3571 for the second pass as reorg sometimes leaves bogus REG_DEAD
3572 notes lying around.
3574 When CHECK_NOTES is zero we can only eliminate add,tr insns
3575 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3576 instructions. */
3577 static void
3578 remove_useless_addtr_insns (int check_notes)
3580 rtx_insn *insn;
3581 static int pass = 0;
3583 /* This is fairly cheap, so always run it when optimizing. */
3584 if (optimize > 0)
3586 int fcmp_count = 0;
3587 int fbranch_count = 0;
3589 /* Walk all the insns in this function looking for fcmp & fbranch
3590 instructions. Keep track of how many of each we find. */
3591 for (insn = get_insns (); insn; insn = next_insn (insn))
3593 rtx tmp;
3595 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3596 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3597 continue;
3599 tmp = PATTERN (insn);
3601 /* It must be a set. */
3602 if (GET_CODE (tmp) != SET)
3603 continue;
3605 /* If the destination is CCFP, then we've found an fcmp insn. */
3606 tmp = SET_DEST (tmp);
3607 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3609 fcmp_count++;
3610 continue;
3613 tmp = PATTERN (insn);
3614 /* If this is an fbranch instruction, bump the fbranch counter. */
3615 if (GET_CODE (tmp) == SET
3616 && SET_DEST (tmp) == pc_rtx
3617 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3618 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3619 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3620 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3622 fbranch_count++;
3623 continue;
3628 /* Find all floating point compare + branch insns. If possible,
3629 reverse the comparison & the branch to avoid add,tr insns. */
3630 for (insn = get_insns (); insn; insn = next_insn (insn))
3632 rtx tmp;
3633 rtx_insn *next;
3635 /* Ignore anything that isn't an INSN. */
3636 if (! NONJUMP_INSN_P (insn))
3637 continue;
3639 tmp = PATTERN (insn);
3641 /* It must be a set. */
3642 if (GET_CODE (tmp) != SET)
3643 continue;
3645 /* The destination must be CCFP, which is register zero. */
3646 tmp = SET_DEST (tmp);
3647 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3648 continue;
3650 /* INSN should be a set of CCFP.
3652 See if the result of this insn is used in a reversed FP
3653 conditional branch. If so, reverse our condition and
3654 the branch. Doing so avoids useless add,tr insns. */
3655 next = next_insn (insn);
3656 while (next)
3658 /* Jumps, calls and labels stop our search. */
3659 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3660 break;
3662 /* As does another fcmp insn. */
3663 if (NONJUMP_INSN_P (next)
3664 && GET_CODE (PATTERN (next)) == SET
3665 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3666 && REGNO (SET_DEST (PATTERN (next))) == 0)
3667 break;
3669 next = next_insn (next);
3672 /* Is NEXT_INSN a branch? */
3673 if (next && JUMP_P (next))
3675 rtx pattern = PATTERN (next);
3677 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3678 and CCFP dies, then reverse our conditional and the branch
3679 to avoid the add,tr. */
3680 if (GET_CODE (pattern) == SET
3681 && SET_DEST (pattern) == pc_rtx
3682 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3683 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3684 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3685 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3686 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3687 && (fcmp_count == fbranch_count
3688 || (check_notes
3689 && find_regno_note (next, REG_DEAD, 0))))
3691 /* Reverse the branch. */
3692 tmp = XEXP (SET_SRC (pattern), 1);
3693 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3694 XEXP (SET_SRC (pattern), 2) = tmp;
3695 INSN_CODE (next) = -1;
3697 /* Reverse our condition. */
3698 tmp = PATTERN (insn);
3699 PUT_CODE (XEXP (tmp, 1),
3700 (reverse_condition_maybe_unordered
3701 (GET_CODE (XEXP (tmp, 1)))));
3707 pass = !pass;
3711 /* You may have trouble believing this, but this is the 32 bit HP-PA
3712 stack layout. Wow.
3714 Offset Contents
3716 Variable arguments (optional; any number may be allocated)
3718 SP-(4*(N+9)) arg word N
3720 SP-56 arg word 5
3721 SP-52 arg word 4
3723 Fixed arguments (must be allocated; may remain unused)
3725 SP-48 arg word 3
3726 SP-44 arg word 2
3727 SP-40 arg word 1
3728 SP-36 arg word 0
3730 Frame Marker
3732 SP-32 External Data Pointer (DP)
3733 SP-28 External sr4
3734 SP-24 External/stub RP (RP')
3735 SP-20 Current RP
3736 SP-16 Static Link
3737 SP-12 Clean up
3738 SP-8 Calling Stub RP (RP'')
3739 SP-4 Previous SP
3741 Top of Frame
3743 SP-0 Stack Pointer (points to next available address)
3747 /* This function saves registers as follows. Registers marked with ' are
3748 this function's registers (as opposed to the previous function's).
3749 If a frame_pointer isn't needed, r4 is saved as a general register;
3750 the space for the frame pointer is still allocated, though, to keep
3751 things simple.
3754 Top of Frame
3756 SP (FP') Previous FP
3757 SP + 4 Alignment filler (sigh)
3758 SP + 8 Space for locals reserved here.
3762 SP + n All call saved register used.
3766 SP + o All call saved fp registers used.
3770 SP + p (SP') points to next available address.
3774 /* Global variables set by output_function_prologue(). */
3775 /* Size of frame. Need to know this to emit return insns from
3776 leaf procedures. */
3777 static HOST_WIDE_INT actual_fsize, local_fsize;
3778 static int save_fregs;
3780 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3781 Handle case where DISP > 8k by using the add_high_const patterns.
3783 Note in DISP > 8k case, we will leave the high part of the address
3784 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3786 static void
3787 store_reg (int reg, HOST_WIDE_INT disp, int base)
3789 rtx dest, src, basereg;
3790 rtx_insn *insn;
3792 src = gen_rtx_REG (word_mode, reg);
3793 basereg = gen_rtx_REG (Pmode, base);
3794 if (VAL_14_BITS_P (disp))
3796 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3797 insn = emit_move_insn (dest, src);
3799 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3801 rtx delta = GEN_INT (disp);
3802 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3804 emit_move_insn (tmpreg, delta);
3805 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3806 if (DO_FRAME_NOTES)
3808 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3809 gen_rtx_SET (tmpreg,
3810 gen_rtx_PLUS (Pmode, basereg, delta)));
3811 RTX_FRAME_RELATED_P (insn) = 1;
3813 dest = gen_rtx_MEM (word_mode, tmpreg);
3814 insn = emit_move_insn (dest, src);
3816 else
3818 rtx delta = GEN_INT (disp);
3819 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3820 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3822 emit_move_insn (tmpreg, high);
3823 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3824 insn = emit_move_insn (dest, src);
3825 if (DO_FRAME_NOTES)
3826 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3827 gen_rtx_SET (gen_rtx_MEM (word_mode,
3828 gen_rtx_PLUS (word_mode,
3829 basereg,
3830 delta)),
3831 src));
3834 if (DO_FRAME_NOTES)
3835 RTX_FRAME_RELATED_P (insn) = 1;
3838 /* Emit RTL to store REG at the memory location specified by BASE and then
3839 add MOD to BASE. MOD must be <= 8k. */
3841 static void
3842 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3844 rtx basereg, srcreg, delta;
3845 rtx_insn *insn;
3847 gcc_assert (VAL_14_BITS_P (mod));
3849 basereg = gen_rtx_REG (Pmode, base);
3850 srcreg = gen_rtx_REG (word_mode, reg);
3851 delta = GEN_INT (mod);
3853 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3854 if (DO_FRAME_NOTES)
3856 RTX_FRAME_RELATED_P (insn) = 1;
3858 /* RTX_FRAME_RELATED_P must be set on each frame related set
3859 in a parallel with more than one element. */
3860 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3861 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3865 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3866 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3867 whether to add a frame note or not.
3869 In the DISP > 8k case, we leave the high part of the address in %r1.
3870 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3872 static void
3873 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3875 rtx_insn *insn;
3877 if (VAL_14_BITS_P (disp))
3879 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3880 plus_constant (Pmode,
3881 gen_rtx_REG (Pmode, base), disp));
3883 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3885 rtx basereg = gen_rtx_REG (Pmode, base);
3886 rtx delta = GEN_INT (disp);
3887 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3889 emit_move_insn (tmpreg, delta);
3890 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3891 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3892 if (DO_FRAME_NOTES)
3893 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3894 gen_rtx_SET (tmpreg,
3895 gen_rtx_PLUS (Pmode, basereg, delta)));
3897 else
3899 rtx basereg = gen_rtx_REG (Pmode, base);
3900 rtx delta = GEN_INT (disp);
3901 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3903 emit_move_insn (tmpreg,
3904 gen_rtx_PLUS (Pmode, basereg,
3905 gen_rtx_HIGH (Pmode, delta)));
3906 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3907 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3910 if (DO_FRAME_NOTES && note)
3911 RTX_FRAME_RELATED_P (insn) = 1;
3914 HOST_WIDE_INT
3915 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3917 int freg_saved = 0;
3918 int i, j;
3920 /* The code in pa_expand_prologue and pa_expand_epilogue must
3921 be consistent with the rounding and size calculation done here.
3922 Change them at the same time. */
3924 /* We do our own stack alignment. First, round the size of the
3925 stack locals up to a word boundary. */
3926 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3928 /* Space for previous frame pointer + filler. If any frame is
3929 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3930 waste some space here for the sake of HP compatibility. The
3931 first slot is only used when the frame pointer is needed. */
3932 if (size || frame_pointer_needed)
3933 size += pa_starting_frame_offset ();
3935 /* If the current function calls __builtin_eh_return, then we need
3936 to allocate stack space for registers that will hold data for
3937 the exception handler. */
3938 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3940 unsigned int i;
3942 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3943 continue;
3944 size += i * UNITS_PER_WORD;
3947 /* Account for space used by the callee general register saves. */
3948 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3949 if (df_regs_ever_live_p (i))
3950 size += UNITS_PER_WORD;
3952 /* Account for space used by the callee floating point register saves. */
3953 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3954 if (df_regs_ever_live_p (i)
3955 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3957 freg_saved = 1;
3959 /* We always save both halves of the FP register, so always
3960 increment the frame size by 8 bytes. */
3961 size += 8;
3964 /* If any of the floating registers are saved, account for the
3965 alignment needed for the floating point register save block. */
3966 if (freg_saved)
3968 size = (size + 7) & ~7;
3969 if (fregs_live)
3970 *fregs_live = 1;
3973 /* The various ABIs include space for the outgoing parameters in the
3974 size of the current function's stack frame. We don't need to align
3975 for the outgoing arguments as their alignment is set by the final
3976 rounding for the frame as a whole. */
3977 size += crtl->outgoing_args_size;
3979 /* Allocate space for the fixed frame marker. This space must be
3980 allocated for any function that makes calls or allocates
3981 stack space. */
3982 if (!crtl->is_leaf || size)
3983 size += TARGET_64BIT ? 48 : 32;
3985 /* Finally, round to the preferred stack boundary. */
3986 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3987 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3990 /* Output function label, and associated .PROC and .CALLINFO statements. */
3992 void
3993 pa_output_function_label (FILE *file)
3995 /* The function's label and associated .PROC must never be
3996 separated and must be output *after* any profiling declarations
3997 to avoid changing spaces/subspaces within a procedure. */
3998 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3999 fputs ("\t.PROC\n", file);
4001 /* pa_expand_prologue does the dirty work now. We just need
4002 to output the assembler directives which denote the start
4003 of a function. */
4004 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4005 if (crtl->is_leaf)
4006 fputs (",NO_CALLS", file);
4007 else
4008 fputs (",CALLS", file);
4009 if (rp_saved)
4010 fputs (",SAVE_RP", file);
4012 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4013 at the beginning of the frame and that it is used as the frame
4014 pointer for the frame. We do this because our current frame
4015 layout doesn't conform to that specified in the HP runtime
4016 documentation and we need a way to indicate to programs such as
4017 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4018 isn't used by HP compilers but is supported by the assembler.
4019 However, SAVE_SP is supposed to indicate that the previous stack
4020 pointer has been saved in the frame marker. */
4021 if (frame_pointer_needed)
4022 fputs (",SAVE_SP", file);
4024 /* Pass on information about the number of callee register saves
4025 performed in the prologue.
4027 The compiler is supposed to pass the highest register number
4028 saved, the assembler then has to adjust that number before
4029 entering it into the unwind descriptor (to account for any
4030 caller saved registers with lower register numbers than the
4031 first callee saved register). */
4032 if (gr_saved)
4033 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4035 if (fr_saved)
4036 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4038 fputs ("\n\t.ENTRY\n", file);
4041 /* Output function prologue. */
4043 static void
4044 pa_output_function_prologue (FILE *file)
4046 pa_output_function_label (file);
4047 remove_useless_addtr_insns (0);
4050 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4052 static void
4053 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4055 remove_useless_addtr_insns (0);
4058 void
4059 pa_expand_prologue (void)
4061 int merge_sp_adjust_with_store = 0;
4062 HOST_WIDE_INT size = get_frame_size ();
4063 HOST_WIDE_INT offset;
4064 int i;
4065 rtx tmpreg;
4066 rtx_insn *insn;
4068 gr_saved = 0;
4069 fr_saved = 0;
4070 save_fregs = 0;
4072 /* Compute total size for frame pointer, filler, locals and rounding to
4073 the next word boundary. Similar code appears in pa_compute_frame_size
4074 and must be changed in tandem with this code. */
4075 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4076 if (local_fsize || frame_pointer_needed)
4077 local_fsize += pa_starting_frame_offset ();
4079 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4080 if (flag_stack_usage_info)
4081 current_function_static_stack_size = actual_fsize;
4083 /* Compute a few things we will use often. */
4084 tmpreg = gen_rtx_REG (word_mode, 1);
4086 /* Save RP first. The calling conventions manual states RP will
4087 always be stored into the caller's frame at sp - 20 or sp - 16
4088 depending on which ABI is in use. */
4089 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4091 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4092 rp_saved = true;
4094 else
4095 rp_saved = false;
4097 /* Allocate the local frame and set up the frame pointer if needed. */
4098 if (actual_fsize != 0)
4100 if (frame_pointer_needed)
4102 /* Copy the old frame pointer temporarily into %r1. Set up the
4103 new stack pointer, then store away the saved old frame pointer
4104 into the stack at sp and at the same time update the stack
4105 pointer by actual_fsize bytes. Two versions, first
4106 handles small (<8k) frames. The second handles large (>=8k)
4107 frames. */
4108 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4109 if (DO_FRAME_NOTES)
4110 RTX_FRAME_RELATED_P (insn) = 1;
4112 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4113 if (DO_FRAME_NOTES)
4114 RTX_FRAME_RELATED_P (insn) = 1;
4116 if (VAL_14_BITS_P (actual_fsize))
4117 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4118 else
4120 /* It is incorrect to store the saved frame pointer at *sp,
4121 then increment sp (writes beyond the current stack boundary).
4123 So instead use stwm to store at *sp and post-increment the
4124 stack pointer as an atomic operation. Then increment sp to
4125 finish allocating the new frame. */
4126 HOST_WIDE_INT adjust1 = 8192 - 64;
4127 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4129 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4130 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4131 adjust2, 1);
4134 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4135 we need to store the previous stack pointer (frame pointer)
4136 into the frame marker on targets that use the HP unwind
4137 library. This allows the HP unwind library to be used to
4138 unwind GCC frames. However, we are not fully compatible
4139 with the HP library because our frame layout differs from
4140 that specified in the HP runtime specification.
4142 We don't want a frame note on this instruction as the frame
4143 marker moves during dynamic stack allocation.
4145 This instruction also serves as a blockage to prevent
4146 register spills from being scheduled before the stack
4147 pointer is raised. This is necessary as we store
4148 registers using the frame pointer as a base register,
4149 and the frame pointer is set before sp is raised. */
4150 if (TARGET_HPUX_UNWIND_LIBRARY)
4152 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4153 GEN_INT (TARGET_64BIT ? -8 : -4));
4155 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4156 hard_frame_pointer_rtx);
4158 else
4159 emit_insn (gen_blockage ());
4161 /* no frame pointer needed. */
4162 else
4164 /* In some cases we can perform the first callee register save
4165 and allocating the stack frame at the same time. If so, just
4166 make a note of it and defer allocating the frame until saving
4167 the callee registers. */
4168 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4169 merge_sp_adjust_with_store = 1;
4170 /* Cannot optimize. Adjust the stack frame by actual_fsize
4171 bytes. */
4172 else
4173 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4174 actual_fsize, 1);
4178 /* Normal register save.
4180 Do not save the frame pointer in the frame_pointer_needed case. It
4181 was done earlier. */
4182 if (frame_pointer_needed)
4184 offset = local_fsize;
4186 /* Saving the EH return data registers in the frame is the simplest
4187 way to get the frame unwind information emitted. We put them
4188 just before the general registers. */
4189 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4191 unsigned int i, regno;
4193 for (i = 0; ; ++i)
4195 regno = EH_RETURN_DATA_REGNO (i);
4196 if (regno == INVALID_REGNUM)
4197 break;
4199 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4200 offset += UNITS_PER_WORD;
4204 for (i = 18; i >= 4; i--)
4205 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4207 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4208 offset += UNITS_PER_WORD;
4209 gr_saved++;
4211 /* Account for %r3 which is saved in a special place. */
4212 gr_saved++;
4214 /* No frame pointer needed. */
4215 else
4217 offset = local_fsize - actual_fsize;
4219 /* Saving the EH return data registers in the frame is the simplest
4220 way to get the frame unwind information emitted. */
4221 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4223 unsigned int i, regno;
4225 for (i = 0; ; ++i)
4227 regno = EH_RETURN_DATA_REGNO (i);
4228 if (regno == INVALID_REGNUM)
4229 break;
4231 /* If merge_sp_adjust_with_store is nonzero, then we can
4232 optimize the first save. */
4233 if (merge_sp_adjust_with_store)
4235 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4236 merge_sp_adjust_with_store = 0;
4238 else
4239 store_reg (regno, offset, STACK_POINTER_REGNUM);
4240 offset += UNITS_PER_WORD;
4244 for (i = 18; i >= 3; i--)
4245 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4247 /* If merge_sp_adjust_with_store is nonzero, then we can
4248 optimize the first GR save. */
4249 if (merge_sp_adjust_with_store)
4251 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4252 merge_sp_adjust_with_store = 0;
4254 else
4255 store_reg (i, offset, STACK_POINTER_REGNUM);
4256 offset += UNITS_PER_WORD;
4257 gr_saved++;
4260 /* If we wanted to merge the SP adjustment with a GR save, but we never
4261 did any GR saves, then just emit the adjustment here. */
4262 if (merge_sp_adjust_with_store)
4263 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4264 actual_fsize, 1);
4267 /* The hppa calling conventions say that %r19, the pic offset
4268 register, is saved at sp - 32 (in this function's frame)
4269 when generating PIC code. FIXME: What is the correct thing
4270 to do for functions which make no calls and allocate no
4271 frame? Do we need to allocate a frame, or can we just omit
4272 the save? For now we'll just omit the save.
4274 We don't want a note on this insn as the frame marker can
4275 move if there is a dynamic stack allocation. */
4276 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4278 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4280 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4284 /* Align pointer properly (doubleword boundary). */
4285 offset = (offset + 7) & ~7;
4287 /* Floating point register store. */
4288 if (save_fregs)
4290 rtx base;
4292 /* First get the frame or stack pointer to the start of the FP register
4293 save area. */
4294 if (frame_pointer_needed)
4296 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4297 base = hard_frame_pointer_rtx;
4299 else
4301 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4302 base = stack_pointer_rtx;
4305 /* Now actually save the FP registers. */
4306 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4308 if (df_regs_ever_live_p (i)
4309 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4311 rtx addr, reg;
4312 rtx_insn *insn;
4313 addr = gen_rtx_MEM (DFmode,
4314 gen_rtx_POST_INC (word_mode, tmpreg));
4315 reg = gen_rtx_REG (DFmode, i);
4316 insn = emit_move_insn (addr, reg);
4317 if (DO_FRAME_NOTES)
4319 RTX_FRAME_RELATED_P (insn) = 1;
4320 if (TARGET_64BIT)
4322 rtx mem = gen_rtx_MEM (DFmode,
4323 plus_constant (Pmode, base,
4324 offset));
4325 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4326 gen_rtx_SET (mem, reg));
4328 else
4330 rtx meml = gen_rtx_MEM (SFmode,
4331 plus_constant (Pmode, base,
4332 offset));
4333 rtx memr = gen_rtx_MEM (SFmode,
4334 plus_constant (Pmode, base,
4335 offset + 4));
4336 rtx regl = gen_rtx_REG (SFmode, i);
4337 rtx regr = gen_rtx_REG (SFmode, i + 1);
4338 rtx setl = gen_rtx_SET (meml, regl);
4339 rtx setr = gen_rtx_SET (memr, regr);
4340 rtvec vec;
4342 RTX_FRAME_RELATED_P (setl) = 1;
4343 RTX_FRAME_RELATED_P (setr) = 1;
4344 vec = gen_rtvec (2, setl, setr);
4345 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4346 gen_rtx_SEQUENCE (VOIDmode, vec));
4349 offset += GET_MODE_SIZE (DFmode);
4350 fr_saved++;
4356 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4357 Handle case where DISP > 8k by using the add_high_const patterns. */
4359 static void
4360 load_reg (int reg, HOST_WIDE_INT disp, int base)
4362 rtx dest = gen_rtx_REG (word_mode, reg);
4363 rtx basereg = gen_rtx_REG (Pmode, base);
4364 rtx src;
4366 if (VAL_14_BITS_P (disp))
4367 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4368 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4370 rtx delta = GEN_INT (disp);
4371 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4373 emit_move_insn (tmpreg, delta);
4374 if (TARGET_DISABLE_INDEXING)
4376 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4377 src = gen_rtx_MEM (word_mode, tmpreg);
4379 else
4380 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4382 else
4384 rtx delta = GEN_INT (disp);
4385 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4386 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4388 emit_move_insn (tmpreg, high);
4389 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4392 emit_move_insn (dest, src);
4395 /* Update the total code bytes output to the text section. */
4397 static void
4398 update_total_code_bytes (unsigned int nbytes)
4400 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4401 && !IN_NAMED_SECTION_P (cfun->decl))
4403 unsigned int old_total = total_code_bytes;
4405 total_code_bytes += nbytes;
4407 /* Be prepared to handle overflows. */
4408 if (old_total > total_code_bytes)
4409 total_code_bytes = UINT_MAX;
4413 /* This function generates the assembly code for function exit.
4414 Args are as for output_function_prologue ().
4416 The function epilogue should not depend on the current stack
4417 pointer! It should use the frame pointer only. This is mandatory
4418 because of alloca; we also take advantage of it to omit stack
4419 adjustments before returning. */
4421 static void
4422 pa_output_function_epilogue (FILE *file)
4424 rtx_insn *insn = get_last_insn ();
4425 bool extra_nop;
4427 /* pa_expand_epilogue does the dirty work now. We just need
4428 to output the assembler directives which denote the end
4429 of a function.
4431 To make debuggers happy, emit a nop if the epilogue was completely
4432 eliminated due to a volatile call as the last insn in the
4433 current function. That way the return address (in %r2) will
4434 always point to a valid instruction in the current function. */
4436 /* Get the last real insn. */
4437 if (NOTE_P (insn))
4438 insn = prev_real_insn (insn);
4440 /* If it is a sequence, then look inside. */
4441 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4442 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4444 /* If insn is a CALL_INSN, then it must be a call to a volatile
4445 function (otherwise there would be epilogue insns). */
4446 if (insn && CALL_P (insn))
4448 fputs ("\tnop\n", file);
4449 extra_nop = true;
4451 else
4452 extra_nop = false;
4454 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4456 if (TARGET_SOM && TARGET_GAS)
4458 /* We are done with this subspace except possibly for some additional
4459 debug information. Forget that we are in this subspace to ensure
4460 that the next function is output in its own subspace. */
4461 in_section = NULL;
4462 cfun->machine->in_nsubspa = 2;
4465 /* Thunks do their own insn accounting. */
4466 if (cfun->is_thunk)
4467 return;
4469 if (INSN_ADDRESSES_SET_P ())
4471 last_address = extra_nop ? 4 : 0;
4472 insn = get_last_nonnote_insn ();
4473 if (insn)
4475 last_address += INSN_ADDRESSES (INSN_UID (insn));
4476 if (INSN_P (insn))
4477 last_address += insn_default_length (insn);
4479 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4480 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4482 else
4483 last_address = UINT_MAX;
4485 /* Finally, update the total number of code bytes output so far. */
4486 update_total_code_bytes (last_address);
4489 void
4490 pa_expand_epilogue (void)
4492 rtx tmpreg;
4493 HOST_WIDE_INT offset;
4494 HOST_WIDE_INT ret_off = 0;
4495 int i;
4496 int merge_sp_adjust_with_load = 0;
4498 /* We will use this often. */
4499 tmpreg = gen_rtx_REG (word_mode, 1);
4501 /* Try to restore RP early to avoid load/use interlocks when
4502 RP gets used in the return (bv) instruction. This appears to still
4503 be necessary even when we schedule the prologue and epilogue. */
4504 if (rp_saved)
4506 ret_off = TARGET_64BIT ? -16 : -20;
4507 if (frame_pointer_needed)
4509 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4510 ret_off = 0;
4512 else
4514 /* No frame pointer, and stack is smaller than 8k. */
4515 if (VAL_14_BITS_P (ret_off - actual_fsize))
4517 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4518 ret_off = 0;
4523 /* General register restores. */
4524 if (frame_pointer_needed)
4526 offset = local_fsize;
4528 /* If the current function calls __builtin_eh_return, then we need
4529 to restore the saved EH data registers. */
4530 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4532 unsigned int i, regno;
4534 for (i = 0; ; ++i)
4536 regno = EH_RETURN_DATA_REGNO (i);
4537 if (regno == INVALID_REGNUM)
4538 break;
4540 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4541 offset += UNITS_PER_WORD;
4545 for (i = 18; i >= 4; i--)
4546 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4548 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4549 offset += UNITS_PER_WORD;
4552 else
4554 offset = local_fsize - actual_fsize;
4556 /* If the current function calls __builtin_eh_return, then we need
4557 to restore the saved EH data registers. */
4558 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4560 unsigned int i, regno;
4562 for (i = 0; ; ++i)
4564 regno = EH_RETURN_DATA_REGNO (i);
4565 if (regno == INVALID_REGNUM)
4566 break;
4568 /* Only for the first load.
4569 merge_sp_adjust_with_load holds the register load
4570 with which we will merge the sp adjustment. */
4571 if (merge_sp_adjust_with_load == 0
4572 && local_fsize == 0
4573 && VAL_14_BITS_P (-actual_fsize))
4574 merge_sp_adjust_with_load = regno;
4575 else
4576 load_reg (regno, offset, STACK_POINTER_REGNUM);
4577 offset += UNITS_PER_WORD;
4581 for (i = 18; i >= 3; i--)
4583 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4585 /* Only for the first load.
4586 merge_sp_adjust_with_load holds the register load
4587 with which we will merge the sp adjustment. */
4588 if (merge_sp_adjust_with_load == 0
4589 && local_fsize == 0
4590 && VAL_14_BITS_P (-actual_fsize))
4591 merge_sp_adjust_with_load = i;
4592 else
4593 load_reg (i, offset, STACK_POINTER_REGNUM);
4594 offset += UNITS_PER_WORD;
4599 /* Align pointer properly (doubleword boundary). */
4600 offset = (offset + 7) & ~7;
4602 /* FP register restores. */
4603 if (save_fregs)
4605 /* Adjust the register to index off of. */
4606 if (frame_pointer_needed)
4607 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4608 else
4609 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4611 /* Actually do the restores now. */
4612 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4613 if (df_regs_ever_live_p (i)
4614 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4616 rtx src = gen_rtx_MEM (DFmode,
4617 gen_rtx_POST_INC (word_mode, tmpreg));
4618 rtx dest = gen_rtx_REG (DFmode, i);
4619 emit_move_insn (dest, src);
4623 /* Emit a blockage insn here to keep these insns from being moved to
4624 an earlier spot in the epilogue, or into the main instruction stream.
4626 This is necessary as we must not cut the stack back before all the
4627 restores are finished. */
4628 emit_insn (gen_blockage ());
4630 /* Reset stack pointer (and possibly frame pointer). The stack
4631 pointer is initially set to fp + 64 to avoid a race condition. */
4632 if (frame_pointer_needed)
4634 rtx delta = GEN_INT (-64);
4636 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4637 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4638 stack_pointer_rtx, delta));
4640 /* If we were deferring a callee register restore, do it now. */
4641 else if (merge_sp_adjust_with_load)
4643 rtx delta = GEN_INT (-actual_fsize);
4644 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4646 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4648 else if (actual_fsize != 0)
4649 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4650 - actual_fsize, 0);
4652 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4653 frame greater than 8k), do so now. */
4654 if (ret_off != 0)
4655 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4657 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4659 rtx sa = EH_RETURN_STACKADJ_RTX;
4661 emit_insn (gen_blockage ());
4662 emit_insn (TARGET_64BIT
4663 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4664 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4668 bool
4669 pa_can_use_return_insn (void)
4671 if (!reload_completed)
4672 return false;
4674 if (frame_pointer_needed)
4675 return false;
4677 if (df_regs_ever_live_p (2))
4678 return false;
4680 if (crtl->profile)
4681 return false;
4683 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4687 hppa_pic_save_rtx (void)
4689 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4692 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4693 #define NO_DEFERRED_PROFILE_COUNTERS 0
4694 #endif
4697 /* Vector of funcdef numbers. */
4698 static vec<int> funcdef_nos;
4700 /* Output deferred profile counters. */
4701 static void
4702 output_deferred_profile_counters (void)
4704 unsigned int i;
4705 int align, n;
4707 if (funcdef_nos.is_empty ())
4708 return;
4710 switch_to_section (data_section);
4711 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4712 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4714 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4716 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4717 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4720 funcdef_nos.release ();
4723 void
4724 hppa_profile_hook (int label_no)
4726 rtx_code_label *label_rtx = gen_label_rtx ();
4727 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4728 rtx arg_bytes, begin_label_rtx, mcount, sym;
4729 rtx_insn *call_insn;
4730 char begin_label_name[16];
4731 bool use_mcount_pcrel_call;
4733 /* Set up call destination. */
4734 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4735 pa_encode_label (sym);
4736 mcount = gen_rtx_MEM (Pmode, sym);
4738 /* If we can reach _mcount with a pc-relative call, we can optimize
4739 loading the address of the current function. This requires linker
4740 long branch stub support. */
4741 if (!TARGET_PORTABLE_RUNTIME
4742 && !TARGET_LONG_CALLS
4743 && (TARGET_SOM || flag_function_sections))
4744 use_mcount_pcrel_call = TRUE;
4745 else
4746 use_mcount_pcrel_call = FALSE;
4748 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4749 label_no);
4750 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4752 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4754 if (!use_mcount_pcrel_call)
4756 /* The address of the function is loaded into %r25 with an instruction-
4757 relative sequence that avoids the use of relocations. We use SImode
4758 for the address of the function in both 32 and 64-bit code to avoid
4759 having to provide DImode versions of the lcla2 pattern. */
4760 if (TARGET_PA_20)
4761 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4762 else
4763 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4766 if (!NO_DEFERRED_PROFILE_COUNTERS)
4768 rtx count_label_rtx, addr, r24;
4769 char count_label_name[16];
4771 funcdef_nos.safe_push (label_no);
4772 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4773 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4774 ggc_strdup (count_label_name));
4776 addr = force_reg (Pmode, count_label_rtx);
4777 r24 = gen_rtx_REG (Pmode, 24);
4778 emit_move_insn (r24, addr);
4780 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4781 if (use_mcount_pcrel_call)
4782 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4783 begin_label_rtx));
4784 else
4785 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4787 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4789 else
4791 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4792 if (use_mcount_pcrel_call)
4793 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4794 begin_label_rtx));
4795 else
4796 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4799 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4800 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4802 /* Indicate the _mcount call cannot throw, nor will it execute a
4803 non-local goto. */
4804 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4806 /* Allocate space for fixed arguments. */
4807 if (reg_parm_stack_space > crtl->outgoing_args_size)
4808 crtl->outgoing_args_size = reg_parm_stack_space;
4811 /* Fetch the return address for the frame COUNT steps up from
4812 the current frame, after the prologue. FRAMEADDR is the
4813 frame pointer of the COUNT frame.
4815 We want to ignore any export stub remnants here. To handle this,
4816 we examine the code at the return address, and if it is an export
4817 stub, we return a memory rtx for the stub return address stored
4818 at frame-24.
4820 The value returned is used in two different ways:
4822 1. To find a function's caller.
4824 2. To change the return address for a function.
4826 This function handles most instances of case 1; however, it will
4827 fail if there are two levels of stubs to execute on the return
4828 path. The only way I believe that can happen is if the return value
4829 needs a parameter relocation, which never happens for C code.
4831 This function handles most instances of case 2; however, it will
4832 fail if we did not originally have stub code on the return path
4833 but will need stub code on the new return path. This can happen if
4834 the caller & callee are both in the main program, but the new
4835 return location is in a shared library. */
4838 pa_return_addr_rtx (int count, rtx frameaddr)
4840 rtx label;
4841 rtx rp;
4842 rtx saved_rp;
4843 rtx ins;
4845 /* The instruction stream at the return address of a PA1.X export stub is:
4847 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4848 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4849 0x00011820 | stub+16: mtsp r1,sr0
4850 0xe0400002 | stub+20: be,n 0(sr0,rp)
4852 0xe0400002 must be specified as -532676606 so that it won't be
4853 rejected as an invalid immediate operand on 64-bit hosts.
4855 The instruction stream at the return address of a PA2.0 export stub is:
4857 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4858 0xe840d002 | stub+12: bve,n (rp)
4861 HOST_WIDE_INT insns[4];
4862 int i, len;
4864 if (count != 0)
4865 return NULL_RTX;
4867 rp = get_hard_reg_initial_val (Pmode, 2);
4869 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4870 return rp;
4872 /* If there is no export stub then just use the value saved from
4873 the return pointer register. */
4875 saved_rp = gen_reg_rtx (Pmode);
4876 emit_move_insn (saved_rp, rp);
4878 /* Get pointer to the instruction stream. We have to mask out the
4879 privilege level from the two low order bits of the return address
4880 pointer here so that ins will point to the start of the first
4881 instruction that would have been executed if we returned. */
4882 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4883 label = gen_label_rtx ();
4885 if (TARGET_PA_20)
4887 insns[0] = 0x4bc23fd1;
4888 insns[1] = -398405630;
4889 len = 2;
4891 else
4893 insns[0] = 0x4bc23fd1;
4894 insns[1] = 0x004010a1;
4895 insns[2] = 0x00011820;
4896 insns[3] = -532676606;
4897 len = 4;
4900 /* Check the instruction stream at the normal return address for the
4901 export stub. If it is an export stub, than our return address is
4902 really in -24[frameaddr]. */
4904 for (i = 0; i < len; i++)
4906 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4907 rtx op1 = GEN_INT (insns[i]);
4908 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4911 /* Here we know that our return address points to an export
4912 stub. We don't want to return the address of the export stub,
4913 but rather the return address of the export stub. That return
4914 address is stored at -24[frameaddr]. */
4916 emit_move_insn (saved_rp,
4917 gen_rtx_MEM (Pmode,
4918 memory_address (Pmode,
4919 plus_constant (Pmode, frameaddr,
4920 -24))));
4922 emit_label (label);
4924 return saved_rp;
4927 void
4928 pa_emit_bcond_fp (rtx operands[])
4930 enum rtx_code code = GET_CODE (operands[0]);
4931 rtx operand0 = operands[1];
4932 rtx operand1 = operands[2];
4933 rtx label = operands[3];
4935 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4936 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4938 emit_jump_insn (gen_rtx_SET (pc_rtx,
4939 gen_rtx_IF_THEN_ELSE (VOIDmode,
4940 gen_rtx_fmt_ee (NE,
4941 VOIDmode,
4942 gen_rtx_REG (CCFPmode, 0),
4943 const0_rtx),
4944 gen_rtx_LABEL_REF (VOIDmode, label),
4945 pc_rtx)));
4949 /* Adjust the cost of a scheduling dependency. Return the new cost of
4950 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4952 static int
4953 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4954 unsigned int)
4956 enum attr_type attr_type;
4958 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4959 true dependencies as they are described with bypasses now. */
4960 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4961 return cost;
4963 if (! recog_memoized (insn))
4964 return 0;
4966 attr_type = get_attr_type (insn);
4968 switch (dep_type)
4970 case REG_DEP_ANTI:
4971 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4972 cycles later. */
4974 if (attr_type == TYPE_FPLOAD)
4976 rtx pat = PATTERN (insn);
4977 rtx dep_pat = PATTERN (dep_insn);
4978 if (GET_CODE (pat) == PARALLEL)
4980 /* This happens for the fldXs,mb patterns. */
4981 pat = XVECEXP (pat, 0, 0);
4983 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4984 /* If this happens, we have to extend this to schedule
4985 optimally. Return 0 for now. */
4986 return 0;
4988 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4990 if (! recog_memoized (dep_insn))
4991 return 0;
4992 switch (get_attr_type (dep_insn))
4994 case TYPE_FPALU:
4995 case TYPE_FPMULSGL:
4996 case TYPE_FPMULDBL:
4997 case TYPE_FPDIVSGL:
4998 case TYPE_FPDIVDBL:
4999 case TYPE_FPSQRTSGL:
5000 case TYPE_FPSQRTDBL:
5001 /* A fpload can't be issued until one cycle before a
5002 preceding arithmetic operation has finished if
5003 the target of the fpload is any of the sources
5004 (or destination) of the arithmetic operation. */
5005 return insn_default_latency (dep_insn) - 1;
5007 default:
5008 return 0;
5012 else if (attr_type == TYPE_FPALU)
5014 rtx pat = PATTERN (insn);
5015 rtx dep_pat = PATTERN (dep_insn);
5016 if (GET_CODE (pat) == PARALLEL)
5018 /* This happens for the fldXs,mb patterns. */
5019 pat = XVECEXP (pat, 0, 0);
5021 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5022 /* If this happens, we have to extend this to schedule
5023 optimally. Return 0 for now. */
5024 return 0;
5026 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5028 if (! recog_memoized (dep_insn))
5029 return 0;
5030 switch (get_attr_type (dep_insn))
5032 case TYPE_FPDIVSGL:
5033 case TYPE_FPDIVDBL:
5034 case TYPE_FPSQRTSGL:
5035 case TYPE_FPSQRTDBL:
5036 /* An ALU flop can't be issued until two cycles before a
5037 preceding divide or sqrt operation has finished if
5038 the target of the ALU flop is any of the sources
5039 (or destination) of the divide or sqrt operation. */
5040 return insn_default_latency (dep_insn) - 2;
5042 default:
5043 return 0;
5048 /* For other anti dependencies, the cost is 0. */
5049 return 0;
5051 case REG_DEP_OUTPUT:
5052 /* Output dependency; DEP_INSN writes a register that INSN writes some
5053 cycles later. */
5054 if (attr_type == TYPE_FPLOAD)
5056 rtx pat = PATTERN (insn);
5057 rtx dep_pat = PATTERN (dep_insn);
5058 if (GET_CODE (pat) == PARALLEL)
5060 /* This happens for the fldXs,mb patterns. */
5061 pat = XVECEXP (pat, 0, 0);
5063 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5064 /* If this happens, we have to extend this to schedule
5065 optimally. Return 0 for now. */
5066 return 0;
5068 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5070 if (! recog_memoized (dep_insn))
5071 return 0;
5072 switch (get_attr_type (dep_insn))
5074 case TYPE_FPALU:
5075 case TYPE_FPMULSGL:
5076 case TYPE_FPMULDBL:
5077 case TYPE_FPDIVSGL:
5078 case TYPE_FPDIVDBL:
5079 case TYPE_FPSQRTSGL:
5080 case TYPE_FPSQRTDBL:
5081 /* A fpload can't be issued until one cycle before a
5082 preceding arithmetic operation has finished if
5083 the target of the fpload is the destination of the
5084 arithmetic operation.
5086 Exception: For PA7100LC, PA7200 and PA7300, the cost
5087 is 3 cycles, unless they bundle together. We also
5088 pay the penalty if the second insn is a fpload. */
5089 return insn_default_latency (dep_insn) - 1;
5091 default:
5092 return 0;
5096 else if (attr_type == TYPE_FPALU)
5098 rtx pat = PATTERN (insn);
5099 rtx dep_pat = PATTERN (dep_insn);
5100 if (GET_CODE (pat) == PARALLEL)
5102 /* This happens for the fldXs,mb patterns. */
5103 pat = XVECEXP (pat, 0, 0);
5105 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5106 /* If this happens, we have to extend this to schedule
5107 optimally. Return 0 for now. */
5108 return 0;
5110 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5112 if (! recog_memoized (dep_insn))
5113 return 0;
5114 switch (get_attr_type (dep_insn))
5116 case TYPE_FPDIVSGL:
5117 case TYPE_FPDIVDBL:
5118 case TYPE_FPSQRTSGL:
5119 case TYPE_FPSQRTDBL:
5120 /* An ALU flop can't be issued until two cycles before a
5121 preceding divide or sqrt operation has finished if
5122 the target of the ALU flop is also the target of
5123 the divide or sqrt operation. */
5124 return insn_default_latency (dep_insn) - 2;
5126 default:
5127 return 0;
5132 /* For other output dependencies, the cost is 0. */
5133 return 0;
5135 default:
5136 gcc_unreachable ();
5140 /* The 700 can only issue a single insn at a time.
5141 The 7XXX processors can issue two insns at a time.
5142 The 8000 can issue 4 insns at a time. */
5143 static int
5144 pa_issue_rate (void)
5146 switch (pa_cpu)
5148 case PROCESSOR_700: return 1;
5149 case PROCESSOR_7100: return 2;
5150 case PROCESSOR_7100LC: return 2;
5151 case PROCESSOR_7200: return 2;
5152 case PROCESSOR_7300: return 2;
5153 case PROCESSOR_8000: return 4;
5155 default:
5156 gcc_unreachable ();
5162 /* Return any length plus adjustment needed by INSN which already has
5163 its length computed as LENGTH. Return LENGTH if no adjustment is
5164 necessary.
5166 Also compute the length of an inline block move here as it is too
5167 complicated to express as a length attribute in pa.md. */
5169 pa_adjust_insn_length (rtx_insn *insn, int length)
5171 rtx pat = PATTERN (insn);
5173 /* If length is negative or undefined, provide initial length. */
5174 if ((unsigned int) length >= INT_MAX)
5176 if (GET_CODE (pat) == SEQUENCE)
5177 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5179 switch (get_attr_type (insn))
5181 case TYPE_MILLI:
5182 length = pa_attr_length_millicode_call (insn);
5183 break;
5184 case TYPE_CALL:
5185 length = pa_attr_length_call (insn, 0);
5186 break;
5187 case TYPE_SIBCALL:
5188 length = pa_attr_length_call (insn, 1);
5189 break;
5190 case TYPE_DYNCALL:
5191 length = pa_attr_length_indirect_call (insn);
5192 break;
5193 case TYPE_SH_FUNC_ADRS:
5194 length = pa_attr_length_millicode_call (insn) + 20;
5195 break;
5196 default:
5197 gcc_unreachable ();
5201 /* Block move pattern. */
5202 if (NONJUMP_INSN_P (insn)
5203 && GET_CODE (pat) == PARALLEL
5204 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5205 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5206 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5207 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5208 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5209 length += compute_cpymem_length (insn) - 4;
5210 /* Block clear pattern. */
5211 else if (NONJUMP_INSN_P (insn)
5212 && GET_CODE (pat) == PARALLEL
5213 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5214 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5215 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5216 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5217 length += compute_clrmem_length (insn) - 4;
5218 /* Conditional branch with an unfilled delay slot. */
5219 else if (JUMP_P (insn) && ! simplejump_p (insn))
5221 /* Adjust a short backwards conditional with an unfilled delay slot. */
5222 if (GET_CODE (pat) == SET
5223 && length == 4
5224 && JUMP_LABEL (insn) != NULL_RTX
5225 && ! forward_branch_p (insn))
5226 length += 4;
5227 else if (GET_CODE (pat) == PARALLEL
5228 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5229 && length == 4)
5230 length += 4;
5231 /* Adjust dbra insn with short backwards conditional branch with
5232 unfilled delay slot -- only for case where counter is in a
5233 general register register. */
5234 else if (GET_CODE (pat) == PARALLEL
5235 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5236 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5237 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5238 && length == 4
5239 && ! forward_branch_p (insn))
5240 length += 4;
5242 return length;
5245 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5247 static bool
5248 pa_print_operand_punct_valid_p (unsigned char code)
5250 if (code == '@'
5251 || code == '#'
5252 || code == '*'
5253 || code == '^')
5254 return true;
5256 return false;
5259 /* Print operand X (an rtx) in assembler syntax to file FILE.
5260 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5261 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5263 void
5264 pa_print_operand (FILE *file, rtx x, int code)
5266 switch (code)
5268 case '#':
5269 /* Output a 'nop' if there's nothing for the delay slot. */
5270 if (dbr_sequence_length () == 0)
5271 fputs ("\n\tnop", file);
5272 return;
5273 case '*':
5274 /* Output a nullification completer if there's nothing for the */
5275 /* delay slot or nullification is requested. */
5276 if (dbr_sequence_length () == 0 ||
5277 (final_sequence &&
5278 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5279 fputs (",n", file);
5280 return;
5281 case 'R':
5282 /* Print out the second register name of a register pair.
5283 I.e., R (6) => 7. */
5284 fputs (reg_names[REGNO (x) + 1], file);
5285 return;
5286 case 'r':
5287 /* A register or zero. */
5288 if (x == const0_rtx
5289 || (x == CONST0_RTX (DFmode))
5290 || (x == CONST0_RTX (SFmode)))
5292 fputs ("%r0", file);
5293 return;
5295 else
5296 break;
5297 case 'f':
5298 /* A register or zero (floating point). */
5299 if (x == const0_rtx
5300 || (x == CONST0_RTX (DFmode))
5301 || (x == CONST0_RTX (SFmode)))
5303 fputs ("%fr0", file);
5304 return;
5306 else
5307 break;
5308 case 'A':
5310 rtx xoperands[2];
5312 xoperands[0] = XEXP (XEXP (x, 0), 0);
5313 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5314 pa_output_global_address (file, xoperands[1], 0);
5315 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5316 return;
5319 case 'C': /* Plain (C)ondition */
5320 case 'X':
5321 switch (GET_CODE (x))
5323 case EQ:
5324 fputs ("=", file); break;
5325 case NE:
5326 fputs ("<>", file); break;
5327 case GT:
5328 fputs (">", file); break;
5329 case GE:
5330 fputs (">=", file); break;
5331 case GEU:
5332 fputs (">>=", file); break;
5333 case GTU:
5334 fputs (">>", file); break;
5335 case LT:
5336 fputs ("<", file); break;
5337 case LE:
5338 fputs ("<=", file); break;
5339 case LEU:
5340 fputs ("<<=", file); break;
5341 case LTU:
5342 fputs ("<<", file); break;
5343 default:
5344 gcc_unreachable ();
5346 return;
5347 case 'N': /* Condition, (N)egated */
5348 switch (GET_CODE (x))
5350 case EQ:
5351 fputs ("<>", file); break;
5352 case NE:
5353 fputs ("=", file); break;
5354 case GT:
5355 fputs ("<=", file); break;
5356 case GE:
5357 fputs ("<", file); break;
5358 case GEU:
5359 fputs ("<<", file); break;
5360 case GTU:
5361 fputs ("<<=", file); break;
5362 case LT:
5363 fputs (">=", file); break;
5364 case LE:
5365 fputs (">", file); break;
5366 case LEU:
5367 fputs (">>", file); break;
5368 case LTU:
5369 fputs (">>=", file); break;
5370 default:
5371 gcc_unreachable ();
5373 return;
5374 /* For floating point comparisons. Note that the output
5375 predicates are the complement of the desired mode. The
5376 conditions for GT, GE, LT, LE and LTGT cause an invalid
5377 operation exception if the result is unordered and this
5378 exception is enabled in the floating-point status register. */
5379 case 'Y':
5380 switch (GET_CODE (x))
5382 case EQ:
5383 fputs ("!=", file); break;
5384 case NE:
5385 fputs ("=", file); break;
5386 case GT:
5387 fputs ("!>", file); break;
5388 case GE:
5389 fputs ("!>=", file); break;
5390 case LT:
5391 fputs ("!<", file); break;
5392 case LE:
5393 fputs ("!<=", file); break;
5394 case LTGT:
5395 fputs ("!<>", file); break;
5396 case UNLE:
5397 fputs ("!?<=", file); break;
5398 case UNLT:
5399 fputs ("!?<", file); break;
5400 case UNGE:
5401 fputs ("!?>=", file); break;
5402 case UNGT:
5403 fputs ("!?>", file); break;
5404 case UNEQ:
5405 fputs ("!?=", file); break;
5406 case UNORDERED:
5407 fputs ("!?", file); break;
5408 case ORDERED:
5409 fputs ("?", file); break;
5410 default:
5411 gcc_unreachable ();
5413 return;
5414 case 'S': /* Condition, operands are (S)wapped. */
5415 switch (GET_CODE (x))
5417 case EQ:
5418 fputs ("=", file); break;
5419 case NE:
5420 fputs ("<>", file); break;
5421 case GT:
5422 fputs ("<", file); break;
5423 case GE:
5424 fputs ("<=", file); break;
5425 case GEU:
5426 fputs ("<<=", file); break;
5427 case GTU:
5428 fputs ("<<", file); break;
5429 case LT:
5430 fputs (">", file); break;
5431 case LE:
5432 fputs (">=", file); break;
5433 case LEU:
5434 fputs (">>=", file); break;
5435 case LTU:
5436 fputs (">>", file); break;
5437 default:
5438 gcc_unreachable ();
5440 return;
5441 case 'B': /* Condition, (B)oth swapped and negate. */
5442 switch (GET_CODE (x))
5444 case EQ:
5445 fputs ("<>", file); break;
5446 case NE:
5447 fputs ("=", file); break;
5448 case GT:
5449 fputs (">=", file); break;
5450 case GE:
5451 fputs (">", file); break;
5452 case GEU:
5453 fputs (">>", file); break;
5454 case GTU:
5455 fputs (">>=", file); break;
5456 case LT:
5457 fputs ("<=", file); break;
5458 case LE:
5459 fputs ("<", file); break;
5460 case LEU:
5461 fputs ("<<", file); break;
5462 case LTU:
5463 fputs ("<<=", file); break;
5464 default:
5465 gcc_unreachable ();
5467 return;
5468 case 'k':
5469 gcc_assert (GET_CODE (x) == CONST_INT);
5470 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5471 return;
5472 case 'Q':
5473 gcc_assert (GET_CODE (x) == CONST_INT);
5474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5475 return;
5476 case 'L':
5477 gcc_assert (GET_CODE (x) == CONST_INT);
5478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5479 return;
5480 case 'o':
5481 gcc_assert (GET_CODE (x) == CONST_INT
5482 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5483 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5484 return;
5485 case 'O':
5486 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5487 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5488 return;
5489 case 'p':
5490 gcc_assert (GET_CODE (x) == CONST_INT);
5491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5492 return;
5493 case 'P':
5494 gcc_assert (GET_CODE (x) == CONST_INT);
5495 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5496 return;
5497 case 'I':
5498 if (GET_CODE (x) == CONST_INT)
5499 fputs ("i", file);
5500 return;
5501 case 'M':
5502 case 'F':
5503 switch (GET_CODE (XEXP (x, 0)))
5505 case PRE_DEC:
5506 case PRE_INC:
5507 if (ASSEMBLER_DIALECT == 0)
5508 fputs ("s,mb", file);
5509 else
5510 fputs (",mb", file);
5511 break;
5512 case POST_DEC:
5513 case POST_INC:
5514 if (ASSEMBLER_DIALECT == 0)
5515 fputs ("s,ma", file);
5516 else
5517 fputs (",ma", file);
5518 break;
5519 case PLUS:
5520 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5521 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5523 if (ASSEMBLER_DIALECT == 0)
5524 fputs ("x", file);
5526 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5527 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5529 if (ASSEMBLER_DIALECT == 0)
5530 fputs ("x,s", file);
5531 else
5532 fputs (",s", file);
5534 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5535 fputs ("s", file);
5536 break;
5537 default:
5538 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5539 fputs ("s", file);
5540 break;
5542 return;
5543 case 'G':
5544 pa_output_global_address (file, x, 0);
5545 return;
5546 case 'H':
5547 pa_output_global_address (file, x, 1);
5548 return;
5549 case 0: /* Don't do anything special */
5550 break;
5551 case 'Z':
5553 unsigned op[3];
5554 compute_zdepwi_operands (INTVAL (x), op);
5555 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5556 return;
5558 case 'z':
5560 unsigned op[3];
5561 compute_zdepdi_operands (INTVAL (x), op);
5562 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5563 return;
5565 case 'c':
5566 /* We can get here from a .vtable_inherit due to our
5567 CONSTANT_ADDRESS_P rejecting perfectly good constant
5568 addresses. */
5569 break;
5570 default:
5571 gcc_unreachable ();
5573 if (GET_CODE (x) == REG)
5575 fputs (reg_names [REGNO (x)], file);
5576 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5578 fputs ("R", file);
5579 return;
5581 if (FP_REG_P (x)
5582 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5583 && (REGNO (x) & 1) == 0)
5584 fputs ("L", file);
5586 else if (GET_CODE (x) == MEM)
5588 int size = GET_MODE_SIZE (GET_MODE (x));
5589 rtx base = NULL_RTX;
5590 switch (GET_CODE (XEXP (x, 0)))
5592 case PRE_DEC:
5593 case POST_DEC:
5594 base = XEXP (XEXP (x, 0), 0);
5595 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5596 break;
5597 case PRE_INC:
5598 case POST_INC:
5599 base = XEXP (XEXP (x, 0), 0);
5600 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5601 break;
5602 case PLUS:
5603 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5604 fprintf (file, "%s(%s)",
5605 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5606 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5607 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5608 fprintf (file, "%s(%s)",
5609 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5610 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5611 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5612 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5614 /* Because the REG_POINTER flag can get lost during reload,
5615 pa_legitimate_address_p canonicalizes the order of the
5616 index and base registers in the combined move patterns. */
5617 rtx base = XEXP (XEXP (x, 0), 1);
5618 rtx index = XEXP (XEXP (x, 0), 0);
5620 fprintf (file, "%s(%s)",
5621 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5623 else
5624 output_address (GET_MODE (x), XEXP (x, 0));
5625 break;
5626 default:
5627 output_address (GET_MODE (x), XEXP (x, 0));
5628 break;
5631 else
5632 output_addr_const (file, x);
5635 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5637 void
5638 pa_output_global_address (FILE *file, rtx x, int round_constant)
5641 /* Imagine (high (const (plus ...))). */
5642 if (GET_CODE (x) == HIGH)
5643 x = XEXP (x, 0);
5645 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5646 output_addr_const (file, x);
5647 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5649 output_addr_const (file, x);
5650 fputs ("-$global$", file);
5652 else if (GET_CODE (x) == CONST)
5654 const char *sep = "";
5655 int offset = 0; /* assembler wants -$global$ at end */
5656 rtx base = NULL_RTX;
5658 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5660 case LABEL_REF:
5661 case SYMBOL_REF:
5662 base = XEXP (XEXP (x, 0), 0);
5663 output_addr_const (file, base);
5664 break;
5665 case CONST_INT:
5666 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5667 break;
5668 default:
5669 gcc_unreachable ();
5672 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5674 case LABEL_REF:
5675 case SYMBOL_REF:
5676 base = XEXP (XEXP (x, 0), 1);
5677 output_addr_const (file, base);
5678 break;
5679 case CONST_INT:
5680 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5681 break;
5682 default:
5683 gcc_unreachable ();
5686 /* How bogus. The compiler is apparently responsible for
5687 rounding the constant if it uses an LR field selector.
5689 The linker and/or assembler seem a better place since
5690 they have to do this kind of thing already.
5692 If we fail to do this, HP's optimizing linker may eliminate
5693 an addil, but not update the ldw/stw/ldo instruction that
5694 uses the result of the addil. */
5695 if (round_constant)
5696 offset = ((offset + 0x1000) & ~0x1fff);
5698 switch (GET_CODE (XEXP (x, 0)))
5700 case PLUS:
5701 if (offset < 0)
5703 offset = -offset;
5704 sep = "-";
5706 else
5707 sep = "+";
5708 break;
5710 case MINUS:
5711 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5712 sep = "-";
5713 break;
5715 default:
5716 gcc_unreachable ();
5719 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5720 fputs ("-$global$", file);
5721 if (offset)
5722 fprintf (file, "%s%d", sep, offset);
5724 else
5725 output_addr_const (file, x);
5728 /* Output boilerplate text to appear at the beginning of the file.
5729 There are several possible versions. */
5730 #define aputs(x) fputs(x, asm_out_file)
5731 static inline void
5732 pa_file_start_level (void)
5734 if (TARGET_64BIT)
5735 aputs ("\t.LEVEL 2.0w\n");
5736 else if (TARGET_PA_20)
5737 aputs ("\t.LEVEL 2.0\n");
5738 else if (TARGET_PA_11)
5739 aputs ("\t.LEVEL 1.1\n");
5740 else
5741 aputs ("\t.LEVEL 1.0\n");
5744 static inline void
5745 pa_file_start_space (int sortspace)
5747 aputs ("\t.SPACE $PRIVATE$");
5748 if (sortspace)
5749 aputs (",SORT=16");
5750 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5751 if (flag_tm)
5752 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5753 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5754 "\n\t.SPACE $TEXT$");
5755 if (sortspace)
5756 aputs (",SORT=8");
5757 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5758 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5761 static inline void
5762 pa_file_start_file (int want_version)
5764 if (write_symbols != NO_DEBUG)
5766 output_file_directive (asm_out_file, main_input_filename);
5767 if (want_version)
5768 aputs ("\t.version\t\"01.01\"\n");
5772 static inline void
5773 pa_file_start_mcount (const char *aswhat)
5775 if (profile_flag)
5776 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5779 static void
5780 pa_elf_file_start (void)
5782 pa_file_start_level ();
5783 pa_file_start_mcount ("ENTRY");
5784 pa_file_start_file (0);
5787 static void
5788 pa_som_file_start (void)
5790 pa_file_start_level ();
5791 pa_file_start_space (0);
5792 aputs ("\t.IMPORT $global$,DATA\n"
5793 "\t.IMPORT $$dyncall,MILLICODE\n");
5794 pa_file_start_mcount ("CODE");
5795 pa_file_start_file (0);
5798 static void
5799 pa_linux_file_start (void)
5801 pa_file_start_file (1);
5802 pa_file_start_level ();
5803 pa_file_start_mcount ("CODE");
5806 static void
5807 pa_hpux64_gas_file_start (void)
5809 pa_file_start_level ();
5810 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5811 if (profile_flag)
5812 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5813 #endif
5814 pa_file_start_file (1);
5817 static void
5818 pa_hpux64_hpas_file_start (void)
5820 pa_file_start_level ();
5821 pa_file_start_space (1);
5822 pa_file_start_mcount ("CODE");
5823 pa_file_start_file (0);
5825 #undef aputs
5827 /* Search the deferred plabel list for SYMBOL and return its internal
5828 label. If an entry for SYMBOL is not found, a new entry is created. */
5831 pa_get_deferred_plabel (rtx symbol)
5833 const char *fname = XSTR (symbol, 0);
5834 size_t i;
5836 /* See if we have already put this function on the list of deferred
5837 plabels. This list is generally small, so a liner search is not
5838 too ugly. If it proves too slow replace it with something faster. */
5839 for (i = 0; i < n_deferred_plabels; i++)
5840 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5841 break;
5843 /* If the deferred plabel list is empty, or this entry was not found
5844 on the list, create a new entry on the list. */
5845 if (deferred_plabels == NULL || i == n_deferred_plabels)
5847 tree id;
5849 if (deferred_plabels == 0)
5850 deferred_plabels = ggc_alloc<deferred_plabel> ();
5851 else
5852 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5853 deferred_plabels,
5854 n_deferred_plabels + 1);
5856 i = n_deferred_plabels++;
5857 deferred_plabels[i].internal_label = gen_label_rtx ();
5858 deferred_plabels[i].symbol = symbol;
5860 /* Gross. We have just implicitly taken the address of this
5861 function. Mark it in the same manner as assemble_name. */
5862 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5863 if (id)
5864 mark_referenced (id);
5867 return deferred_plabels[i].internal_label;
5870 static void
5871 output_deferred_plabels (void)
5873 size_t i;
5875 /* If we have some deferred plabels, then we need to switch into the
5876 data or readonly data section, and align it to a 4 byte boundary
5877 before outputting the deferred plabels. */
5878 if (n_deferred_plabels)
5880 switch_to_section (flag_pic ? data_section : readonly_data_section);
5881 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5884 /* Now output the deferred plabels. */
5885 for (i = 0; i < n_deferred_plabels; i++)
5887 targetm.asm_out.internal_label (asm_out_file, "L",
5888 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5889 assemble_integer (deferred_plabels[i].symbol,
5890 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5894 /* Initialize optabs to point to emulation routines. */
5896 static void
5897 pa_init_libfuncs (void)
5899 if (HPUX_LONG_DOUBLE_LIBRARY)
5901 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5902 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5903 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5904 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5905 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5906 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5907 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5908 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5909 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5911 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5912 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5913 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5914 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5915 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5916 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5917 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5919 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5920 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5921 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5922 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5924 set_conv_libfunc (sfix_optab, SImode, TFmode,
5925 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5926 : "_U_Qfcnvfxt_quad_to_sgl");
5927 set_conv_libfunc (sfix_optab, DImode, TFmode,
5928 "_U_Qfcnvfxt_quad_to_dbl");
5929 set_conv_libfunc (ufix_optab, SImode, TFmode,
5930 "_U_Qfcnvfxt_quad_to_usgl");
5931 set_conv_libfunc (ufix_optab, DImode, TFmode,
5932 "_U_Qfcnvfxt_quad_to_udbl");
5934 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5935 "_U_Qfcnvxf_sgl_to_quad");
5936 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5937 "_U_Qfcnvxf_dbl_to_quad");
5938 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5939 "_U_Qfcnvxf_usgl_to_quad");
5940 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5941 "_U_Qfcnvxf_udbl_to_quad");
5944 if (TARGET_SYNC_LIBCALLS)
5945 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
5948 /* HP's millicode routines mean something special to the assembler.
5949 Keep track of which ones we have used. */
5951 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5952 static void import_milli (enum millicodes);
5953 static char imported[(int) end1000];
5954 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5955 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5956 #define MILLI_START 10
5958 static void
5959 import_milli (enum millicodes code)
5961 char str[sizeof (import_string)];
5963 if (!imported[(int) code])
5965 imported[(int) code] = 1;
5966 strcpy (str, import_string);
5967 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5968 output_asm_insn (str, 0);
5972 /* The register constraints have put the operands and return value in
5973 the proper registers. */
5975 const char *
5976 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5978 import_milli (mulI);
5979 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5982 /* Emit the rtl for doing a division by a constant. */
5984 /* Do magic division millicodes exist for this value? */
5985 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5987 /* We'll use an array to keep track of the magic millicodes and
5988 whether or not we've used them already. [n][0] is signed, [n][1] is
5989 unsigned. */
5991 static int div_milli[16][2];
5994 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5996 if (GET_CODE (operands[2]) == CONST_INT
5997 && INTVAL (operands[2]) > 0
5998 && INTVAL (operands[2]) < 16
5999 && pa_magic_milli[INTVAL (operands[2])])
6001 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6003 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6004 emit
6005 (gen_rtx_PARALLEL
6006 (VOIDmode,
6007 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6008 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6009 SImode,
6010 gen_rtx_REG (SImode, 26),
6011 operands[2])),
6012 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6013 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6014 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6015 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6016 gen_rtx_CLOBBER (VOIDmode, ret))));
6017 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6018 return 1;
6020 return 0;
6023 const char *
6024 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6026 int divisor;
6028 /* If the divisor is a constant, try to use one of the special
6029 opcodes .*/
6030 if (GET_CODE (operands[0]) == CONST_INT)
6032 static char buf[100];
6033 divisor = INTVAL (operands[0]);
6034 if (!div_milli[divisor][unsignedp])
6036 div_milli[divisor][unsignedp] = 1;
6037 if (unsignedp)
6038 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6039 else
6040 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6042 if (unsignedp)
6044 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6045 INTVAL (operands[0]));
6046 return pa_output_millicode_call (insn,
6047 gen_rtx_SYMBOL_REF (SImode, buf));
6049 else
6051 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6052 INTVAL (operands[0]));
6053 return pa_output_millicode_call (insn,
6054 gen_rtx_SYMBOL_REF (SImode, buf));
6057 /* Divisor isn't a special constant. */
6058 else
6060 if (unsignedp)
6062 import_milli (divU);
6063 return pa_output_millicode_call (insn,
6064 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6066 else
6068 import_milli (divI);
6069 return pa_output_millicode_call (insn,
6070 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6075 /* Output a $$rem millicode to do mod. */
6077 const char *
6078 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6080 if (unsignedp)
6082 import_milli (remU);
6083 return pa_output_millicode_call (insn,
6084 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6086 else
6088 import_milli (remI);
6089 return pa_output_millicode_call (insn,
6090 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6094 void
6095 pa_output_arg_descriptor (rtx_insn *call_insn)
6097 const char *arg_regs[4];
6098 machine_mode arg_mode;
6099 rtx link;
6100 int i, output_flag = 0;
6101 int regno;
6103 /* We neither need nor want argument location descriptors for the
6104 64bit runtime environment or the ELF32 environment. */
6105 if (TARGET_64BIT || TARGET_ELF32)
6106 return;
6108 for (i = 0; i < 4; i++)
6109 arg_regs[i] = 0;
6111 /* Specify explicitly that no argument relocations should take place
6112 if using the portable runtime calling conventions. */
6113 if (TARGET_PORTABLE_RUNTIME)
6115 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6116 asm_out_file);
6117 return;
6120 gcc_assert (CALL_P (call_insn));
6121 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6122 link; link = XEXP (link, 1))
6124 rtx use = XEXP (link, 0);
6126 if (! (GET_CODE (use) == USE
6127 && GET_CODE (XEXP (use, 0)) == REG
6128 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6129 continue;
6131 arg_mode = GET_MODE (XEXP (use, 0));
6132 regno = REGNO (XEXP (use, 0));
6133 if (regno >= 23 && regno <= 26)
6135 arg_regs[26 - regno] = "GR";
6136 if (arg_mode == DImode)
6137 arg_regs[25 - regno] = "GR";
6139 else if (regno >= 32 && regno <= 39)
6141 if (arg_mode == SFmode)
6142 arg_regs[(regno - 32) / 2] = "FR";
6143 else
6145 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6146 arg_regs[(regno - 34) / 2] = "FR";
6147 arg_regs[(regno - 34) / 2 + 1] = "FU";
6148 #else
6149 arg_regs[(regno - 34) / 2] = "FU";
6150 arg_regs[(regno - 34) / 2 + 1] = "FR";
6151 #endif
6155 fputs ("\t.CALL ", asm_out_file);
6156 for (i = 0; i < 4; i++)
6158 if (arg_regs[i])
6160 if (output_flag++)
6161 fputc (',', asm_out_file);
6162 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6165 fputc ('\n', asm_out_file);
6168 /* Inform reload about cases where moving X with a mode MODE to or from
6169 a register in RCLASS requires an extra scratch or immediate register.
6170 Return the class needed for the immediate register. */
6172 static reg_class_t
6173 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6174 machine_mode mode, secondary_reload_info *sri)
6176 int regno;
6177 enum reg_class rclass = (enum reg_class) rclass_i;
6179 /* Handle the easy stuff first. */
6180 if (rclass == R1_REGS)
6181 return NO_REGS;
6183 if (REG_P (x))
6185 regno = REGNO (x);
6186 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6187 return NO_REGS;
6189 else
6190 regno = -1;
6192 /* If we have something like (mem (mem (...)), we can safely assume the
6193 inner MEM will end up in a general register after reloading, so there's
6194 no need for a secondary reload. */
6195 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6196 return NO_REGS;
6198 /* Trying to load a constant into a FP register during PIC code
6199 generation requires %r1 as a scratch register. For float modes,
6200 the only legitimate constant is CONST0_RTX. However, there are
6201 a few patterns that accept constant double operands. */
6202 if (flag_pic
6203 && FP_REG_CLASS_P (rclass)
6204 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6206 switch (mode)
6208 case E_SImode:
6209 sri->icode = CODE_FOR_reload_insi_r1;
6210 break;
6212 case E_DImode:
6213 sri->icode = CODE_FOR_reload_indi_r1;
6214 break;
6216 case E_SFmode:
6217 sri->icode = CODE_FOR_reload_insf_r1;
6218 break;
6220 case E_DFmode:
6221 sri->icode = CODE_FOR_reload_indf_r1;
6222 break;
6224 default:
6225 gcc_unreachable ();
6227 return NO_REGS;
6230 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6231 register when we're generating PIC code or when the operand isn't
6232 readonly. */
6233 if (pa_symbolic_expression_p (x))
6235 if (GET_CODE (x) == HIGH)
6236 x = XEXP (x, 0);
6238 if (flag_pic || !read_only_operand (x, VOIDmode))
6240 switch (mode)
6242 case E_SImode:
6243 sri->icode = CODE_FOR_reload_insi_r1;
6244 break;
6246 case E_DImode:
6247 sri->icode = CODE_FOR_reload_indi_r1;
6248 break;
6250 default:
6251 gcc_unreachable ();
6253 return NO_REGS;
6257 /* Profiling showed the PA port spends about 1.3% of its compilation
6258 time in true_regnum from calls inside pa_secondary_reload_class. */
6259 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6260 regno = true_regnum (x);
6262 /* Handle reloads for floating point loads and stores. */
6263 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6264 && FP_REG_CLASS_P (rclass))
6266 if (MEM_P (x))
6268 x = XEXP (x, 0);
6270 /* We don't need a secondary reload for indexed memory addresses.
6272 When INT14_OK_STRICT is true, it might appear that we could
6273 directly allow register indirect memory addresses. However,
6274 this doesn't work because we don't support SUBREGs in
6275 floating-point register copies and reload doesn't tell us
6276 when it's going to use a SUBREG. */
6277 if (IS_INDEX_ADDR_P (x))
6278 return NO_REGS;
6281 /* Request a secondary reload with a general scratch register
6282 for everything else. ??? Could symbolic operands be handled
6283 directly when generating non-pic PA 2.0 code? */
6284 sri->icode = (in_p
6285 ? direct_optab_handler (reload_in_optab, mode)
6286 : direct_optab_handler (reload_out_optab, mode));
6287 return NO_REGS;
6290 /* A SAR<->FP register copy requires an intermediate general register
6291 and secondary memory. We need a secondary reload with a general
6292 scratch register for spills. */
6293 if (rclass == SHIFT_REGS)
6295 /* Handle spill. */
6296 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6298 sri->icode = (in_p
6299 ? direct_optab_handler (reload_in_optab, mode)
6300 : direct_optab_handler (reload_out_optab, mode));
6301 return NO_REGS;
6304 /* Handle FP copy. */
6305 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6306 return GENERAL_REGS;
6309 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6310 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6311 && FP_REG_CLASS_P (rclass))
6312 return GENERAL_REGS;
6314 return NO_REGS;
6317 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6319 static bool
6320 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6321 reg_class_t class1 ATTRIBUTE_UNUSED,
6322 reg_class_t class2 ATTRIBUTE_UNUSED)
6324 #ifdef PA_SECONDARY_MEMORY_NEEDED
6325 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6326 #else
6327 return false;
6328 #endif
6331 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6332 is only marked as live on entry by df-scan when it is a fixed
6333 register. It isn't a fixed register in the 64-bit runtime,
6334 so we need to mark it here. */
6336 static void
6337 pa_extra_live_on_entry (bitmap regs)
6339 if (TARGET_64BIT)
6340 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6343 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6344 to prevent it from being deleted. */
6347 pa_eh_return_handler_rtx (void)
6349 rtx tmp;
6351 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6352 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6353 tmp = gen_rtx_MEM (word_mode, tmp);
6354 tmp->volatil = 1;
6355 return tmp;
6358 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6359 by invisible reference. As a GCC extension, we also pass anything
6360 with a zero or variable size by reference.
6362 The 64-bit runtime does not describe passing any types by invisible
6363 reference. The internals of GCC can't currently handle passing
6364 empty structures, and zero or variable length arrays when they are
6365 not passed entirely on the stack or by reference. Thus, as a GCC
6366 extension, we pass these types by reference. The HP compiler doesn't
6367 support these types, so hopefully there shouldn't be any compatibility
6368 issues. This may have to be revisited when HP releases a C99 compiler
6369 or updates the ABI. */
6371 static bool
6372 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6374 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6375 if (TARGET_64BIT)
6376 return size <= 0;
6377 else
6378 return size <= 0 || size > 8;
6381 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6383 static pad_direction
6384 pa_function_arg_padding (machine_mode mode, const_tree type)
6386 if (mode == BLKmode
6387 || (TARGET_64BIT
6388 && type
6389 && (AGGREGATE_TYPE_P (type)
6390 || TREE_CODE (type) == COMPLEX_TYPE
6391 || VECTOR_TYPE_P (type))))
6393 /* Return PAD_NONE if justification is not required. */
6394 if (type
6395 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6396 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6397 return PAD_NONE;
6399 /* The directions set here are ignored when a BLKmode argument larger
6400 than a word is placed in a register. Different code is used for
6401 the stack and registers. This makes it difficult to have a
6402 consistent data representation for both the stack and registers.
6403 For both runtimes, the justification and padding for arguments on
6404 the stack and in registers should be identical. */
6405 if (TARGET_64BIT)
6406 /* The 64-bit runtime specifies left justification for aggregates. */
6407 return PAD_UPWARD;
6408 else
6409 /* The 32-bit runtime architecture specifies right justification.
6410 When the argument is passed on the stack, the argument is padded
6411 with garbage on the left. The HP compiler pads with zeros. */
6412 return PAD_DOWNWARD;
6415 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6416 return PAD_DOWNWARD;
6417 else
6418 return PAD_NONE;
6422 /* Do what is necessary for `va_start'. We look at the current function
6423 to determine if stdargs or varargs is used and fill in an initial
6424 va_list. A pointer to this constructor is returned. */
6426 static rtx
6427 hppa_builtin_saveregs (void)
6429 rtx offset, dest;
6430 tree fntype = TREE_TYPE (current_function_decl);
6431 int argadj = ((!stdarg_p (fntype))
6432 ? UNITS_PER_WORD : 0);
6434 if (argadj)
6435 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6436 else
6437 offset = crtl->args.arg_offset_rtx;
6439 if (TARGET_64BIT)
6441 int i, off;
6443 /* Adjust for varargs/stdarg differences. */
6444 if (argadj)
6445 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6446 else
6447 offset = crtl->args.arg_offset_rtx;
6449 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6450 from the incoming arg pointer and growing to larger addresses. */
6451 for (i = 26, off = -64; i >= 19; i--, off += 8)
6452 emit_move_insn (gen_rtx_MEM (word_mode,
6453 plus_constant (Pmode,
6454 arg_pointer_rtx, off)),
6455 gen_rtx_REG (word_mode, i));
6457 /* The incoming args pointer points just beyond the flushback area;
6458 normally this is not a serious concern. However, when we are doing
6459 varargs/stdargs we want to make the arg pointer point to the start
6460 of the incoming argument area. */
6461 emit_move_insn (virtual_incoming_args_rtx,
6462 plus_constant (Pmode, arg_pointer_rtx, -64));
6464 /* Now return a pointer to the first anonymous argument. */
6465 return copy_to_reg (expand_binop (Pmode, add_optab,
6466 virtual_incoming_args_rtx,
6467 offset, 0, 0, OPTAB_LIB_WIDEN));
6470 /* Store general registers on the stack. */
6471 dest = gen_rtx_MEM (BLKmode,
6472 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6473 -16));
6474 set_mem_alias_set (dest, get_varargs_alias_set ());
6475 set_mem_align (dest, BITS_PER_WORD);
6476 move_block_from_reg (23, dest, 4);
6478 /* move_block_from_reg will emit code to store the argument registers
6479 individually as scalar stores.
6481 However, other insns may later load from the same addresses for
6482 a structure load (passing a struct to a varargs routine).
6484 The alias code assumes that such aliasing can never happen, so we
6485 have to keep memory referencing insns from moving up beyond the
6486 last argument register store. So we emit a blockage insn here. */
6487 emit_insn (gen_blockage ());
6489 return copy_to_reg (expand_binop (Pmode, add_optab,
6490 crtl->args.internal_arg_pointer,
6491 offset, 0, 0, OPTAB_LIB_WIDEN));
6494 static void
6495 hppa_va_start (tree valist, rtx nextarg)
6497 nextarg = expand_builtin_saveregs ();
6498 std_expand_builtin_va_start (valist, nextarg);
6501 static tree
6502 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6503 gimple_seq *post_p)
6505 if (TARGET_64BIT)
6507 /* Args grow upward. We can use the generic routines. */
6508 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6510 else /* !TARGET_64BIT */
6512 tree ptr = build_pointer_type (type);
6513 tree valist_type;
6514 tree t, u;
6515 unsigned int size, ofs;
6516 bool indirect;
6518 indirect = pass_va_arg_by_reference (type);
6519 if (indirect)
6521 type = ptr;
6522 ptr = build_pointer_type (type);
6524 size = int_size_in_bytes (type);
6525 valist_type = TREE_TYPE (valist);
6527 /* Args grow down. Not handled by generic routines. */
6529 u = fold_convert (sizetype, size_in_bytes (type));
6530 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6531 t = fold_build_pointer_plus (valist, u);
6533 /* Align to 4 or 8 byte boundary depending on argument size. */
6535 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6536 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6537 t = fold_convert (valist_type, t);
6539 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6541 ofs = (8 - size) % 4;
6542 if (ofs != 0)
6543 t = fold_build_pointer_plus_hwi (t, ofs);
6545 t = fold_convert (ptr, t);
6546 t = build_va_arg_indirect_ref (t);
6548 if (indirect)
6549 t = build_va_arg_indirect_ref (t);
6551 return t;
6555 /* True if MODE is valid for the target. By "valid", we mean able to
6556 be manipulated in non-trivial ways. In particular, this means all
6557 the arithmetic is supported. */
6559 static bool
6560 pa_scalar_mode_supported_p (scalar_mode mode)
6562 int precision = GET_MODE_PRECISION (mode);
6564 if (TARGET_64BIT && mode == TImode)
6565 return true;
6567 switch (GET_MODE_CLASS (mode))
6569 case MODE_PARTIAL_INT:
6570 case MODE_INT:
6571 if (precision == CHAR_TYPE_SIZE)
6572 return true;
6573 if (precision == SHORT_TYPE_SIZE)
6574 return true;
6575 if (precision == INT_TYPE_SIZE)
6576 return true;
6577 if (precision == LONG_TYPE_SIZE)
6578 return true;
6579 if (precision == LONG_LONG_TYPE_SIZE)
6580 return true;
6581 return false;
6583 case MODE_FLOAT:
6584 if (precision == FLOAT_TYPE_SIZE)
6585 return true;
6586 if (precision == DOUBLE_TYPE_SIZE)
6587 return true;
6588 if (precision == LONG_DOUBLE_TYPE_SIZE)
6589 return true;
6590 return false;
6592 case MODE_DECIMAL_FLOAT:
6593 return false;
6595 default:
6596 gcc_unreachable ();
6600 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6601 it branches into the delay slot. Otherwise, return FALSE. */
6603 static bool
6604 branch_to_delay_slot_p (rtx_insn *insn)
6606 rtx_insn *jump_insn;
6608 if (dbr_sequence_length ())
6609 return FALSE;
6611 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6612 while (insn)
6614 insn = next_active_insn (insn);
6615 if (jump_insn == insn)
6616 return TRUE;
6618 /* We can't rely on the length of asms. So, we return FALSE when
6619 the branch is followed by an asm. */
6620 if (!insn
6621 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6622 || asm_noperands (PATTERN (insn)) >= 0
6623 || get_attr_length (insn) > 0)
6624 break;
6627 return FALSE;
6630 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6632 This occurs when INSN has an unfilled delay slot and is followed
6633 by an asm. Disaster can occur if the asm is empty and the jump
6634 branches into the delay slot. So, we add a nop in the delay slot
6635 when this occurs. */
6637 static bool
6638 branch_needs_nop_p (rtx_insn *insn)
6640 rtx_insn *jump_insn;
6642 if (dbr_sequence_length ())
6643 return FALSE;
6645 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6646 while (insn)
6648 insn = next_active_insn (insn);
6649 if (!insn || jump_insn == insn)
6650 return TRUE;
6652 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6653 || asm_noperands (PATTERN (insn)) >= 0)
6654 && get_attr_length (insn) > 0)
6655 break;
6658 return FALSE;
6661 /* Return TRUE if INSN, a forward jump insn, can use nullification
6662 to skip the following instruction. This avoids an extra cycle due
6663 to a mis-predicted branch when we fall through. */
6665 static bool
6666 use_skip_p (rtx_insn *insn)
6668 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6670 while (insn)
6672 insn = next_active_insn (insn);
6674 /* We can't rely on the length of asms, so we can't skip asms. */
6675 if (!insn
6676 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6677 || asm_noperands (PATTERN (insn)) >= 0)
6678 break;
6679 if (get_attr_length (insn) == 4
6680 && jump_insn == next_active_insn (insn))
6681 return TRUE;
6682 if (get_attr_length (insn) > 0)
6683 break;
6686 return FALSE;
6689 /* This routine handles all the normal conditional branch sequences we
6690 might need to generate. It handles compare immediate vs compare
6691 register, nullification of delay slots, varying length branches,
6692 negated branches, and all combinations of the above. It returns the
6693 output appropriate to emit the branch corresponding to all given
6694 parameters. */
6696 const char *
6697 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6699 static char buf[100];
6700 bool useskip;
6701 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6702 int length = get_attr_length (insn);
6703 int xdelay;
6705 /* A conditional branch to the following instruction (e.g. the delay slot)
6706 is asking for a disaster. This can happen when not optimizing and
6707 when jump optimization fails.
6709 While it is usually safe to emit nothing, this can fail if the
6710 preceding instruction is a nullified branch with an empty delay
6711 slot and the same branch target as this branch. We could check
6712 for this but jump optimization should eliminate nop jumps. It
6713 is always safe to emit a nop. */
6714 if (branch_to_delay_slot_p (insn))
6715 return "nop";
6717 /* The doubleword form of the cmpib instruction doesn't have the LEU
6718 and GTU conditions while the cmpb instruction does. Since we accept
6719 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6720 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6721 operands[2] = gen_rtx_REG (DImode, 0);
6722 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6723 operands[1] = gen_rtx_REG (DImode, 0);
6725 /* If this is a long branch with its delay slot unfilled, set `nullify'
6726 as it can nullify the delay slot and save a nop. */
6727 if (length == 8 && dbr_sequence_length () == 0)
6728 nullify = 1;
6730 /* If this is a short forward conditional branch which did not get
6731 its delay slot filled, the delay slot can still be nullified. */
6732 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6733 nullify = forward_branch_p (insn);
6735 /* A forward branch over a single nullified insn can be done with a
6736 comclr instruction. This avoids a single cycle penalty due to
6737 mis-predicted branch if we fall through (branch not taken). */
6738 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6740 switch (length)
6742 /* All short conditional branches except backwards with an unfilled
6743 delay slot. */
6744 case 4:
6745 if (useskip)
6746 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6747 else
6748 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6749 if (GET_MODE (operands[1]) == DImode)
6750 strcat (buf, "*");
6751 if (negated)
6752 strcat (buf, "%B3");
6753 else
6754 strcat (buf, "%S3");
6755 if (useskip)
6756 strcat (buf, " %2,%r1,%%r0");
6757 else if (nullify)
6759 if (branch_needs_nop_p (insn))
6760 strcat (buf, ",n %2,%r1,%0%#");
6761 else
6762 strcat (buf, ",n %2,%r1,%0");
6764 else
6765 strcat (buf, " %2,%r1,%0");
6766 break;
6768 /* All long conditionals. Note a short backward branch with an
6769 unfilled delay slot is treated just like a long backward branch
6770 with an unfilled delay slot. */
6771 case 8:
6772 /* Handle weird backwards branch with a filled delay slot
6773 which is nullified. */
6774 if (dbr_sequence_length () != 0
6775 && ! forward_branch_p (insn)
6776 && nullify)
6778 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6779 if (GET_MODE (operands[1]) == DImode)
6780 strcat (buf, "*");
6781 if (negated)
6782 strcat (buf, "%S3");
6783 else
6784 strcat (buf, "%B3");
6785 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6787 /* Handle short backwards branch with an unfilled delay slot.
6788 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6789 taken and untaken branches. */
6790 else if (dbr_sequence_length () == 0
6791 && ! forward_branch_p (insn)
6792 && INSN_ADDRESSES_SET_P ()
6793 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6794 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6796 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6797 if (GET_MODE (operands[1]) == DImode)
6798 strcat (buf, "*");
6799 if (negated)
6800 strcat (buf, "%B3 %2,%r1,%0%#");
6801 else
6802 strcat (buf, "%S3 %2,%r1,%0%#");
6804 else
6806 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6807 if (GET_MODE (operands[1]) == DImode)
6808 strcat (buf, "*");
6809 if (negated)
6810 strcat (buf, "%S3");
6811 else
6812 strcat (buf, "%B3");
6813 if (nullify)
6814 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6815 else
6816 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6818 break;
6820 default:
6821 /* The reversed conditional branch must branch over one additional
6822 instruction if the delay slot is filled and needs to be extracted
6823 by pa_output_lbranch. If the delay slot is empty or this is a
6824 nullified forward branch, the instruction after the reversed
6825 condition branch must be nullified. */
6826 if (dbr_sequence_length () == 0
6827 || (nullify && forward_branch_p (insn)))
6829 nullify = 1;
6830 xdelay = 0;
6831 operands[4] = GEN_INT (length);
6833 else
6835 xdelay = 1;
6836 operands[4] = GEN_INT (length + 4);
6839 /* Create a reversed conditional branch which branches around
6840 the following insns. */
6841 if (GET_MODE (operands[1]) != DImode)
6843 if (nullify)
6845 if (negated)
6846 strcpy (buf,
6847 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6848 else
6849 strcpy (buf,
6850 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6852 else
6854 if (negated)
6855 strcpy (buf,
6856 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6857 else
6858 strcpy (buf,
6859 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6862 else
6864 if (nullify)
6866 if (negated)
6867 strcpy (buf,
6868 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6869 else
6870 strcpy (buf,
6871 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6873 else
6875 if (negated)
6876 strcpy (buf,
6877 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6878 else
6879 strcpy (buf,
6880 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6884 output_asm_insn (buf, operands);
6885 return pa_output_lbranch (operands[0], insn, xdelay);
6887 return buf;
6890 /* Output a PIC pc-relative instruction sequence to load the address of
6891 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6892 or a code label. OPERANDS[1] specifies the register to use to load
6893 the program counter. OPERANDS[3] may be used for label generation
6894 The sequence is always three instructions in length. The program
6895 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6896 Register %r1 is clobbered. */
6898 static void
6899 pa_output_pic_pcrel_sequence (rtx *operands)
6901 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6902 if (TARGET_PA_20)
6904 /* We can use mfia to determine the current program counter. */
6905 if (TARGET_SOM || !TARGET_GAS)
6907 operands[3] = gen_label_rtx ();
6908 targetm.asm_out.internal_label (asm_out_file, "L",
6909 CODE_LABEL_NUMBER (operands[3]));
6910 output_asm_insn ("mfia %1", operands);
6911 output_asm_insn ("addil L'%0-%l3,%1", operands);
6912 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6914 else
6916 output_asm_insn ("mfia %1", operands);
6917 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6918 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6921 else
6923 /* We need to use a branch to determine the current program counter. */
6924 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6925 if (TARGET_SOM || !TARGET_GAS)
6927 operands[3] = gen_label_rtx ();
6928 output_asm_insn ("addil L'%0-%l3,%1", operands);
6929 targetm.asm_out.internal_label (asm_out_file, "L",
6930 CODE_LABEL_NUMBER (operands[3]));
6931 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6933 else
6935 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6936 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6941 /* This routine handles output of long unconditional branches that
6942 exceed the maximum range of a simple branch instruction. Since
6943 we don't have a register available for the branch, we save register
6944 %r1 in the frame marker, load the branch destination DEST into %r1,
6945 execute the branch, and restore %r1 in the delay slot of the branch.
6947 Since long branches may have an insn in the delay slot and the
6948 delay slot is used to restore %r1, we in general need to extract
6949 this insn and execute it before the branch. However, to facilitate
6950 use of this function by conditional branches, we also provide an
6951 option to not extract the delay insn so that it will be emitted
6952 after the long branch. So, if there is an insn in the delay slot,
6953 it is extracted if XDELAY is nonzero.
6955 The lengths of the various long-branch sequences are 20, 16 and 24
6956 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6958 const char *
6959 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6961 rtx xoperands[4];
6963 xoperands[0] = dest;
6965 /* First, free up the delay slot. */
6966 if (xdelay && dbr_sequence_length () != 0)
6968 /* We can't handle a jump in the delay slot. */
6969 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6971 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6972 optimize, 0, NULL);
6974 /* Now delete the delay insn. */
6975 SET_INSN_DELETED (NEXT_INSN (insn));
6978 /* Output an insn to save %r1. The runtime documentation doesn't
6979 specify whether the "Clean Up" slot in the callers frame can
6980 be clobbered by the callee. It isn't copied by HP's builtin
6981 alloca, so this suggests that it can be clobbered if necessary.
6982 The "Static Link" location is copied by HP builtin alloca, so
6983 we avoid using it. Using the cleanup slot might be a problem
6984 if we have to interoperate with languages that pass cleanup
6985 information. However, it should be possible to handle these
6986 situations with GCC's asm feature.
6988 The "Current RP" slot is reserved for the called procedure, so
6989 we try to use it when we don't have a frame of our own. It's
6990 rather unlikely that we won't have a frame when we need to emit
6991 a very long branch.
6993 Really the way to go long term is a register scavenger; goto
6994 the target of the jump and find a register which we can use
6995 as a scratch to hold the value in %r1. Then, we wouldn't have
6996 to free up the delay slot or clobber a slot that may be needed
6997 for other purposes. */
6998 if (TARGET_64BIT)
7000 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7001 /* Use the return pointer slot in the frame marker. */
7002 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7003 else
7004 /* Use the slot at -40 in the frame marker since HP builtin
7005 alloca doesn't copy it. */
7006 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7008 else
7010 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7011 /* Use the return pointer slot in the frame marker. */
7012 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7013 else
7014 /* Use the "Clean Up" slot in the frame marker. In GCC,
7015 the only other use of this location is for copying a
7016 floating point double argument from a floating-point
7017 register to two general registers. The copy is done
7018 as an "atomic" operation when outputting a call, so it
7019 won't interfere with our using the location here. */
7020 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7023 if (TARGET_PORTABLE_RUNTIME)
7025 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7026 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7027 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7029 else if (flag_pic)
7031 xoperands[1] = gen_rtx_REG (Pmode, 1);
7032 xoperands[2] = xoperands[1];
7033 pa_output_pic_pcrel_sequence (xoperands);
7034 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7036 else
7037 /* Now output a very long branch to the original target. */
7038 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7040 /* Now restore the value of %r1 in the delay slot. */
7041 if (TARGET_64BIT)
7043 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7044 return "ldd -16(%%r30),%%r1";
7045 else
7046 return "ldd -40(%%r30),%%r1";
7048 else
7050 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7051 return "ldw -20(%%r30),%%r1";
7052 else
7053 return "ldw -12(%%r30),%%r1";
7057 /* This routine handles all the branch-on-bit conditional branch sequences we
7058 might need to generate. It handles nullification of delay slots,
7059 varying length branches, negated branches and all combinations of the
7060 above. it returns the appropriate output template to emit the branch. */
7062 const char *
7063 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7065 static char buf[100];
7066 bool useskip;
7067 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7068 int length = get_attr_length (insn);
7069 int xdelay;
7071 /* A conditional branch to the following instruction (e.g. the delay slot) is
7072 asking for a disaster. I do not think this can happen as this pattern
7073 is only used when optimizing; jump optimization should eliminate the
7074 jump. But be prepared just in case. */
7076 if (branch_to_delay_slot_p (insn))
7077 return "nop";
7079 /* If this is a long branch with its delay slot unfilled, set `nullify'
7080 as it can nullify the delay slot and save a nop. */
7081 if (length == 8 && dbr_sequence_length () == 0)
7082 nullify = 1;
7084 /* If this is a short forward conditional branch which did not get
7085 its delay slot filled, the delay slot can still be nullified. */
7086 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7087 nullify = forward_branch_p (insn);
7089 /* A forward branch over a single nullified insn can be done with a
7090 extrs instruction. This avoids a single cycle penalty due to
7091 mis-predicted branch if we fall through (branch not taken). */
7092 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7094 switch (length)
7097 /* All short conditional branches except backwards with an unfilled
7098 delay slot. */
7099 case 4:
7100 if (useskip)
7101 strcpy (buf, "{extrs,|extrw,s,}");
7102 else
7103 strcpy (buf, "bb,");
7104 if (useskip && GET_MODE (operands[0]) == DImode)
7105 strcpy (buf, "extrd,s,*");
7106 else if (GET_MODE (operands[0]) == DImode)
7107 strcpy (buf, "bb,*");
7108 if ((which == 0 && negated)
7109 || (which == 1 && ! negated))
7110 strcat (buf, ">=");
7111 else
7112 strcat (buf, "<");
7113 if (useskip)
7114 strcat (buf, " %0,%1,1,%%r0");
7115 else if (nullify && negated)
7117 if (branch_needs_nop_p (insn))
7118 strcat (buf, ",n %0,%1,%3%#");
7119 else
7120 strcat (buf, ",n %0,%1,%3");
7122 else if (nullify && ! negated)
7124 if (branch_needs_nop_p (insn))
7125 strcat (buf, ",n %0,%1,%2%#");
7126 else
7127 strcat (buf, ",n %0,%1,%2");
7129 else if (! nullify && negated)
7130 strcat (buf, " %0,%1,%3");
7131 else if (! nullify && ! negated)
7132 strcat (buf, " %0,%1,%2");
7133 break;
7135 /* All long conditionals. Note a short backward branch with an
7136 unfilled delay slot is treated just like a long backward branch
7137 with an unfilled delay slot. */
7138 case 8:
7139 /* Handle weird backwards branch with a filled delay slot
7140 which is nullified. */
7141 if (dbr_sequence_length () != 0
7142 && ! forward_branch_p (insn)
7143 && nullify)
7145 strcpy (buf, "bb,");
7146 if (GET_MODE (operands[0]) == DImode)
7147 strcat (buf, "*");
7148 if ((which == 0 && negated)
7149 || (which == 1 && ! negated))
7150 strcat (buf, "<");
7151 else
7152 strcat (buf, ">=");
7153 if (negated)
7154 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7155 else
7156 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7158 /* Handle short backwards branch with an unfilled delay slot.
7159 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7160 taken and untaken branches. */
7161 else if (dbr_sequence_length () == 0
7162 && ! forward_branch_p (insn)
7163 && INSN_ADDRESSES_SET_P ()
7164 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7165 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7167 strcpy (buf, "bb,");
7168 if (GET_MODE (operands[0]) == DImode)
7169 strcat (buf, "*");
7170 if ((which == 0 && negated)
7171 || (which == 1 && ! negated))
7172 strcat (buf, ">=");
7173 else
7174 strcat (buf, "<");
7175 if (negated)
7176 strcat (buf, " %0,%1,%3%#");
7177 else
7178 strcat (buf, " %0,%1,%2%#");
7180 else
7182 if (GET_MODE (operands[0]) == DImode)
7183 strcpy (buf, "extrd,s,*");
7184 else
7185 strcpy (buf, "{extrs,|extrw,s,}");
7186 if ((which == 0 && negated)
7187 || (which == 1 && ! negated))
7188 strcat (buf, "<");
7189 else
7190 strcat (buf, ">=");
7191 if (nullify && negated)
7192 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7193 else if (nullify && ! negated)
7194 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7195 else if (negated)
7196 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7197 else
7198 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7200 break;
7202 default:
7203 /* The reversed conditional branch must branch over one additional
7204 instruction if the delay slot is filled and needs to be extracted
7205 by pa_output_lbranch. If the delay slot is empty or this is a
7206 nullified forward branch, the instruction after the reversed
7207 condition branch must be nullified. */
7208 if (dbr_sequence_length () == 0
7209 || (nullify && forward_branch_p (insn)))
7211 nullify = 1;
7212 xdelay = 0;
7213 operands[4] = GEN_INT (length);
7215 else
7217 xdelay = 1;
7218 operands[4] = GEN_INT (length + 4);
7221 if (GET_MODE (operands[0]) == DImode)
7222 strcpy (buf, "bb,*");
7223 else
7224 strcpy (buf, "bb,");
7225 if ((which == 0 && negated)
7226 || (which == 1 && !negated))
7227 strcat (buf, "<");
7228 else
7229 strcat (buf, ">=");
7230 if (nullify)
7231 strcat (buf, ",n %0,%1,.+%4");
7232 else
7233 strcat (buf, " %0,%1,.+%4");
7234 output_asm_insn (buf, operands);
7235 return pa_output_lbranch (negated ? operands[3] : operands[2],
7236 insn, xdelay);
7238 return buf;
7241 /* This routine handles all the branch-on-variable-bit conditional branch
7242 sequences we might need to generate. It handles nullification of delay
7243 slots, varying length branches, negated branches and all combinations
7244 of the above. it returns the appropriate output template to emit the
7245 branch. */
7247 const char *
7248 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7249 int which)
7251 static char buf[100];
7252 bool useskip;
7253 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7254 int length = get_attr_length (insn);
7255 int xdelay;
7257 /* A conditional branch to the following instruction (e.g. the delay slot) is
7258 asking for a disaster. I do not think this can happen as this pattern
7259 is only used when optimizing; jump optimization should eliminate the
7260 jump. But be prepared just in case. */
7262 if (branch_to_delay_slot_p (insn))
7263 return "nop";
7265 /* If this is a long branch with its delay slot unfilled, set `nullify'
7266 as it can nullify the delay slot and save a nop. */
7267 if (length == 8 && dbr_sequence_length () == 0)
7268 nullify = 1;
7270 /* If this is a short forward conditional branch which did not get
7271 its delay slot filled, the delay slot can still be nullified. */
7272 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7273 nullify = forward_branch_p (insn);
7275 /* A forward branch over a single nullified insn can be done with a
7276 extrs instruction. This avoids a single cycle penalty due to
7277 mis-predicted branch if we fall through (branch not taken). */
7278 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7280 switch (length)
7283 /* All short conditional branches except backwards with an unfilled
7284 delay slot. */
7285 case 4:
7286 if (useskip)
7287 strcpy (buf, "{vextrs,|extrw,s,}");
7288 else
7289 strcpy (buf, "{bvb,|bb,}");
7290 if (useskip && GET_MODE (operands[0]) == DImode)
7291 strcpy (buf, "extrd,s,*");
7292 else if (GET_MODE (operands[0]) == DImode)
7293 strcpy (buf, "bb,*");
7294 if ((which == 0 && negated)
7295 || (which == 1 && ! negated))
7296 strcat (buf, ">=");
7297 else
7298 strcat (buf, "<");
7299 if (useskip)
7300 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7301 else if (nullify && negated)
7303 if (branch_needs_nop_p (insn))
7304 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7305 else
7306 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7308 else if (nullify && ! negated)
7310 if (branch_needs_nop_p (insn))
7311 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7312 else
7313 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7315 else if (! nullify && negated)
7316 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7317 else if (! nullify && ! negated)
7318 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7319 break;
7321 /* All long conditionals. Note a short backward branch with an
7322 unfilled delay slot is treated just like a long backward branch
7323 with an unfilled delay slot. */
7324 case 8:
7325 /* Handle weird backwards branch with a filled delay slot
7326 which is nullified. */
7327 if (dbr_sequence_length () != 0
7328 && ! forward_branch_p (insn)
7329 && nullify)
7331 strcpy (buf, "{bvb,|bb,}");
7332 if (GET_MODE (operands[0]) == DImode)
7333 strcat (buf, "*");
7334 if ((which == 0 && negated)
7335 || (which == 1 && ! negated))
7336 strcat (buf, "<");
7337 else
7338 strcat (buf, ">=");
7339 if (negated)
7340 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7341 else
7342 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7344 /* Handle short backwards branch with an unfilled delay slot.
7345 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7346 taken and untaken branches. */
7347 else if (dbr_sequence_length () == 0
7348 && ! forward_branch_p (insn)
7349 && INSN_ADDRESSES_SET_P ()
7350 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7351 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7353 strcpy (buf, "{bvb,|bb,}");
7354 if (GET_MODE (operands[0]) == DImode)
7355 strcat (buf, "*");
7356 if ((which == 0 && negated)
7357 || (which == 1 && ! negated))
7358 strcat (buf, ">=");
7359 else
7360 strcat (buf, "<");
7361 if (negated)
7362 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7363 else
7364 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7366 else
7368 strcpy (buf, "{vextrs,|extrw,s,}");
7369 if (GET_MODE (operands[0]) == DImode)
7370 strcpy (buf, "extrd,s,*");
7371 if ((which == 0 && negated)
7372 || (which == 1 && ! negated))
7373 strcat (buf, "<");
7374 else
7375 strcat (buf, ">=");
7376 if (nullify && negated)
7377 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7378 else if (nullify && ! negated)
7379 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7380 else if (negated)
7381 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7382 else
7383 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7385 break;
7387 default:
7388 /* The reversed conditional branch must branch over one additional
7389 instruction if the delay slot is filled and needs to be extracted
7390 by pa_output_lbranch. If the delay slot is empty or this is a
7391 nullified forward branch, the instruction after the reversed
7392 condition branch must be nullified. */
7393 if (dbr_sequence_length () == 0
7394 || (nullify && forward_branch_p (insn)))
7396 nullify = 1;
7397 xdelay = 0;
7398 operands[4] = GEN_INT (length);
7400 else
7402 xdelay = 1;
7403 operands[4] = GEN_INT (length + 4);
7406 if (GET_MODE (operands[0]) == DImode)
7407 strcpy (buf, "bb,*");
7408 else
7409 strcpy (buf, "{bvb,|bb,}");
7410 if ((which == 0 && negated)
7411 || (which == 1 && !negated))
7412 strcat (buf, "<");
7413 else
7414 strcat (buf, ">=");
7415 if (nullify)
7416 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7417 else
7418 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7419 output_asm_insn (buf, operands);
7420 return pa_output_lbranch (negated ? operands[3] : operands[2],
7421 insn, xdelay);
7423 return buf;
7426 /* Return the output template for emitting a dbra type insn.
7428 Note it may perform some output operations on its own before
7429 returning the final output string. */
7430 const char *
7431 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7433 int length = get_attr_length (insn);
7435 /* A conditional branch to the following instruction (e.g. the delay slot) is
7436 asking for a disaster. Be prepared! */
7438 if (branch_to_delay_slot_p (insn))
7440 if (which_alternative == 0)
7441 return "ldo %1(%0),%0";
7442 else if (which_alternative == 1)
7444 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7445 output_asm_insn ("ldw -16(%%r30),%4", operands);
7446 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7447 return "{fldws|fldw} -16(%%r30),%0";
7449 else
7451 output_asm_insn ("ldw %0,%4", operands);
7452 return "ldo %1(%4),%4\n\tstw %4,%0";
7456 if (which_alternative == 0)
7458 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7459 int xdelay;
7461 /* If this is a long branch with its delay slot unfilled, set `nullify'
7462 as it can nullify the delay slot and save a nop. */
7463 if (length == 8 && dbr_sequence_length () == 0)
7464 nullify = 1;
7466 /* If this is a short forward conditional branch which did not get
7467 its delay slot filled, the delay slot can still be nullified. */
7468 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7469 nullify = forward_branch_p (insn);
7471 switch (length)
7473 case 4:
7474 if (nullify)
7476 if (branch_needs_nop_p (insn))
7477 return "addib,%C2,n %1,%0,%3%#";
7478 else
7479 return "addib,%C2,n %1,%0,%3";
7481 else
7482 return "addib,%C2 %1,%0,%3";
7484 case 8:
7485 /* Handle weird backwards branch with a fulled delay slot
7486 which is nullified. */
7487 if (dbr_sequence_length () != 0
7488 && ! forward_branch_p (insn)
7489 && nullify)
7490 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7491 /* Handle short backwards branch with an unfilled delay slot.
7492 Using a addb;nop rather than addi;bl saves 1 cycle for both
7493 taken and untaken branches. */
7494 else if (dbr_sequence_length () == 0
7495 && ! forward_branch_p (insn)
7496 && INSN_ADDRESSES_SET_P ()
7497 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7498 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7499 return "addib,%C2 %1,%0,%3%#";
7501 /* Handle normal cases. */
7502 if (nullify)
7503 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7504 else
7505 return "addi,%N2 %1,%0,%0\n\tb %3";
7507 default:
7508 /* The reversed conditional branch must branch over one additional
7509 instruction if the delay slot is filled and needs to be extracted
7510 by pa_output_lbranch. If the delay slot is empty or this is a
7511 nullified forward branch, the instruction after the reversed
7512 condition branch must be nullified. */
7513 if (dbr_sequence_length () == 0
7514 || (nullify && forward_branch_p (insn)))
7516 nullify = 1;
7517 xdelay = 0;
7518 operands[4] = GEN_INT (length);
7520 else
7522 xdelay = 1;
7523 operands[4] = GEN_INT (length + 4);
7526 if (nullify)
7527 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7528 else
7529 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7531 return pa_output_lbranch (operands[3], insn, xdelay);
7535 /* Deal with gross reload from FP register case. */
7536 else if (which_alternative == 1)
7538 /* Move loop counter from FP register to MEM then into a GR,
7539 increment the GR, store the GR into MEM, and finally reload
7540 the FP register from MEM from within the branch's delay slot. */
7541 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7542 operands);
7543 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7544 if (length == 24)
7545 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7546 else if (length == 28)
7547 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7548 else
7550 operands[5] = GEN_INT (length - 16);
7551 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7552 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7553 return pa_output_lbranch (operands[3], insn, 0);
7556 /* Deal with gross reload from memory case. */
7557 else
7559 /* Reload loop counter from memory, the store back to memory
7560 happens in the branch's delay slot. */
7561 output_asm_insn ("ldw %0,%4", operands);
7562 if (length == 12)
7563 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7564 else if (length == 16)
7565 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7566 else
7568 operands[5] = GEN_INT (length - 4);
7569 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7570 return pa_output_lbranch (operands[3], insn, 0);
7575 /* Return the output template for emitting a movb type insn.
7577 Note it may perform some output operations on its own before
7578 returning the final output string. */
7579 const char *
7580 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7581 int reverse_comparison)
7583 int length = get_attr_length (insn);
7585 /* A conditional branch to the following instruction (e.g. the delay slot) is
7586 asking for a disaster. Be prepared! */
7588 if (branch_to_delay_slot_p (insn))
7590 if (which_alternative == 0)
7591 return "copy %1,%0";
7592 else if (which_alternative == 1)
7594 output_asm_insn ("stw %1,-16(%%r30)", operands);
7595 return "{fldws|fldw} -16(%%r30),%0";
7597 else if (which_alternative == 2)
7598 return "stw %1,%0";
7599 else
7600 return "mtsar %r1";
7603 /* Support the second variant. */
7604 if (reverse_comparison)
7605 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7607 if (which_alternative == 0)
7609 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7610 int xdelay;
7612 /* If this is a long branch with its delay slot unfilled, set `nullify'
7613 as it can nullify the delay slot and save a nop. */
7614 if (length == 8 && dbr_sequence_length () == 0)
7615 nullify = 1;
7617 /* If this is a short forward conditional branch which did not get
7618 its delay slot filled, the delay slot can still be nullified. */
7619 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7620 nullify = forward_branch_p (insn);
7622 switch (length)
7624 case 4:
7625 if (nullify)
7627 if (branch_needs_nop_p (insn))
7628 return "movb,%C2,n %1,%0,%3%#";
7629 else
7630 return "movb,%C2,n %1,%0,%3";
7632 else
7633 return "movb,%C2 %1,%0,%3";
7635 case 8:
7636 /* Handle weird backwards branch with a filled delay slot
7637 which is nullified. */
7638 if (dbr_sequence_length () != 0
7639 && ! forward_branch_p (insn)
7640 && nullify)
7641 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7643 /* Handle short backwards branch with an unfilled delay slot.
7644 Using a movb;nop rather than or;bl saves 1 cycle for both
7645 taken and untaken branches. */
7646 else if (dbr_sequence_length () == 0
7647 && ! forward_branch_p (insn)
7648 && INSN_ADDRESSES_SET_P ()
7649 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7650 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7651 return "movb,%C2 %1,%0,%3%#";
7652 /* Handle normal cases. */
7653 if (nullify)
7654 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7655 else
7656 return "or,%N2 %1,%%r0,%0\n\tb %3";
7658 default:
7659 /* The reversed conditional branch must branch over one additional
7660 instruction if the delay slot is filled and needs to be extracted
7661 by pa_output_lbranch. If the delay slot is empty or this is a
7662 nullified forward branch, the instruction after the reversed
7663 condition branch must be nullified. */
7664 if (dbr_sequence_length () == 0
7665 || (nullify && forward_branch_p (insn)))
7667 nullify = 1;
7668 xdelay = 0;
7669 operands[4] = GEN_INT (length);
7671 else
7673 xdelay = 1;
7674 operands[4] = GEN_INT (length + 4);
7677 if (nullify)
7678 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7679 else
7680 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7682 return pa_output_lbranch (operands[3], insn, xdelay);
7685 /* Deal with gross reload for FP destination register case. */
7686 else if (which_alternative == 1)
7688 /* Move source register to MEM, perform the branch test, then
7689 finally load the FP register from MEM from within the branch's
7690 delay slot. */
7691 output_asm_insn ("stw %1,-16(%%r30)", operands);
7692 if (length == 12)
7693 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7694 else if (length == 16)
7695 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7696 else
7698 operands[4] = GEN_INT (length - 4);
7699 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7700 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7701 return pa_output_lbranch (operands[3], insn, 0);
7704 /* Deal with gross reload from memory case. */
7705 else if (which_alternative == 2)
7707 /* Reload loop counter from memory, the store back to memory
7708 happens in the branch's delay slot. */
7709 if (length == 8)
7710 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7711 else if (length == 12)
7712 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7713 else
7715 operands[4] = GEN_INT (length);
7716 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7717 operands);
7718 return pa_output_lbranch (operands[3], insn, 0);
7721 /* Handle SAR as a destination. */
7722 else
7724 if (length == 8)
7725 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7726 else if (length == 12)
7727 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7728 else
7730 operands[4] = GEN_INT (length);
7731 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7732 operands);
7733 return pa_output_lbranch (operands[3], insn, 0);
7738 /* Copy any FP arguments in INSN into integer registers. */
7739 static void
7740 copy_fp_args (rtx_insn *insn)
7742 rtx link;
7743 rtx xoperands[2];
7745 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7747 int arg_mode, regno;
7748 rtx use = XEXP (link, 0);
7750 if (! (GET_CODE (use) == USE
7751 && GET_CODE (XEXP (use, 0)) == REG
7752 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7753 continue;
7755 arg_mode = GET_MODE (XEXP (use, 0));
7756 regno = REGNO (XEXP (use, 0));
7758 /* Is it a floating point register? */
7759 if (regno >= 32 && regno <= 39)
7761 /* Copy the FP register into an integer register via memory. */
7762 if (arg_mode == SFmode)
7764 xoperands[0] = XEXP (use, 0);
7765 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7766 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7767 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7769 else
7771 xoperands[0] = XEXP (use, 0);
7772 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7773 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7774 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7775 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7781 /* Compute length of the FP argument copy sequence for INSN. */
7782 static int
7783 length_fp_args (rtx_insn *insn)
7785 int length = 0;
7786 rtx link;
7788 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7790 int arg_mode, regno;
7791 rtx use = XEXP (link, 0);
7793 if (! (GET_CODE (use) == USE
7794 && GET_CODE (XEXP (use, 0)) == REG
7795 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7796 continue;
7798 arg_mode = GET_MODE (XEXP (use, 0));
7799 regno = REGNO (XEXP (use, 0));
7801 /* Is it a floating point register? */
7802 if (regno >= 32 && regno <= 39)
7804 if (arg_mode == SFmode)
7805 length += 8;
7806 else
7807 length += 12;
7811 return length;
7814 /* Return the attribute length for the millicode call instruction INSN.
7815 The length must match the code generated by pa_output_millicode_call.
7816 We include the delay slot in the returned length as it is better to
7817 over estimate the length than to under estimate it. */
7820 pa_attr_length_millicode_call (rtx_insn *insn)
7822 unsigned long distance = -1;
7823 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7825 if (INSN_ADDRESSES_SET_P ())
7827 distance = (total + insn_current_reference_address (insn));
7828 if (distance < total)
7829 distance = -1;
7832 if (TARGET_64BIT)
7834 if (!TARGET_LONG_CALLS && distance < 7600000)
7835 return 8;
7837 return 20;
7839 else if (TARGET_PORTABLE_RUNTIME)
7840 return 24;
7841 else
7843 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7844 return 8;
7846 if (!flag_pic)
7847 return 12;
7849 return 24;
7853 /* INSN is a function call.
7855 CALL_DEST is the routine we are calling. */
7857 const char *
7858 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7860 int attr_length = get_attr_length (insn);
7861 int seq_length = dbr_sequence_length ();
7862 rtx xoperands[4];
7864 xoperands[0] = call_dest;
7866 /* Handle the common case where we are sure that the branch will
7867 reach the beginning of the $CODE$ subspace. The within reach
7868 form of the $$sh_func_adrs call has a length of 28. Because it
7869 has an attribute type of sh_func_adrs, it never has a nonzero
7870 sequence length (i.e., the delay slot is never filled). */
7871 if (!TARGET_LONG_CALLS
7872 && (attr_length == 8
7873 || (attr_length == 28
7874 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7876 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7877 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7879 else
7881 if (TARGET_64BIT)
7883 /* It might seem that one insn could be saved by accessing
7884 the millicode function using the linkage table. However,
7885 this doesn't work in shared libraries and other dynamically
7886 loaded objects. Using a pc-relative sequence also avoids
7887 problems related to the implicit use of the gp register. */
7888 xoperands[1] = gen_rtx_REG (Pmode, 1);
7889 xoperands[2] = xoperands[1];
7890 pa_output_pic_pcrel_sequence (xoperands);
7891 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7893 else if (TARGET_PORTABLE_RUNTIME)
7895 /* Pure portable runtime doesn't allow be/ble; we also don't
7896 have PIC support in the assembler/linker, so this sequence
7897 is needed. */
7899 /* Get the address of our target into %r1. */
7900 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7901 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7903 /* Get our return address into %r31. */
7904 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7905 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7907 /* Jump to our target address in %r1. */
7908 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7910 else if (!flag_pic)
7912 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7913 if (TARGET_PA_20)
7914 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7915 else
7916 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7918 else
7920 xoperands[1] = gen_rtx_REG (Pmode, 31);
7921 xoperands[2] = gen_rtx_REG (Pmode, 1);
7922 pa_output_pic_pcrel_sequence (xoperands);
7924 /* Adjust return address. */
7925 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7927 /* Jump to our target address in %r1. */
7928 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7932 if (seq_length == 0)
7933 output_asm_insn ("nop", xoperands);
7935 return "";
7938 /* Return the attribute length of the call instruction INSN. The SIBCALL
7939 flag indicates whether INSN is a regular call or a sibling call. The
7940 length returned must be longer than the code actually generated by
7941 pa_output_call. Since branch shortening is done before delay branch
7942 sequencing, there is no way to determine whether or not the delay
7943 slot will be filled during branch shortening. Even when the delay
7944 slot is filled, we may have to add a nop if the delay slot contains
7945 a branch that can't reach its target. Thus, we always have to include
7946 the delay slot in the length estimate. This used to be done in
7947 pa_adjust_insn_length but we do it here now as some sequences always
7948 fill the delay slot and we can save four bytes in the estimate for
7949 these sequences. */
7952 pa_attr_length_call (rtx_insn *insn, int sibcall)
7954 int local_call;
7955 rtx call, call_dest;
7956 tree call_decl;
7957 int length = 0;
7958 rtx pat = PATTERN (insn);
7959 unsigned long distance = -1;
7961 gcc_assert (CALL_P (insn));
7963 if (INSN_ADDRESSES_SET_P ())
7965 unsigned long total;
7967 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7968 distance = (total + insn_current_reference_address (insn));
7969 if (distance < total)
7970 distance = -1;
7973 gcc_assert (GET_CODE (pat) == PARALLEL);
7975 /* Get the call rtx. */
7976 call = XVECEXP (pat, 0, 0);
7977 if (GET_CODE (call) == SET)
7978 call = SET_SRC (call);
7980 gcc_assert (GET_CODE (call) == CALL);
7982 /* Determine if this is a local call. */
7983 call_dest = XEXP (XEXP (call, 0), 0);
7984 call_decl = SYMBOL_REF_DECL (call_dest);
7985 local_call = call_decl && targetm.binds_local_p (call_decl);
7987 /* pc-relative branch. */
7988 if (!TARGET_LONG_CALLS
7989 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7990 || distance < MAX_PCREL17F_OFFSET))
7991 length += 8;
7993 /* 64-bit plabel sequence. */
7994 else if (TARGET_64BIT && !local_call)
7995 length += 24;
7997 /* non-pic long absolute branch sequence. */
7998 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7999 length += 12;
8001 /* long pc-relative branch sequence. */
8002 else if (TARGET_LONG_PIC_SDIFF_CALL
8003 || (TARGET_GAS && !TARGET_SOM && local_call))
8005 length += 20;
8007 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8008 length += 8;
8011 /* 32-bit plabel sequence. */
8012 else
8014 length += 32;
8016 if (TARGET_SOM)
8017 length += length_fp_args (insn);
8019 if (flag_pic)
8020 length += 4;
8022 if (!TARGET_PA_20)
8024 if (!sibcall)
8025 length += 8;
8027 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8028 length += 8;
8032 return length;
8035 /* INSN is a function call.
8037 CALL_DEST is the routine we are calling. */
8039 const char *
8040 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8042 int seq_length = dbr_sequence_length ();
8043 tree call_decl = SYMBOL_REF_DECL (call_dest);
8044 int local_call = call_decl && targetm.binds_local_p (call_decl);
8045 rtx xoperands[4];
8047 xoperands[0] = call_dest;
8049 /* Handle the common case where we're sure that the branch will reach
8050 the beginning of the "$CODE$" subspace. This is the beginning of
8051 the current function if we are in a named section. */
8052 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8054 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8055 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8057 else
8059 if (TARGET_64BIT && !local_call)
8061 /* ??? As far as I can tell, the HP linker doesn't support the
8062 long pc-relative sequence described in the 64-bit runtime
8063 architecture. So, we use a slightly longer indirect call. */
8064 xoperands[0] = pa_get_deferred_plabel (call_dest);
8065 xoperands[1] = gen_label_rtx ();
8067 /* Put the load of %r27 into the delay slot. We don't need to
8068 do anything when generating fast indirect calls. */
8069 if (seq_length != 0)
8071 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8072 optimize, 0, NULL);
8074 /* Now delete the delay insn. */
8075 SET_INSN_DELETED (NEXT_INSN (insn));
8078 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8079 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8080 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8081 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8082 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8083 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8084 seq_length = 1;
8086 else
8088 int indirect_call = 0;
8090 /* Emit a long call. There are several different sequences
8091 of increasing length and complexity. In most cases,
8092 they don't allow an instruction in the delay slot. */
8093 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8094 && !TARGET_LONG_PIC_SDIFF_CALL
8095 && !(TARGET_GAS && !TARGET_SOM && local_call)
8096 && !TARGET_64BIT)
8097 indirect_call = 1;
8099 if (seq_length != 0
8100 && !sibcall
8101 && (!TARGET_PA_20
8102 || indirect_call
8103 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8105 /* A non-jump insn in the delay slot. By definition we can
8106 emit this insn before the call (and in fact before argument
8107 relocating. */
8108 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8109 NULL);
8111 /* Now delete the delay insn. */
8112 SET_INSN_DELETED (NEXT_INSN (insn));
8113 seq_length = 0;
8116 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8118 /* This is the best sequence for making long calls in
8119 non-pic code. Unfortunately, GNU ld doesn't provide
8120 the stub needed for external calls, and GAS's support
8121 for this with the SOM linker is buggy. It is safe
8122 to use this for local calls. */
8123 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8124 if (sibcall)
8125 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8126 else
8128 if (TARGET_PA_20)
8129 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8130 xoperands);
8131 else
8132 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8134 output_asm_insn ("copy %%r31,%%r2", xoperands);
8135 seq_length = 1;
8138 else
8140 /* The HP assembler and linker can handle relocations for
8141 the difference of two symbols. The HP assembler
8142 recognizes the sequence as a pc-relative call and
8143 the linker provides stubs when needed. */
8145 /* GAS currently can't generate the relocations that
8146 are needed for the SOM linker under HP-UX using this
8147 sequence. The GNU linker doesn't generate the stubs
8148 that are needed for external calls on TARGET_ELF32
8149 with this sequence. For now, we have to use a longer
8150 plabel sequence when using GAS for non local calls. */
8151 if (TARGET_LONG_PIC_SDIFF_CALL
8152 || (TARGET_GAS && !TARGET_SOM && local_call))
8154 xoperands[1] = gen_rtx_REG (Pmode, 1);
8155 xoperands[2] = xoperands[1];
8156 pa_output_pic_pcrel_sequence (xoperands);
8158 else
8160 /* Emit a long plabel-based call sequence. This is
8161 essentially an inline implementation of $$dyncall.
8162 We don't actually try to call $$dyncall as this is
8163 as difficult as calling the function itself. */
8164 xoperands[0] = pa_get_deferred_plabel (call_dest);
8165 xoperands[1] = gen_label_rtx ();
8167 /* Since the call is indirect, FP arguments in registers
8168 need to be copied to the general registers. Then, the
8169 argument relocation stub will copy them back. */
8170 if (TARGET_SOM)
8171 copy_fp_args (insn);
8173 if (flag_pic)
8175 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8176 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8177 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8179 else
8181 output_asm_insn ("addil LR'%0-$global$,%%r27",
8182 xoperands);
8183 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8184 xoperands);
8187 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8188 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8189 /* Should this be an ordered load to ensure the target
8190 address is loaded before the global pointer? */
8191 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8192 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8194 if (!sibcall && !TARGET_PA_20)
8196 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8197 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8198 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8199 else
8200 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8204 if (TARGET_PA_20)
8206 if (sibcall)
8207 output_asm_insn ("bve (%%r1)", xoperands);
8208 else
8210 if (indirect_call)
8212 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8213 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8214 seq_length = 1;
8216 else
8217 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8220 else
8222 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8223 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8224 xoperands);
8226 if (sibcall)
8228 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8229 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8230 else
8231 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8233 else
8235 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8236 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8237 else
8238 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8240 if (indirect_call)
8241 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8242 else
8243 output_asm_insn ("copy %%r31,%%r2", xoperands);
8244 seq_length = 1;
8251 if (seq_length == 0)
8252 output_asm_insn ("nop", xoperands);
8254 return "";
8257 /* Return the attribute length of the indirect call instruction INSN.
8258 The length must match the code generated by output_indirect call.
8259 The returned length includes the delay slot. Currently, the delay
8260 slot of an indirect call sequence is not exposed and it is used by
8261 the sequence itself. */
8264 pa_attr_length_indirect_call (rtx_insn *insn)
8266 unsigned long distance = -1;
8267 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8269 if (INSN_ADDRESSES_SET_P ())
8271 distance = (total + insn_current_reference_address (insn));
8272 if (distance < total)
8273 distance = -1;
8276 if (TARGET_64BIT)
8277 return 12;
8279 if (TARGET_FAST_INDIRECT_CALLS)
8280 return 8;
8282 if (TARGET_PORTABLE_RUNTIME)
8283 return 16;
8285 if (!TARGET_LONG_CALLS
8286 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8287 || distance < MAX_PCREL17F_OFFSET))
8288 return 8;
8290 /* Out of reach, can use ble. */
8291 if (!flag_pic)
8292 return 12;
8294 /* Inline versions of $$dyncall. */
8295 if (!optimize_size)
8297 if (TARGET_NO_SPACE_REGS)
8298 return 28;
8300 if (TARGET_PA_20)
8301 return 32;
8304 /* Long PIC pc-relative call. */
8305 return 20;
8308 const char *
8309 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8311 rtx xoperands[4];
8312 int length;
8314 if (TARGET_64BIT)
8316 xoperands[0] = call_dest;
8317 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8318 "bve,l (%%r2),%%r2\n\t"
8319 "ldd 24(%0),%%r27", xoperands);
8320 return "";
8323 /* First the special case for kernels, level 0 systems, etc. */
8324 if (TARGET_FAST_INDIRECT_CALLS)
8326 pa_output_arg_descriptor (insn);
8327 if (TARGET_PA_20)
8328 return "bve,l,n (%%r22),%%r2\n\tnop";
8329 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8332 if (TARGET_PORTABLE_RUNTIME)
8334 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8335 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8336 pa_output_arg_descriptor (insn);
8337 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8340 /* Now the normal case -- we can reach $$dyncall directly or
8341 we're sure that we can get there via a long-branch stub.
8343 No need to check target flags as the length uniquely identifies
8344 the remaining cases. */
8345 length = pa_attr_length_indirect_call (insn);
8346 if (length == 8)
8348 pa_output_arg_descriptor (insn);
8350 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8351 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8352 variant of the B,L instruction can't be used on the SOM target. */
8353 if (TARGET_PA_20 && !TARGET_SOM)
8354 return "b,l,n $$dyncall,%%r2\n\tnop";
8355 else
8356 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8359 /* Long millicode call, but we are not generating PIC or portable runtime
8360 code. */
8361 if (length == 12)
8363 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8364 pa_output_arg_descriptor (insn);
8365 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8368 /* The long PIC pc-relative call sequence is five instructions. So,
8369 let's use an inline version of $$dyncall when the calling sequence
8370 has a roughly similar number of instructions and we are not optimizing
8371 for size. We need two instructions to load the return pointer plus
8372 the $$dyncall implementation. */
8373 if (!optimize_size)
8375 if (TARGET_NO_SPACE_REGS)
8377 pa_output_arg_descriptor (insn);
8378 output_asm_insn ("bl .+8,%%r2\n\t"
8379 "ldo 20(%%r2),%%r2\n\t"
8380 "extru,<> %%r22,30,1,%%r0\n\t"
8381 "bv,n %%r0(%%r22)\n\t"
8382 "ldw -2(%%r22),%%r21\n\t"
8383 "bv %%r0(%%r21)\n\t"
8384 "ldw 2(%%r22),%%r19", xoperands);
8385 return "";
8387 if (TARGET_PA_20)
8389 pa_output_arg_descriptor (insn);
8390 output_asm_insn ("bl .+8,%%r2\n\t"
8391 "ldo 24(%%r2),%%r2\n\t"
8392 "stw %%r2,-24(%%sp)\n\t"
8393 "extru,<> %r22,30,1,%%r0\n\t"
8394 "bve,n (%%r22)\n\t"
8395 "ldw -2(%%r22),%%r21\n\t"
8396 "bve (%%r21)\n\t"
8397 "ldw 2(%%r22),%%r19", xoperands);
8398 return "";
8402 /* We need a long PIC call to $$dyncall. */
8403 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8404 xoperands[1] = gen_rtx_REG (Pmode, 2);
8405 xoperands[2] = gen_rtx_REG (Pmode, 1);
8406 pa_output_pic_pcrel_sequence (xoperands);
8407 pa_output_arg_descriptor (insn);
8408 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8411 /* In HPUX 8.0's shared library scheme, special relocations are needed
8412 for function labels if they might be passed to a function
8413 in a shared library (because shared libraries don't live in code
8414 space), and special magic is needed to construct their address. */
8416 void
8417 pa_encode_label (rtx sym)
8419 const char *str = XSTR (sym, 0);
8420 int len = strlen (str) + 1;
8421 char *newstr, *p;
8423 p = newstr = XALLOCAVEC (char, len + 1);
8424 *p++ = '@';
8425 strcpy (p, str);
8427 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8430 static void
8431 pa_encode_section_info (tree decl, rtx rtl, int first)
8433 int old_referenced = 0;
8435 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8436 old_referenced
8437 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8439 default_encode_section_info (decl, rtl, first);
8441 if (first && TEXT_SPACE_P (decl))
8443 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8444 if (TREE_CODE (decl) == FUNCTION_DECL)
8445 pa_encode_label (XEXP (rtl, 0));
8447 else if (old_referenced)
8448 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8451 /* This is sort of inverse to pa_encode_section_info. */
8453 static const char *
8454 pa_strip_name_encoding (const char *str)
8456 str += (*str == '@');
8457 str += (*str == '*');
8458 return str;
8461 /* Returns 1 if OP is a function label involved in a simple addition
8462 with a constant. Used to keep certain patterns from matching
8463 during instruction combination. */
8465 pa_is_function_label_plus_const (rtx op)
8467 /* Strip off any CONST. */
8468 if (GET_CODE (op) == CONST)
8469 op = XEXP (op, 0);
8471 return (GET_CODE (op) == PLUS
8472 && function_label_operand (XEXP (op, 0), VOIDmode)
8473 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8476 /* Output the assembler code for a thunk function. THUNK_DECL is the
8477 declaration for the thunk function itself, FUNCTION is the decl for
8478 the target function. DELTA is an immediate constant offset to be
8479 added to THIS. If VCALL_OFFSET is nonzero, the word at
8480 *(*this + vcall_offset) should be added to THIS. */
8482 static void
8483 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8484 HOST_WIDE_INT vcall_offset, tree function)
8486 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8487 static unsigned int current_thunk_number;
8488 int val_14 = VAL_14_BITS_P (delta);
8489 unsigned int old_last_address = last_address, nbytes = 0;
8490 char label[17];
8491 rtx xoperands[4];
8493 xoperands[0] = XEXP (DECL_RTL (function), 0);
8494 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8495 xoperands[2] = GEN_INT (delta);
8497 assemble_start_function (thunk_fndecl, fnname);
8498 final_start_function (emit_barrier (), file, 1);
8500 if (!vcall_offset)
8502 /* Output the thunk. We know that the function is in the same
8503 translation unit (i.e., the same space) as the thunk, and that
8504 thunks are output after their method. Thus, we don't need an
8505 external branch to reach the function. With SOM and GAS,
8506 functions and thunks are effectively in different sections.
8507 Thus, we can always use a IA-relative branch and the linker
8508 will add a long branch stub if necessary.
8510 However, we have to be careful when generating PIC code on the
8511 SOM port to ensure that the sequence does not transfer to an
8512 import stub for the target function as this could clobber the
8513 return value saved at SP-24. This would also apply to the
8514 32-bit linux port if the multi-space model is implemented. */
8515 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8516 && !(flag_pic && TREE_PUBLIC (function))
8517 && (TARGET_GAS || last_address < 262132))
8518 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8519 && ((targetm_common.have_named_sections
8520 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8521 /* The GNU 64-bit linker has rather poor stub management.
8522 So, we use a long branch from thunks that aren't in
8523 the same section as the target function. */
8524 && ((!TARGET_64BIT
8525 && (DECL_SECTION_NAME (thunk_fndecl)
8526 != DECL_SECTION_NAME (function)))
8527 || ((DECL_SECTION_NAME (thunk_fndecl)
8528 == DECL_SECTION_NAME (function))
8529 && last_address < 262132)))
8530 /* In this case, we need to be able to reach the start of
8531 the stub table even though the function is likely closer
8532 and can be jumped to directly. */
8533 || (targetm_common.have_named_sections
8534 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8535 && DECL_SECTION_NAME (function) == NULL
8536 && total_code_bytes < MAX_PCREL17F_OFFSET)
8537 /* Likewise. */
8538 || (!targetm_common.have_named_sections
8539 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8541 if (!val_14)
8542 output_asm_insn ("addil L'%2,%%r26", xoperands);
8544 output_asm_insn ("b %0", xoperands);
8546 if (val_14)
8548 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8549 nbytes += 8;
8551 else
8553 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8554 nbytes += 12;
8557 else if (TARGET_64BIT)
8559 rtx xop[4];
8561 /* We only have one call-clobbered scratch register, so we can't
8562 make use of the delay slot if delta doesn't fit in 14 bits. */
8563 if (!val_14)
8565 output_asm_insn ("addil L'%2,%%r26", xoperands);
8566 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8569 /* Load function address into %r1. */
8570 xop[0] = xoperands[0];
8571 xop[1] = gen_rtx_REG (Pmode, 1);
8572 xop[2] = xop[1];
8573 pa_output_pic_pcrel_sequence (xop);
8575 if (val_14)
8577 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8578 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8579 nbytes += 20;
8581 else
8583 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8584 nbytes += 24;
8587 else if (TARGET_PORTABLE_RUNTIME)
8589 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8590 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8592 if (!val_14)
8593 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8595 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8597 if (val_14)
8599 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8600 nbytes += 16;
8602 else
8604 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8605 nbytes += 20;
8608 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8610 /* The function is accessible from outside this module. The only
8611 way to avoid an import stub between the thunk and function is to
8612 call the function directly with an indirect sequence similar to
8613 that used by $$dyncall. This is possible because $$dyncall acts
8614 as the import stub in an indirect call. */
8615 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8616 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8617 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8618 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8619 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8620 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8621 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8622 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8623 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8625 if (!val_14)
8627 output_asm_insn ("addil L'%2,%%r26", xoperands);
8628 nbytes += 4;
8631 if (TARGET_PA_20)
8633 output_asm_insn ("bve (%%r22)", xoperands);
8634 nbytes += 36;
8636 else if (TARGET_NO_SPACE_REGS)
8638 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8639 nbytes += 36;
8641 else
8643 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8644 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8645 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8646 nbytes += 44;
8649 if (val_14)
8650 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8651 else
8652 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8654 else if (flag_pic)
8656 rtx xop[4];
8658 /* Load function address into %r22. */
8659 xop[0] = xoperands[0];
8660 xop[1] = gen_rtx_REG (Pmode, 1);
8661 xop[2] = gen_rtx_REG (Pmode, 22);
8662 pa_output_pic_pcrel_sequence (xop);
8664 if (!val_14)
8665 output_asm_insn ("addil L'%2,%%r26", xoperands);
8667 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8669 if (val_14)
8671 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8672 nbytes += 20;
8674 else
8676 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8677 nbytes += 24;
8680 else
8682 if (!val_14)
8683 output_asm_insn ("addil L'%2,%%r26", xoperands);
8685 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8686 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8688 if (val_14)
8690 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8691 nbytes += 12;
8693 else
8695 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8696 nbytes += 16;
8700 else
8702 rtx xop[4];
8704 /* Add DELTA to THIS. */
8705 if (val_14)
8707 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8708 nbytes += 4;
8710 else
8712 output_asm_insn ("addil L'%2,%%r26", xoperands);
8713 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8714 nbytes += 8;
8717 if (TARGET_64BIT)
8719 /* Load *(THIS + DELTA) to %r1. */
8720 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8722 val_14 = VAL_14_BITS_P (vcall_offset);
8723 xoperands[2] = GEN_INT (vcall_offset);
8725 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8726 if (val_14)
8728 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8729 nbytes += 8;
8731 else
8733 output_asm_insn ("addil L'%2,%%r1", xoperands);
8734 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8735 nbytes += 12;
8738 else
8740 /* Load *(THIS + DELTA) to %r1. */
8741 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8743 val_14 = VAL_14_BITS_P (vcall_offset);
8744 xoperands[2] = GEN_INT (vcall_offset);
8746 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8747 if (val_14)
8749 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8750 nbytes += 8;
8752 else
8754 output_asm_insn ("addil L'%2,%%r1", xoperands);
8755 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8756 nbytes += 12;
8760 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8761 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8762 && !(flag_pic && TREE_PUBLIC (function))
8763 && (TARGET_GAS || last_address < 262132))
8764 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8765 && ((targetm_common.have_named_sections
8766 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8767 /* The GNU 64-bit linker has rather poor stub management.
8768 So, we use a long branch from thunks that aren't in
8769 the same section as the target function. */
8770 && ((!TARGET_64BIT
8771 && (DECL_SECTION_NAME (thunk_fndecl)
8772 != DECL_SECTION_NAME (function)))
8773 || ((DECL_SECTION_NAME (thunk_fndecl)
8774 == DECL_SECTION_NAME (function))
8775 && last_address < 262132)))
8776 /* In this case, we need to be able to reach the start of
8777 the stub table even though the function is likely closer
8778 and can be jumped to directly. */
8779 || (targetm_common.have_named_sections
8780 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8781 && DECL_SECTION_NAME (function) == NULL
8782 && total_code_bytes < MAX_PCREL17F_OFFSET)
8783 /* Likewise. */
8784 || (!targetm_common.have_named_sections
8785 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8787 nbytes += 4;
8788 output_asm_insn ("b %0", xoperands);
8790 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8791 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8793 else if (TARGET_64BIT)
8795 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8796 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8798 /* Load function address into %r1. */
8799 nbytes += 16;
8800 xop[0] = xoperands[0];
8801 xop[1] = gen_rtx_REG (Pmode, 1);
8802 xop[2] = xop[1];
8803 pa_output_pic_pcrel_sequence (xop);
8805 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8807 else if (TARGET_PORTABLE_RUNTIME)
8809 /* Load function address into %r22. */
8810 nbytes += 12;
8811 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8812 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8814 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8816 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8817 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8819 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8821 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8822 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8824 /* The function is accessible from outside this module. The only
8825 way to avoid an import stub between the thunk and function is to
8826 call the function directly with an indirect sequence similar to
8827 that used by $$dyncall. This is possible because $$dyncall acts
8828 as the import stub in an indirect call. */
8829 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8830 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8831 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8832 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8833 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8834 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8835 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8836 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8837 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8839 if (TARGET_PA_20)
8841 output_asm_insn ("bve,n (%%r22)", xoperands);
8842 nbytes += 32;
8844 else if (TARGET_NO_SPACE_REGS)
8846 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8847 nbytes += 32;
8849 else
8851 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8852 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8853 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8854 nbytes += 40;
8857 else if (flag_pic)
8859 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8860 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8862 /* Load function address into %r1. */
8863 nbytes += 16;
8864 xop[0] = xoperands[0];
8865 xop[1] = gen_rtx_REG (Pmode, 1);
8866 xop[2] = xop[1];
8867 pa_output_pic_pcrel_sequence (xop);
8869 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8871 else
8873 /* Load function address into %r22. */
8874 nbytes += 8;
8875 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8876 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8878 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8879 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8883 final_end_function ();
8885 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8887 switch_to_section (data_section);
8888 output_asm_insn (".align 4", xoperands);
8889 ASM_OUTPUT_LABEL (file, label);
8890 output_asm_insn (".word P'%0", xoperands);
8893 current_thunk_number++;
8894 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8895 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8896 last_address += nbytes;
8897 if (old_last_address > last_address)
8898 last_address = UINT_MAX;
8899 update_total_code_bytes (nbytes);
8900 assemble_end_function (thunk_fndecl, fnname);
8903 /* Only direct calls to static functions are allowed to be sibling (tail)
8904 call optimized.
8906 This restriction is necessary because some linker generated stubs will
8907 store return pointers into rp' in some cases which might clobber a
8908 live value already in rp'.
8910 In a sibcall the current function and the target function share stack
8911 space. Thus if the path to the current function and the path to the
8912 target function save a value in rp', they save the value into the
8913 same stack slot, which has undesirable consequences.
8915 Because of the deferred binding nature of shared libraries any function
8916 with external scope could be in a different load module and thus require
8917 rp' to be saved when calling that function. So sibcall optimizations
8918 can only be safe for static function.
8920 Note that GCC never needs return value relocations, so we don't have to
8921 worry about static calls with return value relocations (which require
8922 saving rp').
8924 It is safe to perform a sibcall optimization when the target function
8925 will never return. */
8926 static bool
8927 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8929 /* Sibcalls are not ok because the arg pointer register is not a fixed
8930 register. This prevents the sibcall optimization from occurring. In
8931 addition, there are problems with stub placement using GNU ld. This
8932 is because a normal sibcall branch uses a 17-bit relocation while
8933 a regular call branch uses a 22-bit relocation. As a result, more
8934 care needs to be taken in the placement of long-branch stubs. */
8935 if (TARGET_64BIT)
8936 return false;
8938 if (TARGET_PORTABLE_RUNTIME)
8939 return false;
8941 /* Sibcalls are only ok within a translation unit. */
8942 return decl && targetm.binds_local_p (decl);
8945 /* ??? Addition is not commutative on the PA due to the weird implicit
8946 space register selection rules for memory addresses. Therefore, we
8947 don't consider a + b == b + a, as this might be inside a MEM. */
8948 static bool
8949 pa_commutative_p (const_rtx x, int outer_code)
8951 return (COMMUTATIVE_P (x)
8952 && (TARGET_NO_SPACE_REGS
8953 || (outer_code != UNKNOWN && outer_code != MEM)
8954 || GET_CODE (x) != PLUS));
8957 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8958 use in fmpyadd instructions. */
8960 pa_fmpyaddoperands (rtx *operands)
8962 machine_mode mode = GET_MODE (operands[0]);
8964 /* Must be a floating point mode. */
8965 if (mode != SFmode && mode != DFmode)
8966 return 0;
8968 /* All modes must be the same. */
8969 if (! (mode == GET_MODE (operands[1])
8970 && mode == GET_MODE (operands[2])
8971 && mode == GET_MODE (operands[3])
8972 && mode == GET_MODE (operands[4])
8973 && mode == GET_MODE (operands[5])))
8974 return 0;
8976 /* All operands must be registers. */
8977 if (! (GET_CODE (operands[1]) == REG
8978 && GET_CODE (operands[2]) == REG
8979 && GET_CODE (operands[3]) == REG
8980 && GET_CODE (operands[4]) == REG
8981 && GET_CODE (operands[5]) == REG))
8982 return 0;
8984 /* Only 2 real operands to the addition. One of the input operands must
8985 be the same as the output operand. */
8986 if (! rtx_equal_p (operands[3], operands[4])
8987 && ! rtx_equal_p (operands[3], operands[5]))
8988 return 0;
8990 /* Inout operand of add cannot conflict with any operands from multiply. */
8991 if (rtx_equal_p (operands[3], operands[0])
8992 || rtx_equal_p (operands[3], operands[1])
8993 || rtx_equal_p (operands[3], operands[2]))
8994 return 0;
8996 /* multiply cannot feed into addition operands. */
8997 if (rtx_equal_p (operands[4], operands[0])
8998 || rtx_equal_p (operands[5], operands[0]))
8999 return 0;
9001 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9002 if (mode == SFmode
9003 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9004 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9005 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9006 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9007 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9008 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9009 return 0;
9011 /* Passed. Operands are suitable for fmpyadd. */
9012 return 1;
9015 #if !defined(USE_COLLECT2)
9016 static void
9017 pa_asm_out_constructor (rtx symbol, int priority)
9019 if (!function_label_operand (symbol, VOIDmode))
9020 pa_encode_label (symbol);
9022 #ifdef CTORS_SECTION_ASM_OP
9023 default_ctor_section_asm_out_constructor (symbol, priority);
9024 #else
9025 # ifdef TARGET_ASM_NAMED_SECTION
9026 default_named_section_asm_out_constructor (symbol, priority);
9027 # else
9028 default_stabs_asm_out_constructor (symbol, priority);
9029 # endif
9030 #endif
9033 static void
9034 pa_asm_out_destructor (rtx symbol, int priority)
9036 if (!function_label_operand (symbol, VOIDmode))
9037 pa_encode_label (symbol);
9039 #ifdef DTORS_SECTION_ASM_OP
9040 default_dtor_section_asm_out_destructor (symbol, priority);
9041 #else
9042 # ifdef TARGET_ASM_NAMED_SECTION
9043 default_named_section_asm_out_destructor (symbol, priority);
9044 # else
9045 default_stabs_asm_out_destructor (symbol, priority);
9046 # endif
9047 #endif
9049 #endif
9051 /* This function places uninitialized global data in the bss section.
9052 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9053 function on the SOM port to prevent uninitialized global data from
9054 being placed in the data section. */
9056 void
9057 pa_asm_output_aligned_bss (FILE *stream,
9058 const char *name,
9059 unsigned HOST_WIDE_INT size,
9060 unsigned int align)
9062 switch_to_section (bss_section);
9064 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9065 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9066 #endif
9068 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9069 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9070 #endif
9072 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9073 ASM_OUTPUT_LABEL (stream, name);
9074 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9077 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9078 that doesn't allow the alignment of global common storage to be directly
9079 specified. The SOM linker aligns common storage based on the rounded
9080 value of the NUM_BYTES parameter in the .comm directive. It's not
9081 possible to use the .align directive as it doesn't affect the alignment
9082 of the label associated with a .comm directive. */
9084 void
9085 pa_asm_output_aligned_common (FILE *stream,
9086 const char *name,
9087 unsigned HOST_WIDE_INT size,
9088 unsigned int align)
9090 unsigned int max_common_align;
9092 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9093 if (align > max_common_align)
9095 /* Alignment exceeds maximum alignment for global common data. */
9096 align = max_common_align;
9099 switch_to_section (bss_section);
9101 assemble_name (stream, name);
9102 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9103 MAX (size, align / BITS_PER_UNIT));
9106 /* We can't use .comm for local common storage as the SOM linker effectively
9107 treats the symbol as universal and uses the same storage for local symbols
9108 with the same name in different object files. The .block directive
9109 reserves an uninitialized block of storage. However, it's not common
9110 storage. Fortunately, GCC never requests common storage with the same
9111 name in any given translation unit. */
9113 void
9114 pa_asm_output_aligned_local (FILE *stream,
9115 const char *name,
9116 unsigned HOST_WIDE_INT size,
9117 unsigned int align)
9119 switch_to_section (bss_section);
9120 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9122 #ifdef LOCAL_ASM_OP
9123 fprintf (stream, "%s", LOCAL_ASM_OP);
9124 assemble_name (stream, name);
9125 fprintf (stream, "\n");
9126 #endif
9128 ASM_OUTPUT_LABEL (stream, name);
9129 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9132 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9133 use in fmpysub instructions. */
9135 pa_fmpysuboperands (rtx *operands)
9137 machine_mode mode = GET_MODE (operands[0]);
9139 /* Must be a floating point mode. */
9140 if (mode != SFmode && mode != DFmode)
9141 return 0;
9143 /* All modes must be the same. */
9144 if (! (mode == GET_MODE (operands[1])
9145 && mode == GET_MODE (operands[2])
9146 && mode == GET_MODE (operands[3])
9147 && mode == GET_MODE (operands[4])
9148 && mode == GET_MODE (operands[5])))
9149 return 0;
9151 /* All operands must be registers. */
9152 if (! (GET_CODE (operands[1]) == REG
9153 && GET_CODE (operands[2]) == REG
9154 && GET_CODE (operands[3]) == REG
9155 && GET_CODE (operands[4]) == REG
9156 && GET_CODE (operands[5]) == REG))
9157 return 0;
9159 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9160 operation, so operands[4] must be the same as operand[3]. */
9161 if (! rtx_equal_p (operands[3], operands[4]))
9162 return 0;
9164 /* multiply cannot feed into subtraction. */
9165 if (rtx_equal_p (operands[5], operands[0]))
9166 return 0;
9168 /* Inout operand of sub cannot conflict with any operands from multiply. */
9169 if (rtx_equal_p (operands[3], operands[0])
9170 || rtx_equal_p (operands[3], operands[1])
9171 || rtx_equal_p (operands[3], operands[2]))
9172 return 0;
9174 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9175 if (mode == SFmode
9176 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9177 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9178 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9179 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9180 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9181 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9182 return 0;
9184 /* Passed. Operands are suitable for fmpysub. */
9185 return 1;
9188 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9189 constants for a MULT embedded inside a memory address. */
9191 pa_mem_shadd_constant_p (int val)
9193 if (val == 2 || val == 4 || val == 8)
9194 return 1;
9195 else
9196 return 0;
9199 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9200 constants for shadd instructions. */
9202 pa_shadd_constant_p (int val)
9204 if (val == 1 || val == 2 || val == 3)
9205 return 1;
9206 else
9207 return 0;
9210 /* Return TRUE if INSN branches forward. */
9212 static bool
9213 forward_branch_p (rtx_insn *insn)
9215 rtx lab = JUMP_LABEL (insn);
9217 /* The INSN must have a jump label. */
9218 gcc_assert (lab != NULL_RTX);
9220 if (INSN_ADDRESSES_SET_P ())
9221 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9223 while (insn)
9225 if (insn == lab)
9226 return true;
9227 else
9228 insn = NEXT_INSN (insn);
9231 return false;
9234 /* Output an unconditional move and branch insn. */
9236 const char *
9237 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9239 int length = get_attr_length (insn);
9241 /* These are the cases in which we win. */
9242 if (length == 4)
9243 return "mov%I1b,tr %1,%0,%2";
9245 /* None of the following cases win, but they don't lose either. */
9246 if (length == 8)
9248 if (dbr_sequence_length () == 0)
9250 /* Nothing in the delay slot, fake it by putting the combined
9251 insn (the copy or add) in the delay slot of a bl. */
9252 if (GET_CODE (operands[1]) == CONST_INT)
9253 return "b %2\n\tldi %1,%0";
9254 else
9255 return "b %2\n\tcopy %1,%0";
9257 else
9259 /* Something in the delay slot, but we've got a long branch. */
9260 if (GET_CODE (operands[1]) == CONST_INT)
9261 return "ldi %1,%0\n\tb %2";
9262 else
9263 return "copy %1,%0\n\tb %2";
9267 if (GET_CODE (operands[1]) == CONST_INT)
9268 output_asm_insn ("ldi %1,%0", operands);
9269 else
9270 output_asm_insn ("copy %1,%0", operands);
9271 return pa_output_lbranch (operands[2], insn, 1);
9274 /* Output an unconditional add and branch insn. */
9276 const char *
9277 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9279 int length = get_attr_length (insn);
9281 /* To make life easy we want operand0 to be the shared input/output
9282 operand and operand1 to be the readonly operand. */
9283 if (operands[0] == operands[1])
9284 operands[1] = operands[2];
9286 /* These are the cases in which we win. */
9287 if (length == 4)
9288 return "add%I1b,tr %1,%0,%3";
9290 /* None of the following cases win, but they don't lose either. */
9291 if (length == 8)
9293 if (dbr_sequence_length () == 0)
9294 /* Nothing in the delay slot, fake it by putting the combined
9295 insn (the copy or add) in the delay slot of a bl. */
9296 return "b %3\n\tadd%I1 %1,%0,%0";
9297 else
9298 /* Something in the delay slot, but we've got a long branch. */
9299 return "add%I1 %1,%0,%0\n\tb %3";
9302 output_asm_insn ("add%I1 %1,%0,%0", operands);
9303 return pa_output_lbranch (operands[3], insn, 1);
9306 /* We use this hook to perform a PA specific optimization which is difficult
9307 to do in earlier passes. */
9309 static void
9310 pa_reorg (void)
9312 remove_useless_addtr_insns (1);
9314 if (pa_cpu < PROCESSOR_8000)
9315 pa_combine_instructions ();
9318 /* The PA has a number of odd instructions which can perform multiple
9319 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9320 it may be profitable to combine two instructions into one instruction
9321 with two outputs. It's not profitable PA2.0 machines because the
9322 two outputs would take two slots in the reorder buffers.
9324 This routine finds instructions which can be combined and combines
9325 them. We only support some of the potential combinations, and we
9326 only try common ways to find suitable instructions.
9328 * addb can add two registers or a register and a small integer
9329 and jump to a nearby (+-8k) location. Normally the jump to the
9330 nearby location is conditional on the result of the add, but by
9331 using the "true" condition we can make the jump unconditional.
9332 Thus addb can perform two independent operations in one insn.
9334 * movb is similar to addb in that it can perform a reg->reg
9335 or small immediate->reg copy and jump to a nearby (+-8k location).
9337 * fmpyadd and fmpysub can perform a FP multiply and either an
9338 FP add or FP sub if the operands of the multiply and add/sub are
9339 independent (there are other minor restrictions). Note both
9340 the fmpy and fadd/fsub can in theory move to better spots according
9341 to data dependencies, but for now we require the fmpy stay at a
9342 fixed location.
9344 * Many of the memory operations can perform pre & post updates
9345 of index registers. GCC's pre/post increment/decrement addressing
9346 is far too simple to take advantage of all the possibilities. This
9347 pass may not be suitable since those insns may not be independent.
9349 * comclr can compare two ints or an int and a register, nullify
9350 the following instruction and zero some other register. This
9351 is more difficult to use as it's harder to find an insn which
9352 will generate a comclr than finding something like an unconditional
9353 branch. (conditional moves & long branches create comclr insns).
9355 * Most arithmetic operations can conditionally skip the next
9356 instruction. They can be viewed as "perform this operation
9357 and conditionally jump to this nearby location" (where nearby
9358 is an insns away). These are difficult to use due to the
9359 branch length restrictions. */
9361 static void
9362 pa_combine_instructions (void)
9364 rtx_insn *anchor;
9366 /* This can get expensive since the basic algorithm is on the
9367 order of O(n^2) (or worse). Only do it for -O2 or higher
9368 levels of optimization. */
9369 if (optimize < 2)
9370 return;
9372 /* Walk down the list of insns looking for "anchor" insns which
9373 may be combined with "floating" insns. As the name implies,
9374 "anchor" instructions don't move, while "floating" insns may
9375 move around. */
9376 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9377 rtx_insn *new_rtx = make_insn_raw (par);
9379 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9381 enum attr_pa_combine_type anchor_attr;
9382 enum attr_pa_combine_type floater_attr;
9384 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9385 Also ignore any special USE insns. */
9386 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9387 || GET_CODE (PATTERN (anchor)) == USE
9388 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9389 continue;
9391 anchor_attr = get_attr_pa_combine_type (anchor);
9392 /* See if anchor is an insn suitable for combination. */
9393 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9394 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9395 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9396 && ! forward_branch_p (anchor)))
9398 rtx_insn *floater;
9400 for (floater = PREV_INSN (anchor);
9401 floater;
9402 floater = PREV_INSN (floater))
9404 if (NOTE_P (floater)
9405 || (NONJUMP_INSN_P (floater)
9406 && (GET_CODE (PATTERN (floater)) == USE
9407 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9408 continue;
9410 /* Anything except a regular INSN will stop our search. */
9411 if (! NONJUMP_INSN_P (floater))
9413 floater = NULL;
9414 break;
9417 /* See if FLOATER is suitable for combination with the
9418 anchor. */
9419 floater_attr = get_attr_pa_combine_type (floater);
9420 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9421 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9422 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9423 && floater_attr == PA_COMBINE_TYPE_FMPY))
9425 /* If ANCHOR and FLOATER can be combined, then we're
9426 done with this pass. */
9427 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9428 SET_DEST (PATTERN (floater)),
9429 XEXP (SET_SRC (PATTERN (floater)), 0),
9430 XEXP (SET_SRC (PATTERN (floater)), 1)))
9431 break;
9434 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9435 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9437 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9439 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9440 SET_DEST (PATTERN (floater)),
9441 XEXP (SET_SRC (PATTERN (floater)), 0),
9442 XEXP (SET_SRC (PATTERN (floater)), 1)))
9443 break;
9445 else
9447 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9448 SET_DEST (PATTERN (floater)),
9449 SET_SRC (PATTERN (floater)),
9450 SET_SRC (PATTERN (floater))))
9451 break;
9456 /* If we didn't find anything on the backwards scan try forwards. */
9457 if (!floater
9458 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9459 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9461 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9463 if (NOTE_P (floater)
9464 || (NONJUMP_INSN_P (floater)
9465 && (GET_CODE (PATTERN (floater)) == USE
9466 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9468 continue;
9470 /* Anything except a regular INSN will stop our search. */
9471 if (! NONJUMP_INSN_P (floater))
9473 floater = NULL;
9474 break;
9477 /* See if FLOATER is suitable for combination with the
9478 anchor. */
9479 floater_attr = get_attr_pa_combine_type (floater);
9480 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9481 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9482 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9483 && floater_attr == PA_COMBINE_TYPE_FMPY))
9485 /* If ANCHOR and FLOATER can be combined, then we're
9486 done with this pass. */
9487 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9488 SET_DEST (PATTERN (floater)),
9489 XEXP (SET_SRC (PATTERN (floater)),
9491 XEXP (SET_SRC (PATTERN (floater)),
9492 1)))
9493 break;
9498 /* FLOATER will be nonzero if we found a suitable floating
9499 insn for combination with ANCHOR. */
9500 if (floater
9501 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9502 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9504 /* Emit the new instruction and delete the old anchor. */
9505 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9506 copy_rtx (PATTERN (floater)));
9507 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9508 emit_insn_before (temp, anchor);
9510 SET_INSN_DELETED (anchor);
9512 /* Emit a special USE insn for FLOATER, then delete
9513 the floating insn. */
9514 temp = copy_rtx (PATTERN (floater));
9515 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9516 delete_insn (floater);
9518 continue;
9520 else if (floater
9521 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9523 /* Emit the new_jump instruction and delete the old anchor. */
9524 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9525 copy_rtx (PATTERN (floater)));
9526 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9527 temp = emit_jump_insn_before (temp, anchor);
9529 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9530 SET_INSN_DELETED (anchor);
9532 /* Emit a special USE insn for FLOATER, then delete
9533 the floating insn. */
9534 temp = copy_rtx (PATTERN (floater));
9535 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9536 delete_insn (floater);
9537 continue;
9543 static int
9544 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9545 int reversed, rtx dest,
9546 rtx src1, rtx src2)
9548 int insn_code_number;
9549 rtx_insn *start, *end;
9551 /* Create a PARALLEL with the patterns of ANCHOR and
9552 FLOATER, try to recognize it, then test constraints
9553 for the resulting pattern.
9555 If the pattern doesn't match or the constraints
9556 aren't met keep searching for a suitable floater
9557 insn. */
9558 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9559 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9560 INSN_CODE (new_rtx) = -1;
9561 insn_code_number = recog_memoized (new_rtx);
9562 basic_block bb = BLOCK_FOR_INSN (anchor);
9563 if (insn_code_number < 0
9564 || (extract_insn (new_rtx),
9565 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9566 return 0;
9568 if (reversed)
9570 start = anchor;
9571 end = floater;
9573 else
9575 start = floater;
9576 end = anchor;
9579 /* There's up to three operands to consider. One
9580 output and two inputs.
9582 The output must not be used between FLOATER & ANCHOR
9583 exclusive. The inputs must not be set between
9584 FLOATER and ANCHOR exclusive. */
9586 if (reg_used_between_p (dest, start, end))
9587 return 0;
9589 if (reg_set_between_p (src1, start, end))
9590 return 0;
9592 if (reg_set_between_p (src2, start, end))
9593 return 0;
9595 /* If we get here, then everything is good. */
9596 return 1;
9599 /* Return nonzero if references for INSN are delayed.
9601 Millicode insns are actually function calls with some special
9602 constraints on arguments and register usage.
9604 Millicode calls always expect their arguments in the integer argument
9605 registers, and always return their result in %r29 (ret1). They
9606 are expected to clobber their arguments, %r1, %r29, and the return
9607 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9609 This function tells reorg that the references to arguments and
9610 millicode calls do not appear to happen until after the millicode call.
9611 This allows reorg to put insns which set the argument registers into the
9612 delay slot of the millicode call -- thus they act more like traditional
9613 CALL_INSNs.
9615 Note we cannot consider side effects of the insn to be delayed because
9616 the branch and link insn will clobber the return pointer. If we happened
9617 to use the return pointer in the delay slot of the call, then we lose.
9619 get_attr_type will try to recognize the given insn, so make sure to
9620 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9621 in particular. */
9623 pa_insn_refs_are_delayed (rtx_insn *insn)
9625 return ((NONJUMP_INSN_P (insn)
9626 && GET_CODE (PATTERN (insn)) != SEQUENCE
9627 && GET_CODE (PATTERN (insn)) != USE
9628 && GET_CODE (PATTERN (insn)) != CLOBBER
9629 && get_attr_type (insn) == TYPE_MILLI));
9632 /* Promote the return value, but not the arguments. */
9634 static machine_mode
9635 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9636 machine_mode mode,
9637 int *punsignedp ATTRIBUTE_UNUSED,
9638 const_tree fntype ATTRIBUTE_UNUSED,
9639 int for_return)
9641 if (for_return == 0)
9642 return mode;
9643 return promote_mode (type, mode, punsignedp);
9646 /* On the HP-PA the value is found in register(s) 28(-29), unless
9647 the mode is SF or DF. Then the value is returned in fr4 (32).
9649 This must perform the same promotions as PROMOTE_MODE, else promoting
9650 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9652 Small structures must be returned in a PARALLEL on PA64 in order
9653 to match the HP Compiler ABI. */
9655 static rtx
9656 pa_function_value (const_tree valtype,
9657 const_tree func ATTRIBUTE_UNUSED,
9658 bool outgoing ATTRIBUTE_UNUSED)
9660 machine_mode valmode;
9662 if (AGGREGATE_TYPE_P (valtype)
9663 || TREE_CODE (valtype) == COMPLEX_TYPE
9664 || VECTOR_TYPE_P (valtype))
9666 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9668 /* Handle aggregates that fit exactly in a word or double word. */
9669 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9670 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9672 if (TARGET_64BIT)
9674 /* Aggregates with a size less than or equal to 128 bits are
9675 returned in GR 28(-29). They are left justified. The pad
9676 bits are undefined. Larger aggregates are returned in
9677 memory. */
9678 rtx loc[2];
9679 int i, offset = 0;
9680 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9682 for (i = 0; i < ub; i++)
9684 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9685 gen_rtx_REG (DImode, 28 + i),
9686 GEN_INT (offset));
9687 offset += 8;
9690 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9692 else if (valsize > UNITS_PER_WORD)
9694 /* Aggregates 5 to 8 bytes in size are returned in general
9695 registers r28-r29 in the same manner as other non
9696 floating-point objects. The data is right-justified and
9697 zero-extended to 64 bits. This is opposite to the normal
9698 justification used on big endian targets and requires
9699 special treatment. */
9700 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9701 gen_rtx_REG (DImode, 28), const0_rtx);
9702 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9706 if ((INTEGRAL_TYPE_P (valtype)
9707 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9708 || POINTER_TYPE_P (valtype))
9709 valmode = word_mode;
9710 else
9711 valmode = TYPE_MODE (valtype);
9713 if (SCALAR_FLOAT_TYPE_P (valtype)
9714 && !AGGREGATE_TYPE_P (valtype)
9715 && TYPE_MODE (valtype) != TFmode
9716 && !TARGET_SOFT_FLOAT)
9717 return gen_rtx_REG (valmode, 32);
9719 return gen_rtx_REG (valmode, 28);
9722 /* Implement the TARGET_LIBCALL_VALUE hook. */
9724 static rtx
9725 pa_libcall_value (machine_mode mode,
9726 const_rtx fun ATTRIBUTE_UNUSED)
9728 if (! TARGET_SOFT_FLOAT
9729 && (mode == SFmode || mode == DFmode))
9730 return gen_rtx_REG (mode, 32);
9731 else
9732 return gen_rtx_REG (mode, 28);
9735 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9737 static bool
9738 pa_function_value_regno_p (const unsigned int regno)
9740 if (regno == 28
9741 || (! TARGET_SOFT_FLOAT && regno == 32))
9742 return true;
9744 return false;
9747 /* Update the data in CUM to advance over argument ARG. */
9749 static void
9750 pa_function_arg_advance (cumulative_args_t cum_v,
9751 const function_arg_info &arg)
9753 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9754 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9756 cum->nargs_prototype--;
9757 cum->words += (arg_size
9758 + ((cum->words & 01)
9759 && arg.type != NULL_TREE
9760 && arg_size > 1));
9763 /* Return the location of a parameter that is passed in a register or NULL
9764 if the parameter has any component that is passed in memory.
9766 This is new code and will be pushed to into the net sources after
9767 further testing.
9769 ??? We might want to restructure this so that it looks more like other
9770 ports. */
9771 static rtx
9772 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9774 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9775 tree type = arg.type;
9776 machine_mode mode = arg.mode;
9777 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9778 int alignment = 0;
9779 int arg_size;
9780 int fpr_reg_base;
9781 int gpr_reg_base;
9782 rtx retval;
9784 if (arg.end_marker_p ())
9785 return NULL_RTX;
9787 arg_size = pa_function_arg_size (mode, type);
9788 if (!arg_size)
9789 return NULL_RTX;
9791 /* If this arg would be passed partially or totally on the stack, then
9792 this routine should return zero. pa_arg_partial_bytes will
9793 handle arguments which are split between regs and stack slots if
9794 the ABI mandates split arguments. */
9795 if (!TARGET_64BIT)
9797 /* The 32-bit ABI does not split arguments. */
9798 if (cum->words + arg_size > max_arg_words)
9799 return NULL_RTX;
9801 else
9803 if (arg_size > 1)
9804 alignment = cum->words & 1;
9805 if (cum->words + alignment >= max_arg_words)
9806 return NULL_RTX;
9809 /* The 32bit ABIs and the 64bit ABIs are rather different,
9810 particularly in their handling of FP registers. We might
9811 be able to cleverly share code between them, but I'm not
9812 going to bother in the hope that splitting them up results
9813 in code that is more easily understood. */
9815 if (TARGET_64BIT)
9817 /* Advance the base registers to their current locations.
9819 Remember, gprs grow towards smaller register numbers while
9820 fprs grow to higher register numbers. Also remember that
9821 although FP regs are 32-bit addressable, we pretend that
9822 the registers are 64-bits wide. */
9823 gpr_reg_base = 26 - cum->words;
9824 fpr_reg_base = 32 + cum->words;
9826 /* Arguments wider than one word and small aggregates need special
9827 treatment. */
9828 if (arg_size > 1
9829 || mode == BLKmode
9830 || (type && (AGGREGATE_TYPE_P (type)
9831 || TREE_CODE (type) == COMPLEX_TYPE
9832 || VECTOR_TYPE_P (type))))
9834 /* Double-extended precision (80-bit), quad-precision (128-bit)
9835 and aggregates including complex numbers are aligned on
9836 128-bit boundaries. The first eight 64-bit argument slots
9837 are associated one-to-one, with general registers r26
9838 through r19, and also with floating-point registers fr4
9839 through fr11. Arguments larger than one word are always
9840 passed in general registers.
9842 Using a PARALLEL with a word mode register results in left
9843 justified data on a big-endian target. */
9845 rtx loc[8];
9846 int i, offset = 0, ub = arg_size;
9848 /* Align the base register. */
9849 gpr_reg_base -= alignment;
9851 ub = MIN (ub, max_arg_words - cum->words - alignment);
9852 for (i = 0; i < ub; i++)
9854 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9855 gen_rtx_REG (DImode, gpr_reg_base),
9856 GEN_INT (offset));
9857 gpr_reg_base -= 1;
9858 offset += 8;
9861 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9864 else
9866 /* If the argument is larger than a word, then we know precisely
9867 which registers we must use. */
9868 if (arg_size > 1)
9870 if (cum->words)
9872 gpr_reg_base = 23;
9873 fpr_reg_base = 38;
9875 else
9877 gpr_reg_base = 25;
9878 fpr_reg_base = 34;
9881 /* Structures 5 to 8 bytes in size are passed in the general
9882 registers in the same manner as other non floating-point
9883 objects. The data is right-justified and zero-extended
9884 to 64 bits. This is opposite to the normal justification
9885 used on big endian targets and requires special treatment.
9886 We now define BLOCK_REG_PADDING to pad these objects.
9887 Aggregates, complex and vector types are passed in the same
9888 manner as structures. */
9889 if (mode == BLKmode
9890 || (type && (AGGREGATE_TYPE_P (type)
9891 || TREE_CODE (type) == COMPLEX_TYPE
9892 || VECTOR_TYPE_P (type))))
9894 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9895 gen_rtx_REG (DImode, gpr_reg_base),
9896 const0_rtx);
9897 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9900 else
9902 /* We have a single word (32 bits). A simple computation
9903 will get us the register #s we need. */
9904 gpr_reg_base = 26 - cum->words;
9905 fpr_reg_base = 32 + 2 * cum->words;
9909 /* Determine if the argument needs to be passed in both general and
9910 floating point registers. */
9911 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9912 /* If we are doing soft-float with portable runtime, then there
9913 is no need to worry about FP regs. */
9914 && !TARGET_SOFT_FLOAT
9915 /* The parameter must be some kind of scalar float, else we just
9916 pass it in integer registers. */
9917 && GET_MODE_CLASS (mode) == MODE_FLOAT
9918 /* The target function must not have a prototype. */
9919 && cum->nargs_prototype <= 0
9920 /* libcalls do not need to pass items in both FP and general
9921 registers. */
9922 && type != NULL_TREE
9923 /* All this hair applies to "outgoing" args only. This includes
9924 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9925 && !cum->incoming)
9926 /* Also pass outgoing floating arguments in both registers in indirect
9927 calls with the 32 bit ABI and the HP assembler since there is no
9928 way to the specify argument locations in static functions. */
9929 || (!TARGET_64BIT
9930 && !TARGET_GAS
9931 && !cum->incoming
9932 && cum->indirect
9933 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9935 retval
9936 = gen_rtx_PARALLEL
9937 (mode,
9938 gen_rtvec (2,
9939 gen_rtx_EXPR_LIST (VOIDmode,
9940 gen_rtx_REG (mode, fpr_reg_base),
9941 const0_rtx),
9942 gen_rtx_EXPR_LIST (VOIDmode,
9943 gen_rtx_REG (mode, gpr_reg_base),
9944 const0_rtx)));
9946 else
9948 /* See if we should pass this parameter in a general register. */
9949 if (TARGET_SOFT_FLOAT
9950 /* Indirect calls in the normal 32bit ABI require all arguments
9951 to be passed in general registers. */
9952 || (!TARGET_PORTABLE_RUNTIME
9953 && !TARGET_64BIT
9954 && !TARGET_ELF32
9955 && cum->indirect)
9956 /* If the parameter is not a scalar floating-point parameter,
9957 then it belongs in GPRs. */
9958 || GET_MODE_CLASS (mode) != MODE_FLOAT
9959 /* Structure with single SFmode field belongs in GPR. */
9960 || (type && AGGREGATE_TYPE_P (type)))
9961 retval = gen_rtx_REG (mode, gpr_reg_base);
9962 else
9963 retval = gen_rtx_REG (mode, fpr_reg_base);
9965 return retval;
9968 /* Arguments larger than one word are double word aligned. */
9970 static unsigned int
9971 pa_function_arg_boundary (machine_mode mode, const_tree type)
9973 bool singleword = (type
9974 ? (integer_zerop (TYPE_SIZE (type))
9975 || !TREE_CONSTANT (TYPE_SIZE (type))
9976 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9977 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9979 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9982 /* If this arg would be passed totally in registers or totally on the stack,
9983 then this routine should return zero. */
9985 static int
9986 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9988 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9989 unsigned int max_arg_words = 8;
9990 unsigned int offset = 0;
9991 int arg_size;
9993 if (!TARGET_64BIT)
9994 return 0;
9996 arg_size = pa_function_arg_size (arg.mode, arg.type);
9997 if (arg_size > 1 && (cum->words & 1))
9998 offset = 1;
10000 if (cum->words + offset + arg_size <= max_arg_words)
10001 /* Arg fits fully into registers. */
10002 return 0;
10003 else if (cum->words + offset >= max_arg_words)
10004 /* Arg fully on the stack. */
10005 return 0;
10006 else
10007 /* Arg is split. */
10008 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10012 /* A get_unnamed_section callback for switching to the text section.
10014 This function is only used with SOM. Because we don't support
10015 named subspaces, we can only create a new subspace or switch back
10016 to the default text subspace. */
10018 static void
10019 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10021 gcc_assert (TARGET_SOM);
10022 if (TARGET_GAS)
10024 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10026 /* We only want to emit a .nsubspa directive once at the
10027 start of the function. */
10028 cfun->machine->in_nsubspa = 1;
10030 /* Create a new subspace for the text. This provides
10031 better stub placement and one-only functions. */
10032 if (cfun->decl
10033 && DECL_ONE_ONLY (cfun->decl)
10034 && !DECL_WEAK (cfun->decl))
10036 output_section_asm_op ("\t.SPACE $TEXT$\n"
10037 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10038 "ACCESS=44,SORT=24,COMDAT");
10039 return;
10042 else
10044 /* There isn't a current function or the body of the current
10045 function has been completed. So, we are changing to the
10046 text section to output debugging information. Thus, we
10047 need to forget that we are in the text section so that
10048 varasm.cc will call us when text_section is selected again. */
10049 gcc_assert (!cfun || !cfun->machine
10050 || cfun->machine->in_nsubspa == 2);
10051 in_section = NULL;
10053 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10054 return;
10056 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10059 /* A get_unnamed_section callback for switching to comdat data
10060 sections. This function is only used with SOM. */
10062 static void
10063 som_output_comdat_data_section_asm_op (const char *data)
10065 in_section = NULL;
10066 output_section_asm_op (data);
10069 /* Implement TARGET_ASM_INIT_SECTIONS. */
10071 static void
10072 pa_som_asm_init_sections (void)
10074 text_section
10075 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10077 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10078 is not being generated. */
10079 som_readonly_data_section
10080 = get_unnamed_section (0, output_section_asm_op,
10081 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10083 /* When secondary definitions are not supported, SOM makes readonly
10084 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10085 the comdat flag. */
10086 som_one_only_readonly_data_section
10087 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10088 "\t.SPACE $TEXT$\n"
10089 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10090 "ACCESS=0x2c,SORT=16,COMDAT");
10093 /* When secondary definitions are not supported, SOM makes data one-only
10094 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10095 som_one_only_data_section
10096 = get_unnamed_section (SECTION_WRITE,
10097 som_output_comdat_data_section_asm_op,
10098 "\t.SPACE $PRIVATE$\n"
10099 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10100 "ACCESS=31,SORT=24,COMDAT");
10102 if (flag_tm)
10103 som_tm_clone_table_section
10104 = get_unnamed_section (0, output_section_asm_op,
10105 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10107 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10108 reference data within the $TEXT$ space (for example constant
10109 strings in the $LIT$ subspace).
10111 The assemblers (GAS and HP as) both have problems with handling
10112 the difference of two symbols. This is the other correct way to
10113 reference constant data during PIC code generation.
10115 Thus, we can't put constant data needing relocation in the $TEXT$
10116 space during PIC generation.
10118 Previously, we placed all constant data into the $DATA$ subspace
10119 when generating PIC code. This reduces sharing, but it works
10120 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10121 This puts constant data not needing relocation into the $TEXT$ space. */
10122 readonly_data_section = som_readonly_data_section;
10124 /* We must not have a reference to an external symbol defined in a
10125 shared library in a readonly section, else the SOM linker will
10126 complain.
10128 So, we force exception information into the data section. */
10129 exception_section = data_section;
10132 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10134 static section *
10135 pa_som_tm_clone_table_section (void)
10137 return som_tm_clone_table_section;
10140 /* On hpux10, the linker will give an error if we have a reference
10141 in the read-only data section to a symbol defined in a shared
10142 library. Therefore, expressions that might require a reloc
10143 cannot be placed in the read-only data section. */
10145 static section *
10146 pa_select_section (tree exp, int reloc,
10147 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10149 if (TREE_CODE (exp) == VAR_DECL
10150 && TREE_READONLY (exp)
10151 && !TREE_THIS_VOLATILE (exp)
10152 && DECL_INITIAL (exp)
10153 && (DECL_INITIAL (exp) == error_mark_node
10154 || TREE_CONSTANT (DECL_INITIAL (exp)))
10155 && !(reloc & pa_reloc_rw_mask ()))
10157 if (TARGET_SOM
10158 && DECL_ONE_ONLY (exp)
10159 && !DECL_WEAK (exp))
10160 return som_one_only_readonly_data_section;
10161 else
10162 return readonly_data_section;
10164 else if (CONSTANT_CLASS_P (exp)
10165 && !(reloc & pa_reloc_rw_mask ()))
10166 return readonly_data_section;
10167 else if (TARGET_SOM
10168 && TREE_CODE (exp) == VAR_DECL
10169 && DECL_ONE_ONLY (exp)
10170 && !DECL_WEAK (exp))
10171 return som_one_only_data_section;
10172 else
10173 return data_section;
10176 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10177 and the function is in a COMDAT group, place the plabel reference in the
10178 .data.rel.ro.local section. The linker ignores references to symbols in
10179 discarded sections from this section. */
10181 static section *
10182 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10183 unsigned HOST_WIDE_INT align)
10185 if (function_label_operand (x, VOIDmode))
10187 tree decl = SYMBOL_REF_DECL (x);
10189 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10190 return get_named_section (NULL, ".data.rel.ro.local", 1);
10193 return default_elf_select_rtx_section (mode, x, align);
10196 /* Implement pa_reloc_rw_mask. */
10198 static int
10199 pa_reloc_rw_mask (void)
10201 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10202 return 3;
10204 /* HP linker does not support global relocs in readonly memory. */
10205 return TARGET_SOM ? 2 : 0;
10208 static void
10209 pa_globalize_label (FILE *stream, const char *name)
10211 /* We only handle DATA objects here, functions are globalized in
10212 ASM_DECLARE_FUNCTION_NAME. */
10213 if (! FUNCTION_NAME_P (name))
10215 fputs ("\t.EXPORT ", stream);
10216 assemble_name (stream, name);
10217 fputs (",DATA\n", stream);
10221 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10223 static rtx
10224 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10225 int incoming ATTRIBUTE_UNUSED)
10227 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10230 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10232 bool
10233 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10235 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10236 PA64 ABI says that objects larger than 128 bits are returned in memory.
10237 Note, int_size_in_bytes can return -1 if the size of the object is
10238 variable or larger than the maximum value that can be expressed as
10239 a HOST_WIDE_INT. It can also return zero for an empty type. The
10240 simplest way to handle variable and empty types is to pass them in
10241 memory. This avoids problems in defining the boundaries of argument
10242 slots, allocating registers, etc. */
10243 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10244 || int_size_in_bytes (type) <= 0);
10247 /* Structure to hold declaration and name of external symbols that are
10248 emitted by GCC. We generate a vector of these symbols and output them
10249 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10250 This avoids putting out names that are never really used. */
10252 typedef struct GTY(()) extern_symbol
10254 tree decl;
10255 const char *name;
10256 } extern_symbol;
10258 /* Define gc'd vector type for extern_symbol. */
10260 /* Vector of extern_symbol pointers. */
10261 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10263 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10264 /* Mark DECL (name NAME) as an external reference (assembler output
10265 file FILE). This saves the names to output at the end of the file
10266 if actually referenced. */
10268 void
10269 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10271 gcc_assert (file == asm_out_file);
10272 extern_symbol p = {decl, name};
10273 vec_safe_push (extern_symbols, p);
10275 #endif
10277 /* Output text required at the end of an assembler file.
10278 This includes deferred plabels and .import directives for
10279 all external symbols that were actually referenced. */
10281 static void
10282 pa_file_end (void)
10284 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10285 unsigned int i;
10286 extern_symbol *p;
10288 if (!NO_DEFERRED_PROFILE_COUNTERS)
10289 output_deferred_profile_counters ();
10290 #endif
10292 output_deferred_plabels ();
10294 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10295 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10297 tree decl = p->decl;
10299 if (!TREE_ASM_WRITTEN (decl)
10300 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10301 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10304 vec_free (extern_symbols);
10305 #endif
10307 if (NEED_INDICATE_EXEC_STACK)
10308 file_end_indicate_exec_stack ();
10311 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10313 static bool
10314 pa_can_change_mode_class (machine_mode from, machine_mode to,
10315 reg_class_t rclass)
10317 if (from == to)
10318 return true;
10320 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10321 return true;
10323 /* Reject changes to/from modes with zero size. */
10324 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10325 return false;
10327 /* Reject changes to/from complex and vector modes. */
10328 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10329 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10330 return false;
10332 /* There is no way to load QImode or HImode values directly from memory
10333 to a FP register. SImode loads to the FP registers are not zero
10334 extended. On the 64-bit target, this conflicts with the definition
10335 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10336 except for DImode to SImode on the 64-bit target. It is handled by
10337 register renaming in pa_print_operand. */
10338 if (MAYBE_FP_REG_CLASS_P (rclass))
10339 return TARGET_64BIT && from == DImode && to == SImode;
10341 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10342 in specific sets of registers. Thus, we cannot allow changing
10343 to a larger mode when it's larger than a word. */
10344 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10345 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10346 return false;
10348 return true;
10351 /* Implement TARGET_MODES_TIEABLE_P.
10353 We should return FALSE for QImode and HImode because these modes
10354 are not ok in the floating-point registers. However, this prevents
10355 tieing these modes to SImode and DImode in the general registers.
10356 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10357 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10358 in the floating-point registers. */
10360 static bool
10361 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10363 /* Don't tie modes in different classes. */
10364 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10365 return false;
10367 return true;
10371 /* Length in units of the trampoline instruction code. */
10373 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10376 /* Output assembler code for a block containing the constant parts
10377 of a trampoline, leaving space for the variable parts.\
10379 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10380 and then branches to the specified routine.
10382 This code template is copied from text segment to stack location
10383 and then patched with pa_trampoline_init to contain valid values,
10384 and then entered as a subroutine.
10386 It is best to keep this as small as possible to avoid having to
10387 flush multiple lines in the cache. */
10389 static void
10390 pa_asm_trampoline_template (FILE *f)
10392 if (!TARGET_64BIT)
10394 if (TARGET_PA_20)
10396 fputs ("\tmfia %r20\n", f);
10397 fputs ("\tldw 48(%r20),%r22\n", f);
10398 fputs ("\tcopy %r22,%r21\n", f);
10399 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10400 fputs ("\tdepwi 0,31,2,%r22\n", f);
10401 fputs ("\tldw 0(%r22),%r21\n", f);
10402 fputs ("\tldw 4(%r22),%r19\n", f);
10403 fputs ("\tbve (%r21)\n", f);
10404 fputs ("\tldw 52(%r20),%r29\n", f);
10405 fputs ("\t.word 0\n", f);
10406 fputs ("\t.word 0\n", f);
10407 fputs ("\t.word 0\n", f);
10409 else
10411 if (ASSEMBLER_DIALECT == 0)
10413 fputs ("\tbl .+8,%r20\n", f);
10414 fputs ("\tdepi 0,31,2,%r20\n", f);
10416 else
10418 fputs ("\tb,l .+8,%r20\n", f);
10419 fputs ("\tdepwi 0,31,2,%r20\n", f);
10421 fputs ("\tldw 40(%r20),%r22\n", f);
10422 fputs ("\tcopy %r22,%r21\n", f);
10423 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10424 if (ASSEMBLER_DIALECT == 0)
10425 fputs ("\tdepi 0,31,2,%r22\n", f);
10426 else
10427 fputs ("\tdepwi 0,31,2,%r22\n", f);
10428 fputs ("\tldw 0(%r22),%r21\n", f);
10429 fputs ("\tldw 4(%r22),%r19\n", f);
10430 fputs ("\tldsid (%r21),%r1\n", f);
10431 fputs ("\tmtsp %r1,%sr0\n", f);
10432 fputs ("\tbe 0(%sr0,%r21)\n", f);
10433 fputs ("\tldw 44(%r20),%r29\n", f);
10435 fputs ("\t.word 0\n", f);
10436 fputs ("\t.word 0\n", f);
10437 fputs ("\t.word 0\n", f);
10438 fputs ("\t.word 0\n", f);
10440 else
10442 fputs ("\t.dword 0\n", f);
10443 fputs ("\t.dword 0\n", f);
10444 fputs ("\t.dword 0\n", f);
10445 fputs ("\t.dword 0\n", f);
10446 fputs ("\tmfia %r31\n", f);
10447 fputs ("\tldd 24(%r31),%r27\n", f);
10448 fputs ("\tldd 32(%r31),%r31\n", f);
10449 fputs ("\tldd 16(%r27),%r1\n", f);
10450 fputs ("\tbve (%r1)\n", f);
10451 fputs ("\tldd 24(%r27),%r27\n", f);
10452 fputs ("\t.dword 0 ; fptr\n", f);
10453 fputs ("\t.dword 0 ; static link\n", f);
10457 /* Emit RTL insns to initialize the variable parts of a trampoline.
10458 FNADDR is an RTX for the address of the function's pure code.
10459 CXT is an RTX for the static chain value for the function.
10461 Move the function address to the trampoline template at offset 48.
10462 Move the static chain value to trampoline template at offset 52.
10463 Move the trampoline address to trampoline template at offset 56.
10464 Move r19 to trampoline template at offset 60. The latter two
10465 words create a plabel for the indirect call to the trampoline.
10467 A similar sequence is used for the 64-bit port but the plabel is
10468 at the beginning of the trampoline.
10470 Finally, the cache entries for the trampoline code are flushed.
10471 This is necessary to ensure that the trampoline instruction sequence
10472 is written to memory prior to any attempts at prefetching the code
10473 sequence. */
10475 static void
10476 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10478 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10479 rtx start_addr = gen_reg_rtx (Pmode);
10480 rtx end_addr = gen_reg_rtx (Pmode);
10481 rtx line_length = gen_reg_rtx (Pmode);
10482 rtx r_tramp, tmp;
10484 emit_block_move (m_tramp, assemble_trampoline_template (),
10485 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10486 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10488 if (!TARGET_64BIT)
10490 tmp = adjust_address (m_tramp, Pmode, 48);
10491 emit_move_insn (tmp, fnaddr);
10492 tmp = adjust_address (m_tramp, Pmode, 52);
10493 emit_move_insn (tmp, chain_value);
10495 /* Create a fat pointer for the trampoline. */
10496 tmp = adjust_address (m_tramp, Pmode, 56);
10497 emit_move_insn (tmp, r_tramp);
10498 tmp = adjust_address (m_tramp, Pmode, 60);
10499 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10501 /* fdc and fic only use registers for the address to flush,
10502 they do not accept integer displacements. We align the
10503 start and end addresses to the beginning of their respective
10504 cache lines to minimize the number of lines flushed. */
10505 emit_insn (gen_andsi3 (start_addr, r_tramp,
10506 GEN_INT (-MIN_CACHELINE_SIZE)));
10507 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10508 TRAMPOLINE_CODE_SIZE-1));
10509 emit_insn (gen_andsi3 (end_addr, tmp,
10510 GEN_INT (-MIN_CACHELINE_SIZE)));
10511 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10512 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10513 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10514 gen_reg_rtx (Pmode),
10515 gen_reg_rtx (Pmode)));
10517 else
10519 tmp = adjust_address (m_tramp, Pmode, 56);
10520 emit_move_insn (tmp, fnaddr);
10521 tmp = adjust_address (m_tramp, Pmode, 64);
10522 emit_move_insn (tmp, chain_value);
10524 /* Create a fat pointer for the trampoline. */
10525 tmp = adjust_address (m_tramp, Pmode, 16);
10526 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10527 r_tramp, 32)));
10528 tmp = adjust_address (m_tramp, Pmode, 24);
10529 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10531 /* fdc and fic only use registers for the address to flush,
10532 they do not accept integer displacements. We align the
10533 start and end addresses to the beginning of their respective
10534 cache lines to minimize the number of lines flushed. */
10535 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10536 emit_insn (gen_anddi3 (start_addr, tmp,
10537 GEN_INT (-MIN_CACHELINE_SIZE)));
10538 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10539 TRAMPOLINE_CODE_SIZE - 1));
10540 emit_insn (gen_anddi3 (end_addr, tmp,
10541 GEN_INT (-MIN_CACHELINE_SIZE)));
10542 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10543 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10544 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10545 gen_reg_rtx (Pmode),
10546 gen_reg_rtx (Pmode)));
10549 #ifdef HAVE_ENABLE_EXECUTE_STACK
10550 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10551 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10552 #endif
10555 /* Perform any machine-specific adjustment in the address of the trampoline.
10556 ADDR contains the address that was passed to pa_trampoline_init.
10557 Adjust the trampoline address to point to the plabel at offset 56. */
10559 static rtx
10560 pa_trampoline_adjust_address (rtx addr)
10562 if (!TARGET_64BIT)
10563 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10564 return addr;
10567 static rtx
10568 pa_delegitimize_address (rtx orig_x)
10570 rtx x = delegitimize_mem_from_attrs (orig_x);
10572 if (GET_CODE (x) == LO_SUM
10573 && GET_CODE (XEXP (x, 1)) == UNSPEC
10574 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10575 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10576 return x;
10579 static rtx
10580 pa_internal_arg_pointer (void)
10582 /* The argument pointer and the hard frame pointer are the same in
10583 the 32-bit runtime, so we don't need a copy. */
10584 if (TARGET_64BIT)
10585 return copy_to_reg (virtual_incoming_args_rtx);
10586 else
10587 return virtual_incoming_args_rtx;
10590 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10591 Frame pointer elimination is automatically handled. */
10593 static bool
10594 pa_can_eliminate (const int from, const int to)
10596 /* The argument cannot be eliminated in the 64-bit runtime. */
10597 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10598 return false;
10600 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10601 ? ! frame_pointer_needed
10602 : true);
10605 /* Define the offset between two registers, FROM to be eliminated and its
10606 replacement TO, at the start of a routine. */
10607 HOST_WIDE_INT
10608 pa_initial_elimination_offset (int from, int to)
10610 HOST_WIDE_INT offset;
10612 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10613 && to == STACK_POINTER_REGNUM)
10614 offset = -pa_compute_frame_size (get_frame_size (), 0);
10615 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10616 offset = 0;
10617 else
10618 gcc_unreachable ();
10620 return offset;
10623 static void
10624 pa_conditional_register_usage (void)
10626 int i;
10628 if (!TARGET_64BIT && !TARGET_PA_11)
10630 for (i = 56; i <= FP_REG_LAST; i++)
10631 fixed_regs[i] = call_used_regs[i] = 1;
10632 for (i = 33; i < 56; i += 2)
10633 fixed_regs[i] = call_used_regs[i] = 1;
10635 if (TARGET_SOFT_FLOAT)
10637 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10638 fixed_regs[i] = call_used_regs[i] = 1;
10640 if (flag_pic)
10641 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10644 /* Target hook for c_mode_for_suffix. */
10646 static machine_mode
10647 pa_c_mode_for_suffix (char suffix)
10649 if (HPUX_LONG_DOUBLE_LIBRARY)
10651 if (suffix == 'q')
10652 return TFmode;
10655 return VOIDmode;
10658 /* Target hook for function_section. */
10660 static section *
10661 pa_function_section (tree decl, enum node_frequency freq,
10662 bool startup, bool exit)
10664 /* Put functions in text section if target doesn't have named sections. */
10665 if (!targetm_common.have_named_sections)
10666 return text_section;
10668 /* Force nested functions into the same section as the containing
10669 function. */
10670 if (decl
10671 && DECL_SECTION_NAME (decl) == NULL
10672 && DECL_CONTEXT (decl) != NULL_TREE
10673 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10674 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10675 return function_section (DECL_CONTEXT (decl));
10677 /* Otherwise, use the default function section. */
10678 return default_function_section (decl, freq, startup, exit);
10681 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10683 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10684 that need more than three instructions to load prior to reload. This
10685 limit is somewhat arbitrary. It takes three instructions to load a
10686 CONST_INT from memory but two are memory accesses. It may be better
10687 to increase the allowed range for CONST_INTS. We may also be able
10688 to handle CONST_DOUBLES. */
10690 static bool
10691 pa_legitimate_constant_p (machine_mode mode, rtx x)
10693 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10694 return false;
10696 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10697 return false;
10699 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10700 legitimate constants. The other variants can't be handled by
10701 the move patterns after reload starts. */
10702 if (tls_referenced_p (x))
10703 return false;
10705 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10706 return false;
10708 if (TARGET_64BIT
10709 && HOST_BITS_PER_WIDE_INT > 32
10710 && GET_CODE (x) == CONST_INT
10711 && !reload_in_progress
10712 && !reload_completed
10713 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10714 && !pa_cint_ok_for_move (UINTVAL (x)))
10715 return false;
10717 if (function_label_operand (x, mode))
10718 return false;
10720 return true;
10723 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10725 static unsigned int
10726 pa_section_type_flags (tree decl, const char *name, int reloc)
10728 unsigned int flags;
10730 flags = default_section_type_flags (decl, name, reloc);
10732 /* Function labels are placed in the constant pool. This can
10733 cause a section conflict if decls are put in ".data.rel.ro"
10734 or ".data.rel.ro.local" using the __attribute__ construct. */
10735 if (strcmp (name, ".data.rel.ro") == 0
10736 || strcmp (name, ".data.rel.ro.local") == 0)
10737 flags |= SECTION_WRITE | SECTION_RELRO;
10739 return flags;
10742 /* pa_legitimate_address_p recognizes an RTL expression that is a
10743 valid memory address for an instruction. The MODE argument is the
10744 machine mode for the MEM expression that wants to use this address.
10746 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10747 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10748 available with floating point loads and stores, and integer loads.
10749 We get better code by allowing indexed addresses in the initial
10750 RTL generation.
10752 The acceptance of indexed addresses as legitimate implies that we
10753 must provide patterns for doing indexed integer stores, or the move
10754 expanders must force the address of an indexed store to a register.
10755 We have adopted the latter approach.
10757 Another function of pa_legitimate_address_p is to ensure that
10758 the base register is a valid pointer for indexed instructions.
10759 On targets that have non-equivalent space registers, we have to
10760 know at the time of assembler output which register in a REG+REG
10761 pair is the base register. The REG_POINTER flag is sometimes lost
10762 in reload and the following passes, so it can't be relied on during
10763 code generation. Thus, we either have to canonicalize the order
10764 of the registers in REG+REG indexed addresses, or treat REG+REG
10765 addresses separately and provide patterns for both permutations.
10767 The latter approach requires several hundred additional lines of
10768 code in pa.md. The downside to canonicalizing is that a PLUS
10769 in the wrong order can't combine to form to make a scaled indexed
10770 memory operand. As we won't need to canonicalize the operands if
10771 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10773 We initially break out scaled indexed addresses in canonical order
10774 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10775 scaled indexed addresses during RTL generation. However, fold_rtx
10776 has its own opinion on how the operands of a PLUS should be ordered.
10777 If one of the operands is equivalent to a constant, it will make
10778 that operand the second operand. As the base register is likely to
10779 be equivalent to a SYMBOL_REF, we have made it the second operand.
10781 pa_legitimate_address_p accepts REG+REG as legitimate when the
10782 operands are in the order INDEX+BASE on targets with non-equivalent
10783 space registers, and in any order on targets with equivalent space
10784 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10786 We treat a SYMBOL_REF as legitimate if it is part of the current
10787 function's constant-pool, because such addresses can actually be
10788 output as REG+SMALLINT. */
10790 static bool
10791 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
10793 if ((REG_P (x)
10794 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10795 : REG_OK_FOR_BASE_P (x)))
10796 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10797 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10798 && REG_P (XEXP (x, 0))
10799 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10800 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10801 return true;
10803 if (GET_CODE (x) == PLUS)
10805 rtx base, index;
10807 /* For REG+REG, the base register should be in XEXP (x, 1),
10808 so check it first. */
10809 if (REG_P (XEXP (x, 1))
10810 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10811 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10812 base = XEXP (x, 1), index = XEXP (x, 0);
10813 else if (REG_P (XEXP (x, 0))
10814 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10815 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10816 base = XEXP (x, 0), index = XEXP (x, 1);
10817 else
10818 return false;
10820 if (GET_CODE (index) == CONST_INT)
10822 if (INT_5_BITS (index))
10823 return true;
10825 /* When INT14_OK_STRICT is false, a secondary reload is needed
10826 to adjust the displacement of SImode and DImode floating point
10827 instructions but this may fail when the register also needs
10828 reloading. So, we return false when STRICT is true. We
10829 also reject long displacements for float mode addresses since
10830 the majority of accesses will use floating point instructions
10831 that don't support 14-bit offsets. */
10832 if (!INT14_OK_STRICT
10833 && (strict || !(reload_in_progress || reload_completed))
10834 && mode != QImode
10835 && mode != HImode)
10836 return false;
10838 return base14_operand (index, mode);
10841 if (!TARGET_DISABLE_INDEXING
10842 /* Only accept the "canonical" INDEX+BASE operand order
10843 on targets with non-equivalent space registers. */
10844 && (TARGET_NO_SPACE_REGS
10845 ? REG_P (index)
10846 : (base == XEXP (x, 1) && REG_P (index)
10847 && (reload_completed
10848 || (reload_in_progress && HARD_REGISTER_P (base))
10849 || REG_POINTER (base))
10850 && (reload_completed
10851 || (reload_in_progress && HARD_REGISTER_P (index))
10852 || !REG_POINTER (index))))
10853 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10854 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10855 : REG_OK_FOR_INDEX_P (index))
10856 && borx_reg_operand (base, Pmode)
10857 && borx_reg_operand (index, Pmode))
10858 return true;
10860 if (!TARGET_DISABLE_INDEXING
10861 && GET_CODE (index) == MULT
10862 /* Only accept base operands with the REG_POINTER flag prior to
10863 reload on targets with non-equivalent space registers. */
10864 && (TARGET_NO_SPACE_REGS
10865 || (base == XEXP (x, 1)
10866 && (reload_completed
10867 || (reload_in_progress && HARD_REGISTER_P (base))
10868 || REG_POINTER (base))))
10869 && REG_P (XEXP (index, 0))
10870 && GET_MODE (XEXP (index, 0)) == Pmode
10871 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10872 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10873 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10874 && GET_CODE (XEXP (index, 1)) == CONST_INT
10875 && INTVAL (XEXP (index, 1))
10876 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10877 && borx_reg_operand (base, Pmode))
10878 return true;
10880 return false;
10883 if (GET_CODE (x) == LO_SUM)
10885 rtx y = XEXP (x, 0);
10887 if (GET_CODE (y) == SUBREG)
10888 y = SUBREG_REG (y);
10890 if (REG_P (y)
10891 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10892 : REG_OK_FOR_BASE_P (y)))
10894 /* Needed for -fPIC */
10895 if (mode == Pmode
10896 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10897 return true;
10899 if (!INT14_OK_STRICT
10900 && (strict || !(reload_in_progress || reload_completed))
10901 && mode != QImode
10902 && mode != HImode)
10903 return false;
10905 if (CONSTANT_P (XEXP (x, 1)))
10906 return true;
10908 return false;
10911 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10912 return true;
10914 return false;
10917 /* Look for machine dependent ways to make the invalid address AD a
10918 valid address.
10920 For the PA, transform:
10922 memory(X + <large int>)
10924 into:
10926 if (<large int> & mask) >= 16
10927 Y = (<large int> & ~mask) + mask + 1 Round up.
10928 else
10929 Y = (<large int> & ~mask) Round down.
10930 Z = X + Y
10931 memory (Z + (<large int> - Y));
10933 This makes reload inheritance and reload_cse work better since Z
10934 can be reused.
10936 There may be more opportunities to improve code with this hook. */
10939 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10940 int opnum, int type,
10941 int ind_levels ATTRIBUTE_UNUSED)
10943 long offset, newoffset, mask;
10944 rtx new_rtx, temp = NULL_RTX;
10946 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10947 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10949 if (optimize && GET_CODE (ad) == PLUS)
10950 temp = simplify_binary_operation (PLUS, Pmode,
10951 XEXP (ad, 0), XEXP (ad, 1));
10953 new_rtx = temp ? temp : ad;
10955 if (optimize
10956 && GET_CODE (new_rtx) == PLUS
10957 && GET_CODE (XEXP (new_rtx, 0)) == REG
10958 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10960 offset = INTVAL (XEXP ((new_rtx), 1));
10962 /* Choose rounding direction. Round up if we are >= halfway. */
10963 if ((offset & mask) >= ((mask + 1) / 2))
10964 newoffset = (offset & ~mask) + mask + 1;
10965 else
10966 newoffset = offset & ~mask;
10968 /* Ensure that long displacements are aligned. */
10969 if (mask == 0x3fff
10970 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10971 || (TARGET_64BIT && (mode) == DImode)))
10972 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10974 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10976 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10977 GEN_INT (newoffset));
10978 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10979 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10980 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10981 opnum, (enum reload_type) type);
10982 return ad;
10986 return NULL_RTX;
10989 /* Output address vector. */
10991 void
10992 pa_output_addr_vec (rtx lab, rtx body)
10994 int idx, vlen = XVECLEN (body, 0);
10996 if (!TARGET_SOM)
10997 fputs ("\t.align 4\n", asm_out_file);
10998 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10999 if (TARGET_GAS)
11000 fputs ("\t.begin_brtab\n", asm_out_file);
11001 for (idx = 0; idx < vlen; idx++)
11003 ASM_OUTPUT_ADDR_VEC_ELT
11004 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11006 if (TARGET_GAS)
11007 fputs ("\t.end_brtab\n", asm_out_file);
11010 /* Output address difference vector. */
11012 void
11013 pa_output_addr_diff_vec (rtx lab, rtx body)
11015 rtx base = XEXP (XEXP (body, 0), 0);
11016 int idx, vlen = XVECLEN (body, 1);
11018 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11019 if (TARGET_GAS)
11020 fputs ("\t.begin_brtab\n", asm_out_file);
11021 for (idx = 0; idx < vlen; idx++)
11023 ASM_OUTPUT_ADDR_DIFF_ELT
11024 (asm_out_file,
11025 body,
11026 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11027 CODE_LABEL_NUMBER (base));
11029 if (TARGET_GAS)
11030 fputs ("\t.end_brtab\n", asm_out_file);
11033 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11034 arguments passed by hidden reference in the 32-bit HP runtime. Users
11035 can override this behavior for better compatibility with openmp at the
11036 risk of library incompatibilities. Arguments are always passed by value
11037 in the 64-bit HP runtime. */
11039 static bool
11040 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11042 return !TARGET_CALLER_COPIES;
11045 /* Implement TARGET_HARD_REGNO_NREGS. */
11047 static unsigned int
11048 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11050 return PA_HARD_REGNO_NREGS (regno, mode);
11053 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11055 static bool
11056 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11058 return PA_HARD_REGNO_MODE_OK (regno, mode);
11061 /* Implement TARGET_STARTING_FRAME_OFFSET.
11063 On the 32-bit ports, we reserve one slot for the previous frame
11064 pointer and one fill slot. The fill slot is for compatibility
11065 with HP compiled programs. On the 64-bit ports, we reserve one
11066 slot for the previous frame pointer. */
11068 static HOST_WIDE_INT
11069 pa_starting_frame_offset (void)
11071 return 8;
11074 /* Figure out the size in words of the function argument. */
11077 pa_function_arg_size (machine_mode mode, const_tree type)
11079 HOST_WIDE_INT size;
11081 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11083 /* The 64-bit runtime does not restrict the size of stack frames,
11084 but the gcc calling conventions limit argument sizes to 1G. Our
11085 prologue/epilogue code limits frame sizes to just under 32 bits.
11086 1G is also the maximum frame size that can be handled by the HPUX
11087 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11088 occur for (parallel:BLK []), we need to ignore large arguments
11089 passed by value. */
11090 if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11091 size = 0;
11092 return (int) CEIL (size, UNITS_PER_WORD);
11095 #include "gt-pa.h"