Skip various cmp-mem-const tests on lp64 hppa*-*-*
[official-gcc.git] / gcc / config / pa / pa.cc
blob48a370cdc200116306855b360b1ca5b357f9670f
1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
57 /* This file should be included last. */
58 #include "target-def.h"
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
81 other_mode = GET_MODE (SET_SRC (set));
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool,
200 code_helper = ERROR_MARK);
201 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
202 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
203 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
204 static bool pa_modes_tieable_p (machine_mode, machine_mode);
205 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
206 static HOST_WIDE_INT pa_starting_frame_offset (void);
207 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
209 /* The following extra sections are only used for SOM. */
210 static GTY(()) section *som_readonly_data_section;
211 static GTY(()) section *som_one_only_readonly_data_section;
212 static GTY(()) section *som_one_only_data_section;
213 static GTY(()) section *som_tm_clone_table_section;
215 /* Counts for the number of callee-saved general and floating point
216 registers which were saved by the current function's prologue. */
217 static int gr_saved, fr_saved;
219 /* Boolean indicating whether the return pointer was saved by the
220 current function's prologue. */
221 static bool rp_saved;
223 static rtx find_addr_reg (rtx);
225 /* Keep track of the number of bytes we have output in the CODE subspace
226 during this compilation so we'll know when to emit inline long-calls. */
227 unsigned long total_code_bytes;
229 /* The last address of the previous function plus the number of bytes in
230 associated thunks that have been output. This is used to determine if
231 a thunk can use an IA-relative branch to reach its target function. */
232 static unsigned int last_address;
234 /* Variables to handle plabels that we discover are necessary at assembly
235 output time. They are output after the current function. */
236 struct GTY(()) deferred_plabel
238 rtx internal_label;
239 rtx symbol;
241 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
242 deferred_plabels;
243 static size_t n_deferred_plabels = 0;
245 /* Initialize the GCC target structure. */
247 #undef TARGET_OPTION_OVERRIDE
248 #define TARGET_OPTION_OVERRIDE pa_option_override
250 #undef TARGET_ASM_ALIGNED_HI_OP
251 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
252 #undef TARGET_ASM_ALIGNED_SI_OP
253 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
254 #undef TARGET_ASM_ALIGNED_DI_OP
255 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
256 #undef TARGET_ASM_UNALIGNED_HI_OP
257 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
258 #undef TARGET_ASM_UNALIGNED_SI_OP
259 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
260 #undef TARGET_ASM_UNALIGNED_DI_OP
261 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
262 #undef TARGET_ASM_INTEGER
263 #define TARGET_ASM_INTEGER pa_assemble_integer
265 #undef TARGET_ASM_FUNCTION_EPILOGUE
266 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268 #undef TARGET_FUNCTION_VALUE
269 #define TARGET_FUNCTION_VALUE pa_function_value
270 #undef TARGET_LIBCALL_VALUE
271 #define TARGET_LIBCALL_VALUE pa_libcall_value
272 #undef TARGET_FUNCTION_VALUE_REGNO_P
273 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275 #undef TARGET_LEGITIMIZE_ADDRESS
276 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278 #undef TARGET_SCHED_ADJUST_COST
279 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
280 #undef TARGET_SCHED_ISSUE_RATE
281 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283 #undef TARGET_ENCODE_SECTION_INFO
284 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
285 #undef TARGET_STRIP_NAME_ENCODING
286 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
288 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
289 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291 #undef TARGET_COMMUTATIVE_P
292 #define TARGET_COMMUTATIVE_P pa_commutative_p
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
296 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
297 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
299 #undef TARGET_ASM_FILE_END
300 #define TARGET_ASM_FILE_END pa_file_end
302 #undef TARGET_ASM_RELOC_RW_MASK
303 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
306 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 #if !defined(USE_COLLECT2)
309 #undef TARGET_ASM_CONSTRUCTOR
310 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
311 #undef TARGET_ASM_DESTRUCTOR
312 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
313 #endif
315 #undef TARGET_INIT_BUILTINS
316 #define TARGET_INIT_BUILTINS pa_init_builtins
318 #undef TARGET_EXPAND_BUILTIN
319 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321 #undef TARGET_REGISTER_MOVE_COST
322 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
323 #undef TARGET_RTX_COSTS
324 #define TARGET_RTX_COSTS hppa_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hppa_address_cost
328 #undef TARGET_MACHINE_DEPENDENT_REORG
329 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331 #undef TARGET_INIT_LIBFUNCS
332 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334 #undef TARGET_PROMOTE_FUNCTION_MODE
335 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
336 #undef TARGET_PROMOTE_PROTOTYPES
337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 #undef TARGET_STRUCT_VALUE_RTX
340 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
341 #undef TARGET_RETURN_IN_MEMORY
342 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
343 #undef TARGET_MUST_PASS_IN_STACK
344 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
347 #undef TARGET_CALLEE_COPIES
348 #define TARGET_CALLEE_COPIES pa_callee_copies
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
351 #undef TARGET_FUNCTION_ARG
352 #define TARGET_FUNCTION_ARG pa_function_arg
353 #undef TARGET_FUNCTION_ARG_ADVANCE
354 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
355 #undef TARGET_FUNCTION_ARG_PADDING
356 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
357 #undef TARGET_FUNCTION_ARG_BOUNDARY
358 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
360 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
361 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
362 #undef TARGET_EXPAND_BUILTIN_VA_START
363 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
364 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
365 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
367 #undef TARGET_SCALAR_MODE_SUPPORTED_P
368 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
370 #undef TARGET_CANNOT_FORCE_CONST_MEM
371 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
373 #undef TARGET_SECONDARY_RELOAD
374 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
375 #undef TARGET_SECONDARY_MEMORY_NEEDED
376 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
378 #undef TARGET_EXTRA_LIVE_ON_ENTRY
379 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
381 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
382 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
383 #undef TARGET_TRAMPOLINE_INIT
384 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
385 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
386 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
387 #undef TARGET_DELEGITIMIZE_ADDRESS
388 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
389 #undef TARGET_INTERNAL_ARG_POINTER
390 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
391 #undef TARGET_CAN_ELIMINATE
392 #define TARGET_CAN_ELIMINATE pa_can_eliminate
393 #undef TARGET_CONDITIONAL_REGISTER_USAGE
394 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
395 #undef TARGET_C_MODE_FOR_SUFFIX
396 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
397 #undef TARGET_ASM_FUNCTION_SECTION
398 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
400 #undef TARGET_LEGITIMATE_CONSTANT_P
401 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
402 #undef TARGET_SECTION_TYPE_FLAGS
403 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
404 #undef TARGET_LEGITIMATE_ADDRESS_P
405 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
407 #undef TARGET_LRA_P
408 #define TARGET_LRA_P hook_bool_void_false
410 #undef TARGET_HARD_REGNO_NREGS
411 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
412 #undef TARGET_HARD_REGNO_MODE_OK
413 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
414 #undef TARGET_MODES_TIEABLE_P
415 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
417 #undef TARGET_CAN_CHANGE_MODE_CLASS
418 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
420 #undef TARGET_CONSTANT_ALIGNMENT
421 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
423 #undef TARGET_STARTING_FRAME_OFFSET
424 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
426 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
427 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
429 struct gcc_target targetm = TARGET_INITIALIZER;
431 /* Parse the -mfixed-range= option string. */
433 static void
434 fix_range (const char *const_str)
436 int i, first, last;
437 char *str, *dash, *comma;
439 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
440 REG2 are either register names or register numbers. The effect
441 of this option is to mark the registers in the range from REG1 to
442 REG2 as ``fixed'' so they won't be used by the compiler. This is
443 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
445 i = strlen (const_str);
446 str = (char *) alloca (i + 1);
447 memcpy (str, const_str, i + 1);
449 while (1)
451 dash = strchr (str, '-');
452 if (!dash)
454 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
455 return;
457 *dash = '\0';
459 comma = strchr (dash + 1, ',');
460 if (comma)
461 *comma = '\0';
463 first = decode_reg_name (str);
464 if (first < 0)
466 warning (0, "unknown register name: %s", str);
467 return;
470 last = decode_reg_name (dash + 1);
471 if (last < 0)
473 warning (0, "unknown register name: %s", dash + 1);
474 return;
477 *dash = '-';
479 if (first > last)
481 warning (0, "%s-%s is an empty range", str, dash + 1);
482 return;
485 for (i = first; i <= last; ++i)
486 fixed_regs[i] = call_used_regs[i] = 1;
488 if (!comma)
489 break;
491 *comma = ',';
492 str = comma + 1;
495 /* Check if all floating point registers have been fixed. */
496 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
497 if (!fixed_regs[i])
498 break;
500 if (i > FP_REG_LAST)
501 target_flags |= MASK_SOFT_FLOAT;
504 /* Implement the TARGET_OPTION_OVERRIDE hook. */
506 static void
507 pa_option_override (void)
509 unsigned int i;
510 cl_deferred_option *opt;
511 vec<cl_deferred_option> *v
512 = (vec<cl_deferred_option> *) pa_deferred_options;
514 if (v)
515 FOR_EACH_VEC_ELT (*v, i, opt)
517 switch (opt->opt_index)
519 case OPT_mfixed_range_:
520 fix_range (opt->arg);
521 break;
523 default:
524 gcc_unreachable ();
528 if (flag_pic && TARGET_PORTABLE_RUNTIME)
530 warning (0, "PIC code generation is not supported in the portable runtime model");
533 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
535 warning (0, "PIC code generation is not compatible with fast indirect calls");
538 if (! TARGET_GAS && write_symbols != NO_DEBUG)
540 warning (0, "%<-g%> is only supported when using GAS on this processor");
541 warning (0, "%<-g%> option disabled");
542 write_symbols = NO_DEBUG;
545 if (TARGET_64BIT && TARGET_HPUX)
547 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
548 specifically selected. */
549 if (!OPTION_SET_P (dwarf_strict))
550 dwarf_strict = 1;
551 if (!OPTION_SET_P (dwarf_version))
552 dwarf_version = 4;
555 /* We only support the "big PIC" model now. And we always generate PIC
556 code when in 64bit mode. */
557 if (flag_pic == 1 || TARGET_64BIT)
558 flag_pic = 2;
560 /* Disable -freorder-blocks-and-partition as we don't support hot and
561 cold partitioning. */
562 if (flag_reorder_blocks_and_partition)
564 inform (input_location,
565 "%<-freorder-blocks-and-partition%> does not work "
566 "on this architecture");
567 flag_reorder_blocks_and_partition = 0;
568 flag_reorder_blocks = 1;
571 /* Disable -fstack-protector to suppress warning. */
572 flag_stack_protect = 0;
574 /* We can't guarantee that .dword is available for 32-bit targets. */
575 if (UNITS_PER_WORD == 4)
576 targetm.asm_out.aligned_op.di = NULL;
578 /* The unaligned ops are only available when using GAS. */
579 if (!TARGET_GAS)
581 targetm.asm_out.unaligned_op.hi = NULL;
582 targetm.asm_out.unaligned_op.si = NULL;
583 targetm.asm_out.unaligned_op.di = NULL;
586 init_machine_status = pa_init_machine_status;
589 enum pa_builtins
591 PA_BUILTIN_COPYSIGNQ,
592 PA_BUILTIN_FABSQ,
593 PA_BUILTIN_INFQ,
594 PA_BUILTIN_HUGE_VALQ,
595 PA_BUILTIN_max
598 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
600 static void
601 pa_init_builtins (void)
603 #ifdef DONT_HAVE_FPUTC_UNLOCKED
605 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
606 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
607 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
609 #endif
610 #if TARGET_HPUX_11
612 tree decl;
614 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
615 set_user_assembler_name (decl, "_Isfinite");
616 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
617 set_user_assembler_name (decl, "_Isfinitef");
619 #endif
621 if (HPUX_LONG_DOUBLE_LIBRARY)
623 tree decl, ftype;
625 /* Under HPUX, the __float128 type is a synonym for "long double". */
626 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
627 "__float128");
629 /* TFmode support builtins. */
630 ftype = build_function_type_list (long_double_type_node,
631 long_double_type_node,
632 NULL_TREE);
633 decl = add_builtin_function ("__builtin_fabsq", ftype,
634 PA_BUILTIN_FABSQ, BUILT_IN_MD,
635 "_U_Qfabs", NULL_TREE);
636 TREE_READONLY (decl) = 1;
637 pa_builtins[PA_BUILTIN_FABSQ] = decl;
639 ftype = build_function_type_list (long_double_type_node,
640 long_double_type_node,
641 long_double_type_node,
642 NULL_TREE);
643 decl = add_builtin_function ("__builtin_copysignq", ftype,
644 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
645 "_U_Qfcopysign", NULL_TREE);
646 TREE_READONLY (decl) = 1;
647 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
649 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
650 decl = add_builtin_function ("__builtin_infq", ftype,
651 PA_BUILTIN_INFQ, BUILT_IN_MD,
652 NULL, NULL_TREE);
653 pa_builtins[PA_BUILTIN_INFQ] = decl;
655 decl = add_builtin_function ("__builtin_huge_valq", ftype,
656 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
657 NULL, NULL_TREE);
658 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
662 static rtx
663 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
664 machine_mode mode ATTRIBUTE_UNUSED,
665 int ignore ATTRIBUTE_UNUSED)
667 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
668 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
670 switch (fcode)
672 case PA_BUILTIN_FABSQ:
673 case PA_BUILTIN_COPYSIGNQ:
674 return expand_call (exp, target, ignore);
676 case PA_BUILTIN_INFQ:
677 case PA_BUILTIN_HUGE_VALQ:
679 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
680 REAL_VALUE_TYPE inf;
681 rtx tmp;
683 real_inf (&inf);
684 tmp = const_double_from_real_value (inf, target_mode);
686 tmp = validize_mem (force_const_mem (target_mode, tmp));
688 if (target == 0)
689 target = gen_reg_rtx (target_mode);
691 emit_move_insn (target, tmp);
692 return target;
695 default:
696 gcc_unreachable ();
699 return NULL_RTX;
702 /* Function to init struct machine_function.
703 This will be called, via a pointer variable,
704 from push_function_context. */
706 static struct machine_function *
707 pa_init_machine_status (void)
709 return ggc_cleared_alloc<machine_function> ();
712 /* If FROM is a probable pointer register, mark TO as a probable
713 pointer register with the same pointer alignment as FROM. */
715 static void
716 copy_reg_pointer (rtx to, rtx from)
718 if (REG_POINTER (from))
719 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
722 /* Return 1 if X contains a symbolic expression. We know these
723 expressions will have one of a few well defined forms, so
724 we need only check those forms. */
726 pa_symbolic_expression_p (rtx x)
729 /* Strip off any HIGH. */
730 if (GET_CODE (x) == HIGH)
731 x = XEXP (x, 0);
733 return symbolic_operand (x, VOIDmode);
736 /* Accept any constant that can be moved in one instruction into a
737 general register. */
739 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
741 /* OK if ldo, ldil, or zdepi, can be used. */
742 return (VAL_14_BITS_P (ival)
743 || pa_ldil_cint_p (ival)
744 || pa_zdepi_cint_p (ival));
747 /* True iff ldil can be used to load this CONST_INT. The least
748 significant 11 bits of the value must be zero and the value must
749 not change sign when extended from 32 to 64 bits. */
751 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
753 unsigned HOST_WIDE_INT x;
755 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
756 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
759 /* True iff zdepi can be used to generate this CONST_INT.
760 zdepi first sign extends a 5-bit signed number to a given field
761 length, then places this field anywhere in a zero. */
763 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
765 unsigned HOST_WIDE_INT lsb_mask, t;
767 /* This might not be obvious, but it's at least fast.
768 This function is critical; we don't have the time loops would take. */
769 lsb_mask = x & -x;
770 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
771 /* Return true iff t is a power of two. */
772 return ((t & (t - 1)) == 0);
775 /* True iff depi or extru can be used to compute (reg & mask).
776 Accept bit pattern like these:
777 0....01....1
778 1....10....0
779 1..10..01..1 */
781 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
783 mask = ~mask;
784 mask += mask & -mask;
785 return (mask & (mask - 1)) == 0;
788 /* True iff depi can be used to compute (reg | MASK). */
790 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
792 mask += mask & -mask;
793 return (mask & (mask - 1)) == 0;
796 /* Legitimize PIC addresses. If the address is already
797 position-independent, we return ORIG. Newly generated
798 position-independent addresses go to REG. If we need more
799 than one register, we lose. */
801 static rtx
802 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
804 rtx pic_ref = orig;
806 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
808 /* Labels need special handling. */
809 if (pic_label_operand (orig, mode))
811 rtx_insn *insn;
813 /* We do not want to go through the movXX expanders here since that
814 would create recursion.
816 Nor do we really want to call a generator for a named pattern
817 since that requires multiple patterns if we want to support
818 multiple word sizes.
820 So instead we just emit the raw set, which avoids the movXX
821 expanders completely. */
822 mark_reg_pointer (reg, BITS_PER_UNIT);
823 insn = emit_insn (gen_rtx_SET (reg, orig));
825 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
826 add_reg_note (insn, REG_EQUAL, orig);
828 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
829 and update LABEL_NUSES because this is not done automatically. */
830 if (reload_in_progress || reload_completed)
832 /* Extract LABEL_REF. */
833 if (GET_CODE (orig) == CONST)
834 orig = XEXP (XEXP (orig, 0), 0);
835 /* Extract CODE_LABEL. */
836 orig = XEXP (orig, 0);
837 add_reg_note (insn, REG_LABEL_OPERAND, orig);
838 /* Make sure we have label and not a note. */
839 if (LABEL_P (orig))
840 LABEL_NUSES (orig)++;
842 crtl->uses_pic_offset_table = 1;
843 return reg;
845 if (GET_CODE (orig) == SYMBOL_REF)
847 rtx_insn *insn;
848 rtx tmp_reg;
850 gcc_assert (reg);
852 /* Before reload, allocate a temporary register for the intermediate
853 result. This allows the sequence to be deleted when the final
854 result is unused and the insns are trivially dead. */
855 tmp_reg = ((reload_in_progress || reload_completed)
856 ? reg : gen_reg_rtx (Pmode));
858 if (function_label_operand (orig, VOIDmode))
860 /* Force function label into memory in word mode. */
861 orig = XEXP (force_const_mem (word_mode, orig), 0);
862 /* Load plabel address from DLT. */
863 emit_move_insn (tmp_reg,
864 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
865 gen_rtx_HIGH (word_mode, orig)));
866 pic_ref
867 = gen_const_mem (Pmode,
868 gen_rtx_LO_SUM (Pmode, tmp_reg,
869 gen_rtx_UNSPEC (Pmode,
870 gen_rtvec (1, orig),
871 UNSPEC_DLTIND14R)));
872 emit_move_insn (reg, pic_ref);
873 /* Now load address of function descriptor. */
874 pic_ref = gen_rtx_MEM (Pmode, reg);
876 else
878 /* Load symbol reference from DLT. */
879 emit_move_insn (tmp_reg,
880 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
881 gen_rtx_HIGH (word_mode, orig)));
882 pic_ref
883 = gen_const_mem (Pmode,
884 gen_rtx_LO_SUM (Pmode, tmp_reg,
885 gen_rtx_UNSPEC (Pmode,
886 gen_rtvec (1, orig),
887 UNSPEC_DLTIND14R)));
890 crtl->uses_pic_offset_table = 1;
891 mark_reg_pointer (reg, BITS_PER_UNIT);
892 insn = emit_move_insn (reg, pic_ref);
894 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
895 set_unique_reg_note (insn, REG_EQUAL, orig);
897 return reg;
899 else if (GET_CODE (orig) == CONST)
901 rtx base;
903 if (GET_CODE (XEXP (orig, 0)) == PLUS
904 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
905 return orig;
907 gcc_assert (reg);
908 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
910 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
911 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
912 base == reg ? 0 : reg);
914 if (GET_CODE (orig) == CONST_INT)
916 if (INT_14_BITS (orig))
917 return plus_constant (Pmode, base, INTVAL (orig));
918 orig = force_reg (Pmode, orig);
920 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
921 /* Likewise, should we set special REG_NOTEs here? */
924 return pic_ref;
927 static GTY(()) rtx gen_tls_tga;
929 static rtx
930 gen_tls_get_addr (void)
932 if (!gen_tls_tga)
933 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
934 return gen_tls_tga;
937 static rtx
938 hppa_tls_call (rtx arg)
940 rtx ret;
942 ret = gen_reg_rtx (Pmode);
943 emit_library_call_value (gen_tls_get_addr (), ret,
944 LCT_CONST, Pmode, arg, Pmode);
946 return ret;
949 static rtx
950 legitimize_tls_address (rtx addr)
952 rtx ret, tmp, t1, t2, tp;
953 rtx_insn *insn;
955 /* Currently, we can't handle anything but a SYMBOL_REF. */
956 if (GET_CODE (addr) != SYMBOL_REF)
957 return addr;
959 switch (SYMBOL_REF_TLS_MODEL (addr))
961 case TLS_MODEL_GLOBAL_DYNAMIC:
962 tmp = gen_reg_rtx (Pmode);
963 if (flag_pic)
964 emit_insn (gen_tgd_load_pic (tmp, addr));
965 else
966 emit_insn (gen_tgd_load (tmp, addr));
967 ret = hppa_tls_call (tmp);
968 break;
970 case TLS_MODEL_LOCAL_DYNAMIC:
971 ret = gen_reg_rtx (Pmode);
972 tmp = gen_reg_rtx (Pmode);
973 start_sequence ();
974 if (flag_pic)
975 emit_insn (gen_tld_load_pic (tmp, addr));
976 else
977 emit_insn (gen_tld_load (tmp, addr));
978 t1 = hppa_tls_call (tmp);
979 insn = get_insns ();
980 end_sequence ();
981 t2 = gen_reg_rtx (Pmode);
982 emit_libcall_block (insn, t2, t1,
983 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
984 UNSPEC_TLSLDBASE));
985 emit_insn (gen_tld_offset_load (ret, addr, t2));
986 break;
988 case TLS_MODEL_INITIAL_EXEC:
989 tp = gen_reg_rtx (Pmode);
990 tmp = gen_reg_rtx (Pmode);
991 ret = gen_reg_rtx (Pmode);
992 emit_insn (gen_tp_load (tp));
993 if (flag_pic)
994 emit_insn (gen_tie_load_pic (tmp, addr));
995 else
996 emit_insn (gen_tie_load (tmp, addr));
997 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
998 break;
1000 case TLS_MODEL_LOCAL_EXEC:
1001 tp = gen_reg_rtx (Pmode);
1002 ret = gen_reg_rtx (Pmode);
1003 emit_insn (gen_tp_load (tp));
1004 emit_insn (gen_tle_load (ret, addr, tp));
1005 break;
1007 default:
1008 gcc_unreachable ();
1011 return ret;
1014 /* Helper for hppa_legitimize_address. Given X, return true if it
1015 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1017 This respectively represent canonical shift-add rtxs or scaled
1018 memory addresses. */
1019 static bool
1020 mem_shadd_or_shadd_rtx_p (rtx x)
1022 return ((GET_CODE (x) == ASHIFT
1023 || GET_CODE (x) == MULT)
1024 && GET_CODE (XEXP (x, 1)) == CONST_INT
1025 && ((GET_CODE (x) == ASHIFT
1026 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1027 || (GET_CODE (x) == MULT
1028 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1031 /* Try machine-dependent ways of modifying an illegitimate address
1032 to be legitimate. If we find one, return the new, valid address.
1033 This macro is used in only one place: `memory_address' in explow.cc.
1035 OLDX is the address as it was before break_out_memory_refs was called.
1036 In some cases it is useful to look at this to decide what needs to be done.
1038 It is always safe for this macro to do nothing. It exists to recognize
1039 opportunities to optimize the output.
1041 For the PA, transform:
1043 memory(X + <large int>)
1045 into:
1047 if (<large int> & mask) >= 16
1048 Y = (<large int> & ~mask) + mask + 1 Round up.
1049 else
1050 Y = (<large int> & ~mask) Round down.
1051 Z = X + Y
1052 memory (Z + (<large int> - Y));
1054 This is for CSE to find several similar references, and only use one Z.
1056 X can either be a SYMBOL_REF or REG, but because combine cannot
1057 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1058 D will not fit in 14 bits.
1060 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1061 0x1f as the mask.
1063 MODE_INT references allow displacements which fit in 14 bits, so use
1064 0x3fff as the mask.
1066 This relies on the fact that most mode MODE_FLOAT references will use FP
1067 registers and most mode MODE_INT references will use integer registers.
1068 (In the rare case of an FP register used in an integer MODE, we depend
1069 on secondary reloads to clean things up.)
1072 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1073 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1074 addressing modes to be used).
1076 Note that the addresses passed into hppa_legitimize_address always
1077 come from a MEM, so we only have to match the MULT form on incoming
1078 addresses. But to be future proof we also match the ASHIFT form.
1080 However, this routine always places those shift-add sequences into
1081 registers, so we have to generate the ASHIFT form as our output.
1083 Put X and Z into registers. Then put the entire expression into
1084 a register. */
1087 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1088 machine_mode mode)
1090 rtx orig = x;
1092 /* We need to canonicalize the order of operands in unscaled indexed
1093 addresses since the code that checks if an address is valid doesn't
1094 always try both orders. */
1095 if (!TARGET_NO_SPACE_REGS
1096 && GET_CODE (x) == PLUS
1097 && GET_MODE (x) == Pmode
1098 && REG_P (XEXP (x, 0))
1099 && REG_P (XEXP (x, 1))
1100 && REG_POINTER (XEXP (x, 0))
1101 && !REG_POINTER (XEXP (x, 1)))
1102 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1104 if (tls_referenced_p (x))
1105 return legitimize_tls_address (x);
1106 else if (flag_pic)
1107 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1109 /* Strip off CONST. */
1110 if (GET_CODE (x) == CONST)
1111 x = XEXP (x, 0);
1113 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1114 That should always be safe. */
1115 if (GET_CODE (x) == PLUS
1116 && GET_CODE (XEXP (x, 0)) == REG
1117 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1119 rtx reg = force_reg (Pmode, XEXP (x, 1));
1120 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1123 /* Note we must reject symbols which represent function addresses
1124 since the assembler/linker can't handle arithmetic on plabels. */
1125 if (GET_CODE (x) == PLUS
1126 && GET_CODE (XEXP (x, 1)) == CONST_INT
1127 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1128 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1129 || GET_CODE (XEXP (x, 0)) == REG))
1131 rtx int_part, ptr_reg;
1132 int newoffset;
1133 int offset = INTVAL (XEXP (x, 1));
1134 int mask;
1136 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1137 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1139 /* Choose which way to round the offset. Round up if we
1140 are >= halfway to the next boundary. */
1141 if ((offset & mask) >= ((mask + 1) / 2))
1142 newoffset = (offset & ~ mask) + mask + 1;
1143 else
1144 newoffset = (offset & ~ mask);
1146 /* If the newoffset will not fit in 14 bits (ldo), then
1147 handling this would take 4 or 5 instructions (2 to load
1148 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1149 add the new offset and the SYMBOL_REF.) Combine cannot
1150 handle 4->2 or 5->2 combinations, so do not create
1151 them. */
1152 if (! VAL_14_BITS_P (newoffset)
1153 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1155 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1156 rtx tmp_reg
1157 = force_reg (Pmode,
1158 gen_rtx_HIGH (Pmode, const_part));
1159 ptr_reg
1160 = force_reg (Pmode,
1161 gen_rtx_LO_SUM (Pmode,
1162 tmp_reg, const_part));
1164 else
1166 if (! VAL_14_BITS_P (newoffset))
1167 int_part = force_reg (Pmode, GEN_INT (newoffset));
1168 else
1169 int_part = GEN_INT (newoffset);
1171 ptr_reg = force_reg (Pmode,
1172 gen_rtx_PLUS (Pmode,
1173 force_reg (Pmode, XEXP (x, 0)),
1174 int_part));
1176 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1179 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1181 if (GET_CODE (x) == PLUS
1182 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1183 && (OBJECT_P (XEXP (x, 1))
1184 || GET_CODE (XEXP (x, 1)) == SUBREG)
1185 && GET_CODE (XEXP (x, 1)) != CONST)
1187 /* If we were given a MULT, we must fix the constant
1188 as we're going to create the ASHIFT form. */
1189 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1190 if (GET_CODE (XEXP (x, 0)) == MULT)
1191 shift_val = exact_log2 (shift_val);
1193 rtx reg1, reg2;
1194 reg1 = XEXP (x, 1);
1195 if (GET_CODE (reg1) != REG)
1196 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1198 reg2 = XEXP (XEXP (x, 0), 0);
1199 if (GET_CODE (reg2) != REG)
1200 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1202 return force_reg (Pmode,
1203 gen_rtx_PLUS (Pmode,
1204 gen_rtx_ASHIFT (Pmode, reg2,
1205 GEN_INT (shift_val)),
1206 reg1));
1209 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1211 Only do so for floating point modes since this is more speculative
1212 and we lose if it's an integer store. */
1213 if (GET_CODE (x) == PLUS
1214 && GET_CODE (XEXP (x, 0)) == PLUS
1215 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1216 && (mode == SFmode || mode == DFmode))
1218 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1220 /* If we were given a MULT, we must fix the constant
1221 as we're going to create the ASHIFT form. */
1222 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1223 shift_val = exact_log2 (shift_val);
1225 /* Try and figure out what to use as a base register. */
1226 rtx reg1, reg2, base, idx;
1228 reg1 = XEXP (XEXP (x, 0), 1);
1229 reg2 = XEXP (x, 1);
1230 base = NULL_RTX;
1231 idx = NULL_RTX;
1233 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1234 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1235 it's a base register below. */
1236 if (GET_CODE (reg1) != REG)
1237 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1239 if (GET_CODE (reg2) != REG)
1240 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1242 /* Figure out what the base and index are. */
1244 if (GET_CODE (reg1) == REG
1245 && REG_POINTER (reg1))
1247 base = reg1;
1248 idx = gen_rtx_PLUS (Pmode,
1249 gen_rtx_ASHIFT (Pmode,
1250 XEXP (XEXP (XEXP (x, 0), 0), 0),
1251 GEN_INT (shift_val)),
1252 XEXP (x, 1));
1254 else if (GET_CODE (reg2) == REG
1255 && REG_POINTER (reg2))
1257 base = reg2;
1258 idx = XEXP (x, 0);
1261 if (base == 0)
1262 return orig;
1264 /* If the index adds a large constant, try to scale the
1265 constant so that it can be loaded with only one insn. */
1266 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1267 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1268 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1269 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1271 /* Divide the CONST_INT by the scale factor, then add it to A. */
1272 int val = INTVAL (XEXP (idx, 1));
1273 val /= (1 << shift_val);
1275 reg1 = XEXP (XEXP (idx, 0), 0);
1276 if (GET_CODE (reg1) != REG)
1277 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1279 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1281 /* We can now generate a simple scaled indexed address. */
1282 return
1283 force_reg
1284 (Pmode, gen_rtx_PLUS (Pmode,
1285 gen_rtx_ASHIFT (Pmode, reg1,
1286 GEN_INT (shift_val)),
1287 base));
1290 /* If B + C is still a valid base register, then add them. */
1291 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1292 && INTVAL (XEXP (idx, 1)) <= 4096
1293 && INTVAL (XEXP (idx, 1)) >= -4096)
1295 rtx reg1, reg2;
1297 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1299 reg2 = XEXP (XEXP (idx, 0), 0);
1300 if (GET_CODE (reg2) != CONST_INT)
1301 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1303 return force_reg (Pmode,
1304 gen_rtx_PLUS (Pmode,
1305 gen_rtx_ASHIFT (Pmode, reg2,
1306 GEN_INT (shift_val)),
1307 reg1));
1310 /* Get the index into a register, then add the base + index and
1311 return a register holding the result. */
1313 /* First get A into a register. */
1314 reg1 = XEXP (XEXP (idx, 0), 0);
1315 if (GET_CODE (reg1) != REG)
1316 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1318 /* And get B into a register. */
1319 reg2 = XEXP (idx, 1);
1320 if (GET_CODE (reg2) != REG)
1321 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1323 reg1 = force_reg (Pmode,
1324 gen_rtx_PLUS (Pmode,
1325 gen_rtx_ASHIFT (Pmode, reg1,
1326 GEN_INT (shift_val)),
1327 reg2));
1329 /* Add the result to our base register and return. */
1330 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1334 /* Uh-oh. We might have an address for x[n-100000]. This needs
1335 special handling to avoid creating an indexed memory address
1336 with x-100000 as the base.
1338 If the constant part is small enough, then it's still safe because
1339 there is a guard page at the beginning and end of the data segment.
1341 Scaled references are common enough that we want to try and rearrange the
1342 terms so that we can use indexing for these addresses too. Only
1343 do the optimization for floatint point modes. */
1345 if (GET_CODE (x) == PLUS
1346 && pa_symbolic_expression_p (XEXP (x, 1)))
1348 /* Ugly. We modify things here so that the address offset specified
1349 by the index expression is computed first, then added to x to form
1350 the entire address. */
1352 rtx regx1, regx2, regy1, regy2, y;
1354 /* Strip off any CONST. */
1355 y = XEXP (x, 1);
1356 if (GET_CODE (y) == CONST)
1357 y = XEXP (y, 0);
1359 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1361 /* See if this looks like
1362 (plus (mult (reg) (mem_shadd_const))
1363 (const (plus (symbol_ref) (const_int))))
1365 Where const_int is small. In that case the const
1366 expression is a valid pointer for indexing.
1368 If const_int is big, but can be divided evenly by shadd_const
1369 and added to (reg). This allows more scaled indexed addresses. */
1370 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1371 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1372 && GET_CODE (XEXP (y, 1)) == CONST_INT
1373 && INTVAL (XEXP (y, 1)) >= -4096
1374 && INTVAL (XEXP (y, 1)) <= 4095)
1376 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1378 /* If we were given a MULT, we must fix the constant
1379 as we're going to create the ASHIFT form. */
1380 if (GET_CODE (XEXP (x, 0)) == MULT)
1381 shift_val = exact_log2 (shift_val);
1383 rtx reg1, reg2;
1385 reg1 = XEXP (x, 1);
1386 if (GET_CODE (reg1) != REG)
1387 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1389 reg2 = XEXP (XEXP (x, 0), 0);
1390 if (GET_CODE (reg2) != REG)
1391 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1393 return
1394 force_reg (Pmode,
1395 gen_rtx_PLUS (Pmode,
1396 gen_rtx_ASHIFT (Pmode,
1397 reg2,
1398 GEN_INT (shift_val)),
1399 reg1));
1401 else if ((mode == DFmode || mode == SFmode)
1402 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1403 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1404 && GET_CODE (XEXP (y, 1)) == CONST_INT
1405 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1407 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1409 /* If we were given a MULT, we must fix the constant
1410 as we're going to create the ASHIFT form. */
1411 if (GET_CODE (XEXP (x, 0)) == MULT)
1412 shift_val = exact_log2 (shift_val);
1414 regx1
1415 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1416 / INTVAL (XEXP (XEXP (x, 0), 1))));
1417 regx2 = XEXP (XEXP (x, 0), 0);
1418 if (GET_CODE (regx2) != REG)
1419 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1420 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1421 regx2, regx1));
1422 return
1423 force_reg (Pmode,
1424 gen_rtx_PLUS (Pmode,
1425 gen_rtx_ASHIFT (Pmode, regx2,
1426 GEN_INT (shift_val)),
1427 force_reg (Pmode, XEXP (y, 0))));
1429 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1430 && INTVAL (XEXP (y, 1)) >= -4096
1431 && INTVAL (XEXP (y, 1)) <= 4095)
1433 /* This is safe because of the guard page at the
1434 beginning and end of the data space. Just
1435 return the original address. */
1436 return orig;
1438 else
1440 /* Doesn't look like one we can optimize. */
1441 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1442 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1443 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1444 regx1 = force_reg (Pmode,
1445 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1446 regx1, regy2));
1447 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1452 return orig;
1455 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1457 Compute extra cost of moving data between one register class
1458 and another.
1460 Make moves from SAR so expensive they should never happen. We used to
1461 have 0xffff here, but that generates overflow in rare cases.
1463 Copies involving a FP register and a non-FP register are relatively
1464 expensive because they must go through memory.
1466 Other copies are reasonably cheap. */
1468 static int
1469 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1470 reg_class_t from, reg_class_t to)
1472 if (from == SHIFT_REGS)
1473 return 0x100;
1474 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1475 return 18;
1476 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1477 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1478 return 16;
1479 else
1480 return 2;
1483 /* For the HPPA, REG and REG+CONST is cost 0
1484 and addresses involving symbolic constants are cost 2.
1486 PIC addresses are very expensive.
1488 It is no coincidence that this has the same structure
1489 as pa_legitimate_address_p. */
1491 static int
1492 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1493 addr_space_t as ATTRIBUTE_UNUSED,
1494 bool speed ATTRIBUTE_UNUSED)
1496 switch (GET_CODE (X))
1498 case REG:
1499 case PLUS:
1500 case LO_SUM:
1501 return 1;
1502 case HIGH:
1503 return 2;
1504 default:
1505 return 4;
1509 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1510 The machine mode of X is known to be SImode or DImode. */
1512 static bool
1513 hppa_rtx_costs_shadd_p (rtx x)
1515 if (GET_CODE (x) != PLUS
1516 || !REG_P (XEXP (x, 1)))
1517 return false;
1518 rtx op0 = XEXP (x, 0);
1519 if (GET_CODE (op0) == ASHIFT
1520 && CONST_INT_P (XEXP (op0, 1))
1521 && REG_P (XEXP (op0, 0)))
1523 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1524 return x == 1 || x == 2 || x == 3;
1526 if (GET_CODE (op0) == MULT
1527 && CONST_INT_P (XEXP (op0, 1))
1528 && REG_P (XEXP (op0, 0)))
1530 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1531 return x == 2 || x == 4 || x == 8;
1533 return false;
1536 /* Compute a (partial) cost for rtx X. Return true if the complete
1537 cost has been computed, and false if subexpressions should be
1538 scanned. In either case, *TOTAL contains the cost result. */
1540 static bool
1541 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1542 int opno ATTRIBUTE_UNUSED,
1543 int *total, bool speed)
1545 int code = GET_CODE (x);
1547 switch (code)
1549 case CONST_INT:
1550 if (outer_code == SET)
1551 *total = COSTS_N_INSNS (1);
1552 else if (INTVAL (x) == 0)
1553 *total = 0;
1554 else if (INT_14_BITS (x))
1555 *total = 1;
1556 else
1557 *total = 2;
1558 return true;
1560 case HIGH:
1561 *total = 2;
1562 return true;
1564 case CONST:
1565 case LABEL_REF:
1566 case SYMBOL_REF:
1567 *total = 4;
1568 return true;
1570 case CONST_DOUBLE:
1571 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1572 && outer_code != SET)
1573 *total = 0;
1574 else
1575 *total = 8;
1576 return true;
1578 case MULT:
1579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1581 *total = COSTS_N_INSNS (3);
1583 else if (mode == DImode)
1585 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1586 *total = COSTS_N_INSNS (25);
1587 else
1588 *total = COSTS_N_INSNS (80);
1590 else
1592 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1593 *total = COSTS_N_INSNS (8);
1594 else
1595 *total = COSTS_N_INSNS (20);
1597 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1599 case DIV:
1600 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1602 *total = COSTS_N_INSNS (14);
1603 return false;
1605 /* FALLTHRU */
1607 case UDIV:
1608 case MOD:
1609 case UMOD:
1610 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1611 if (mode == DImode)
1612 *total = COSTS_N_INSNS (240);
1613 else
1614 *total = COSTS_N_INSNS (60);
1615 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1617 case PLUS: /* this includes shNadd insns */
1618 case MINUS:
1619 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1620 *total = COSTS_N_INSNS (3);
1621 else if (mode == DImode)
1623 if (TARGET_64BIT)
1625 *total = COSTS_N_INSNS (1);
1626 /* Handle shladd,l instructions. */
1627 if (hppa_rtx_costs_shadd_p (x))
1628 return true;
1630 else
1631 *total = COSTS_N_INSNS (2);
1633 else
1635 *total = COSTS_N_INSNS (1);
1636 /* Handle shNadd instructions. */
1637 if (hppa_rtx_costs_shadd_p (x))
1638 return true;
1640 return REG_P (XEXP (x, 0))
1641 && (REG_P (XEXP (x, 1))
1642 || CONST_INT_P (XEXP (x, 1)));
1644 case ASHIFT:
1645 if (mode == DImode)
1647 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1649 if (TARGET_64BIT)
1650 *total = COSTS_N_INSNS (1);
1651 else
1652 *total = COSTS_N_INSNS (2);
1653 return true;
1655 else if (TARGET_64BIT)
1656 *total = COSTS_N_INSNS (3);
1657 else if (speed)
1658 *total = COSTS_N_INSNS (13);
1659 else
1660 *total = COSTS_N_INSNS (18);
1662 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1664 if (TARGET_64BIT)
1665 *total = COSTS_N_INSNS (2);
1666 else
1667 *total = COSTS_N_INSNS (1);
1668 return true;
1670 else if (TARGET_64BIT)
1671 *total = COSTS_N_INSNS (4);
1672 else
1673 *total = COSTS_N_INSNS (2);
1674 return REG_P (XEXP (x, 0))
1675 && (REG_P (XEXP (x, 1))
1676 || CONST_INT_P (XEXP (x, 1)));
1678 case ASHIFTRT:
1679 if (mode == DImode)
1681 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1683 if (TARGET_64BIT)
1684 *total = COSTS_N_INSNS (1);
1685 else
1686 *total = COSTS_N_INSNS (2);
1687 return true;
1689 else if (TARGET_64BIT)
1690 *total = COSTS_N_INSNS (3);
1691 else if (speed)
1692 *total = COSTS_N_INSNS (14);
1693 else
1694 *total = COSTS_N_INSNS (19);
1696 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1698 if (TARGET_64BIT)
1699 *total = COSTS_N_INSNS (2);
1700 else
1701 *total = COSTS_N_INSNS (1);
1702 return true;
1704 else if (TARGET_64BIT)
1705 *total = COSTS_N_INSNS (4);
1706 else
1707 *total = COSTS_N_INSNS (2);
1708 return REG_P (XEXP (x, 0))
1709 && (REG_P (XEXP (x, 1))
1710 || CONST_INT_P (XEXP (x, 1)));
1712 case LSHIFTRT:
1713 if (mode == DImode)
1715 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1717 if (TARGET_64BIT)
1718 *total = COSTS_N_INSNS (1);
1719 else
1720 *total = COSTS_N_INSNS (2);
1721 return true;
1723 else if (TARGET_64BIT)
1724 *total = COSTS_N_INSNS (2);
1725 else if (speed)
1726 *total = COSTS_N_INSNS (12);
1727 else
1728 *total = COSTS_N_INSNS (15);
1730 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1732 *total = COSTS_N_INSNS (1);
1733 return true;
1735 else if (TARGET_64BIT)
1736 *total = COSTS_N_INSNS (3);
1737 else
1738 *total = COSTS_N_INSNS (2);
1739 return REG_P (XEXP (x, 0))
1740 && (REG_P (XEXP (x, 1))
1741 || CONST_INT_P (XEXP (x, 1)));
1743 default:
1744 return false;
1748 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1749 new rtx with the correct mode. */
1750 static inline rtx
1751 force_mode (machine_mode mode, rtx orig)
1753 if (mode == GET_MODE (orig))
1754 return orig;
1756 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1758 return gen_rtx_REG (mode, REGNO (orig));
1761 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1763 static bool
1764 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1766 return tls_referenced_p (x);
1769 /* Emit insns to move operands[1] into operands[0].
1771 Return 1 if we have written out everything that needs to be done to
1772 do the move. Otherwise, return 0 and the caller will emit the move
1773 normally.
1775 Note SCRATCH_REG may not be in the proper mode depending on how it
1776 will be used. This routine is responsible for creating a new copy
1777 of SCRATCH_REG in the proper mode. */
1780 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1782 rtx operand0 = operands[0];
1783 rtx operand1 = operands[1];
1784 rtx tem;
1786 /* We can only handle indexed addresses in the destination operand
1787 of floating point stores. Thus, we need to break out indexed
1788 addresses from the destination operand. */
1789 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1791 gcc_assert (can_create_pseudo_p ());
1793 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1794 operand0 = replace_equiv_address (operand0, tem);
1797 /* On targets with non-equivalent space registers, break out unscaled
1798 indexed addresses from the source operand before the final CSE.
1799 We have to do this because the REG_POINTER flag is not correctly
1800 carried through various optimization passes and CSE may substitute
1801 a pseudo without the pointer set for one with the pointer set. As
1802 a result, we loose various opportunities to create insns with
1803 unscaled indexed addresses. */
1804 if (!TARGET_NO_SPACE_REGS
1805 && !cse_not_expected
1806 && GET_CODE (operand1) == MEM
1807 && GET_CODE (XEXP (operand1, 0)) == PLUS
1808 && REG_P (XEXP (XEXP (operand1, 0), 0))
1809 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1810 operand1
1811 = replace_equiv_address (operand1,
1812 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1814 if (scratch_reg
1815 && reload_in_progress && GET_CODE (operand0) == REG
1816 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1817 operand0 = reg_equiv_mem (REGNO (operand0));
1818 else if (scratch_reg
1819 && reload_in_progress && GET_CODE (operand0) == SUBREG
1820 && GET_CODE (SUBREG_REG (operand0)) == REG
1821 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1823 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1824 the code which tracks sets/uses for delete_output_reload. */
1825 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1826 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1827 SUBREG_BYTE (operand0));
1828 operand0 = alter_subreg (&temp, true);
1831 if (scratch_reg
1832 && reload_in_progress && GET_CODE (operand1) == REG
1833 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1834 operand1 = reg_equiv_mem (REGNO (operand1));
1835 else if (scratch_reg
1836 && reload_in_progress && GET_CODE (operand1) == SUBREG
1837 && GET_CODE (SUBREG_REG (operand1)) == REG
1838 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1840 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1841 the code which tracks sets/uses for delete_output_reload. */
1842 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1843 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1844 SUBREG_BYTE (operand1));
1845 operand1 = alter_subreg (&temp, true);
1848 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1849 && ((tem = find_replacement (&XEXP (operand0, 0)))
1850 != XEXP (operand0, 0)))
1851 operand0 = replace_equiv_address (operand0, tem);
1853 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1854 && ((tem = find_replacement (&XEXP (operand1, 0)))
1855 != XEXP (operand1, 0)))
1856 operand1 = replace_equiv_address (operand1, tem);
1858 /* Handle secondary reloads for loads/stores of FP registers from
1859 REG+D addresses where D does not fit in 5 or 14 bits, including
1860 (subreg (mem (addr))) cases, and reloads for other unsupported
1861 memory operands. */
1862 if (scratch_reg
1863 && FP_REG_P (operand0)
1864 && (MEM_P (operand1)
1865 || (GET_CODE (operand1) == SUBREG
1866 && MEM_P (XEXP (operand1, 0)))))
1868 rtx op1 = operand1;
1870 if (GET_CODE (op1) == SUBREG)
1871 op1 = XEXP (op1, 0);
1873 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1875 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1876 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1878 /* SCRATCH_REG will hold an address and maybe the actual data.
1879 We want it in WORD_MODE regardless of what mode it was
1880 originally given to us. */
1881 scratch_reg = force_mode (word_mode, scratch_reg);
1883 /* D might not fit in 14 bits either; for such cases load D
1884 into scratch reg. */
1885 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1887 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1888 emit_move_insn (scratch_reg,
1889 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1890 Pmode,
1891 XEXP (XEXP (op1, 0), 0),
1892 scratch_reg));
1894 else
1895 emit_move_insn (scratch_reg, XEXP (op1, 0));
1896 op1 = replace_equiv_address (op1, scratch_reg);
1899 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1900 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1901 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1903 /* Load memory address into SCRATCH_REG. */
1904 scratch_reg = force_mode (word_mode, scratch_reg);
1905 emit_move_insn (scratch_reg, XEXP (op1, 0));
1906 op1 = replace_equiv_address (op1, scratch_reg);
1908 emit_insn (gen_rtx_SET (operand0, op1));
1909 return 1;
1911 else if (scratch_reg
1912 && FP_REG_P (operand1)
1913 && (MEM_P (operand0)
1914 || (GET_CODE (operand0) == SUBREG
1915 && MEM_P (XEXP (operand0, 0)))))
1917 rtx op0 = operand0;
1919 if (GET_CODE (op0) == SUBREG)
1920 op0 = XEXP (op0, 0);
1922 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1924 if (!(INT14_OK_STRICT && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1925 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1927 /* SCRATCH_REG will hold an address and maybe the actual data.
1928 We want it in WORD_MODE regardless of what mode it was
1929 originally given to us. */
1930 scratch_reg = force_mode (word_mode, scratch_reg);
1932 /* D might not fit in 14 bits either; for such cases load D
1933 into scratch reg. */
1934 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1936 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1937 emit_move_insn (scratch_reg,
1938 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1939 Pmode,
1940 XEXP (XEXP (op0, 0), 0),
1941 scratch_reg));
1943 else
1944 emit_move_insn (scratch_reg, XEXP (op0, 0));
1945 op0 = replace_equiv_address (op0, scratch_reg);
1948 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1949 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1950 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1952 /* Load memory address into SCRATCH_REG. */
1953 scratch_reg = force_mode (word_mode, scratch_reg);
1954 emit_move_insn (scratch_reg, XEXP (op0, 0));
1955 op0 = replace_equiv_address (op0, scratch_reg);
1957 emit_insn (gen_rtx_SET (op0, operand1));
1958 return 1;
1960 /* Handle secondary reloads for loads of FP registers from constant
1961 expressions by forcing the constant into memory. For the most part,
1962 this is only necessary for SImode and DImode.
1964 Use scratch_reg to hold the address of the memory location. */
1965 else if (scratch_reg
1966 && CONSTANT_P (operand1)
1967 && FP_REG_P (operand0))
1969 rtx const_mem, xoperands[2];
1971 if (operand1 == CONST0_RTX (mode))
1973 emit_insn (gen_rtx_SET (operand0, operand1));
1974 return 1;
1977 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1978 it in WORD_MODE regardless of what mode it was originally given
1979 to us. */
1980 scratch_reg = force_mode (word_mode, scratch_reg);
1982 /* Force the constant into memory and put the address of the
1983 memory location into scratch_reg. */
1984 const_mem = force_const_mem (mode, operand1);
1985 xoperands[0] = scratch_reg;
1986 xoperands[1] = XEXP (const_mem, 0);
1987 pa_emit_move_sequence (xoperands, Pmode, 0);
1989 /* Now load the destination register. */
1990 emit_insn (gen_rtx_SET (operand0,
1991 replace_equiv_address (const_mem, scratch_reg)));
1992 return 1;
1994 /* Handle secondary reloads for SAR. These occur when trying to load
1995 the SAR from memory or a constant. */
1996 else if (scratch_reg
1997 && GET_CODE (operand0) == REG
1998 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1999 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2000 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2002 /* D might not fit in 14 bits either; for such cases load D into
2003 scratch reg. */
2004 if (GET_CODE (operand1) == MEM
2005 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2007 /* We are reloading the address into the scratch register, so we
2008 want to make sure the scratch register is a full register. */
2009 scratch_reg = force_mode (word_mode, scratch_reg);
2011 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2012 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2013 0)),
2014 Pmode,
2015 XEXP (XEXP (operand1, 0),
2017 scratch_reg));
2019 /* Now we are going to load the scratch register from memory,
2020 we want to load it in the same width as the original MEM,
2021 which must be the same as the width of the ultimate destination,
2022 OPERAND0. */
2023 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2025 emit_move_insn (scratch_reg,
2026 replace_equiv_address (operand1, scratch_reg));
2028 else
2030 /* We want to load the scratch register using the same mode as
2031 the ultimate destination. */
2032 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2034 emit_move_insn (scratch_reg, operand1);
2037 /* And emit the insn to set the ultimate destination. We know that
2038 the scratch register has the same mode as the destination at this
2039 point. */
2040 emit_move_insn (operand0, scratch_reg);
2041 return 1;
2044 /* Handle the most common case: storing into a register. */
2045 if (register_operand (operand0, mode))
2047 /* Legitimize TLS symbol references. This happens for references
2048 that aren't a legitimate constant. */
2049 if (PA_SYMBOL_REF_TLS_P (operand1))
2050 operand1 = legitimize_tls_address (operand1);
2052 if (register_operand (operand1, mode)
2053 || (GET_CODE (operand1) == CONST_INT
2054 && pa_cint_ok_for_move (UINTVAL (operand1)))
2055 || (operand1 == CONST0_RTX (mode))
2056 || (GET_CODE (operand1) == HIGH
2057 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2058 /* Only `general_operands' can come here, so MEM is ok. */
2059 || GET_CODE (operand1) == MEM)
2061 /* Various sets are created during RTL generation which don't
2062 have the REG_POINTER flag correctly set. After the CSE pass,
2063 instruction recognition can fail if we don't consistently
2064 set this flag when performing register copies. This should
2065 also improve the opportunities for creating insns that use
2066 unscaled indexing. */
2067 if (REG_P (operand0) && REG_P (operand1))
2069 if (REG_POINTER (operand1)
2070 && !REG_POINTER (operand0)
2071 && !HARD_REGISTER_P (operand0))
2072 copy_reg_pointer (operand0, operand1);
2075 /* When MEMs are broken out, the REG_POINTER flag doesn't
2076 get set. In some cases, we can set the REG_POINTER flag
2077 from the declaration for the MEM. */
2078 if (REG_P (operand0)
2079 && GET_CODE (operand1) == MEM
2080 && !REG_POINTER (operand0))
2082 tree decl = MEM_EXPR (operand1);
2084 /* Set the register pointer flag and register alignment
2085 if the declaration for this memory reference is a
2086 pointer type. */
2087 if (decl)
2089 tree type;
2091 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2092 tree operand 1. */
2093 if (TREE_CODE (decl) == COMPONENT_REF)
2094 decl = TREE_OPERAND (decl, 1);
2096 type = TREE_TYPE (decl);
2097 type = strip_array_types (type);
2099 if (POINTER_TYPE_P (type))
2100 mark_reg_pointer (operand0, BITS_PER_UNIT);
2104 emit_insn (gen_rtx_SET (operand0, operand1));
2105 return 1;
2108 else if (GET_CODE (operand0) == MEM)
2110 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2111 && !(reload_in_progress || reload_completed))
2113 rtx temp = gen_reg_rtx (DFmode);
2115 emit_insn (gen_rtx_SET (temp, operand1));
2116 emit_insn (gen_rtx_SET (operand0, temp));
2117 return 1;
2119 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2121 /* Run this case quickly. */
2122 emit_insn (gen_rtx_SET (operand0, operand1));
2123 return 1;
2125 if (! (reload_in_progress || reload_completed))
2127 operands[0] = validize_mem (operand0);
2128 operands[1] = operand1 = force_reg (mode, operand1);
2132 /* Simplify the source if we need to.
2133 Note we do have to handle function labels here, even though we do
2134 not consider them legitimate constants. Loop optimizations can
2135 call the emit_move_xxx with one as a source. */
2136 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2137 || (GET_CODE (operand1) == HIGH
2138 && symbolic_operand (XEXP (operand1, 0), mode))
2139 || function_label_operand (operand1, VOIDmode)
2140 || tls_referenced_p (operand1))
2142 int ishighonly = 0;
2144 if (GET_CODE (operand1) == HIGH)
2146 ishighonly = 1;
2147 operand1 = XEXP (operand1, 0);
2149 if (symbolic_operand (operand1, mode))
2151 /* Argh. The assembler and linker can't handle arithmetic
2152 involving plabels.
2154 So we force the plabel into memory, load operand0 from
2155 the memory location, then add in the constant part. */
2156 if ((GET_CODE (operand1) == CONST
2157 && GET_CODE (XEXP (operand1, 0)) == PLUS
2158 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2159 VOIDmode))
2160 || function_label_operand (operand1, VOIDmode))
2162 rtx temp, const_part;
2164 /* Figure out what (if any) scratch register to use. */
2165 if (reload_in_progress || reload_completed)
2167 scratch_reg = scratch_reg ? scratch_reg : operand0;
2168 /* SCRATCH_REG will hold an address and maybe the actual
2169 data. We want it in WORD_MODE regardless of what mode it
2170 was originally given to us. */
2171 scratch_reg = force_mode (word_mode, scratch_reg);
2173 else if (flag_pic)
2174 scratch_reg = gen_reg_rtx (Pmode);
2176 if (GET_CODE (operand1) == CONST)
2178 /* Save away the constant part of the expression. */
2179 const_part = XEXP (XEXP (operand1, 0), 1);
2180 gcc_assert (GET_CODE (const_part) == CONST_INT);
2182 /* Force the function label into memory. */
2183 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2185 else
2187 /* No constant part. */
2188 const_part = NULL_RTX;
2190 /* Force the function label into memory. */
2191 temp = force_const_mem (mode, operand1);
2195 /* Get the address of the memory location. PIC-ify it if
2196 necessary. */
2197 temp = XEXP (temp, 0);
2198 if (flag_pic)
2199 temp = legitimize_pic_address (temp, mode, scratch_reg);
2201 /* Put the address of the memory location into our destination
2202 register. */
2203 operands[1] = temp;
2204 pa_emit_move_sequence (operands, mode, scratch_reg);
2206 /* Now load from the memory location into our destination
2207 register. */
2208 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2209 pa_emit_move_sequence (operands, mode, scratch_reg);
2211 /* And add back in the constant part. */
2212 if (const_part != NULL_RTX)
2213 expand_inc (operand0, const_part);
2215 return 1;
2218 if (flag_pic)
2220 rtx_insn *insn;
2221 rtx temp;
2223 if (reload_in_progress || reload_completed)
2225 temp = scratch_reg ? scratch_reg : operand0;
2226 /* TEMP will hold an address and maybe the actual
2227 data. We want it in WORD_MODE regardless of what mode it
2228 was originally given to us. */
2229 temp = force_mode (word_mode, temp);
2231 else
2232 temp = gen_reg_rtx (Pmode);
2234 /* Force (const (plus (symbol) (const_int))) to memory
2235 if the const_int will not fit in 14 bits. Although
2236 this requires a relocation, the instruction sequence
2237 needed to load the value is shorter. */
2238 if (GET_CODE (operand1) == CONST
2239 && GET_CODE (XEXP (operand1, 0)) == PLUS
2240 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2241 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2243 rtx x, m = force_const_mem (mode, operand1);
2245 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2246 x = replace_equiv_address (m, x);
2247 insn = emit_move_insn (operand0, x);
2249 else
2251 operands[1] = legitimize_pic_address (operand1, mode, temp);
2252 if (REG_P (operand0) && REG_P (operands[1]))
2253 copy_reg_pointer (operand0, operands[1]);
2254 insn = emit_move_insn (operand0, operands[1]);
2257 /* Put a REG_EQUAL note on this insn. */
2258 set_unique_reg_note (insn, REG_EQUAL, operand1);
2260 /* On the HPPA, references to data space are supposed to use dp,
2261 register 27, but showing it in the RTL inhibits various cse
2262 and loop optimizations. */
2263 else
2265 rtx temp, set;
2267 if (reload_in_progress || reload_completed)
2269 temp = scratch_reg ? scratch_reg : operand0;
2270 /* TEMP will hold an address and maybe the actual
2271 data. We want it in WORD_MODE regardless of what mode it
2272 was originally given to us. */
2273 temp = force_mode (word_mode, temp);
2275 else
2276 temp = gen_reg_rtx (mode);
2278 /* Loading a SYMBOL_REF into a register makes that register
2279 safe to be used as the base in an indexed address.
2281 Don't mark hard registers though. That loses. */
2282 if (GET_CODE (operand0) == REG
2283 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2284 mark_reg_pointer (operand0, BITS_PER_UNIT);
2285 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2286 mark_reg_pointer (temp, BITS_PER_UNIT);
2288 if (ishighonly)
2289 set = gen_rtx_SET (operand0, temp);
2290 else
2291 set = gen_rtx_SET (operand0,
2292 gen_rtx_LO_SUM (mode, temp, operand1));
2294 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2295 emit_insn (set);
2298 return 1;
2300 else if (tls_referenced_p (operand1))
2302 rtx tmp = operand1;
2303 rtx addend = NULL;
2305 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2307 addend = XEXP (XEXP (tmp, 0), 1);
2308 tmp = XEXP (XEXP (tmp, 0), 0);
2311 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2312 tmp = legitimize_tls_address (tmp);
2313 if (addend)
2315 tmp = gen_rtx_PLUS (mode, tmp, addend);
2316 tmp = force_operand (tmp, operands[0]);
2318 operands[1] = tmp;
2320 else if (GET_CODE (operand1) != CONST_INT
2321 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2323 rtx temp;
2324 rtx_insn *insn;
2325 rtx op1 = operand1;
2326 HOST_WIDE_INT value = 0;
2327 HOST_WIDE_INT insv = 0;
2328 int insert = 0;
2330 if (GET_CODE (operand1) == CONST_INT)
2331 value = INTVAL (operand1);
2333 if (TARGET_64BIT
2334 && GET_CODE (operand1) == CONST_INT
2335 && HOST_BITS_PER_WIDE_INT > 32
2336 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2338 HOST_WIDE_INT nval;
2340 /* Extract the low order 32 bits of the value and sign extend.
2341 If the new value is the same as the original value, we can
2342 can use the original value as-is. If the new value is
2343 different, we use it and insert the most-significant 32-bits
2344 of the original value into the final result. */
2345 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2346 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2347 if (value != nval)
2349 #if HOST_BITS_PER_WIDE_INT > 32
2350 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2351 #endif
2352 insert = 1;
2353 value = nval;
2354 operand1 = GEN_INT (nval);
2358 if (reload_in_progress || reload_completed)
2359 temp = scratch_reg ? scratch_reg : operand0;
2360 else
2361 temp = gen_reg_rtx (mode);
2363 /* We don't directly split DImode constants on 32-bit targets
2364 because PLUS uses an 11-bit immediate and the insn sequence
2365 generated is not as efficient as the one using HIGH/LO_SUM. */
2366 if (GET_CODE (operand1) == CONST_INT
2367 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2368 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2369 && !insert)
2371 /* Directly break constant into high and low parts. This
2372 provides better optimization opportunities because various
2373 passes recognize constants split with PLUS but not LO_SUM.
2374 We use a 14-bit signed low part except when the addition
2375 of 0x4000 to the high part might change the sign of the
2376 high part. */
2377 HOST_WIDE_INT low = value & 0x3fff;
2378 HOST_WIDE_INT high = value & ~ 0x3fff;
2380 if (low >= 0x2000)
2382 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2383 high += 0x2000;
2384 else
2385 high += 0x4000;
2388 low = value - high;
2390 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2391 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2393 else
2395 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2396 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2399 insn = emit_move_insn (operands[0], operands[1]);
2401 /* Now insert the most significant 32 bits of the value
2402 into the register. When we don't have a second register
2403 available, it could take up to nine instructions to load
2404 a 64-bit integer constant. Prior to reload, we force
2405 constants that would take more than three instructions
2406 to load to the constant pool. During and after reload,
2407 we have to handle all possible values. */
2408 if (insert)
2410 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2411 register and the value to be inserted is outside the
2412 range that can be loaded with three depdi instructions. */
2413 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2415 operand1 = GEN_INT (insv);
2417 emit_insn (gen_rtx_SET (temp,
2418 gen_rtx_HIGH (mode, operand1)));
2419 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2420 if (mode == DImode)
2421 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2422 const0_rtx, temp));
2423 else
2424 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2425 const0_rtx, temp));
2427 else
2429 int len = 5, pos = 27;
2431 /* Insert the bits using the depdi instruction. */
2432 while (pos >= 0)
2434 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2435 HOST_WIDE_INT sign = v5 < 0;
2437 /* Left extend the insertion. */
2438 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2439 while (pos > 0 && (insv & 1) == sign)
2441 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2442 len += 1;
2443 pos -= 1;
2446 if (mode == DImode)
2447 insn = emit_insn (gen_insvdi (operand0,
2448 GEN_INT (len),
2449 GEN_INT (pos),
2450 GEN_INT (v5)));
2451 else
2452 insn = emit_insn (gen_insvsi (operand0,
2453 GEN_INT (len),
2454 GEN_INT (pos),
2455 GEN_INT (v5)));
2457 len = pos > 0 && pos < 5 ? pos : 5;
2458 pos -= len;
2463 set_unique_reg_note (insn, REG_EQUAL, op1);
2465 return 1;
2468 /* Now have insn-emit do whatever it normally does. */
2469 return 0;
2472 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2473 it will need a link/runtime reloc). */
2476 pa_reloc_needed (tree exp)
2478 int reloc = 0;
2480 switch (TREE_CODE (exp))
2482 case ADDR_EXPR:
2483 return 1;
2485 case POINTER_PLUS_EXPR:
2486 case PLUS_EXPR:
2487 case MINUS_EXPR:
2488 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2489 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2490 break;
2492 CASE_CONVERT:
2493 case NON_LVALUE_EXPR:
2494 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2495 break;
2497 case CONSTRUCTOR:
2499 tree value;
2500 unsigned HOST_WIDE_INT ix;
2502 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2503 if (value)
2504 reloc |= pa_reloc_needed (value);
2506 break;
2508 case ERROR_MARK:
2509 break;
2511 default:
2512 break;
2514 return reloc;
2518 /* Return the best assembler insn template
2519 for moving operands[1] into operands[0] as a fullword. */
2520 const char *
2521 pa_singlemove_string (rtx *operands)
2523 HOST_WIDE_INT intval;
2525 if (GET_CODE (operands[0]) == MEM)
2526 return "stw %r1,%0";
2527 if (GET_CODE (operands[1]) == MEM)
2528 return "ldw %1,%0";
2529 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2531 long i;
2533 gcc_assert (GET_MODE (operands[1]) == SFmode);
2535 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2536 bit pattern. */
2537 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2539 operands[1] = GEN_INT (i);
2540 /* Fall through to CONST_INT case. */
2542 if (GET_CODE (operands[1]) == CONST_INT)
2544 intval = INTVAL (operands[1]);
2546 if (VAL_14_BITS_P (intval))
2547 return "ldi %1,%0";
2548 else if ((intval & 0x7ff) == 0)
2549 return "ldil L'%1,%0";
2550 else if (pa_zdepi_cint_p (intval))
2551 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2552 else
2553 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2555 return "copy %1,%0";
2559 /* Compute position (in OP[1]) and width (in OP[2])
2560 useful for copying IMM to a register using the zdepi
2561 instructions. Store the immediate value to insert in OP[0]. */
2562 static void
2563 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2565 int lsb, len;
2567 /* Find the least significant set bit in IMM. */
2568 for (lsb = 0; lsb < 32; lsb++)
2570 if ((imm & 1) != 0)
2571 break;
2572 imm >>= 1;
2575 /* Choose variants based on *sign* of the 5-bit field. */
2576 if ((imm & 0x10) == 0)
2577 len = (lsb <= 28) ? 4 : 32 - lsb;
2578 else
2580 /* Find the width of the bitstring in IMM. */
2581 for (len = 5; len < 32 - lsb; len++)
2583 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2584 break;
2587 /* Sign extend IMM as a 5-bit value. */
2588 imm = (imm & 0xf) - 0x10;
2591 op[0] = imm;
2592 op[1] = 31 - lsb;
2593 op[2] = len;
2596 /* Compute position (in OP[1]) and width (in OP[2])
2597 useful for copying IMM to a register using the depdi,z
2598 instructions. Store the immediate value to insert in OP[0]. */
2600 static void
2601 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2603 int lsb, len, maxlen;
2605 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2607 /* Find the least significant set bit in IMM. */
2608 for (lsb = 0; lsb < maxlen; lsb++)
2610 if ((imm & 1) != 0)
2611 break;
2612 imm >>= 1;
2615 /* Choose variants based on *sign* of the 5-bit field. */
2616 if ((imm & 0x10) == 0)
2617 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2618 else
2620 /* Find the width of the bitstring in IMM. */
2621 for (len = 5; len < maxlen - lsb; len++)
2623 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2624 break;
2627 /* Extend length if host is narrow and IMM is negative. */
2628 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2629 len += 32;
2631 /* Sign extend IMM as a 5-bit value. */
2632 imm = (imm & 0xf) - 0x10;
2635 op[0] = imm;
2636 op[1] = 63 - lsb;
2637 op[2] = len;
2640 /* Output assembler code to perform a doubleword move insn
2641 with operands OPERANDS. */
2643 const char *
2644 pa_output_move_double (rtx *operands)
2646 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2647 rtx latehalf[2];
2648 rtx addreg0 = 0, addreg1 = 0;
2649 int highonly = 0;
2651 /* First classify both operands. */
2653 if (REG_P (operands[0]))
2654 optype0 = REGOP;
2655 else if (offsettable_memref_p (operands[0]))
2656 optype0 = OFFSOP;
2657 else if (GET_CODE (operands[0]) == MEM)
2658 optype0 = MEMOP;
2659 else
2660 optype0 = RNDOP;
2662 if (REG_P (operands[1]))
2663 optype1 = REGOP;
2664 else if (CONSTANT_P (operands[1]))
2665 optype1 = CNSTOP;
2666 else if (offsettable_memref_p (operands[1]))
2667 optype1 = OFFSOP;
2668 else if (GET_CODE (operands[1]) == MEM)
2669 optype1 = MEMOP;
2670 else
2671 optype1 = RNDOP;
2673 /* Check for the cases that the operand constraints are not
2674 supposed to allow to happen. */
2675 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2677 /* Handle copies between general and floating registers. */
2679 if (optype0 == REGOP && optype1 == REGOP
2680 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2682 if (FP_REG_P (operands[0]))
2684 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2685 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2686 return "{fldds|fldd} -16(%%sp),%0";
2688 else
2690 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2691 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2692 return "{ldws|ldw} -12(%%sp),%R0";
2696 /* Handle auto decrementing and incrementing loads and stores
2697 specifically, since the structure of the function doesn't work
2698 for them without major modification. Do it better when we learn
2699 this port about the general inc/dec addressing of PA.
2700 (This was written by tege. Chide him if it doesn't work.) */
2702 if (optype0 == MEMOP)
2704 /* We have to output the address syntax ourselves, since print_operand
2705 doesn't deal with the addresses we want to use. Fix this later. */
2707 rtx addr = XEXP (operands[0], 0);
2708 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2710 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2712 operands[0] = XEXP (addr, 0);
2713 gcc_assert (GET_CODE (operands[1]) == REG
2714 && GET_CODE (operands[0]) == REG);
2716 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2718 /* No overlap between high target register and address
2719 register. (We do this in a non-obvious way to
2720 save a register file writeback) */
2721 if (GET_CODE (addr) == POST_INC)
2722 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2723 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2725 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2727 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2729 operands[0] = XEXP (addr, 0);
2730 gcc_assert (GET_CODE (operands[1]) == REG
2731 && GET_CODE (operands[0]) == REG);
2733 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2734 /* No overlap between high target register and address
2735 register. (We do this in a non-obvious way to save a
2736 register file writeback) */
2737 if (GET_CODE (addr) == PRE_INC)
2738 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2739 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2742 if (optype1 == MEMOP)
2744 /* We have to output the address syntax ourselves, since print_operand
2745 doesn't deal with the addresses we want to use. Fix this later. */
2747 rtx addr = XEXP (operands[1], 0);
2748 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2750 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2752 operands[1] = XEXP (addr, 0);
2753 gcc_assert (GET_CODE (operands[0]) == REG
2754 && GET_CODE (operands[1]) == REG);
2756 if (!reg_overlap_mentioned_p (high_reg, addr))
2758 /* No overlap between high target register and address
2759 register. (We do this in a non-obvious way to
2760 save a register file writeback) */
2761 if (GET_CODE (addr) == POST_INC)
2762 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2763 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2765 else
2767 /* This is an undefined situation. We should load into the
2768 address register *and* update that register. Probably
2769 we don't need to handle this at all. */
2770 if (GET_CODE (addr) == POST_INC)
2771 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2772 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2775 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2777 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2779 operands[1] = XEXP (addr, 0);
2780 gcc_assert (GET_CODE (operands[0]) == REG
2781 && GET_CODE (operands[1]) == REG);
2783 if (!reg_overlap_mentioned_p (high_reg, addr))
2785 /* No overlap between high target register and address
2786 register. (We do this in a non-obvious way to
2787 save a register file writeback) */
2788 if (GET_CODE (addr) == PRE_INC)
2789 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2790 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2792 else
2794 /* This is an undefined situation. We should load into the
2795 address register *and* update that register. Probably
2796 we don't need to handle this at all. */
2797 if (GET_CODE (addr) == PRE_INC)
2798 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2799 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2802 else if (GET_CODE (addr) == PLUS
2803 && GET_CODE (XEXP (addr, 0)) == MULT)
2805 rtx xoperands[4];
2807 /* Load address into left half of destination register. */
2808 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2809 xoperands[1] = XEXP (addr, 1);
2810 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2811 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2812 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2813 xoperands);
2814 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2816 else if (GET_CODE (addr) == PLUS
2817 && REG_P (XEXP (addr, 0))
2818 && REG_P (XEXP (addr, 1)))
2820 rtx xoperands[3];
2822 /* Load address into left half of destination register. */
2823 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2824 xoperands[1] = XEXP (addr, 0);
2825 xoperands[2] = XEXP (addr, 1);
2826 output_asm_insn ("{addl|add,l} %1,%2,%0",
2827 xoperands);
2828 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2832 /* If an operand is an unoffsettable memory ref, find a register
2833 we can increment temporarily to make it refer to the second word. */
2835 if (optype0 == MEMOP)
2836 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2838 if (optype1 == MEMOP)
2839 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2841 /* Ok, we can do one word at a time.
2842 Normally we do the low-numbered word first.
2844 In either case, set up in LATEHALF the operands to use
2845 for the high-numbered word and in some cases alter the
2846 operands in OPERANDS to be suitable for the low-numbered word. */
2848 if (optype0 == REGOP)
2849 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2850 else if (optype0 == OFFSOP)
2851 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2852 else
2853 latehalf[0] = operands[0];
2855 if (optype1 == REGOP)
2856 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2857 else if (optype1 == OFFSOP)
2858 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2859 else if (optype1 == CNSTOP)
2861 if (GET_CODE (operands[1]) == HIGH)
2863 operands[1] = XEXP (operands[1], 0);
2864 highonly = 1;
2866 split_double (operands[1], &operands[1], &latehalf[1]);
2868 else
2869 latehalf[1] = operands[1];
2871 /* If the first move would clobber the source of the second one,
2872 do them in the other order.
2874 This can happen in two cases:
2876 mem -> register where the first half of the destination register
2877 is the same register used in the memory's address. Reload
2878 can create such insns.
2880 mem in this case will be either register indirect or register
2881 indirect plus a valid offset.
2883 register -> register move where REGNO(dst) == REGNO(src + 1)
2884 someone (Tim/Tege?) claimed this can happen for parameter loads.
2886 Handle mem -> register case first. */
2887 if (optype0 == REGOP
2888 && (optype1 == MEMOP || optype1 == OFFSOP)
2889 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2891 /* Do the late half first. */
2892 if (addreg1)
2893 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2894 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2896 /* Then clobber. */
2897 if (addreg1)
2898 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2899 return pa_singlemove_string (operands);
2902 /* Now handle register -> register case. */
2903 if (optype0 == REGOP && optype1 == REGOP
2904 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2906 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2907 return pa_singlemove_string (operands);
2910 /* Normal case: do the two words, low-numbered first. */
2912 output_asm_insn (pa_singlemove_string (operands), operands);
2914 /* Make any unoffsettable addresses point at high-numbered word. */
2915 if (addreg0)
2916 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2917 if (addreg1)
2918 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2920 /* Do high-numbered word. */
2921 if (highonly)
2922 output_asm_insn ("ldil L'%1,%0", latehalf);
2923 else
2924 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2926 /* Undo the adds we just did. */
2927 if (addreg0)
2928 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2929 if (addreg1)
2930 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2932 return "";
2935 const char *
2936 pa_output_fp_move_double (rtx *operands)
2938 if (FP_REG_P (operands[0]))
2940 if (FP_REG_P (operands[1])
2941 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2942 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2943 else
2944 output_asm_insn ("fldd%F1 %1,%0", operands);
2946 else if (FP_REG_P (operands[1]))
2948 output_asm_insn ("fstd%F0 %1,%0", operands);
2950 else
2952 rtx xoperands[2];
2954 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2956 /* This is a pain. You have to be prepared to deal with an
2957 arbitrary address here including pre/post increment/decrement.
2959 so avoid this in the MD. */
2960 gcc_assert (GET_CODE (operands[0]) == REG);
2962 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2963 xoperands[0] = operands[0];
2964 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2966 return "";
2969 /* Return a REG that occurs in ADDR with coefficient 1.
2970 ADDR can be effectively incremented by incrementing REG. */
2972 static rtx
2973 find_addr_reg (rtx addr)
2975 while (GET_CODE (addr) == PLUS)
2977 if (GET_CODE (XEXP (addr, 0)) == REG)
2978 addr = XEXP (addr, 0);
2979 else if (GET_CODE (XEXP (addr, 1)) == REG)
2980 addr = XEXP (addr, 1);
2981 else if (CONSTANT_P (XEXP (addr, 0)))
2982 addr = XEXP (addr, 1);
2983 else if (CONSTANT_P (XEXP (addr, 1)))
2984 addr = XEXP (addr, 0);
2985 else
2986 gcc_unreachable ();
2988 gcc_assert (GET_CODE (addr) == REG);
2989 return addr;
2992 /* Emit code to perform a block move.
2994 OPERANDS[0] is the destination pointer as a REG, clobbered.
2995 OPERANDS[1] is the source pointer as a REG, clobbered.
2996 OPERANDS[2] is a register for temporary storage.
2997 OPERANDS[3] is a register for temporary storage.
2998 OPERANDS[4] is the size as a CONST_INT
2999 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3000 OPERANDS[6] is another temporary register. */
3002 const char *
3003 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3005 int align = INTVAL (operands[5]);
3006 unsigned long n_bytes = INTVAL (operands[4]);
3008 /* We can't move more than a word at a time because the PA
3009 has no longer integer move insns. (Could use fp mem ops?) */
3010 if (align > (TARGET_64BIT ? 8 : 4))
3011 align = (TARGET_64BIT ? 8 : 4);
3013 /* Note that we know each loop below will execute at least twice
3014 (else we would have open-coded the copy). */
3015 switch (align)
3017 case 8:
3018 /* Pre-adjust the loop counter. */
3019 operands[4] = GEN_INT (n_bytes - 16);
3020 output_asm_insn ("ldi %4,%2", operands);
3022 /* Copying loop. */
3023 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3024 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3025 output_asm_insn ("std,ma %3,8(%0)", operands);
3026 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3027 output_asm_insn ("std,ma %6,8(%0)", operands);
3029 /* Handle the residual. There could be up to 7 bytes of
3030 residual to copy! */
3031 if (n_bytes % 16 != 0)
3033 operands[4] = GEN_INT (n_bytes % 8);
3034 if (n_bytes % 16 >= 8)
3035 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3036 if (n_bytes % 8 != 0)
3037 output_asm_insn ("ldd 0(%1),%6", operands);
3038 if (n_bytes % 16 >= 8)
3039 output_asm_insn ("std,ma %3,8(%0)", operands);
3040 if (n_bytes % 8 != 0)
3041 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3043 return "";
3045 case 4:
3046 /* Pre-adjust the loop counter. */
3047 operands[4] = GEN_INT (n_bytes - 8);
3048 output_asm_insn ("ldi %4,%2", operands);
3050 /* Copying loop. */
3051 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3052 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3053 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3054 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3055 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3057 /* Handle the residual. There could be up to 7 bytes of
3058 residual to copy! */
3059 if (n_bytes % 8 != 0)
3061 operands[4] = GEN_INT (n_bytes % 4);
3062 if (n_bytes % 8 >= 4)
3063 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3064 if (n_bytes % 4 != 0)
3065 output_asm_insn ("ldw 0(%1),%6", operands);
3066 if (n_bytes % 8 >= 4)
3067 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3068 if (n_bytes % 4 != 0)
3069 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3071 return "";
3073 case 2:
3074 /* Pre-adjust the loop counter. */
3075 operands[4] = GEN_INT (n_bytes - 4);
3076 output_asm_insn ("ldi %4,%2", operands);
3078 /* Copying loop. */
3079 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3080 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3081 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3082 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3083 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3085 /* Handle the residual. */
3086 if (n_bytes % 4 != 0)
3088 if (n_bytes % 4 >= 2)
3089 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3090 if (n_bytes % 2 != 0)
3091 output_asm_insn ("ldb 0(%1),%6", operands);
3092 if (n_bytes % 4 >= 2)
3093 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3094 if (n_bytes % 2 != 0)
3095 output_asm_insn ("stb %6,0(%0)", operands);
3097 return "";
3099 case 1:
3100 /* Pre-adjust the loop counter. */
3101 operands[4] = GEN_INT (n_bytes - 2);
3102 output_asm_insn ("ldi %4,%2", operands);
3104 /* Copying loop. */
3105 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3106 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3107 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3108 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3109 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3111 /* Handle the residual. */
3112 if (n_bytes % 2 != 0)
3114 output_asm_insn ("ldb 0(%1),%3", operands);
3115 output_asm_insn ("stb %3,0(%0)", operands);
3117 return "";
3119 default:
3120 gcc_unreachable ();
3124 /* Count the number of insns necessary to handle this block move.
3126 Basic structure is the same as emit_block_move, except that we
3127 count insns rather than emit them. */
3129 static int
3130 compute_cpymem_length (rtx_insn *insn)
3132 rtx pat = PATTERN (insn);
3133 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3134 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3135 unsigned int n_insns = 0;
3137 /* We can't move more than four bytes at a time because the PA
3138 has no longer integer move insns. (Could use fp mem ops?) */
3139 if (align > (TARGET_64BIT ? 8 : 4))
3140 align = (TARGET_64BIT ? 8 : 4);
3142 /* The basic copying loop. */
3143 n_insns = 6;
3145 /* Residuals. */
3146 if (n_bytes % (2 * align) != 0)
3148 if ((n_bytes % (2 * align)) >= align)
3149 n_insns += 2;
3151 if ((n_bytes % align) != 0)
3152 n_insns += 2;
3155 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3156 return n_insns * 4;
3159 /* Emit code to perform a block clear.
3161 OPERANDS[0] is the destination pointer as a REG, clobbered.
3162 OPERANDS[1] is a register for temporary storage.
3163 OPERANDS[2] is the size as a CONST_INT
3164 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3166 const char *
3167 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3169 int align = INTVAL (operands[3]);
3170 unsigned long n_bytes = INTVAL (operands[2]);
3172 /* We can't clear more than a word at a time because the PA
3173 has no longer integer move insns. */
3174 if (align > (TARGET_64BIT ? 8 : 4))
3175 align = (TARGET_64BIT ? 8 : 4);
3177 /* Note that we know each loop below will execute at least twice
3178 (else we would have open-coded the copy). */
3179 switch (align)
3181 case 8:
3182 /* Pre-adjust the loop counter. */
3183 operands[2] = GEN_INT (n_bytes - 16);
3184 output_asm_insn ("ldi %2,%1", operands);
3186 /* Loop. */
3187 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3188 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3189 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3191 /* Handle the residual. There could be up to 7 bytes of
3192 residual to copy! */
3193 if (n_bytes % 16 != 0)
3195 operands[2] = GEN_INT (n_bytes % 8);
3196 if (n_bytes % 16 >= 8)
3197 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3198 if (n_bytes % 8 != 0)
3199 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3201 return "";
3203 case 4:
3204 /* Pre-adjust the loop counter. */
3205 operands[2] = GEN_INT (n_bytes - 8);
3206 output_asm_insn ("ldi %2,%1", operands);
3208 /* Loop. */
3209 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3210 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3211 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3213 /* Handle the residual. There could be up to 7 bytes of
3214 residual to copy! */
3215 if (n_bytes % 8 != 0)
3217 operands[2] = GEN_INT (n_bytes % 4);
3218 if (n_bytes % 8 >= 4)
3219 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3220 if (n_bytes % 4 != 0)
3221 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3223 return "";
3225 case 2:
3226 /* Pre-adjust the loop counter. */
3227 operands[2] = GEN_INT (n_bytes - 4);
3228 output_asm_insn ("ldi %2,%1", operands);
3230 /* Loop. */
3231 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3232 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3233 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3235 /* Handle the residual. */
3236 if (n_bytes % 4 != 0)
3238 if (n_bytes % 4 >= 2)
3239 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3240 if (n_bytes % 2 != 0)
3241 output_asm_insn ("stb %%r0,0(%0)", operands);
3243 return "";
3245 case 1:
3246 /* Pre-adjust the loop counter. */
3247 operands[2] = GEN_INT (n_bytes - 2);
3248 output_asm_insn ("ldi %2,%1", operands);
3250 /* Loop. */
3251 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3252 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3253 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3255 /* Handle the residual. */
3256 if (n_bytes % 2 != 0)
3257 output_asm_insn ("stb %%r0,0(%0)", operands);
3259 return "";
3261 default:
3262 gcc_unreachable ();
3266 /* Count the number of insns necessary to handle this block move.
3268 Basic structure is the same as emit_block_move, except that we
3269 count insns rather than emit them. */
3271 static int
3272 compute_clrmem_length (rtx_insn *insn)
3274 rtx pat = PATTERN (insn);
3275 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3276 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3277 unsigned int n_insns = 0;
3279 /* We can't clear more than a word at a time because the PA
3280 has no longer integer move insns. */
3281 if (align > (TARGET_64BIT ? 8 : 4))
3282 align = (TARGET_64BIT ? 8 : 4);
3284 /* The basic loop. */
3285 n_insns = 4;
3287 /* Residuals. */
3288 if (n_bytes % (2 * align) != 0)
3290 if ((n_bytes % (2 * align)) >= align)
3291 n_insns++;
3293 if ((n_bytes % align) != 0)
3294 n_insns++;
3297 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3298 return n_insns * 4;
3302 const char *
3303 pa_output_and (rtx *operands)
3305 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3307 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3308 int ls0, ls1, ms0, p, len;
3310 for (ls0 = 0; ls0 < 32; ls0++)
3311 if ((mask & (1 << ls0)) == 0)
3312 break;
3314 for (ls1 = ls0; ls1 < 32; ls1++)
3315 if ((mask & (1 << ls1)) != 0)
3316 break;
3318 for (ms0 = ls1; ms0 < 32; ms0++)
3319 if ((mask & (1 << ms0)) == 0)
3320 break;
3322 gcc_assert (ms0 == 32);
3324 if (ls1 == 32)
3326 len = ls0;
3328 gcc_assert (len);
3330 operands[2] = GEN_INT (len);
3331 return "{extru|extrw,u} %1,31,%2,%0";
3333 else
3335 /* We could use this `depi' for the case above as well, but `depi'
3336 requires one more register file access than an `extru'. */
3338 p = 31 - ls0;
3339 len = ls1 - ls0;
3341 operands[2] = GEN_INT (p);
3342 operands[3] = GEN_INT (len);
3343 return "{depi|depwi} 0,%2,%3,%0";
3346 else
3347 return "and %1,%2,%0";
3350 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3351 storing the result in operands[0]. */
3352 const char *
3353 pa_output_64bit_and (rtx *operands)
3355 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3357 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3358 int ls0, ls1, ms0, p, len;
3360 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3361 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3362 break;
3364 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3365 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3366 break;
3368 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3369 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3370 break;
3372 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3374 if (ls1 == HOST_BITS_PER_WIDE_INT)
3376 len = ls0;
3378 gcc_assert (len);
3380 operands[2] = GEN_INT (len);
3381 return "extrd,u %1,63,%2,%0";
3383 else
3385 /* We could use this `depi' for the case above as well, but `depi'
3386 requires one more register file access than an `extru'. */
3388 p = 63 - ls0;
3389 len = ls1 - ls0;
3391 operands[2] = GEN_INT (p);
3392 operands[3] = GEN_INT (len);
3393 return "depdi 0,%2,%3,%0";
3396 else
3397 return "and %1,%2,%0";
3400 const char *
3401 pa_output_ior (rtx *operands)
3403 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3404 int bs0, bs1, p, len;
3406 if (INTVAL (operands[2]) == 0)
3407 return "copy %1,%0";
3409 for (bs0 = 0; bs0 < 32; bs0++)
3410 if ((mask & (1 << bs0)) != 0)
3411 break;
3413 for (bs1 = bs0; bs1 < 32; bs1++)
3414 if ((mask & (1 << bs1)) == 0)
3415 break;
3417 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3419 p = 31 - bs0;
3420 len = bs1 - bs0;
3422 operands[2] = GEN_INT (p);
3423 operands[3] = GEN_INT (len);
3424 return "{depi|depwi} -1,%2,%3,%0";
3427 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3428 storing the result in operands[0]. */
3429 const char *
3430 pa_output_64bit_ior (rtx *operands)
3432 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3433 int bs0, bs1, p, len;
3435 if (INTVAL (operands[2]) == 0)
3436 return "copy %1,%0";
3438 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3439 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3440 break;
3442 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3443 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3444 break;
3446 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3447 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3449 p = 63 - bs0;
3450 len = bs1 - bs0;
3452 operands[2] = GEN_INT (p);
3453 operands[3] = GEN_INT (len);
3454 return "depdi -1,%2,%3,%0";
3457 /* Target hook for assembling integer objects. This code handles
3458 aligned SI and DI integers specially since function references
3459 must be preceded by P%. */
3461 static bool
3462 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3464 bool result;
3465 tree decl = NULL;
3467 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3468 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3469 calling output_addr_const. Otherwise, it may call assemble_external
3470 in the midst of outputing the assembler code for the SYMBOL_REF.
3471 We restore the SYMBOL_REF_DECL after the output is done. */
3472 if (GET_CODE (x) == SYMBOL_REF)
3474 decl = SYMBOL_REF_DECL (x);
3475 if (decl)
3477 assemble_external (decl);
3478 SET_SYMBOL_REF_DECL (x, NULL);
3482 if (size == UNITS_PER_WORD
3483 && aligned_p
3484 && function_label_operand (x, VOIDmode))
3486 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3488 /* We don't want an OPD when generating fast indirect calls. */
3489 if (!TARGET_FAST_INDIRECT_CALLS)
3490 fputs ("P%", asm_out_file);
3492 output_addr_const (asm_out_file, x);
3493 fputc ('\n', asm_out_file);
3494 result = true;
3496 else
3497 result = default_assemble_integer (x, size, aligned_p);
3499 if (decl)
3500 SET_SYMBOL_REF_DECL (x, decl);
3502 return result;
3505 /* Output an ascii string. */
3506 void
3507 pa_output_ascii (FILE *file, const char *p, int size)
3509 int i;
3510 int chars_output;
3511 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3513 /* The HP assembler can only take strings of 256 characters at one
3514 time. This is a limitation on input line length, *not* the
3515 length of the string. Sigh. Even worse, it seems that the
3516 restriction is in number of input characters (see \xnn &
3517 \whatever). So we have to do this very carefully. */
3519 fputs ("\t.STRING \"", file);
3521 chars_output = 0;
3522 for (i = 0; i < size; i += 4)
3524 int co = 0;
3525 int io = 0;
3526 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3528 unsigned int c = (unsigned char) p[i + io];
3530 if (c == '\"' || c == '\\')
3531 partial_output[co++] = '\\';
3532 if (c >= ' ' && c < 0177)
3533 partial_output[co++] = c;
3534 else
3536 unsigned int hexd;
3537 partial_output[co++] = '\\';
3538 partial_output[co++] = 'x';
3539 hexd = c / 16 - 0 + '0';
3540 if (hexd > '9')
3541 hexd -= '9' - 'a' + 1;
3542 partial_output[co++] = hexd;
3543 hexd = c % 16 - 0 + '0';
3544 if (hexd > '9')
3545 hexd -= '9' - 'a' + 1;
3546 partial_output[co++] = hexd;
3549 if (chars_output + co > 243)
3551 fputs ("\"\n\t.STRING \"", file);
3552 chars_output = 0;
3554 fwrite (partial_output, 1, (size_t) co, file);
3555 chars_output += co;
3556 co = 0;
3558 fputs ("\"\n", file);
3561 /* Try to rewrite floating point comparisons & branches to avoid
3562 useless add,tr insns.
3564 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3565 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3566 first attempt to remove useless add,tr insns. It is zero
3567 for the second pass as reorg sometimes leaves bogus REG_DEAD
3568 notes lying around.
3570 When CHECK_NOTES is zero we can only eliminate add,tr insns
3571 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3572 instructions. */
3573 static void
3574 remove_useless_addtr_insns (int check_notes)
3576 rtx_insn *insn;
3577 static int pass = 0;
3579 /* This is fairly cheap, so always run it when optimizing. */
3580 if (optimize > 0)
3582 int fcmp_count = 0;
3583 int fbranch_count = 0;
3585 /* Walk all the insns in this function looking for fcmp & fbranch
3586 instructions. Keep track of how many of each we find. */
3587 for (insn = get_insns (); insn; insn = next_insn (insn))
3589 rtx tmp;
3591 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3592 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3593 continue;
3595 tmp = PATTERN (insn);
3597 /* It must be a set. */
3598 if (GET_CODE (tmp) != SET)
3599 continue;
3601 /* If the destination is CCFP, then we've found an fcmp insn. */
3602 tmp = SET_DEST (tmp);
3603 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3605 fcmp_count++;
3606 continue;
3609 tmp = PATTERN (insn);
3610 /* If this is an fbranch instruction, bump the fbranch counter. */
3611 if (GET_CODE (tmp) == SET
3612 && SET_DEST (tmp) == pc_rtx
3613 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3614 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3615 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3616 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3618 fbranch_count++;
3619 continue;
3624 /* Find all floating point compare + branch insns. If possible,
3625 reverse the comparison & the branch to avoid add,tr insns. */
3626 for (insn = get_insns (); insn; insn = next_insn (insn))
3628 rtx tmp;
3629 rtx_insn *next;
3631 /* Ignore anything that isn't an INSN. */
3632 if (! NONJUMP_INSN_P (insn))
3633 continue;
3635 tmp = PATTERN (insn);
3637 /* It must be a set. */
3638 if (GET_CODE (tmp) != SET)
3639 continue;
3641 /* The destination must be CCFP, which is register zero. */
3642 tmp = SET_DEST (tmp);
3643 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3644 continue;
3646 /* INSN should be a set of CCFP.
3648 See if the result of this insn is used in a reversed FP
3649 conditional branch. If so, reverse our condition and
3650 the branch. Doing so avoids useless add,tr insns. */
3651 next = next_insn (insn);
3652 while (next)
3654 /* Jumps, calls and labels stop our search. */
3655 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3656 break;
3658 /* As does another fcmp insn. */
3659 if (NONJUMP_INSN_P (next)
3660 && GET_CODE (PATTERN (next)) == SET
3661 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3662 && REGNO (SET_DEST (PATTERN (next))) == 0)
3663 break;
3665 next = next_insn (next);
3668 /* Is NEXT_INSN a branch? */
3669 if (next && JUMP_P (next))
3671 rtx pattern = PATTERN (next);
3673 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3674 and CCFP dies, then reverse our conditional and the branch
3675 to avoid the add,tr. */
3676 if (GET_CODE (pattern) == SET
3677 && SET_DEST (pattern) == pc_rtx
3678 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3679 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3680 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3681 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3682 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3683 && (fcmp_count == fbranch_count
3684 || (check_notes
3685 && find_regno_note (next, REG_DEAD, 0))))
3687 /* Reverse the branch. */
3688 tmp = XEXP (SET_SRC (pattern), 1);
3689 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3690 XEXP (SET_SRC (pattern), 2) = tmp;
3691 INSN_CODE (next) = -1;
3693 /* Reverse our condition. */
3694 tmp = PATTERN (insn);
3695 PUT_CODE (XEXP (tmp, 1),
3696 (reverse_condition_maybe_unordered
3697 (GET_CODE (XEXP (tmp, 1)))));
3703 pass = !pass;
3707 /* You may have trouble believing this, but this is the 32 bit HP-PA
3708 stack layout. Wow.
3710 Offset Contents
3712 Variable arguments (optional; any number may be allocated)
3714 SP-(4*(N+9)) arg word N
3716 SP-56 arg word 5
3717 SP-52 arg word 4
3719 Fixed arguments (must be allocated; may remain unused)
3721 SP-48 arg word 3
3722 SP-44 arg word 2
3723 SP-40 arg word 1
3724 SP-36 arg word 0
3726 Frame Marker
3728 SP-32 External Data Pointer (DP)
3729 SP-28 External sr4
3730 SP-24 External/stub RP (RP')
3731 SP-20 Current RP
3732 SP-16 Static Link
3733 SP-12 Clean up
3734 SP-8 Calling Stub RP (RP'')
3735 SP-4 Previous SP
3737 Top of Frame
3739 SP-0 Stack Pointer (points to next available address)
3743 /* This function saves registers as follows. Registers marked with ' are
3744 this function's registers (as opposed to the previous function's).
3745 If a frame_pointer isn't needed, r4 is saved as a general register;
3746 the space for the frame pointer is still allocated, though, to keep
3747 things simple.
3750 Top of Frame
3752 SP (FP') Previous FP
3753 SP + 4 Alignment filler (sigh)
3754 SP + 8 Space for locals reserved here.
3758 SP + n All call saved register used.
3762 SP + o All call saved fp registers used.
3766 SP + p (SP') points to next available address.
3770 /* Global variables set by output_function_prologue(). */
3771 /* Size of frame. Need to know this to emit return insns from
3772 leaf procedures. */
3773 static HOST_WIDE_INT actual_fsize, local_fsize;
3774 static int save_fregs;
3776 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3777 Handle case where DISP > 8k by using the add_high_const patterns.
3779 Note in DISP > 8k case, we will leave the high part of the address
3780 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3782 static void
3783 store_reg (int reg, HOST_WIDE_INT disp, int base)
3785 rtx dest, src, basereg;
3786 rtx_insn *insn;
3788 src = gen_rtx_REG (word_mode, reg);
3789 basereg = gen_rtx_REG (Pmode, base);
3790 if (VAL_14_BITS_P (disp))
3792 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3793 insn = emit_move_insn (dest, src);
3795 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3797 rtx delta = GEN_INT (disp);
3798 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3800 emit_move_insn (tmpreg, delta);
3801 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3802 if (DO_FRAME_NOTES)
3804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3805 gen_rtx_SET (tmpreg,
3806 gen_rtx_PLUS (Pmode, basereg, delta)));
3807 RTX_FRAME_RELATED_P (insn) = 1;
3809 dest = gen_rtx_MEM (word_mode, tmpreg);
3810 insn = emit_move_insn (dest, src);
3812 else
3814 rtx delta = GEN_INT (disp);
3815 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3816 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3818 emit_move_insn (tmpreg, high);
3819 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3820 insn = emit_move_insn (dest, src);
3821 if (DO_FRAME_NOTES)
3822 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3823 gen_rtx_SET (gen_rtx_MEM (word_mode,
3824 gen_rtx_PLUS (word_mode,
3825 basereg,
3826 delta)),
3827 src));
3830 if (DO_FRAME_NOTES)
3831 RTX_FRAME_RELATED_P (insn) = 1;
3834 /* Emit RTL to store REG at the memory location specified by BASE and then
3835 add MOD to BASE. MOD must be <= 8k. */
3837 static void
3838 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3840 rtx basereg, srcreg, delta;
3841 rtx_insn *insn;
3843 gcc_assert (VAL_14_BITS_P (mod));
3845 basereg = gen_rtx_REG (Pmode, base);
3846 srcreg = gen_rtx_REG (word_mode, reg);
3847 delta = GEN_INT (mod);
3849 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3850 if (DO_FRAME_NOTES)
3852 RTX_FRAME_RELATED_P (insn) = 1;
3854 /* RTX_FRAME_RELATED_P must be set on each frame related set
3855 in a parallel with more than one element. */
3856 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3857 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3861 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3862 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3863 whether to add a frame note or not.
3865 In the DISP > 8k case, we leave the high part of the address in %r1.
3866 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3868 static void
3869 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3871 rtx_insn *insn;
3873 if (VAL_14_BITS_P (disp))
3875 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3876 plus_constant (Pmode,
3877 gen_rtx_REG (Pmode, base), disp));
3879 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3881 rtx basereg = gen_rtx_REG (Pmode, base);
3882 rtx delta = GEN_INT (disp);
3883 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3885 emit_move_insn (tmpreg, delta);
3886 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3887 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3888 if (DO_FRAME_NOTES)
3889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3890 gen_rtx_SET (tmpreg,
3891 gen_rtx_PLUS (Pmode, basereg, delta)));
3893 else
3895 rtx basereg = gen_rtx_REG (Pmode, base);
3896 rtx delta = GEN_INT (disp);
3897 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3899 emit_move_insn (tmpreg,
3900 gen_rtx_PLUS (Pmode, basereg,
3901 gen_rtx_HIGH (Pmode, delta)));
3902 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3903 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3906 if (DO_FRAME_NOTES && note)
3907 RTX_FRAME_RELATED_P (insn) = 1;
3910 HOST_WIDE_INT
3911 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3913 int freg_saved = 0;
3914 int i, j;
3916 /* The code in pa_expand_prologue and pa_expand_epilogue must
3917 be consistent with the rounding and size calculation done here.
3918 Change them at the same time. */
3920 /* We do our own stack alignment. First, round the size of the
3921 stack locals up to a word boundary. */
3922 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3924 /* Space for previous frame pointer + filler. If any frame is
3925 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3926 waste some space here for the sake of HP compatibility. The
3927 first slot is only used when the frame pointer is needed. */
3928 if (size || frame_pointer_needed)
3929 size += pa_starting_frame_offset ();
3931 /* If the current function calls __builtin_eh_return, then we need
3932 to allocate stack space for registers that will hold data for
3933 the exception handler. */
3934 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3936 unsigned int i;
3938 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3939 continue;
3940 size += i * UNITS_PER_WORD;
3943 /* Account for space used by the callee general register saves. */
3944 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3945 if (df_regs_ever_live_p (i))
3946 size += UNITS_PER_WORD;
3948 /* Account for space used by the callee floating point register saves. */
3949 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3950 if (df_regs_ever_live_p (i)
3951 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3953 freg_saved = 1;
3955 /* We always save both halves of the FP register, so always
3956 increment the frame size by 8 bytes. */
3957 size += 8;
3960 /* If any of the floating registers are saved, account for the
3961 alignment needed for the floating point register save block. */
3962 if (freg_saved)
3964 size = (size + 7) & ~7;
3965 if (fregs_live)
3966 *fregs_live = 1;
3969 /* The various ABIs include space for the outgoing parameters in the
3970 size of the current function's stack frame. We don't need to align
3971 for the outgoing arguments as their alignment is set by the final
3972 rounding for the frame as a whole. */
3973 size += crtl->outgoing_args_size;
3975 /* Allocate space for the fixed frame marker. This space must be
3976 allocated for any function that makes calls or allocates
3977 stack space. */
3978 if (!crtl->is_leaf || size)
3979 size += TARGET_64BIT ? 48 : 32;
3981 /* Finally, round to the preferred stack boundary. */
3982 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3983 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3986 /* Output function label, and associated .PROC and .CALLINFO statements. */
3988 void
3989 pa_output_function_label (FILE *file)
3991 /* The function's label and associated .PROC must never be
3992 separated and must be output *after* any profiling declarations
3993 to avoid changing spaces/subspaces within a procedure. */
3994 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
3995 ASM_OUTPUT_FUNCTION_LABEL (file, name, current_function_decl);
3996 fputs ("\t.PROC\n", file);
3998 /* pa_expand_prologue does the dirty work now. We just need
3999 to output the assembler directives which denote the start
4000 of a function. */
4001 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4002 if (crtl->is_leaf)
4003 fputs (",NO_CALLS", file);
4004 else
4005 fputs (",CALLS", file);
4006 if (rp_saved)
4007 fputs (",SAVE_RP", file);
4009 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4010 at the beginning of the frame and that it is used as the frame
4011 pointer for the frame. We do this because our current frame
4012 layout doesn't conform to that specified in the HP runtime
4013 documentation and we need a way to indicate to programs such as
4014 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4015 isn't used by HP compilers but is supported by the assembler.
4016 However, SAVE_SP is supposed to indicate that the previous stack
4017 pointer has been saved in the frame marker. */
4018 if (frame_pointer_needed)
4019 fputs (",SAVE_SP", file);
4021 /* Pass on information about the number of callee register saves
4022 performed in the prologue.
4024 The compiler is supposed to pass the highest register number
4025 saved, the assembler then has to adjust that number before
4026 entering it into the unwind descriptor (to account for any
4027 caller saved registers with lower register numbers than the
4028 first callee saved register). */
4029 if (gr_saved)
4030 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4032 if (fr_saved)
4033 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4035 fputs ("\n\t.ENTRY\n", file);
4038 /* Output function prologue. */
4040 static void
4041 pa_output_function_prologue (FILE *file)
4043 pa_output_function_label (file);
4044 remove_useless_addtr_insns (0);
4047 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4049 static void
4050 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4052 remove_useless_addtr_insns (0);
4055 void
4056 pa_expand_prologue (void)
4058 int merge_sp_adjust_with_store = 0;
4059 HOST_WIDE_INT size = get_frame_size ();
4060 HOST_WIDE_INT offset;
4061 int i;
4062 rtx tmpreg;
4063 rtx_insn *insn;
4065 gr_saved = 0;
4066 fr_saved = 0;
4067 save_fregs = 0;
4069 /* Compute total size for frame pointer, filler, locals and rounding to
4070 the next word boundary. Similar code appears in pa_compute_frame_size
4071 and must be changed in tandem with this code. */
4072 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4073 if (local_fsize || frame_pointer_needed)
4074 local_fsize += pa_starting_frame_offset ();
4076 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4077 if (flag_stack_usage_info)
4078 current_function_static_stack_size = actual_fsize;
4080 /* Compute a few things we will use often. */
4081 tmpreg = gen_rtx_REG (word_mode, 1);
4083 /* Save RP first. The calling conventions manual states RP will
4084 always be stored into the caller's frame at sp - 20 or sp - 16
4085 depending on which ABI is in use. */
4086 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4088 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4089 rp_saved = true;
4091 else
4092 rp_saved = false;
4094 /* Allocate the local frame and set up the frame pointer if needed. */
4095 if (actual_fsize != 0)
4097 if (frame_pointer_needed)
4099 /* Copy the old frame pointer temporarily into %r1. Set up the
4100 new stack pointer, then store away the saved old frame pointer
4101 into the stack at sp and at the same time update the stack
4102 pointer by actual_fsize bytes. Two versions, first
4103 handles small (<8k) frames. The second handles large (>=8k)
4104 frames. */
4105 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4106 if (DO_FRAME_NOTES)
4107 RTX_FRAME_RELATED_P (insn) = 1;
4109 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4110 if (DO_FRAME_NOTES)
4111 RTX_FRAME_RELATED_P (insn) = 1;
4113 if (VAL_14_BITS_P (actual_fsize))
4114 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4115 else
4117 /* It is incorrect to store the saved frame pointer at *sp,
4118 then increment sp (writes beyond the current stack boundary).
4120 So instead use stwm to store at *sp and post-increment the
4121 stack pointer as an atomic operation. Then increment sp to
4122 finish allocating the new frame. */
4123 HOST_WIDE_INT adjust1 = 8192 - 64;
4124 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4126 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4127 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4128 adjust2, 1);
4131 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4132 we need to store the previous stack pointer (frame pointer)
4133 into the frame marker on targets that use the HP unwind
4134 library. This allows the HP unwind library to be used to
4135 unwind GCC frames. However, we are not fully compatible
4136 with the HP library because our frame layout differs from
4137 that specified in the HP runtime specification.
4139 We don't want a frame note on this instruction as the frame
4140 marker moves during dynamic stack allocation.
4142 This instruction also serves as a blockage to prevent
4143 register spills from being scheduled before the stack
4144 pointer is raised. This is necessary as we store
4145 registers using the frame pointer as a base register,
4146 and the frame pointer is set before sp is raised. */
4147 if (TARGET_HPUX_UNWIND_LIBRARY)
4149 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4150 GEN_INT (TARGET_64BIT ? -8 : -4));
4152 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4153 hard_frame_pointer_rtx);
4155 else
4156 emit_insn (gen_blockage ());
4158 /* no frame pointer needed. */
4159 else
4161 /* In some cases we can perform the first callee register save
4162 and allocating the stack frame at the same time. If so, just
4163 make a note of it and defer allocating the frame until saving
4164 the callee registers. */
4165 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4166 merge_sp_adjust_with_store = 1;
4167 /* Cannot optimize. Adjust the stack frame by actual_fsize
4168 bytes. */
4169 else
4170 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4171 actual_fsize, 1);
4175 /* Normal register save.
4177 Do not save the frame pointer in the frame_pointer_needed case. It
4178 was done earlier. */
4179 if (frame_pointer_needed)
4181 offset = local_fsize;
4183 /* Saving the EH return data registers in the frame is the simplest
4184 way to get the frame unwind information emitted. We put them
4185 just before the general registers. */
4186 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4188 unsigned int i, regno;
4190 for (i = 0; ; ++i)
4192 regno = EH_RETURN_DATA_REGNO (i);
4193 if (regno == INVALID_REGNUM)
4194 break;
4196 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4197 offset += UNITS_PER_WORD;
4201 for (i = 18; i >= 4; i--)
4202 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4204 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4205 offset += UNITS_PER_WORD;
4206 gr_saved++;
4208 /* Account for %r3 which is saved in a special place. */
4209 gr_saved++;
4211 /* No frame pointer needed. */
4212 else
4214 offset = local_fsize - actual_fsize;
4216 /* Saving the EH return data registers in the frame is the simplest
4217 way to get the frame unwind information emitted. */
4218 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4220 unsigned int i, regno;
4222 for (i = 0; ; ++i)
4224 regno = EH_RETURN_DATA_REGNO (i);
4225 if (regno == INVALID_REGNUM)
4226 break;
4228 /* If merge_sp_adjust_with_store is nonzero, then we can
4229 optimize the first save. */
4230 if (merge_sp_adjust_with_store)
4232 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4233 merge_sp_adjust_with_store = 0;
4235 else
4236 store_reg (regno, offset, STACK_POINTER_REGNUM);
4237 offset += UNITS_PER_WORD;
4241 for (i = 18; i >= 3; i--)
4242 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4244 /* If merge_sp_adjust_with_store is nonzero, then we can
4245 optimize the first GR save. */
4246 if (merge_sp_adjust_with_store)
4248 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4249 merge_sp_adjust_with_store = 0;
4251 else
4252 store_reg (i, offset, STACK_POINTER_REGNUM);
4253 offset += UNITS_PER_WORD;
4254 gr_saved++;
4257 /* If we wanted to merge the SP adjustment with a GR save, but we never
4258 did any GR saves, then just emit the adjustment here. */
4259 if (merge_sp_adjust_with_store)
4260 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4261 actual_fsize, 1);
4264 /* The hppa calling conventions say that %r19, the pic offset
4265 register, is saved at sp - 32 (in this function's frame)
4266 when generating PIC code. FIXME: What is the correct thing
4267 to do for functions which make no calls and allocate no
4268 frame? Do we need to allocate a frame, or can we just omit
4269 the save? For now we'll just omit the save.
4271 We don't want a note on this insn as the frame marker can
4272 move if there is a dynamic stack allocation. */
4273 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4275 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4277 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4281 /* Align pointer properly (doubleword boundary). */
4282 offset = (offset + 7) & ~7;
4284 /* Floating point register store. */
4285 if (save_fregs)
4287 rtx base;
4289 /* First get the frame or stack pointer to the start of the FP register
4290 save area. */
4291 if (frame_pointer_needed)
4293 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4294 base = hard_frame_pointer_rtx;
4296 else
4298 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4299 base = stack_pointer_rtx;
4302 /* Now actually save the FP registers. */
4303 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4305 if (df_regs_ever_live_p (i)
4306 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4308 rtx addr, reg;
4309 rtx_insn *insn;
4310 addr = gen_rtx_MEM (DFmode,
4311 gen_rtx_POST_INC (word_mode, tmpreg));
4312 reg = gen_rtx_REG (DFmode, i);
4313 insn = emit_move_insn (addr, reg);
4314 if (DO_FRAME_NOTES)
4316 RTX_FRAME_RELATED_P (insn) = 1;
4317 if (TARGET_64BIT)
4319 rtx mem = gen_rtx_MEM (DFmode,
4320 plus_constant (Pmode, base,
4321 offset));
4322 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4323 gen_rtx_SET (mem, reg));
4325 else
4327 rtx meml = gen_rtx_MEM (SFmode,
4328 plus_constant (Pmode, base,
4329 offset));
4330 rtx memr = gen_rtx_MEM (SFmode,
4331 plus_constant (Pmode, base,
4332 offset + 4));
4333 rtx regl = gen_rtx_REG (SFmode, i);
4334 rtx regr = gen_rtx_REG (SFmode, i + 1);
4335 rtx setl = gen_rtx_SET (meml, regl);
4336 rtx setr = gen_rtx_SET (memr, regr);
4337 rtvec vec;
4339 RTX_FRAME_RELATED_P (setl) = 1;
4340 RTX_FRAME_RELATED_P (setr) = 1;
4341 vec = gen_rtvec (2, setl, setr);
4342 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4343 gen_rtx_SEQUENCE (VOIDmode, vec));
4346 offset += GET_MODE_SIZE (DFmode);
4347 fr_saved++;
4353 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4354 Handle case where DISP > 8k by using the add_high_const patterns. */
4356 static void
4357 load_reg (int reg, HOST_WIDE_INT disp, int base)
4359 rtx dest = gen_rtx_REG (word_mode, reg);
4360 rtx basereg = gen_rtx_REG (Pmode, base);
4361 rtx src;
4363 if (VAL_14_BITS_P (disp))
4364 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4365 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4367 rtx delta = GEN_INT (disp);
4368 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4370 emit_move_insn (tmpreg, delta);
4371 if (TARGET_DISABLE_INDEXING)
4373 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4374 src = gen_rtx_MEM (word_mode, tmpreg);
4376 else
4377 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4379 else
4381 rtx delta = GEN_INT (disp);
4382 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4383 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4385 emit_move_insn (tmpreg, high);
4386 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4389 emit_move_insn (dest, src);
4392 /* Update the total code bytes output to the text section. */
4394 static void
4395 update_total_code_bytes (unsigned int nbytes)
4397 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4398 && !IN_NAMED_SECTION_P (cfun->decl))
4400 unsigned int old_total = total_code_bytes;
4402 total_code_bytes += nbytes;
4404 /* Be prepared to handle overflows. */
4405 if (old_total > total_code_bytes)
4406 total_code_bytes = UINT_MAX;
4410 /* This function generates the assembly code for function exit.
4411 Args are as for output_function_prologue ().
4413 The function epilogue should not depend on the current stack
4414 pointer! It should use the frame pointer only. This is mandatory
4415 because of alloca; we also take advantage of it to omit stack
4416 adjustments before returning. */
4418 static void
4419 pa_output_function_epilogue (FILE *file)
4421 rtx_insn *insn = get_last_insn ();
4422 bool extra_nop;
4424 /* pa_expand_epilogue does the dirty work now. We just need
4425 to output the assembler directives which denote the end
4426 of a function.
4428 To make debuggers happy, emit a nop if the epilogue was completely
4429 eliminated due to a volatile call as the last insn in the
4430 current function. That way the return address (in %r2) will
4431 always point to a valid instruction in the current function. */
4433 /* Get the last real insn. */
4434 if (NOTE_P (insn))
4435 insn = prev_real_insn (insn);
4437 /* If it is a sequence, then look inside. */
4438 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4439 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4441 /* If insn is a CALL_INSN, then it must be a call to a volatile
4442 function (otherwise there would be epilogue insns). */
4443 if (insn && CALL_P (insn))
4445 fputs ("\tnop\n", file);
4446 extra_nop = true;
4448 else
4449 extra_nop = false;
4451 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4453 if (TARGET_SOM && TARGET_GAS)
4455 /* We are done with this subspace except possibly for some additional
4456 debug information. Forget that we are in this subspace to ensure
4457 that the next function is output in its own subspace. */
4458 in_section = NULL;
4459 cfun->machine->in_nsubspa = 2;
4462 /* Thunks do their own insn accounting. */
4463 if (cfun->is_thunk)
4464 return;
4466 if (INSN_ADDRESSES_SET_P ())
4468 last_address = extra_nop ? 4 : 0;
4469 insn = get_last_nonnote_insn ();
4470 if (insn)
4472 last_address += INSN_ADDRESSES (INSN_UID (insn));
4473 if (INSN_P (insn))
4474 last_address += insn_default_length (insn);
4476 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4477 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4479 else
4480 last_address = UINT_MAX;
4482 /* Finally, update the total number of code bytes output so far. */
4483 update_total_code_bytes (last_address);
4486 void
4487 pa_expand_epilogue (void)
4489 rtx tmpreg;
4490 HOST_WIDE_INT offset;
4491 HOST_WIDE_INT ret_off = 0;
4492 int i;
4493 int merge_sp_adjust_with_load = 0;
4495 /* We will use this often. */
4496 tmpreg = gen_rtx_REG (word_mode, 1);
4498 /* Try to restore RP early to avoid load/use interlocks when
4499 RP gets used in the return (bv) instruction. This appears to still
4500 be necessary even when we schedule the prologue and epilogue. */
4501 if (rp_saved)
4503 ret_off = TARGET_64BIT ? -16 : -20;
4504 if (frame_pointer_needed)
4506 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4507 ret_off = 0;
4509 else
4511 /* No frame pointer, and stack is smaller than 8k. */
4512 if (VAL_14_BITS_P (ret_off - actual_fsize))
4514 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4515 ret_off = 0;
4520 /* General register restores. */
4521 if (frame_pointer_needed)
4523 offset = local_fsize;
4525 /* If the current function calls __builtin_eh_return, then we need
4526 to restore the saved EH data registers. */
4527 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4529 unsigned int i, regno;
4531 for (i = 0; ; ++i)
4533 regno = EH_RETURN_DATA_REGNO (i);
4534 if (regno == INVALID_REGNUM)
4535 break;
4537 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4538 offset += UNITS_PER_WORD;
4542 for (i = 18; i >= 4; i--)
4543 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4545 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4546 offset += UNITS_PER_WORD;
4549 else
4551 offset = local_fsize - actual_fsize;
4553 /* If the current function calls __builtin_eh_return, then we need
4554 to restore the saved EH data registers. */
4555 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4557 unsigned int i, regno;
4559 for (i = 0; ; ++i)
4561 regno = EH_RETURN_DATA_REGNO (i);
4562 if (regno == INVALID_REGNUM)
4563 break;
4565 /* Only for the first load.
4566 merge_sp_adjust_with_load holds the register load
4567 with which we will merge the sp adjustment. */
4568 if (merge_sp_adjust_with_load == 0
4569 && local_fsize == 0
4570 && VAL_14_BITS_P (-actual_fsize))
4571 merge_sp_adjust_with_load = regno;
4572 else
4573 load_reg (regno, offset, STACK_POINTER_REGNUM);
4574 offset += UNITS_PER_WORD;
4578 for (i = 18; i >= 3; i--)
4580 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4582 /* Only for the first load.
4583 merge_sp_adjust_with_load holds the register load
4584 with which we will merge the sp adjustment. */
4585 if (merge_sp_adjust_with_load == 0
4586 && local_fsize == 0
4587 && VAL_14_BITS_P (-actual_fsize))
4588 merge_sp_adjust_with_load = i;
4589 else
4590 load_reg (i, offset, STACK_POINTER_REGNUM);
4591 offset += UNITS_PER_WORD;
4596 /* Align pointer properly (doubleword boundary). */
4597 offset = (offset + 7) & ~7;
4599 /* FP register restores. */
4600 if (save_fregs)
4602 /* Adjust the register to index off of. */
4603 if (frame_pointer_needed)
4604 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4605 else
4606 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4608 /* Actually do the restores now. */
4609 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4610 if (df_regs_ever_live_p (i)
4611 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4613 rtx src = gen_rtx_MEM (DFmode,
4614 gen_rtx_POST_INC (word_mode, tmpreg));
4615 rtx dest = gen_rtx_REG (DFmode, i);
4616 emit_move_insn (dest, src);
4620 /* Emit a blockage insn here to keep these insns from being moved to
4621 an earlier spot in the epilogue, or into the main instruction stream.
4623 This is necessary as we must not cut the stack back before all the
4624 restores are finished. */
4625 emit_insn (gen_blockage ());
4627 /* Reset stack pointer (and possibly frame pointer). The stack
4628 pointer is initially set to fp + 64 to avoid a race condition. */
4629 if (frame_pointer_needed)
4631 rtx delta = GEN_INT (-64);
4633 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4634 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4635 stack_pointer_rtx, delta));
4637 /* If we were deferring a callee register restore, do it now. */
4638 else if (merge_sp_adjust_with_load)
4640 rtx delta = GEN_INT (-actual_fsize);
4641 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4643 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4645 else if (actual_fsize != 0)
4646 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4647 - actual_fsize, 0);
4649 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4650 frame greater than 8k), do so now. */
4651 if (ret_off != 0)
4652 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4654 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4656 rtx sa = EH_RETURN_STACKADJ_RTX;
4658 emit_insn (gen_blockage ());
4659 emit_insn (TARGET_64BIT
4660 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4661 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4665 bool
4666 pa_can_use_return_insn (void)
4668 if (!reload_completed)
4669 return false;
4671 if (frame_pointer_needed)
4672 return false;
4674 if (df_regs_ever_live_p (2))
4675 return false;
4677 if (crtl->profile)
4678 return false;
4680 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4684 hppa_pic_save_rtx (void)
4686 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4689 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4690 #define NO_DEFERRED_PROFILE_COUNTERS 0
4691 #endif
4694 /* Vector of funcdef numbers. */
4695 static vec<int> funcdef_nos;
4697 /* Output deferred profile counters. */
4698 static void
4699 output_deferred_profile_counters (void)
4701 unsigned int i;
4702 int align, n;
4704 if (funcdef_nos.is_empty ())
4705 return;
4707 switch_to_section (data_section);
4708 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4709 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4711 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4713 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4714 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4717 funcdef_nos.release ();
4720 void
4721 hppa_profile_hook (int label_no)
4723 rtx_code_label *label_rtx = gen_label_rtx ();
4724 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4725 rtx arg_bytes, begin_label_rtx, mcount, sym;
4726 rtx_insn *call_insn;
4727 char begin_label_name[16];
4728 bool use_mcount_pcrel_call;
4730 /* Set up call destination. */
4731 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4732 pa_encode_label (sym);
4733 mcount = gen_rtx_MEM (Pmode, sym);
4735 /* If we can reach _mcount with a pc-relative call, we can optimize
4736 loading the address of the current function. This requires linker
4737 long branch stub support. */
4738 if (!TARGET_PORTABLE_RUNTIME
4739 && !TARGET_LONG_CALLS
4740 && (TARGET_SOM || flag_function_sections))
4741 use_mcount_pcrel_call = TRUE;
4742 else
4743 use_mcount_pcrel_call = FALSE;
4745 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4746 label_no);
4747 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4749 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4751 if (!use_mcount_pcrel_call)
4753 /* The address of the function is loaded into %r25 with an instruction-
4754 relative sequence that avoids the use of relocations. We use SImode
4755 for the address of the function in both 32 and 64-bit code to avoid
4756 having to provide DImode versions of the lcla2 pattern. */
4757 if (TARGET_PA_20)
4758 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4759 else
4760 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4763 if (!NO_DEFERRED_PROFILE_COUNTERS)
4765 rtx count_label_rtx, addr, r24;
4766 char count_label_name[16];
4768 funcdef_nos.safe_push (label_no);
4769 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4770 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4771 ggc_strdup (count_label_name));
4773 addr = force_reg (Pmode, count_label_rtx);
4774 r24 = gen_rtx_REG (Pmode, 24);
4775 emit_move_insn (r24, addr);
4777 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4778 if (use_mcount_pcrel_call)
4779 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4780 begin_label_rtx));
4781 else
4782 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4784 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4786 else
4788 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4789 if (use_mcount_pcrel_call)
4790 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4791 begin_label_rtx));
4792 else
4793 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4796 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4797 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4799 /* Indicate the _mcount call cannot throw, nor will it execute a
4800 non-local goto. */
4801 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4803 /* Allocate space for fixed arguments. */
4804 if (reg_parm_stack_space > crtl->outgoing_args_size)
4805 crtl->outgoing_args_size = reg_parm_stack_space;
4808 /* Fetch the return address for the frame COUNT steps up from
4809 the current frame, after the prologue. FRAMEADDR is the
4810 frame pointer of the COUNT frame.
4812 We want to ignore any export stub remnants here. To handle this,
4813 we examine the code at the return address, and if it is an export
4814 stub, we return a memory rtx for the stub return address stored
4815 at frame-24.
4817 The value returned is used in two different ways:
4819 1. To find a function's caller.
4821 2. To change the return address for a function.
4823 This function handles most instances of case 1; however, it will
4824 fail if there are two levels of stubs to execute on the return
4825 path. The only way I believe that can happen is if the return value
4826 needs a parameter relocation, which never happens for C code.
4828 This function handles most instances of case 2; however, it will
4829 fail if we did not originally have stub code on the return path
4830 but will need stub code on the new return path. This can happen if
4831 the caller & callee are both in the main program, but the new
4832 return location is in a shared library. */
4835 pa_return_addr_rtx (int count, rtx frameaddr)
4837 rtx label;
4838 rtx rp;
4839 rtx saved_rp;
4840 rtx ins;
4842 /* The instruction stream at the return address of a PA1.X export stub is:
4844 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4845 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4846 0x00011820 | stub+16: mtsp r1,sr0
4847 0xe0400002 | stub+20: be,n 0(sr0,rp)
4849 0xe0400002 must be specified as -532676606 so that it won't be
4850 rejected as an invalid immediate operand on 64-bit hosts.
4852 The instruction stream at the return address of a PA2.0 export stub is:
4854 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4855 0xe840d002 | stub+12: bve,n (rp)
4858 HOST_WIDE_INT insns[4];
4859 int i, len;
4861 if (count != 0)
4862 return NULL_RTX;
4864 rp = get_hard_reg_initial_val (Pmode, 2);
4866 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4867 return rp;
4869 /* If there is no export stub then just use the value saved from
4870 the return pointer register. */
4872 saved_rp = gen_reg_rtx (Pmode);
4873 emit_move_insn (saved_rp, rp);
4875 /* Get pointer to the instruction stream. We have to mask out the
4876 privilege level from the two low order bits of the return address
4877 pointer here so that ins will point to the start of the first
4878 instruction that would have been executed if we returned. */
4879 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4880 label = gen_label_rtx ();
4882 if (TARGET_PA_20)
4884 insns[0] = 0x4bc23fd1;
4885 insns[1] = -398405630;
4886 len = 2;
4888 else
4890 insns[0] = 0x4bc23fd1;
4891 insns[1] = 0x004010a1;
4892 insns[2] = 0x00011820;
4893 insns[3] = -532676606;
4894 len = 4;
4897 /* Check the instruction stream at the normal return address for the
4898 export stub. If it is an export stub, than our return address is
4899 really in -24[frameaddr]. */
4901 for (i = 0; i < len; i++)
4903 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4904 rtx op1 = GEN_INT (insns[i]);
4905 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4908 /* Here we know that our return address points to an export
4909 stub. We don't want to return the address of the export stub,
4910 but rather the return address of the export stub. That return
4911 address is stored at -24[frameaddr]. */
4913 emit_move_insn (saved_rp,
4914 gen_rtx_MEM (Pmode,
4915 memory_address (Pmode,
4916 plus_constant (Pmode, frameaddr,
4917 -24))));
4919 emit_label (label);
4921 return saved_rp;
4924 void
4925 pa_emit_bcond_fp (rtx operands[])
4927 enum rtx_code code = GET_CODE (operands[0]);
4928 rtx operand0 = operands[1];
4929 rtx operand1 = operands[2];
4930 rtx label = operands[3];
4932 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4933 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4935 emit_jump_insn (gen_rtx_SET (pc_rtx,
4936 gen_rtx_IF_THEN_ELSE (VOIDmode,
4937 gen_rtx_fmt_ee (NE,
4938 VOIDmode,
4939 gen_rtx_REG (CCFPmode, 0),
4940 const0_rtx),
4941 gen_rtx_LABEL_REF (VOIDmode, label),
4942 pc_rtx)));
4946 /* Adjust the cost of a scheduling dependency. Return the new cost of
4947 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4949 static int
4950 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4951 unsigned int)
4953 enum attr_type attr_type;
4955 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4956 true dependencies as they are described with bypasses now. */
4957 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4958 return cost;
4960 if (! recog_memoized (insn))
4961 return 0;
4963 attr_type = get_attr_type (insn);
4965 switch (dep_type)
4967 case REG_DEP_ANTI:
4968 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4969 cycles later. */
4971 if (attr_type == TYPE_FPLOAD)
4973 rtx pat = PATTERN (insn);
4974 rtx dep_pat = PATTERN (dep_insn);
4975 if (GET_CODE (pat) == PARALLEL)
4977 /* This happens for the fldXs,mb patterns. */
4978 pat = XVECEXP (pat, 0, 0);
4980 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4981 /* If this happens, we have to extend this to schedule
4982 optimally. Return 0 for now. */
4983 return 0;
4985 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4987 if (! recog_memoized (dep_insn))
4988 return 0;
4989 switch (get_attr_type (dep_insn))
4991 case TYPE_FPALU:
4992 case TYPE_FPMULSGL:
4993 case TYPE_FPMULDBL:
4994 case TYPE_FPDIVSGL:
4995 case TYPE_FPDIVDBL:
4996 case TYPE_FPSQRTSGL:
4997 case TYPE_FPSQRTDBL:
4998 /* A fpload can't be issued until one cycle before a
4999 preceding arithmetic operation has finished if
5000 the target of the fpload is any of the sources
5001 (or destination) of the arithmetic operation. */
5002 return insn_default_latency (dep_insn) - 1;
5004 default:
5005 return 0;
5009 else if (attr_type == TYPE_FPALU)
5011 rtx pat = PATTERN (insn);
5012 rtx dep_pat = PATTERN (dep_insn);
5013 if (GET_CODE (pat) == PARALLEL)
5015 /* This happens for the fldXs,mb patterns. */
5016 pat = XVECEXP (pat, 0, 0);
5018 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5019 /* If this happens, we have to extend this to schedule
5020 optimally. Return 0 for now. */
5021 return 0;
5023 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5025 if (! recog_memoized (dep_insn))
5026 return 0;
5027 switch (get_attr_type (dep_insn))
5029 case TYPE_FPDIVSGL:
5030 case TYPE_FPDIVDBL:
5031 case TYPE_FPSQRTSGL:
5032 case TYPE_FPSQRTDBL:
5033 /* An ALU flop can't be issued until two cycles before a
5034 preceding divide or sqrt operation has finished if
5035 the target of the ALU flop is any of the sources
5036 (or destination) of the divide or sqrt operation. */
5037 return insn_default_latency (dep_insn) - 2;
5039 default:
5040 return 0;
5045 /* For other anti dependencies, the cost is 0. */
5046 return 0;
5048 case REG_DEP_OUTPUT:
5049 /* Output dependency; DEP_INSN writes a register that INSN writes some
5050 cycles later. */
5051 if (attr_type == TYPE_FPLOAD)
5053 rtx pat = PATTERN (insn);
5054 rtx dep_pat = PATTERN (dep_insn);
5055 if (GET_CODE (pat) == PARALLEL)
5057 /* This happens for the fldXs,mb patterns. */
5058 pat = XVECEXP (pat, 0, 0);
5060 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5061 /* If this happens, we have to extend this to schedule
5062 optimally. Return 0 for now. */
5063 return 0;
5065 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5067 if (! recog_memoized (dep_insn))
5068 return 0;
5069 switch (get_attr_type (dep_insn))
5071 case TYPE_FPALU:
5072 case TYPE_FPMULSGL:
5073 case TYPE_FPMULDBL:
5074 case TYPE_FPDIVSGL:
5075 case TYPE_FPDIVDBL:
5076 case TYPE_FPSQRTSGL:
5077 case TYPE_FPSQRTDBL:
5078 /* A fpload can't be issued until one cycle before a
5079 preceding arithmetic operation has finished if
5080 the target of the fpload is the destination of the
5081 arithmetic operation.
5083 Exception: For PA7100LC, PA7200 and PA7300, the cost
5084 is 3 cycles, unless they bundle together. We also
5085 pay the penalty if the second insn is a fpload. */
5086 return insn_default_latency (dep_insn) - 1;
5088 default:
5089 return 0;
5093 else if (attr_type == TYPE_FPALU)
5095 rtx pat = PATTERN (insn);
5096 rtx dep_pat = PATTERN (dep_insn);
5097 if (GET_CODE (pat) == PARALLEL)
5099 /* This happens for the fldXs,mb patterns. */
5100 pat = XVECEXP (pat, 0, 0);
5102 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5103 /* If this happens, we have to extend this to schedule
5104 optimally. Return 0 for now. */
5105 return 0;
5107 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5109 if (! recog_memoized (dep_insn))
5110 return 0;
5111 switch (get_attr_type (dep_insn))
5113 case TYPE_FPDIVSGL:
5114 case TYPE_FPDIVDBL:
5115 case TYPE_FPSQRTSGL:
5116 case TYPE_FPSQRTDBL:
5117 /* An ALU flop can't be issued until two cycles before a
5118 preceding divide or sqrt operation has finished if
5119 the target of the ALU flop is also the target of
5120 the divide or sqrt operation. */
5121 return insn_default_latency (dep_insn) - 2;
5123 default:
5124 return 0;
5129 /* For other output dependencies, the cost is 0. */
5130 return 0;
5132 default:
5133 gcc_unreachable ();
5137 /* The 700 can only issue a single insn at a time.
5138 The 7XXX processors can issue two insns at a time.
5139 The 8000 can issue 4 insns at a time. */
5140 static int
5141 pa_issue_rate (void)
5143 switch (pa_cpu)
5145 case PROCESSOR_700: return 1;
5146 case PROCESSOR_7100: return 2;
5147 case PROCESSOR_7100LC: return 2;
5148 case PROCESSOR_7200: return 2;
5149 case PROCESSOR_7300: return 2;
5150 case PROCESSOR_8000: return 4;
5152 default:
5153 gcc_unreachable ();
5159 /* Return any length plus adjustment needed by INSN which already has
5160 its length computed as LENGTH. Return LENGTH if no adjustment is
5161 necessary.
5163 Also compute the length of an inline block move here as it is too
5164 complicated to express as a length attribute in pa.md. */
5166 pa_adjust_insn_length (rtx_insn *insn, int length)
5168 rtx pat = PATTERN (insn);
5170 /* If length is negative or undefined, provide initial length. */
5171 if ((unsigned int) length >= INT_MAX)
5173 if (GET_CODE (pat) == SEQUENCE)
5174 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5176 switch (get_attr_type (insn))
5178 case TYPE_MILLI:
5179 length = pa_attr_length_millicode_call (insn);
5180 break;
5181 case TYPE_CALL:
5182 length = pa_attr_length_call (insn, 0);
5183 break;
5184 case TYPE_SIBCALL:
5185 length = pa_attr_length_call (insn, 1);
5186 break;
5187 case TYPE_DYNCALL:
5188 length = pa_attr_length_indirect_call (insn);
5189 break;
5190 case TYPE_SH_FUNC_ADRS:
5191 length = pa_attr_length_millicode_call (insn) + 20;
5192 break;
5193 default:
5194 gcc_unreachable ();
5198 /* Block move pattern. */
5199 if (NONJUMP_INSN_P (insn)
5200 && GET_CODE (pat) == PARALLEL
5201 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5202 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5203 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5204 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5205 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5206 length += compute_cpymem_length (insn) - 4;
5207 /* Block clear pattern. */
5208 else if (NONJUMP_INSN_P (insn)
5209 && GET_CODE (pat) == PARALLEL
5210 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5211 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5212 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5213 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5214 length += compute_clrmem_length (insn) - 4;
5215 /* Conditional branch with an unfilled delay slot. */
5216 else if (JUMP_P (insn) && ! simplejump_p (insn))
5218 /* Adjust a short backwards conditional with an unfilled delay slot. */
5219 if (GET_CODE (pat) == SET
5220 && length == 4
5221 && JUMP_LABEL (insn) != NULL_RTX
5222 && ! forward_branch_p (insn))
5223 length += 4;
5224 else if (GET_CODE (pat) == PARALLEL
5225 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5226 && length == 4)
5227 length += 4;
5228 /* Adjust dbra insn with short backwards conditional branch with
5229 unfilled delay slot -- only for case where counter is in a
5230 general register register. */
5231 else if (GET_CODE (pat) == PARALLEL
5232 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5233 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5234 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5235 && length == 4
5236 && ! forward_branch_p (insn))
5237 length += 4;
5239 return length;
5242 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5244 static bool
5245 pa_print_operand_punct_valid_p (unsigned char code)
5247 if (code == '@'
5248 || code == '#'
5249 || code == '*'
5250 || code == '^')
5251 return true;
5253 return false;
5256 /* Print operand X (an rtx) in assembler syntax to file FILE.
5257 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5258 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5260 void
5261 pa_print_operand (FILE *file, rtx x, int code)
5263 switch (code)
5265 case '#':
5266 /* Output a 'nop' if there's nothing for the delay slot. */
5267 if (dbr_sequence_length () == 0)
5268 fputs ("\n\tnop", file);
5269 return;
5270 case '*':
5271 /* Output a nullification completer if there's nothing for the */
5272 /* delay slot or nullification is requested. */
5273 if (dbr_sequence_length () == 0 ||
5274 (final_sequence &&
5275 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5276 fputs (",n", file);
5277 return;
5278 case 'R':
5279 /* Print out the second register name of a register pair.
5280 I.e., R (6) => 7. */
5281 fputs (reg_names[REGNO (x) + 1], file);
5282 return;
5283 case 'r':
5284 /* A register or zero. */
5285 if (x == const0_rtx
5286 || (x == CONST0_RTX (DFmode))
5287 || (x == CONST0_RTX (SFmode)))
5289 fputs ("%r0", file);
5290 return;
5292 else
5293 break;
5294 case 'f':
5295 /* A register or zero (floating point). */
5296 if (x == const0_rtx
5297 || (x == CONST0_RTX (DFmode))
5298 || (x == CONST0_RTX (SFmode)))
5300 fputs ("%fr0", file);
5301 return;
5303 else
5304 break;
5305 case 'A':
5307 rtx xoperands[2];
5309 xoperands[0] = XEXP (XEXP (x, 0), 0);
5310 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5311 pa_output_global_address (file, xoperands[1], 0);
5312 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5313 return;
5316 case 'C': /* Plain (C)ondition */
5317 case 'X':
5318 switch (GET_CODE (x))
5320 case EQ:
5321 fputs ("=", file); break;
5322 case NE:
5323 fputs ("<>", file); break;
5324 case GT:
5325 fputs (">", file); break;
5326 case GE:
5327 fputs (">=", file); break;
5328 case GEU:
5329 fputs (">>=", file); break;
5330 case GTU:
5331 fputs (">>", file); break;
5332 case LT:
5333 fputs ("<", file); break;
5334 case LE:
5335 fputs ("<=", file); break;
5336 case LEU:
5337 fputs ("<<=", file); break;
5338 case LTU:
5339 fputs ("<<", file); break;
5340 default:
5341 gcc_unreachable ();
5343 return;
5344 case 'N': /* Condition, (N)egated */
5345 switch (GET_CODE (x))
5347 case EQ:
5348 fputs ("<>", file); break;
5349 case NE:
5350 fputs ("=", file); break;
5351 case GT:
5352 fputs ("<=", file); break;
5353 case GE:
5354 fputs ("<", file); break;
5355 case GEU:
5356 fputs ("<<", file); break;
5357 case GTU:
5358 fputs ("<<=", file); break;
5359 case LT:
5360 fputs (">=", file); break;
5361 case LE:
5362 fputs (">", file); break;
5363 case LEU:
5364 fputs (">>", file); break;
5365 case LTU:
5366 fputs (">>=", file); break;
5367 default:
5368 gcc_unreachable ();
5370 return;
5371 /* For floating point comparisons. Note that the output
5372 predicates are the complement of the desired mode. The
5373 conditions for GT, GE, LT, LE and LTGT cause an invalid
5374 operation exception if the result is unordered and this
5375 exception is enabled in the floating-point status register. */
5376 case 'Y':
5377 switch (GET_CODE (x))
5379 case EQ:
5380 fputs ("!=", file); break;
5381 case NE:
5382 fputs ("=", file); break;
5383 case GT:
5384 fputs ("!>", file); break;
5385 case GE:
5386 fputs ("!>=", file); break;
5387 case LT:
5388 fputs ("!<", file); break;
5389 case LE:
5390 fputs ("!<=", file); break;
5391 case LTGT:
5392 fputs ("!<>", file); break;
5393 case UNLE:
5394 fputs ("!?<=", file); break;
5395 case UNLT:
5396 fputs ("!?<", file); break;
5397 case UNGE:
5398 fputs ("!?>=", file); break;
5399 case UNGT:
5400 fputs ("!?>", file); break;
5401 case UNEQ:
5402 fputs ("!?=", file); break;
5403 case UNORDERED:
5404 fputs ("!?", file); break;
5405 case ORDERED:
5406 fputs ("?", file); break;
5407 default:
5408 gcc_unreachable ();
5410 return;
5411 case 'S': /* Condition, operands are (S)wapped. */
5412 switch (GET_CODE (x))
5414 case EQ:
5415 fputs ("=", file); break;
5416 case NE:
5417 fputs ("<>", file); break;
5418 case GT:
5419 fputs ("<", file); break;
5420 case GE:
5421 fputs ("<=", file); break;
5422 case GEU:
5423 fputs ("<<=", file); break;
5424 case GTU:
5425 fputs ("<<", file); break;
5426 case LT:
5427 fputs (">", file); break;
5428 case LE:
5429 fputs (">=", file); break;
5430 case LEU:
5431 fputs (">>=", file); break;
5432 case LTU:
5433 fputs (">>", file); break;
5434 default:
5435 gcc_unreachable ();
5437 return;
5438 case 'B': /* Condition, (B)oth swapped and negate. */
5439 switch (GET_CODE (x))
5441 case EQ:
5442 fputs ("<>", file); break;
5443 case NE:
5444 fputs ("=", file); break;
5445 case GT:
5446 fputs (">=", file); break;
5447 case GE:
5448 fputs (">", file); break;
5449 case GEU:
5450 fputs (">>", file); break;
5451 case GTU:
5452 fputs (">>=", file); break;
5453 case LT:
5454 fputs ("<=", file); break;
5455 case LE:
5456 fputs ("<", file); break;
5457 case LEU:
5458 fputs ("<<", file); break;
5459 case LTU:
5460 fputs ("<<=", file); break;
5461 default:
5462 gcc_unreachable ();
5464 return;
5465 case 'k':
5466 gcc_assert (GET_CODE (x) == CONST_INT);
5467 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5468 return;
5469 case 'Q':
5470 gcc_assert (GET_CODE (x) == CONST_INT);
5471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5472 return;
5473 case 'L':
5474 gcc_assert (GET_CODE (x) == CONST_INT);
5475 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5476 return;
5477 case 'o':
5478 gcc_assert (GET_CODE (x) == CONST_INT
5479 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5480 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5481 return;
5482 case 'O':
5483 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5484 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5485 return;
5486 case 'p':
5487 gcc_assert (GET_CODE (x) == CONST_INT);
5488 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5489 return;
5490 case 'P':
5491 gcc_assert (GET_CODE (x) == CONST_INT);
5492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5493 return;
5494 case 'I':
5495 if (GET_CODE (x) == CONST_INT)
5496 fputs ("i", file);
5497 return;
5498 case 'M':
5499 case 'F':
5500 switch (GET_CODE (XEXP (x, 0)))
5502 case PRE_DEC:
5503 case PRE_INC:
5504 if (ASSEMBLER_DIALECT == 0)
5505 fputs ("s,mb", file);
5506 else
5507 fputs (",mb", file);
5508 break;
5509 case POST_DEC:
5510 case POST_INC:
5511 if (ASSEMBLER_DIALECT == 0)
5512 fputs ("s,ma", file);
5513 else
5514 fputs (",ma", file);
5515 break;
5516 case PLUS:
5517 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5518 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5520 if (ASSEMBLER_DIALECT == 0)
5521 fputs ("x", file);
5523 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5524 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5526 if (ASSEMBLER_DIALECT == 0)
5527 fputs ("x,s", file);
5528 else
5529 fputs (",s", file);
5531 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5532 fputs ("s", file);
5533 break;
5534 default:
5535 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5536 fputs ("s", file);
5537 break;
5539 return;
5540 case 'G':
5541 pa_output_global_address (file, x, 0);
5542 return;
5543 case 'H':
5544 pa_output_global_address (file, x, 1);
5545 return;
5546 case 0: /* Don't do anything special */
5547 break;
5548 case 'Z':
5550 unsigned op[3];
5551 compute_zdepwi_operands (INTVAL (x), op);
5552 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5553 return;
5555 case 'z':
5557 unsigned op[3];
5558 compute_zdepdi_operands (INTVAL (x), op);
5559 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5560 return;
5562 case 'c':
5563 /* We can get here from a .vtable_inherit due to our
5564 CONSTANT_ADDRESS_P rejecting perfectly good constant
5565 addresses. */
5566 break;
5567 default:
5568 gcc_unreachable ();
5570 if (GET_CODE (x) == REG)
5572 fputs (reg_names [REGNO (x)], file);
5573 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5575 fputs ("R", file);
5576 return;
5578 if (FP_REG_P (x)
5579 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5580 && (REGNO (x) & 1) == 0)
5581 fputs ("L", file);
5583 else if (GET_CODE (x) == MEM)
5585 int size = GET_MODE_SIZE (GET_MODE (x));
5586 rtx base = NULL_RTX;
5587 switch (GET_CODE (XEXP (x, 0)))
5589 case PRE_DEC:
5590 case POST_DEC:
5591 base = XEXP (XEXP (x, 0), 0);
5592 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5593 break;
5594 case PRE_INC:
5595 case POST_INC:
5596 base = XEXP (XEXP (x, 0), 0);
5597 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5598 break;
5599 case PLUS:
5600 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5601 fprintf (file, "%s(%s)",
5602 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5603 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5604 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5605 fprintf (file, "%s(%s)",
5606 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5607 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5608 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5609 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5611 /* Because the REG_POINTER flag can get lost during reload,
5612 pa_legitimate_address_p canonicalizes the order of the
5613 index and base registers in the combined move patterns. */
5614 rtx base = XEXP (XEXP (x, 0), 1);
5615 rtx index = XEXP (XEXP (x, 0), 0);
5617 fprintf (file, "%s(%s)",
5618 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5620 else
5621 output_address (GET_MODE (x), XEXP (x, 0));
5622 break;
5623 default:
5624 output_address (GET_MODE (x), XEXP (x, 0));
5625 break;
5628 else
5629 output_addr_const (file, x);
5632 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5634 void
5635 pa_output_global_address (FILE *file, rtx x, int round_constant)
5638 /* Imagine (high (const (plus ...))). */
5639 if (GET_CODE (x) == HIGH)
5640 x = XEXP (x, 0);
5642 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5643 output_addr_const (file, x);
5644 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5646 output_addr_const (file, x);
5647 fputs ("-$global$", file);
5649 else if (GET_CODE (x) == CONST)
5651 const char *sep = "";
5652 int offset = 0; /* assembler wants -$global$ at end */
5653 rtx base = NULL_RTX;
5655 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5657 case LABEL_REF:
5658 case SYMBOL_REF:
5659 base = XEXP (XEXP (x, 0), 0);
5660 output_addr_const (file, base);
5661 break;
5662 case CONST_INT:
5663 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5664 break;
5665 default:
5666 gcc_unreachable ();
5669 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5671 case LABEL_REF:
5672 case SYMBOL_REF:
5673 base = XEXP (XEXP (x, 0), 1);
5674 output_addr_const (file, base);
5675 break;
5676 case CONST_INT:
5677 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5678 break;
5679 default:
5680 gcc_unreachable ();
5683 /* How bogus. The compiler is apparently responsible for
5684 rounding the constant if it uses an LR field selector.
5686 The linker and/or assembler seem a better place since
5687 they have to do this kind of thing already.
5689 If we fail to do this, HP's optimizing linker may eliminate
5690 an addil, but not update the ldw/stw/ldo instruction that
5691 uses the result of the addil. */
5692 if (round_constant)
5693 offset = ((offset + 0x1000) & ~0x1fff);
5695 switch (GET_CODE (XEXP (x, 0)))
5697 case PLUS:
5698 if (offset < 0)
5700 offset = -offset;
5701 sep = "-";
5703 else
5704 sep = "+";
5705 break;
5707 case MINUS:
5708 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5709 sep = "-";
5710 break;
5712 default:
5713 gcc_unreachable ();
5716 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5717 fputs ("-$global$", file);
5718 if (offset)
5719 fprintf (file, "%s%d", sep, offset);
5721 else
5722 output_addr_const (file, x);
5725 /* Output boilerplate text to appear at the beginning of the file.
5726 There are several possible versions. */
5727 #define aputs(x) fputs(x, asm_out_file)
5728 static inline void
5729 pa_file_start_level (void)
5731 if (TARGET_64BIT)
5732 aputs ("\t.LEVEL 2.0w\n");
5733 else if (TARGET_PA_20)
5734 aputs ("\t.LEVEL 2.0\n");
5735 else if (TARGET_PA_11)
5736 aputs ("\t.LEVEL 1.1\n");
5737 else
5738 aputs ("\t.LEVEL 1.0\n");
5741 static inline void
5742 pa_file_start_space (int sortspace)
5744 aputs ("\t.SPACE $PRIVATE$");
5745 if (sortspace)
5746 aputs (",SORT=16");
5747 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5748 if (flag_tm)
5749 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5750 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5751 "\n\t.SPACE $TEXT$");
5752 if (sortspace)
5753 aputs (",SORT=8");
5754 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5755 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5758 static inline void
5759 pa_file_start_file (int want_version)
5761 if (write_symbols != NO_DEBUG)
5763 output_file_directive (asm_out_file, main_input_filename);
5764 if (want_version)
5765 aputs ("\t.version\t\"01.01\"\n");
5769 static inline void
5770 pa_file_start_mcount (const char *aswhat)
5772 if (profile_flag)
5773 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5776 static void
5777 pa_elf_file_start (void)
5779 pa_file_start_level ();
5780 pa_file_start_mcount ("ENTRY");
5781 pa_file_start_file (0);
5784 static void
5785 pa_som_file_start (void)
5787 pa_file_start_level ();
5788 pa_file_start_space (0);
5789 aputs ("\t.IMPORT $global$,DATA\n"
5790 "\t.IMPORT $$dyncall,MILLICODE\n");
5791 pa_file_start_mcount ("CODE");
5792 pa_file_start_file (0);
5795 static void
5796 pa_linux_file_start (void)
5798 pa_file_start_file (1);
5799 pa_file_start_level ();
5800 pa_file_start_mcount ("CODE");
5803 static void
5804 pa_hpux64_gas_file_start (void)
5806 pa_file_start_level ();
5807 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5808 if (profile_flag)
5809 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5810 #endif
5811 pa_file_start_file (1);
5814 static void
5815 pa_hpux64_hpas_file_start (void)
5817 pa_file_start_level ();
5818 pa_file_start_space (1);
5819 pa_file_start_mcount ("CODE");
5820 pa_file_start_file (0);
5822 #undef aputs
5824 /* Search the deferred plabel list for SYMBOL and return its internal
5825 label. If an entry for SYMBOL is not found, a new entry is created. */
5828 pa_get_deferred_plabel (rtx symbol)
5830 const char *fname = XSTR (symbol, 0);
5831 size_t i;
5833 /* See if we have already put this function on the list of deferred
5834 plabels. This list is generally small, so a liner search is not
5835 too ugly. If it proves too slow replace it with something faster. */
5836 for (i = 0; i < n_deferred_plabels; i++)
5837 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5838 break;
5840 /* If the deferred plabel list is empty, or this entry was not found
5841 on the list, create a new entry on the list. */
5842 if (deferred_plabels == NULL || i == n_deferred_plabels)
5844 tree id;
5846 if (deferred_plabels == 0)
5847 deferred_plabels = ggc_alloc<deferred_plabel> ();
5848 else
5849 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5850 deferred_plabels,
5851 n_deferred_plabels + 1);
5853 i = n_deferred_plabels++;
5854 deferred_plabels[i].internal_label = gen_label_rtx ();
5855 deferred_plabels[i].symbol = symbol;
5857 /* Gross. We have just implicitly taken the address of this
5858 function. Mark it in the same manner as assemble_name. */
5859 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5860 if (id)
5861 mark_referenced (id);
5864 return deferred_plabels[i].internal_label;
5867 static void
5868 output_deferred_plabels (void)
5870 size_t i;
5872 /* If we have some deferred plabels, then we need to switch into the
5873 data or readonly data section, and align it to a 4 byte boundary
5874 before outputting the deferred plabels. */
5875 if (n_deferred_plabels)
5877 switch_to_section (flag_pic ? data_section : readonly_data_section);
5878 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5881 /* Now output the deferred plabels. */
5882 for (i = 0; i < n_deferred_plabels; i++)
5884 targetm.asm_out.internal_label (asm_out_file, "L",
5885 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5886 assemble_integer (deferred_plabels[i].symbol,
5887 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5891 /* Initialize optabs to point to emulation routines. */
5893 static void
5894 pa_init_libfuncs (void)
5896 if (HPUX_LONG_DOUBLE_LIBRARY)
5898 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5899 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5900 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5901 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5902 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5903 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5904 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5905 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5906 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5908 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5909 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5910 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5911 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5912 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5913 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5914 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5916 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5917 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5918 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5919 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5921 set_conv_libfunc (sfix_optab, SImode, TFmode,
5922 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5923 : "_U_Qfcnvfxt_quad_to_sgl");
5924 set_conv_libfunc (sfix_optab, DImode, TFmode,
5925 "_U_Qfcnvfxt_quad_to_dbl");
5926 set_conv_libfunc (ufix_optab, SImode, TFmode,
5927 "_U_Qfcnvfxt_quad_to_usgl");
5928 set_conv_libfunc (ufix_optab, DImode, TFmode,
5929 "_U_Qfcnvfxt_quad_to_udbl");
5931 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5932 "_U_Qfcnvxf_sgl_to_quad");
5933 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5934 "_U_Qfcnvxf_dbl_to_quad");
5935 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5936 "_U_Qfcnvxf_usgl_to_quad");
5937 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5938 "_U_Qfcnvxf_udbl_to_quad");
5941 if (TARGET_SYNC_LIBCALLS)
5942 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
5945 /* HP's millicode routines mean something special to the assembler.
5946 Keep track of which ones we have used. */
5948 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5949 static void import_milli (enum millicodes);
5950 static char imported[(int) end1000];
5951 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5952 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5953 #define MILLI_START 10
5955 static void
5956 import_milli (enum millicodes code)
5958 char str[sizeof (import_string)];
5960 if (!imported[(int) code])
5962 imported[(int) code] = 1;
5963 strcpy (str, import_string);
5964 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5965 output_asm_insn (str, 0);
5969 /* The register constraints have put the operands and return value in
5970 the proper registers. */
5972 const char *
5973 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5975 import_milli (mulI);
5976 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5979 /* Emit the rtl for doing a division by a constant. */
5981 /* Do magic division millicodes exist for this value? */
5982 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5984 /* We'll use an array to keep track of the magic millicodes and
5985 whether or not we've used them already. [n][0] is signed, [n][1] is
5986 unsigned. */
5988 static int div_milli[16][2];
5991 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5993 if (GET_CODE (operands[2]) == CONST_INT
5994 && INTVAL (operands[2]) > 0
5995 && INTVAL (operands[2]) < 16
5996 && pa_magic_milli[INTVAL (operands[2])])
5998 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6000 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6001 emit
6002 (gen_rtx_PARALLEL
6003 (VOIDmode,
6004 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6005 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6006 SImode,
6007 gen_rtx_REG (SImode, 26),
6008 operands[2])),
6009 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6010 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6011 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6012 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6013 gen_rtx_CLOBBER (VOIDmode, ret))));
6014 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6015 return 1;
6017 return 0;
6020 const char *
6021 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6023 int divisor;
6025 /* If the divisor is a constant, try to use one of the special
6026 opcodes .*/
6027 if (GET_CODE (operands[0]) == CONST_INT)
6029 static char buf[100];
6030 divisor = INTVAL (operands[0]);
6031 if (!div_milli[divisor][unsignedp])
6033 div_milli[divisor][unsignedp] = 1;
6034 if (unsignedp)
6035 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6036 else
6037 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6039 if (unsignedp)
6041 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6042 INTVAL (operands[0]));
6043 return pa_output_millicode_call (insn,
6044 gen_rtx_SYMBOL_REF (SImode, buf));
6046 else
6048 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6049 INTVAL (operands[0]));
6050 return pa_output_millicode_call (insn,
6051 gen_rtx_SYMBOL_REF (SImode, buf));
6054 /* Divisor isn't a special constant. */
6055 else
6057 if (unsignedp)
6059 import_milli (divU);
6060 return pa_output_millicode_call (insn,
6061 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6063 else
6065 import_milli (divI);
6066 return pa_output_millicode_call (insn,
6067 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6072 /* Output a $$rem millicode to do mod. */
6074 const char *
6075 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6077 if (unsignedp)
6079 import_milli (remU);
6080 return pa_output_millicode_call (insn,
6081 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6083 else
6085 import_milli (remI);
6086 return pa_output_millicode_call (insn,
6087 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6091 void
6092 pa_output_arg_descriptor (rtx_insn *call_insn)
6094 const char *arg_regs[4];
6095 machine_mode arg_mode;
6096 rtx link;
6097 int i, output_flag = 0;
6098 int regno;
6100 /* We neither need nor want argument location descriptors for the
6101 64bit runtime environment or the ELF32 environment. */
6102 if (TARGET_64BIT || TARGET_ELF32)
6103 return;
6105 for (i = 0; i < 4; i++)
6106 arg_regs[i] = 0;
6108 /* Specify explicitly that no argument relocations should take place
6109 if using the portable runtime calling conventions. */
6110 if (TARGET_PORTABLE_RUNTIME)
6112 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6113 asm_out_file);
6114 return;
6117 gcc_assert (CALL_P (call_insn));
6118 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6119 link; link = XEXP (link, 1))
6121 rtx use = XEXP (link, 0);
6123 if (! (GET_CODE (use) == USE
6124 && GET_CODE (XEXP (use, 0)) == REG
6125 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6126 continue;
6128 arg_mode = GET_MODE (XEXP (use, 0));
6129 regno = REGNO (XEXP (use, 0));
6130 if (regno >= 23 && regno <= 26)
6132 arg_regs[26 - regno] = "GR";
6133 if (arg_mode == DImode)
6134 arg_regs[25 - regno] = "GR";
6136 else if (regno >= 32 && regno <= 39)
6138 if (arg_mode == SFmode)
6139 arg_regs[(regno - 32) / 2] = "FR";
6140 else
6142 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6143 arg_regs[(regno - 34) / 2] = "FR";
6144 arg_regs[(regno - 34) / 2 + 1] = "FU";
6145 #else
6146 arg_regs[(regno - 34) / 2] = "FU";
6147 arg_regs[(regno - 34) / 2 + 1] = "FR";
6148 #endif
6152 fputs ("\t.CALL ", asm_out_file);
6153 for (i = 0; i < 4; i++)
6155 if (arg_regs[i])
6157 if (output_flag++)
6158 fputc (',', asm_out_file);
6159 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6162 fputc ('\n', asm_out_file);
6165 /* Inform reload about cases where moving X with a mode MODE to or from
6166 a register in RCLASS requires an extra scratch or immediate register.
6167 Return the class needed for the immediate register. */
6169 static reg_class_t
6170 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6171 machine_mode mode, secondary_reload_info *sri)
6173 int regno;
6174 enum reg_class rclass = (enum reg_class) rclass_i;
6176 /* Handle the easy stuff first. */
6177 if (rclass == R1_REGS)
6178 return NO_REGS;
6180 if (REG_P (x))
6182 regno = REGNO (x);
6183 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6184 return NO_REGS;
6186 else
6187 regno = -1;
6189 /* If we have something like (mem (mem (...)), we can safely assume the
6190 inner MEM will end up in a general register after reloading, so there's
6191 no need for a secondary reload. */
6192 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6193 return NO_REGS;
6195 /* Trying to load a constant into a FP register during PIC code
6196 generation requires %r1 as a scratch register. For float modes,
6197 the only legitimate constant is CONST0_RTX. However, there are
6198 a few patterns that accept constant double operands. */
6199 if (flag_pic
6200 && FP_REG_CLASS_P (rclass)
6201 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6203 switch (mode)
6205 case E_SImode:
6206 sri->icode = CODE_FOR_reload_insi_r1;
6207 break;
6209 case E_DImode:
6210 sri->icode = CODE_FOR_reload_indi_r1;
6211 break;
6213 case E_SFmode:
6214 sri->icode = CODE_FOR_reload_insf_r1;
6215 break;
6217 case E_DFmode:
6218 sri->icode = CODE_FOR_reload_indf_r1;
6219 break;
6221 default:
6222 gcc_unreachable ();
6224 return NO_REGS;
6227 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6228 register when we're generating PIC code or when the operand isn't
6229 readonly. */
6230 if (pa_symbolic_expression_p (x))
6232 if (GET_CODE (x) == HIGH)
6233 x = XEXP (x, 0);
6235 if (flag_pic || !read_only_operand (x, VOIDmode))
6237 switch (mode)
6239 case E_SImode:
6240 sri->icode = CODE_FOR_reload_insi_r1;
6241 break;
6243 case E_DImode:
6244 sri->icode = CODE_FOR_reload_indi_r1;
6245 break;
6247 default:
6248 gcc_unreachable ();
6250 return NO_REGS;
6254 /* Profiling showed the PA port spends about 1.3% of its compilation
6255 time in true_regnum from calls inside pa_secondary_reload_class. */
6256 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6257 regno = true_regnum (x);
6259 /* Handle reloads for floating point loads and stores. */
6260 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6261 && FP_REG_CLASS_P (rclass))
6263 if (MEM_P (x))
6265 x = XEXP (x, 0);
6267 /* We don't need a secondary reload for indexed memory addresses.
6269 When INT14_OK_STRICT is true, it might appear that we could
6270 directly allow register indirect memory addresses. However,
6271 this doesn't work because we don't support SUBREGs in
6272 floating-point register copies and reload doesn't tell us
6273 when it's going to use a SUBREG. */
6274 if (IS_INDEX_ADDR_P (x))
6275 return NO_REGS;
6278 /* Request a secondary reload with a general scratch register
6279 for everything else. ??? Could symbolic operands be handled
6280 directly when generating non-pic PA 2.0 code? */
6281 sri->icode = (in_p
6282 ? direct_optab_handler (reload_in_optab, mode)
6283 : direct_optab_handler (reload_out_optab, mode));
6284 return NO_REGS;
6287 /* A SAR<->FP register copy requires an intermediate general register
6288 and secondary memory. We need a secondary reload with a general
6289 scratch register for spills. */
6290 if (rclass == SHIFT_REGS)
6292 /* Handle spill. */
6293 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6295 sri->icode = (in_p
6296 ? direct_optab_handler (reload_in_optab, mode)
6297 : direct_optab_handler (reload_out_optab, mode));
6298 return NO_REGS;
6301 /* Handle FP copy. */
6302 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6303 return GENERAL_REGS;
6306 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6307 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6308 && FP_REG_CLASS_P (rclass))
6309 return GENERAL_REGS;
6311 return NO_REGS;
6314 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6316 static bool
6317 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6318 reg_class_t class1 ATTRIBUTE_UNUSED,
6319 reg_class_t class2 ATTRIBUTE_UNUSED)
6321 #ifdef PA_SECONDARY_MEMORY_NEEDED
6322 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6323 #else
6324 return false;
6325 #endif
6328 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6329 is only marked as live on entry by df-scan when it is a fixed
6330 register. It isn't a fixed register in the 64-bit runtime,
6331 so we need to mark it here. */
6333 static void
6334 pa_extra_live_on_entry (bitmap regs)
6336 if (TARGET_64BIT)
6337 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6340 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6341 to prevent it from being deleted. */
6344 pa_eh_return_handler_rtx (void)
6346 rtx tmp;
6348 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6349 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6350 tmp = gen_rtx_MEM (word_mode, tmp);
6351 tmp->volatil = 1;
6352 return tmp;
6355 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6356 by invisible reference. As a GCC extension, we also pass anything
6357 with a zero or variable size by reference.
6359 The 64-bit runtime does not describe passing any types by invisible
6360 reference. The internals of GCC can't currently handle passing
6361 empty structures, and zero or variable length arrays when they are
6362 not passed entirely on the stack or by reference. Thus, as a GCC
6363 extension, we pass these types by reference. The HP compiler doesn't
6364 support these types, so hopefully there shouldn't be any compatibility
6365 issues. This may have to be revisited when HP releases a C99 compiler
6366 or updates the ABI. */
6368 static bool
6369 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6371 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6372 if (TARGET_64BIT)
6373 return size <= 0;
6374 else
6375 return size <= 0 || size > 8;
6378 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6380 static pad_direction
6381 pa_function_arg_padding (machine_mode mode, const_tree type)
6383 if (mode == BLKmode
6384 || (TARGET_64BIT
6385 && type
6386 && (AGGREGATE_TYPE_P (type)
6387 || TREE_CODE (type) == COMPLEX_TYPE
6388 || VECTOR_TYPE_P (type))))
6390 /* Return PAD_NONE if justification is not required. */
6391 if (type
6392 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6393 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6394 return PAD_NONE;
6396 /* The directions set here are ignored when a BLKmode argument larger
6397 than a word is placed in a register. Different code is used for
6398 the stack and registers. This makes it difficult to have a
6399 consistent data representation for both the stack and registers.
6400 For both runtimes, the justification and padding for arguments on
6401 the stack and in registers should be identical. */
6402 if (TARGET_64BIT)
6403 /* The 64-bit runtime specifies left justification for aggregates. */
6404 return PAD_UPWARD;
6405 else
6406 /* The 32-bit runtime architecture specifies right justification.
6407 When the argument is passed on the stack, the argument is padded
6408 with garbage on the left. The HP compiler pads with zeros. */
6409 return PAD_DOWNWARD;
6412 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6413 return PAD_DOWNWARD;
6414 else
6415 return PAD_NONE;
6419 /* Do what is necessary for `va_start'. We look at the current function
6420 to determine if stdargs or varargs is used and fill in an initial
6421 va_list. A pointer to this constructor is returned. */
6423 static rtx
6424 hppa_builtin_saveregs (void)
6426 rtx offset, dest;
6427 tree fntype = TREE_TYPE (current_function_decl);
6428 int argadj = ((!stdarg_p (fntype))
6429 ? UNITS_PER_WORD : 0);
6431 if (argadj)
6432 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6433 else
6434 offset = crtl->args.arg_offset_rtx;
6436 if (TARGET_64BIT)
6438 int i, off;
6440 /* Adjust for varargs/stdarg differences. */
6441 if (argadj)
6442 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6443 else
6444 offset = crtl->args.arg_offset_rtx;
6446 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6447 from the incoming arg pointer and growing to larger addresses. */
6448 for (i = 26, off = -64; i >= 19; i--, off += 8)
6449 emit_move_insn (gen_rtx_MEM (word_mode,
6450 plus_constant (Pmode,
6451 arg_pointer_rtx, off)),
6452 gen_rtx_REG (word_mode, i));
6454 /* The incoming args pointer points just beyond the flushback area;
6455 normally this is not a serious concern. However, when we are doing
6456 varargs/stdargs we want to make the arg pointer point to the start
6457 of the incoming argument area. */
6458 emit_move_insn (virtual_incoming_args_rtx,
6459 plus_constant (Pmode, arg_pointer_rtx, -64));
6461 /* Now return a pointer to the first anonymous argument. */
6462 return copy_to_reg (expand_binop (Pmode, add_optab,
6463 virtual_incoming_args_rtx,
6464 offset, 0, 0, OPTAB_LIB_WIDEN));
6467 /* Store general registers on the stack. */
6468 dest = gen_rtx_MEM (BLKmode,
6469 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6470 -16));
6471 set_mem_alias_set (dest, get_varargs_alias_set ());
6472 set_mem_align (dest, BITS_PER_WORD);
6473 move_block_from_reg (23, dest, 4);
6475 /* move_block_from_reg will emit code to store the argument registers
6476 individually as scalar stores.
6478 However, other insns may later load from the same addresses for
6479 a structure load (passing a struct to a varargs routine).
6481 The alias code assumes that such aliasing can never happen, so we
6482 have to keep memory referencing insns from moving up beyond the
6483 last argument register store. So we emit a blockage insn here. */
6484 emit_insn (gen_blockage ());
6486 return copy_to_reg (expand_binop (Pmode, add_optab,
6487 crtl->args.internal_arg_pointer,
6488 offset, 0, 0, OPTAB_LIB_WIDEN));
6491 static void
6492 hppa_va_start (tree valist, rtx nextarg)
6494 nextarg = expand_builtin_saveregs ();
6495 std_expand_builtin_va_start (valist, nextarg);
6498 static tree
6499 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6500 gimple_seq *post_p)
6502 if (TARGET_64BIT)
6504 /* Args grow upward. We can use the generic routines. */
6505 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6507 else /* !TARGET_64BIT */
6509 tree ptr = build_pointer_type (type);
6510 tree valist_type;
6511 tree t, u;
6512 unsigned int size, ofs;
6513 bool indirect;
6515 indirect = pass_va_arg_by_reference (type);
6516 if (indirect)
6518 type = ptr;
6519 ptr = build_pointer_type (type);
6521 size = int_size_in_bytes (type);
6522 valist_type = TREE_TYPE (valist);
6524 /* Args grow down. Not handled by generic routines. */
6526 u = fold_convert (sizetype, size_in_bytes (type));
6527 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6528 t = fold_build_pointer_plus (valist, u);
6530 /* Align to 4 or 8 byte boundary depending on argument size. */
6532 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6533 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6534 t = fold_convert (valist_type, t);
6536 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6538 ofs = (8 - size) % 4;
6539 if (ofs != 0)
6540 t = fold_build_pointer_plus_hwi (t, ofs);
6542 t = fold_convert (ptr, t);
6543 t = build_va_arg_indirect_ref (t);
6545 if (indirect)
6546 t = build_va_arg_indirect_ref (t);
6548 return t;
6552 /* True if MODE is valid for the target. By "valid", we mean able to
6553 be manipulated in non-trivial ways. In particular, this means all
6554 the arithmetic is supported. */
6556 static bool
6557 pa_scalar_mode_supported_p (scalar_mode mode)
6559 int precision = GET_MODE_PRECISION (mode);
6561 if (TARGET_64BIT && mode == TImode)
6562 return true;
6564 switch (GET_MODE_CLASS (mode))
6566 case MODE_PARTIAL_INT:
6567 case MODE_INT:
6568 if (precision == CHAR_TYPE_SIZE)
6569 return true;
6570 if (precision == SHORT_TYPE_SIZE)
6571 return true;
6572 if (precision == INT_TYPE_SIZE)
6573 return true;
6574 if (precision == LONG_TYPE_SIZE)
6575 return true;
6576 if (precision == LONG_LONG_TYPE_SIZE)
6577 return true;
6578 return false;
6580 case MODE_FLOAT:
6581 if (precision == FLOAT_TYPE_SIZE)
6582 return true;
6583 if (precision == DOUBLE_TYPE_SIZE)
6584 return true;
6585 if (precision == LONG_DOUBLE_TYPE_SIZE)
6586 return true;
6587 return false;
6589 case MODE_DECIMAL_FLOAT:
6590 return false;
6592 default:
6593 gcc_unreachable ();
6597 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6598 it branches into the delay slot. Otherwise, return FALSE. */
6600 static bool
6601 branch_to_delay_slot_p (rtx_insn *insn)
6603 rtx_insn *jump_insn;
6605 if (dbr_sequence_length ())
6606 return FALSE;
6608 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6609 while (insn)
6611 insn = next_active_insn (insn);
6612 if (jump_insn == insn)
6613 return TRUE;
6615 /* We can't rely on the length of asms. So, we return FALSE when
6616 the branch is followed by an asm. */
6617 if (!insn
6618 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6619 || asm_noperands (PATTERN (insn)) >= 0
6620 || get_attr_length (insn) > 0)
6621 break;
6624 return FALSE;
6627 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6629 This occurs when INSN has an unfilled delay slot and is followed
6630 by an asm. Disaster can occur if the asm is empty and the jump
6631 branches into the delay slot. So, we add a nop in the delay slot
6632 when this occurs. */
6634 static bool
6635 branch_needs_nop_p (rtx_insn *insn)
6637 rtx_insn *jump_insn;
6639 if (dbr_sequence_length ())
6640 return FALSE;
6642 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6643 while (insn)
6645 insn = next_active_insn (insn);
6646 if (!insn || jump_insn == insn)
6647 return TRUE;
6649 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6650 || asm_noperands (PATTERN (insn)) >= 0)
6651 && get_attr_length (insn) > 0)
6652 break;
6655 return FALSE;
6658 /* Return TRUE if INSN, a forward jump insn, can use nullification
6659 to skip the following instruction. This avoids an extra cycle due
6660 to a mis-predicted branch when we fall through. */
6662 static bool
6663 use_skip_p (rtx_insn *insn)
6665 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6667 while (insn)
6669 insn = next_active_insn (insn);
6671 /* We can't rely on the length of asms, so we can't skip asms. */
6672 if (!insn
6673 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6674 || asm_noperands (PATTERN (insn)) >= 0)
6675 break;
6676 if (get_attr_length (insn) == 4
6677 && jump_insn == next_active_insn (insn))
6678 return TRUE;
6679 if (get_attr_length (insn) > 0)
6680 break;
6683 return FALSE;
6686 /* This routine handles all the normal conditional branch sequences we
6687 might need to generate. It handles compare immediate vs compare
6688 register, nullification of delay slots, varying length branches,
6689 negated branches, and all combinations of the above. It returns the
6690 output appropriate to emit the branch corresponding to all given
6691 parameters. */
6693 const char *
6694 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6696 static char buf[100];
6697 bool useskip;
6698 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6699 int length = get_attr_length (insn);
6700 int xdelay;
6702 /* A conditional branch to the following instruction (e.g. the delay slot)
6703 is asking for a disaster. This can happen when not optimizing and
6704 when jump optimization fails.
6706 While it is usually safe to emit nothing, this can fail if the
6707 preceding instruction is a nullified branch with an empty delay
6708 slot and the same branch target as this branch. We could check
6709 for this but jump optimization should eliminate nop jumps. It
6710 is always safe to emit a nop. */
6711 if (branch_to_delay_slot_p (insn))
6712 return "nop";
6714 /* The doubleword form of the cmpib instruction doesn't have the LEU
6715 and GTU conditions while the cmpb instruction does. Since we accept
6716 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6717 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6718 operands[2] = gen_rtx_REG (DImode, 0);
6719 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6720 operands[1] = gen_rtx_REG (DImode, 0);
6722 /* If this is a long branch with its delay slot unfilled, set `nullify'
6723 as it can nullify the delay slot and save a nop. */
6724 if (length == 8 && dbr_sequence_length () == 0)
6725 nullify = 1;
6727 /* If this is a short forward conditional branch which did not get
6728 its delay slot filled, the delay slot can still be nullified. */
6729 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6730 nullify = forward_branch_p (insn);
6732 /* A forward branch over a single nullified insn can be done with a
6733 comclr instruction. This avoids a single cycle penalty due to
6734 mis-predicted branch if we fall through (branch not taken). */
6735 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6737 switch (length)
6739 /* All short conditional branches except backwards with an unfilled
6740 delay slot. */
6741 case 4:
6742 if (useskip)
6743 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6744 else
6745 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6746 if (GET_MODE (operands[1]) == DImode)
6747 strcat (buf, "*");
6748 if (negated)
6749 strcat (buf, "%B3");
6750 else
6751 strcat (buf, "%S3");
6752 if (useskip)
6753 strcat (buf, " %2,%r1,%%r0");
6754 else if (nullify)
6756 if (branch_needs_nop_p (insn))
6757 strcat (buf, ",n %2,%r1,%0%#");
6758 else
6759 strcat (buf, ",n %2,%r1,%0");
6761 else
6762 strcat (buf, " %2,%r1,%0");
6763 break;
6765 /* All long conditionals. Note a short backward branch with an
6766 unfilled delay slot is treated just like a long backward branch
6767 with an unfilled delay slot. */
6768 case 8:
6769 /* Handle weird backwards branch with a filled delay slot
6770 which is nullified. */
6771 if (dbr_sequence_length () != 0
6772 && ! forward_branch_p (insn)
6773 && nullify)
6775 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6776 if (GET_MODE (operands[1]) == DImode)
6777 strcat (buf, "*");
6778 if (negated)
6779 strcat (buf, "%S3");
6780 else
6781 strcat (buf, "%B3");
6782 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6784 /* Handle short backwards branch with an unfilled delay slot.
6785 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6786 taken and untaken branches. */
6787 else if (dbr_sequence_length () == 0
6788 && ! forward_branch_p (insn)
6789 && INSN_ADDRESSES_SET_P ()
6790 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6791 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6793 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6794 if (GET_MODE (operands[1]) == DImode)
6795 strcat (buf, "*");
6796 if (negated)
6797 strcat (buf, "%B3 %2,%r1,%0%#");
6798 else
6799 strcat (buf, "%S3 %2,%r1,%0%#");
6801 else
6803 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6804 if (GET_MODE (operands[1]) == DImode)
6805 strcat (buf, "*");
6806 if (negated)
6807 strcat (buf, "%S3");
6808 else
6809 strcat (buf, "%B3");
6810 if (nullify)
6811 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6812 else
6813 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6815 break;
6817 default:
6818 /* The reversed conditional branch must branch over one additional
6819 instruction if the delay slot is filled and needs to be extracted
6820 by pa_output_lbranch. If the delay slot is empty or this is a
6821 nullified forward branch, the instruction after the reversed
6822 condition branch must be nullified. */
6823 if (dbr_sequence_length () == 0
6824 || (nullify && forward_branch_p (insn)))
6826 nullify = 1;
6827 xdelay = 0;
6828 operands[4] = GEN_INT (length);
6830 else
6832 xdelay = 1;
6833 operands[4] = GEN_INT (length + 4);
6836 /* Create a reversed conditional branch which branches around
6837 the following insns. */
6838 if (GET_MODE (operands[1]) != DImode)
6840 if (nullify)
6842 if (negated)
6843 strcpy (buf,
6844 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6845 else
6846 strcpy (buf,
6847 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6849 else
6851 if (negated)
6852 strcpy (buf,
6853 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6854 else
6855 strcpy (buf,
6856 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6859 else
6861 if (nullify)
6863 if (negated)
6864 strcpy (buf,
6865 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6866 else
6867 strcpy (buf,
6868 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6870 else
6872 if (negated)
6873 strcpy (buf,
6874 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6875 else
6876 strcpy (buf,
6877 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6881 output_asm_insn (buf, operands);
6882 return pa_output_lbranch (operands[0], insn, xdelay);
6884 return buf;
6887 /* Output a PIC pc-relative instruction sequence to load the address of
6888 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6889 or a code label. OPERANDS[1] specifies the register to use to load
6890 the program counter. OPERANDS[3] may be used for label generation
6891 The sequence is always three instructions in length. The program
6892 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6893 Register %r1 is clobbered. */
6895 static void
6896 pa_output_pic_pcrel_sequence (rtx *operands)
6898 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6899 if (TARGET_PA_20)
6901 /* We can use mfia to determine the current program counter. */
6902 if (TARGET_SOM || !TARGET_GAS)
6904 operands[3] = gen_label_rtx ();
6905 targetm.asm_out.internal_label (asm_out_file, "L",
6906 CODE_LABEL_NUMBER (operands[3]));
6907 output_asm_insn ("mfia %1", operands);
6908 output_asm_insn ("addil L'%0-%l3,%1", operands);
6909 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6911 else
6913 output_asm_insn ("mfia %1", operands);
6914 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6915 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6918 else
6920 /* We need to use a branch to determine the current program counter. */
6921 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6922 if (TARGET_SOM || !TARGET_GAS)
6924 operands[3] = gen_label_rtx ();
6925 output_asm_insn ("addil L'%0-%l3,%1", operands);
6926 targetm.asm_out.internal_label (asm_out_file, "L",
6927 CODE_LABEL_NUMBER (operands[3]));
6928 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6930 else
6932 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6933 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6938 /* This routine handles output of long unconditional branches that
6939 exceed the maximum range of a simple branch instruction. Since
6940 we don't have a register available for the branch, we save register
6941 %r1 in the frame marker, load the branch destination DEST into %r1,
6942 execute the branch, and restore %r1 in the delay slot of the branch.
6944 Since long branches may have an insn in the delay slot and the
6945 delay slot is used to restore %r1, we in general need to extract
6946 this insn and execute it before the branch. However, to facilitate
6947 use of this function by conditional branches, we also provide an
6948 option to not extract the delay insn so that it will be emitted
6949 after the long branch. So, if there is an insn in the delay slot,
6950 it is extracted if XDELAY is nonzero.
6952 The lengths of the various long-branch sequences are 20, 16 and 24
6953 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6955 const char *
6956 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6958 rtx xoperands[4];
6960 xoperands[0] = dest;
6962 /* First, free up the delay slot. */
6963 if (xdelay && dbr_sequence_length () != 0)
6965 /* We can't handle a jump in the delay slot. */
6966 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6968 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6969 optimize, 0, NULL);
6971 /* Now delete the delay insn. */
6972 SET_INSN_DELETED (NEXT_INSN (insn));
6975 /* Output an insn to save %r1. The runtime documentation doesn't
6976 specify whether the "Clean Up" slot in the callers frame can
6977 be clobbered by the callee. It isn't copied by HP's builtin
6978 alloca, so this suggests that it can be clobbered if necessary.
6979 The "Static Link" location is copied by HP builtin alloca, so
6980 we avoid using it. Using the cleanup slot might be a problem
6981 if we have to interoperate with languages that pass cleanup
6982 information. However, it should be possible to handle these
6983 situations with GCC's asm feature.
6985 The "Current RP" slot is reserved for the called procedure, so
6986 we try to use it when we don't have a frame of our own. It's
6987 rather unlikely that we won't have a frame when we need to emit
6988 a very long branch.
6990 Really the way to go long term is a register scavenger; goto
6991 the target of the jump and find a register which we can use
6992 as a scratch to hold the value in %r1. Then, we wouldn't have
6993 to free up the delay slot or clobber a slot that may be needed
6994 for other purposes. */
6995 if (TARGET_64BIT)
6997 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6998 /* Use the return pointer slot in the frame marker. */
6999 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7000 else
7001 /* Use the slot at -40 in the frame marker since HP builtin
7002 alloca doesn't copy it. */
7003 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7005 else
7007 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7008 /* Use the return pointer slot in the frame marker. */
7009 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7010 else
7011 /* Use the "Clean Up" slot in the frame marker. In GCC,
7012 the only other use of this location is for copying a
7013 floating point double argument from a floating-point
7014 register to two general registers. The copy is done
7015 as an "atomic" operation when outputting a call, so it
7016 won't interfere with our using the location here. */
7017 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7020 if (TARGET_PORTABLE_RUNTIME)
7022 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7023 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7024 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7026 else if (flag_pic)
7028 xoperands[1] = gen_rtx_REG (Pmode, 1);
7029 xoperands[2] = xoperands[1];
7030 pa_output_pic_pcrel_sequence (xoperands);
7031 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7033 else
7034 /* Now output a very long branch to the original target. */
7035 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7037 /* Now restore the value of %r1 in the delay slot. */
7038 if (TARGET_64BIT)
7040 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7041 return "ldd -16(%%r30),%%r1";
7042 else
7043 return "ldd -40(%%r30),%%r1";
7045 else
7047 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7048 return "ldw -20(%%r30),%%r1";
7049 else
7050 return "ldw -12(%%r30),%%r1";
7054 /* This routine handles all the branch-on-bit conditional branch sequences we
7055 might need to generate. It handles nullification of delay slots,
7056 varying length branches, negated branches and all combinations of the
7057 above. it returns the appropriate output template to emit the branch. */
7059 const char *
7060 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7062 static char buf[100];
7063 bool useskip;
7064 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7065 int length = get_attr_length (insn);
7066 int xdelay;
7068 /* A conditional branch to the following instruction (e.g. the delay slot) is
7069 asking for a disaster. I do not think this can happen as this pattern
7070 is only used when optimizing; jump optimization should eliminate the
7071 jump. But be prepared just in case. */
7073 if (branch_to_delay_slot_p (insn))
7074 return "nop";
7076 /* If this is a long branch with its delay slot unfilled, set `nullify'
7077 as it can nullify the delay slot and save a nop. */
7078 if (length == 8 && dbr_sequence_length () == 0)
7079 nullify = 1;
7081 /* If this is a short forward conditional branch which did not get
7082 its delay slot filled, the delay slot can still be nullified. */
7083 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7084 nullify = forward_branch_p (insn);
7086 /* A forward branch over a single nullified insn can be done with a
7087 extrs instruction. This avoids a single cycle penalty due to
7088 mis-predicted branch if we fall through (branch not taken). */
7089 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7091 switch (length)
7094 /* All short conditional branches except backwards with an unfilled
7095 delay slot. */
7096 case 4:
7097 if (useskip)
7098 strcpy (buf, "{extrs,|extrw,s,}");
7099 else
7100 strcpy (buf, "bb,");
7101 if (useskip && GET_MODE (operands[0]) == DImode)
7102 strcpy (buf, "extrd,s,*");
7103 else if (GET_MODE (operands[0]) == DImode)
7104 strcpy (buf, "bb,*");
7105 if ((which == 0 && negated)
7106 || (which == 1 && ! negated))
7107 strcat (buf, ">=");
7108 else
7109 strcat (buf, "<");
7110 if (useskip)
7111 strcat (buf, " %0,%1,1,%%r0");
7112 else if (nullify && negated)
7114 if (branch_needs_nop_p (insn))
7115 strcat (buf, ",n %0,%1,%3%#");
7116 else
7117 strcat (buf, ",n %0,%1,%3");
7119 else if (nullify && ! negated)
7121 if (branch_needs_nop_p (insn))
7122 strcat (buf, ",n %0,%1,%2%#");
7123 else
7124 strcat (buf, ",n %0,%1,%2");
7126 else if (! nullify && negated)
7127 strcat (buf, " %0,%1,%3");
7128 else if (! nullify && ! negated)
7129 strcat (buf, " %0,%1,%2");
7130 break;
7132 /* All long conditionals. Note a short backward branch with an
7133 unfilled delay slot is treated just like a long backward branch
7134 with an unfilled delay slot. */
7135 case 8:
7136 /* Handle weird backwards branch with a filled delay slot
7137 which is nullified. */
7138 if (dbr_sequence_length () != 0
7139 && ! forward_branch_p (insn)
7140 && nullify)
7142 strcpy (buf, "bb,");
7143 if (GET_MODE (operands[0]) == DImode)
7144 strcat (buf, "*");
7145 if ((which == 0 && negated)
7146 || (which == 1 && ! negated))
7147 strcat (buf, "<");
7148 else
7149 strcat (buf, ">=");
7150 if (negated)
7151 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7152 else
7153 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7155 /* Handle short backwards branch with an unfilled delay slot.
7156 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7157 taken and untaken branches. */
7158 else if (dbr_sequence_length () == 0
7159 && ! forward_branch_p (insn)
7160 && INSN_ADDRESSES_SET_P ()
7161 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7162 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7164 strcpy (buf, "bb,");
7165 if (GET_MODE (operands[0]) == DImode)
7166 strcat (buf, "*");
7167 if ((which == 0 && negated)
7168 || (which == 1 && ! negated))
7169 strcat (buf, ">=");
7170 else
7171 strcat (buf, "<");
7172 if (negated)
7173 strcat (buf, " %0,%1,%3%#");
7174 else
7175 strcat (buf, " %0,%1,%2%#");
7177 else
7179 if (GET_MODE (operands[0]) == DImode)
7180 strcpy (buf, "extrd,s,*");
7181 else
7182 strcpy (buf, "{extrs,|extrw,s,}");
7183 if ((which == 0 && negated)
7184 || (which == 1 && ! negated))
7185 strcat (buf, "<");
7186 else
7187 strcat (buf, ">=");
7188 if (nullify && negated)
7189 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7190 else if (nullify && ! negated)
7191 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7192 else if (negated)
7193 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7194 else
7195 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7197 break;
7199 default:
7200 /* The reversed conditional branch must branch over one additional
7201 instruction if the delay slot is filled and needs to be extracted
7202 by pa_output_lbranch. If the delay slot is empty or this is a
7203 nullified forward branch, the instruction after the reversed
7204 condition branch must be nullified. */
7205 if (dbr_sequence_length () == 0
7206 || (nullify && forward_branch_p (insn)))
7208 nullify = 1;
7209 xdelay = 0;
7210 operands[4] = GEN_INT (length);
7212 else
7214 xdelay = 1;
7215 operands[4] = GEN_INT (length + 4);
7218 if (GET_MODE (operands[0]) == DImode)
7219 strcpy (buf, "bb,*");
7220 else
7221 strcpy (buf, "bb,");
7222 if ((which == 0 && negated)
7223 || (which == 1 && !negated))
7224 strcat (buf, "<");
7225 else
7226 strcat (buf, ">=");
7227 if (nullify)
7228 strcat (buf, ",n %0,%1,.+%4");
7229 else
7230 strcat (buf, " %0,%1,.+%4");
7231 output_asm_insn (buf, operands);
7232 return pa_output_lbranch (negated ? operands[3] : operands[2],
7233 insn, xdelay);
7235 return buf;
7238 /* This routine handles all the branch-on-variable-bit conditional branch
7239 sequences we might need to generate. It handles nullification of delay
7240 slots, varying length branches, negated branches and all combinations
7241 of the above. it returns the appropriate output template to emit the
7242 branch. */
7244 const char *
7245 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7246 int which)
7248 static char buf[100];
7249 bool useskip;
7250 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7251 int length = get_attr_length (insn);
7252 int xdelay;
7254 /* A conditional branch to the following instruction (e.g. the delay slot) is
7255 asking for a disaster. I do not think this can happen as this pattern
7256 is only used when optimizing; jump optimization should eliminate the
7257 jump. But be prepared just in case. */
7259 if (branch_to_delay_slot_p (insn))
7260 return "nop";
7262 /* If this is a long branch with its delay slot unfilled, set `nullify'
7263 as it can nullify the delay slot and save a nop. */
7264 if (length == 8 && dbr_sequence_length () == 0)
7265 nullify = 1;
7267 /* If this is a short forward conditional branch which did not get
7268 its delay slot filled, the delay slot can still be nullified. */
7269 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7270 nullify = forward_branch_p (insn);
7272 /* A forward branch over a single nullified insn can be done with a
7273 extrs instruction. This avoids a single cycle penalty due to
7274 mis-predicted branch if we fall through (branch not taken). */
7275 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7277 switch (length)
7280 /* All short conditional branches except backwards with an unfilled
7281 delay slot. */
7282 case 4:
7283 if (useskip)
7284 strcpy (buf, "{vextrs,|extrw,s,}");
7285 else
7286 strcpy (buf, "{bvb,|bb,}");
7287 if (useskip && GET_MODE (operands[0]) == DImode)
7288 strcpy (buf, "extrd,s,*");
7289 else if (GET_MODE (operands[0]) == DImode)
7290 strcpy (buf, "bb,*");
7291 if ((which == 0 && negated)
7292 || (which == 1 && ! negated))
7293 strcat (buf, ">=");
7294 else
7295 strcat (buf, "<");
7296 if (useskip)
7297 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7298 else if (nullify && negated)
7300 if (branch_needs_nop_p (insn))
7301 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7302 else
7303 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7305 else if (nullify && ! negated)
7307 if (branch_needs_nop_p (insn))
7308 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7309 else
7310 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7312 else if (! nullify && negated)
7313 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7314 else if (! nullify && ! negated)
7315 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7316 break;
7318 /* All long conditionals. Note a short backward branch with an
7319 unfilled delay slot is treated just like a long backward branch
7320 with an unfilled delay slot. */
7321 case 8:
7322 /* Handle weird backwards branch with a filled delay slot
7323 which is nullified. */
7324 if (dbr_sequence_length () != 0
7325 && ! forward_branch_p (insn)
7326 && nullify)
7328 strcpy (buf, "{bvb,|bb,}");
7329 if (GET_MODE (operands[0]) == DImode)
7330 strcat (buf, "*");
7331 if ((which == 0 && negated)
7332 || (which == 1 && ! negated))
7333 strcat (buf, "<");
7334 else
7335 strcat (buf, ">=");
7336 if (negated)
7337 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7338 else
7339 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7341 /* Handle short backwards branch with an unfilled delay slot.
7342 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7343 taken and untaken branches. */
7344 else if (dbr_sequence_length () == 0
7345 && ! forward_branch_p (insn)
7346 && INSN_ADDRESSES_SET_P ()
7347 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7348 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7350 strcpy (buf, "{bvb,|bb,}");
7351 if (GET_MODE (operands[0]) == DImode)
7352 strcat (buf, "*");
7353 if ((which == 0 && negated)
7354 || (which == 1 && ! negated))
7355 strcat (buf, ">=");
7356 else
7357 strcat (buf, "<");
7358 if (negated)
7359 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7360 else
7361 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7363 else
7365 strcpy (buf, "{vextrs,|extrw,s,}");
7366 if (GET_MODE (operands[0]) == DImode)
7367 strcpy (buf, "extrd,s,*");
7368 if ((which == 0 && negated)
7369 || (which == 1 && ! negated))
7370 strcat (buf, "<");
7371 else
7372 strcat (buf, ">=");
7373 if (nullify && negated)
7374 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7375 else if (nullify && ! negated)
7376 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7377 else if (negated)
7378 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7379 else
7380 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7382 break;
7384 default:
7385 /* The reversed conditional branch must branch over one additional
7386 instruction if the delay slot is filled and needs to be extracted
7387 by pa_output_lbranch. If the delay slot is empty or this is a
7388 nullified forward branch, the instruction after the reversed
7389 condition branch must be nullified. */
7390 if (dbr_sequence_length () == 0
7391 || (nullify && forward_branch_p (insn)))
7393 nullify = 1;
7394 xdelay = 0;
7395 operands[4] = GEN_INT (length);
7397 else
7399 xdelay = 1;
7400 operands[4] = GEN_INT (length + 4);
7403 if (GET_MODE (operands[0]) == DImode)
7404 strcpy (buf, "bb,*");
7405 else
7406 strcpy (buf, "{bvb,|bb,}");
7407 if ((which == 0 && negated)
7408 || (which == 1 && !negated))
7409 strcat (buf, "<");
7410 else
7411 strcat (buf, ">=");
7412 if (nullify)
7413 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7414 else
7415 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7416 output_asm_insn (buf, operands);
7417 return pa_output_lbranch (negated ? operands[3] : operands[2],
7418 insn, xdelay);
7420 return buf;
7423 /* Return the output template for emitting a dbra type insn.
7425 Note it may perform some output operations on its own before
7426 returning the final output string. */
7427 const char *
7428 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7430 int length = get_attr_length (insn);
7432 /* A conditional branch to the following instruction (e.g. the delay slot) is
7433 asking for a disaster. Be prepared! */
7435 if (branch_to_delay_slot_p (insn))
7437 if (which_alternative == 0)
7438 return "ldo %1(%0),%0";
7439 else if (which_alternative == 1)
7441 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7442 output_asm_insn ("ldw -16(%%r30),%4", operands);
7443 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7444 return "{fldws|fldw} -16(%%r30),%0";
7446 else
7448 output_asm_insn ("ldw %0,%4", operands);
7449 return "ldo %1(%4),%4\n\tstw %4,%0";
7453 if (which_alternative == 0)
7455 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7456 int xdelay;
7458 /* If this is a long branch with its delay slot unfilled, set `nullify'
7459 as it can nullify the delay slot and save a nop. */
7460 if (length == 8 && dbr_sequence_length () == 0)
7461 nullify = 1;
7463 /* If this is a short forward conditional branch which did not get
7464 its delay slot filled, the delay slot can still be nullified. */
7465 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7466 nullify = forward_branch_p (insn);
7468 switch (length)
7470 case 4:
7471 if (nullify)
7473 if (branch_needs_nop_p (insn))
7474 return "addib,%C2,n %1,%0,%3%#";
7475 else
7476 return "addib,%C2,n %1,%0,%3";
7478 else
7479 return "addib,%C2 %1,%0,%3";
7481 case 8:
7482 /* Handle weird backwards branch with a fulled delay slot
7483 which is nullified. */
7484 if (dbr_sequence_length () != 0
7485 && ! forward_branch_p (insn)
7486 && nullify)
7487 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7488 /* Handle short backwards branch with an unfilled delay slot.
7489 Using a addb;nop rather than addi;bl saves 1 cycle for both
7490 taken and untaken branches. */
7491 else if (dbr_sequence_length () == 0
7492 && ! forward_branch_p (insn)
7493 && INSN_ADDRESSES_SET_P ()
7494 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7495 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7496 return "addib,%C2 %1,%0,%3%#";
7498 /* Handle normal cases. */
7499 if (nullify)
7500 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7501 else
7502 return "addi,%N2 %1,%0,%0\n\tb %3";
7504 default:
7505 /* The reversed conditional branch must branch over one additional
7506 instruction if the delay slot is filled and needs to be extracted
7507 by pa_output_lbranch. If the delay slot is empty or this is a
7508 nullified forward branch, the instruction after the reversed
7509 condition branch must be nullified. */
7510 if (dbr_sequence_length () == 0
7511 || (nullify && forward_branch_p (insn)))
7513 nullify = 1;
7514 xdelay = 0;
7515 operands[4] = GEN_INT (length);
7517 else
7519 xdelay = 1;
7520 operands[4] = GEN_INT (length + 4);
7523 if (nullify)
7524 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7525 else
7526 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7528 return pa_output_lbranch (operands[3], insn, xdelay);
7532 /* Deal with gross reload from FP register case. */
7533 else if (which_alternative == 1)
7535 /* Move loop counter from FP register to MEM then into a GR,
7536 increment the GR, store the GR into MEM, and finally reload
7537 the FP register from MEM from within the branch's delay slot. */
7538 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7539 operands);
7540 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7541 if (length == 24)
7542 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7543 else if (length == 28)
7544 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7545 else
7547 operands[5] = GEN_INT (length - 16);
7548 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7549 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7550 return pa_output_lbranch (operands[3], insn, 0);
7553 /* Deal with gross reload from memory case. */
7554 else
7556 /* Reload loop counter from memory, the store back to memory
7557 happens in the branch's delay slot. */
7558 output_asm_insn ("ldw %0,%4", operands);
7559 if (length == 12)
7560 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7561 else if (length == 16)
7562 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7563 else
7565 operands[5] = GEN_INT (length - 4);
7566 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7567 return pa_output_lbranch (operands[3], insn, 0);
7572 /* Return the output template for emitting a movb type insn.
7574 Note it may perform some output operations on its own before
7575 returning the final output string. */
7576 const char *
7577 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7578 int reverse_comparison)
7580 int length = get_attr_length (insn);
7582 /* A conditional branch to the following instruction (e.g. the delay slot) is
7583 asking for a disaster. Be prepared! */
7585 if (branch_to_delay_slot_p (insn))
7587 if (which_alternative == 0)
7588 return "copy %1,%0";
7589 else if (which_alternative == 1)
7591 output_asm_insn ("stw %1,-16(%%r30)", operands);
7592 return "{fldws|fldw} -16(%%r30),%0";
7594 else if (which_alternative == 2)
7595 return "stw %1,%0";
7596 else
7597 return "mtsar %r1";
7600 /* Support the second variant. */
7601 if (reverse_comparison)
7602 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7604 if (which_alternative == 0)
7606 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7607 int xdelay;
7609 /* If this is a long branch with its delay slot unfilled, set `nullify'
7610 as it can nullify the delay slot and save a nop. */
7611 if (length == 8 && dbr_sequence_length () == 0)
7612 nullify = 1;
7614 /* If this is a short forward conditional branch which did not get
7615 its delay slot filled, the delay slot can still be nullified. */
7616 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7617 nullify = forward_branch_p (insn);
7619 switch (length)
7621 case 4:
7622 if (nullify)
7624 if (branch_needs_nop_p (insn))
7625 return "movb,%C2,n %1,%0,%3%#";
7626 else
7627 return "movb,%C2,n %1,%0,%3";
7629 else
7630 return "movb,%C2 %1,%0,%3";
7632 case 8:
7633 /* Handle weird backwards branch with a filled delay slot
7634 which is nullified. */
7635 if (dbr_sequence_length () != 0
7636 && ! forward_branch_p (insn)
7637 && nullify)
7638 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7640 /* Handle short backwards branch with an unfilled delay slot.
7641 Using a movb;nop rather than or;bl saves 1 cycle for both
7642 taken and untaken branches. */
7643 else if (dbr_sequence_length () == 0
7644 && ! forward_branch_p (insn)
7645 && INSN_ADDRESSES_SET_P ()
7646 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7647 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7648 return "movb,%C2 %1,%0,%3%#";
7649 /* Handle normal cases. */
7650 if (nullify)
7651 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7652 else
7653 return "or,%N2 %1,%%r0,%0\n\tb %3";
7655 default:
7656 /* The reversed conditional branch must branch over one additional
7657 instruction if the delay slot is filled and needs to be extracted
7658 by pa_output_lbranch. If the delay slot is empty or this is a
7659 nullified forward branch, the instruction after the reversed
7660 condition branch must be nullified. */
7661 if (dbr_sequence_length () == 0
7662 || (nullify && forward_branch_p (insn)))
7664 nullify = 1;
7665 xdelay = 0;
7666 operands[4] = GEN_INT (length);
7668 else
7670 xdelay = 1;
7671 operands[4] = GEN_INT (length + 4);
7674 if (nullify)
7675 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7676 else
7677 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7679 return pa_output_lbranch (operands[3], insn, xdelay);
7682 /* Deal with gross reload for FP destination register case. */
7683 else if (which_alternative == 1)
7685 /* Move source register to MEM, perform the branch test, then
7686 finally load the FP register from MEM from within the branch's
7687 delay slot. */
7688 output_asm_insn ("stw %1,-16(%%r30)", operands);
7689 if (length == 12)
7690 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7691 else if (length == 16)
7692 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7693 else
7695 operands[4] = GEN_INT (length - 4);
7696 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7697 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7698 return pa_output_lbranch (operands[3], insn, 0);
7701 /* Deal with gross reload from memory case. */
7702 else if (which_alternative == 2)
7704 /* Reload loop counter from memory, the store back to memory
7705 happens in the branch's delay slot. */
7706 if (length == 8)
7707 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7708 else if (length == 12)
7709 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7710 else
7712 operands[4] = GEN_INT (length);
7713 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7714 operands);
7715 return pa_output_lbranch (operands[3], insn, 0);
7718 /* Handle SAR as a destination. */
7719 else
7721 if (length == 8)
7722 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7723 else if (length == 12)
7724 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7725 else
7727 operands[4] = GEN_INT (length);
7728 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7729 operands);
7730 return pa_output_lbranch (operands[3], insn, 0);
7735 /* Copy any FP arguments in INSN into integer registers. */
7736 static void
7737 copy_fp_args (rtx_insn *insn)
7739 rtx link;
7740 rtx xoperands[2];
7742 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7744 int arg_mode, regno;
7745 rtx use = XEXP (link, 0);
7747 if (! (GET_CODE (use) == USE
7748 && GET_CODE (XEXP (use, 0)) == REG
7749 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7750 continue;
7752 arg_mode = GET_MODE (XEXP (use, 0));
7753 regno = REGNO (XEXP (use, 0));
7755 /* Is it a floating point register? */
7756 if (regno >= 32 && regno <= 39)
7758 /* Copy the FP register into an integer register via memory. */
7759 if (arg_mode == SFmode)
7761 xoperands[0] = XEXP (use, 0);
7762 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7763 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7764 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7766 else
7768 xoperands[0] = XEXP (use, 0);
7769 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7770 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7771 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7772 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7778 /* Compute length of the FP argument copy sequence for INSN. */
7779 static int
7780 length_fp_args (rtx_insn *insn)
7782 int length = 0;
7783 rtx link;
7785 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7787 int arg_mode, regno;
7788 rtx use = XEXP (link, 0);
7790 if (! (GET_CODE (use) == USE
7791 && GET_CODE (XEXP (use, 0)) == REG
7792 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7793 continue;
7795 arg_mode = GET_MODE (XEXP (use, 0));
7796 regno = REGNO (XEXP (use, 0));
7798 /* Is it a floating point register? */
7799 if (regno >= 32 && regno <= 39)
7801 if (arg_mode == SFmode)
7802 length += 8;
7803 else
7804 length += 12;
7808 return length;
7811 /* Return the attribute length for the millicode call instruction INSN.
7812 The length must match the code generated by pa_output_millicode_call.
7813 We include the delay slot in the returned length as it is better to
7814 over estimate the length than to under estimate it. */
7817 pa_attr_length_millicode_call (rtx_insn *insn)
7819 unsigned long distance = -1;
7820 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7822 if (INSN_ADDRESSES_SET_P ())
7824 distance = (total + insn_current_reference_address (insn));
7825 if (distance < total)
7826 distance = -1;
7829 if (TARGET_64BIT)
7831 if (!TARGET_LONG_CALLS && distance < 7600000)
7832 return 8;
7834 return 20;
7836 else if (TARGET_PORTABLE_RUNTIME)
7837 return 24;
7838 else
7840 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7841 return 8;
7843 if (!flag_pic)
7844 return 12;
7846 return 24;
7850 /* INSN is a function call.
7852 CALL_DEST is the routine we are calling. */
7854 const char *
7855 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7857 int attr_length = get_attr_length (insn);
7858 int seq_length = dbr_sequence_length ();
7859 rtx xoperands[4];
7861 xoperands[0] = call_dest;
7863 /* Handle the common case where we are sure that the branch will
7864 reach the beginning of the $CODE$ subspace. The within reach
7865 form of the $$sh_func_adrs call has a length of 28. Because it
7866 has an attribute type of sh_func_adrs, it never has a nonzero
7867 sequence length (i.e., the delay slot is never filled). */
7868 if (!TARGET_LONG_CALLS
7869 && (attr_length == 8
7870 || (attr_length == 28
7871 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7873 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7874 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7876 else
7878 if (TARGET_64BIT)
7880 /* It might seem that one insn could be saved by accessing
7881 the millicode function using the linkage table. However,
7882 this doesn't work in shared libraries and other dynamically
7883 loaded objects. Using a pc-relative sequence also avoids
7884 problems related to the implicit use of the gp register. */
7885 xoperands[1] = gen_rtx_REG (Pmode, 1);
7886 xoperands[2] = xoperands[1];
7887 pa_output_pic_pcrel_sequence (xoperands);
7888 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7890 else if (TARGET_PORTABLE_RUNTIME)
7892 /* Pure portable runtime doesn't allow be/ble; we also don't
7893 have PIC support in the assembler/linker, so this sequence
7894 is needed. */
7896 /* Get the address of our target into %r1. */
7897 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7898 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7900 /* Get our return address into %r31. */
7901 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7902 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7904 /* Jump to our target address in %r1. */
7905 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7907 else if (!flag_pic)
7909 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7910 if (TARGET_PA_20)
7911 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7912 else
7913 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7915 else
7917 xoperands[1] = gen_rtx_REG (Pmode, 31);
7918 xoperands[2] = gen_rtx_REG (Pmode, 1);
7919 pa_output_pic_pcrel_sequence (xoperands);
7921 /* Adjust return address. */
7922 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7924 /* Jump to our target address in %r1. */
7925 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7929 if (seq_length == 0)
7930 output_asm_insn ("nop", xoperands);
7932 return "";
7935 /* Return the attribute length of the call instruction INSN. The SIBCALL
7936 flag indicates whether INSN is a regular call or a sibling call. The
7937 length returned must be longer than the code actually generated by
7938 pa_output_call. Since branch shortening is done before delay branch
7939 sequencing, there is no way to determine whether or not the delay
7940 slot will be filled during branch shortening. Even when the delay
7941 slot is filled, we may have to add a nop if the delay slot contains
7942 a branch that can't reach its target. Thus, we always have to include
7943 the delay slot in the length estimate. This used to be done in
7944 pa_adjust_insn_length but we do it here now as some sequences always
7945 fill the delay slot and we can save four bytes in the estimate for
7946 these sequences. */
7949 pa_attr_length_call (rtx_insn *insn, int sibcall)
7951 int local_call;
7952 rtx call, call_dest;
7953 tree call_decl;
7954 int length = 0;
7955 rtx pat = PATTERN (insn);
7956 unsigned long distance = -1;
7958 gcc_assert (CALL_P (insn));
7960 if (INSN_ADDRESSES_SET_P ())
7962 unsigned long total;
7964 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7965 distance = (total + insn_current_reference_address (insn));
7966 if (distance < total)
7967 distance = -1;
7970 gcc_assert (GET_CODE (pat) == PARALLEL);
7972 /* Get the call rtx. */
7973 call = XVECEXP (pat, 0, 0);
7974 if (GET_CODE (call) == SET)
7975 call = SET_SRC (call);
7977 gcc_assert (GET_CODE (call) == CALL);
7979 /* Determine if this is a local call. */
7980 call_dest = XEXP (XEXP (call, 0), 0);
7981 call_decl = SYMBOL_REF_DECL (call_dest);
7982 local_call = call_decl && targetm.binds_local_p (call_decl);
7984 /* pc-relative branch. */
7985 if (!TARGET_LONG_CALLS
7986 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7987 || distance < MAX_PCREL17F_OFFSET))
7988 length += 8;
7990 /* 64-bit plabel sequence. */
7991 else if (TARGET_64BIT && !local_call)
7992 length += 24;
7994 /* non-pic long absolute branch sequence. */
7995 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7996 length += 12;
7998 /* long pc-relative branch sequence. */
7999 else if (TARGET_LONG_PIC_SDIFF_CALL
8000 || (TARGET_GAS && !TARGET_SOM && local_call))
8002 length += 20;
8004 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8005 length += 8;
8008 /* 32-bit plabel sequence. */
8009 else
8011 length += 32;
8013 if (TARGET_SOM)
8014 length += length_fp_args (insn);
8016 if (flag_pic)
8017 length += 4;
8019 if (!TARGET_PA_20)
8021 if (!sibcall)
8022 length += 8;
8024 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8025 length += 8;
8029 return length;
8032 /* INSN is a function call.
8034 CALL_DEST is the routine we are calling. */
8036 const char *
8037 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8039 int seq_length = dbr_sequence_length ();
8040 tree call_decl = SYMBOL_REF_DECL (call_dest);
8041 int local_call = call_decl && targetm.binds_local_p (call_decl);
8042 rtx xoperands[4];
8044 xoperands[0] = call_dest;
8046 /* Handle the common case where we're sure that the branch will reach
8047 the beginning of the "$CODE$" subspace. This is the beginning of
8048 the current function if we are in a named section. */
8049 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8051 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8052 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8054 else
8056 if (TARGET_64BIT && !local_call)
8058 /* ??? As far as I can tell, the HP linker doesn't support the
8059 long pc-relative sequence described in the 64-bit runtime
8060 architecture. So, we use a slightly longer indirect call. */
8061 xoperands[0] = pa_get_deferred_plabel (call_dest);
8062 xoperands[1] = gen_label_rtx ();
8064 /* Put the load of %r27 into the delay slot. We don't need to
8065 do anything when generating fast indirect calls. */
8066 if (seq_length != 0)
8068 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8069 optimize, 0, NULL);
8071 /* Now delete the delay insn. */
8072 SET_INSN_DELETED (NEXT_INSN (insn));
8075 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8076 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8077 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8078 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8079 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8080 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8081 seq_length = 1;
8083 else
8085 int indirect_call = 0;
8087 /* Emit a long call. There are several different sequences
8088 of increasing length and complexity. In most cases,
8089 they don't allow an instruction in the delay slot. */
8090 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8091 && !TARGET_LONG_PIC_SDIFF_CALL
8092 && !(TARGET_GAS && !TARGET_SOM && local_call)
8093 && !TARGET_64BIT)
8094 indirect_call = 1;
8096 if (seq_length != 0
8097 && !sibcall
8098 && (!TARGET_PA_20
8099 || indirect_call
8100 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8102 /* A non-jump insn in the delay slot. By definition we can
8103 emit this insn before the call (and in fact before argument
8104 relocating. */
8105 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8106 NULL);
8108 /* Now delete the delay insn. */
8109 SET_INSN_DELETED (NEXT_INSN (insn));
8110 seq_length = 0;
8113 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8115 /* This is the best sequence for making long calls in
8116 non-pic code. Unfortunately, GNU ld doesn't provide
8117 the stub needed for external calls, and GAS's support
8118 for this with the SOM linker is buggy. It is safe
8119 to use this for local calls. */
8120 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8121 if (sibcall)
8122 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8123 else
8125 if (TARGET_PA_20)
8126 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8127 xoperands);
8128 else
8129 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8131 output_asm_insn ("copy %%r31,%%r2", xoperands);
8132 seq_length = 1;
8135 else
8137 /* The HP assembler and linker can handle relocations for
8138 the difference of two symbols. The HP assembler
8139 recognizes the sequence as a pc-relative call and
8140 the linker provides stubs when needed. */
8142 /* GAS currently can't generate the relocations that
8143 are needed for the SOM linker under HP-UX using this
8144 sequence. The GNU linker doesn't generate the stubs
8145 that are needed for external calls on TARGET_ELF32
8146 with this sequence. For now, we have to use a longer
8147 plabel sequence when using GAS for non local calls. */
8148 if (TARGET_LONG_PIC_SDIFF_CALL
8149 || (TARGET_GAS && !TARGET_SOM && local_call))
8151 xoperands[1] = gen_rtx_REG (Pmode, 1);
8152 xoperands[2] = xoperands[1];
8153 pa_output_pic_pcrel_sequence (xoperands);
8155 else
8157 /* Emit a long plabel-based call sequence. This is
8158 essentially an inline implementation of $$dyncall.
8159 We don't actually try to call $$dyncall as this is
8160 as difficult as calling the function itself. */
8161 xoperands[0] = pa_get_deferred_plabel (call_dest);
8162 xoperands[1] = gen_label_rtx ();
8164 /* Since the call is indirect, FP arguments in registers
8165 need to be copied to the general registers. Then, the
8166 argument relocation stub will copy them back. */
8167 if (TARGET_SOM)
8168 copy_fp_args (insn);
8170 if (flag_pic)
8172 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8173 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8174 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8176 else
8178 output_asm_insn ("addil LR'%0-$global$,%%r27",
8179 xoperands);
8180 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8181 xoperands);
8184 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8185 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8186 /* Should this be an ordered load to ensure the target
8187 address is loaded before the global pointer? */
8188 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8189 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8191 if (!sibcall && !TARGET_PA_20)
8193 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8194 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8195 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8196 else
8197 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8201 if (TARGET_PA_20)
8203 if (sibcall)
8204 output_asm_insn ("bve (%%r1)", xoperands);
8205 else
8207 if (indirect_call)
8209 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8210 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8211 seq_length = 1;
8213 else
8214 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8217 else
8219 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8220 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8221 xoperands);
8223 if (sibcall)
8225 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8226 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8227 else
8228 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8230 else
8232 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8233 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8234 else
8235 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8237 if (indirect_call)
8238 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8239 else
8240 output_asm_insn ("copy %%r31,%%r2", xoperands);
8241 seq_length = 1;
8248 if (seq_length == 0)
8249 output_asm_insn ("nop", xoperands);
8251 return "";
8254 /* Return the attribute length of the indirect call instruction INSN.
8255 The length must match the code generated by output_indirect call.
8256 The returned length includes the delay slot. Currently, the delay
8257 slot of an indirect call sequence is not exposed and it is used by
8258 the sequence itself. */
8261 pa_attr_length_indirect_call (rtx_insn *insn)
8263 unsigned long distance = -1;
8264 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8266 if (INSN_ADDRESSES_SET_P ())
8268 distance = (total + insn_current_reference_address (insn));
8269 if (distance < total)
8270 distance = -1;
8273 if (TARGET_64BIT)
8274 return 12;
8276 if (TARGET_FAST_INDIRECT_CALLS)
8277 return 8;
8279 if (TARGET_PORTABLE_RUNTIME)
8280 return 16;
8282 if (!TARGET_LONG_CALLS
8283 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8284 || distance < MAX_PCREL17F_OFFSET))
8285 return 8;
8287 /* Out of reach, can use ble. */
8288 if (!flag_pic)
8289 return 12;
8291 /* Inline versions of $$dyncall. */
8292 if (!optimize_size)
8294 if (TARGET_NO_SPACE_REGS)
8295 return 28;
8297 if (TARGET_PA_20)
8298 return 32;
8301 /* Long PIC pc-relative call. */
8302 return 20;
8305 const char *
8306 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8308 rtx xoperands[4];
8309 int length;
8311 if (TARGET_64BIT)
8313 xoperands[0] = call_dest;
8314 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8315 "bve,l (%%r2),%%r2\n\t"
8316 "ldd 24(%0),%%r27", xoperands);
8317 return "";
8320 /* First the special case for kernels, level 0 systems, etc. */
8321 if (TARGET_FAST_INDIRECT_CALLS)
8323 pa_output_arg_descriptor (insn);
8324 if (TARGET_PA_20)
8325 return "bve,l,n (%%r22),%%r2\n\tnop";
8326 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8329 if (TARGET_PORTABLE_RUNTIME)
8331 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8332 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8333 pa_output_arg_descriptor (insn);
8334 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8337 /* Now the normal case -- we can reach $$dyncall directly or
8338 we're sure that we can get there via a long-branch stub.
8340 No need to check target flags as the length uniquely identifies
8341 the remaining cases. */
8342 length = pa_attr_length_indirect_call (insn);
8343 if (length == 8)
8345 pa_output_arg_descriptor (insn);
8347 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8348 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8349 variant of the B,L instruction can't be used on the SOM target. */
8350 if (TARGET_PA_20 && !TARGET_SOM)
8351 return "b,l,n $$dyncall,%%r2\n\tnop";
8352 else
8353 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8356 /* Long millicode call, but we are not generating PIC or portable runtime
8357 code. */
8358 if (length == 12)
8360 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8361 pa_output_arg_descriptor (insn);
8362 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8365 /* The long PIC pc-relative call sequence is five instructions. So,
8366 let's use an inline version of $$dyncall when the calling sequence
8367 has a roughly similar number of instructions and we are not optimizing
8368 for size. We need two instructions to load the return pointer plus
8369 the $$dyncall implementation. */
8370 if (!optimize_size)
8372 if (TARGET_NO_SPACE_REGS)
8374 pa_output_arg_descriptor (insn);
8375 output_asm_insn ("bl .+8,%%r2\n\t"
8376 "ldo 20(%%r2),%%r2\n\t"
8377 "extru,<> %%r22,30,1,%%r0\n\t"
8378 "bv,n %%r0(%%r22)\n\t"
8379 "ldw -2(%%r22),%%r21\n\t"
8380 "bv %%r0(%%r21)\n\t"
8381 "ldw 2(%%r22),%%r19", xoperands);
8382 return "";
8384 if (TARGET_PA_20)
8386 pa_output_arg_descriptor (insn);
8387 output_asm_insn ("bl .+8,%%r2\n\t"
8388 "ldo 24(%%r2),%%r2\n\t"
8389 "stw %%r2,-24(%%sp)\n\t"
8390 "extru,<> %r22,30,1,%%r0\n\t"
8391 "bve,n (%%r22)\n\t"
8392 "ldw -2(%%r22),%%r21\n\t"
8393 "bve (%%r21)\n\t"
8394 "ldw 2(%%r22),%%r19", xoperands);
8395 return "";
8399 /* We need a long PIC call to $$dyncall. */
8400 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8401 xoperands[1] = gen_rtx_REG (Pmode, 2);
8402 xoperands[2] = gen_rtx_REG (Pmode, 1);
8403 pa_output_pic_pcrel_sequence (xoperands);
8404 pa_output_arg_descriptor (insn);
8405 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8408 /* In HPUX 8.0's shared library scheme, special relocations are needed
8409 for function labels if they might be passed to a function
8410 in a shared library (because shared libraries don't live in code
8411 space), and special magic is needed to construct their address. */
8413 void
8414 pa_encode_label (rtx sym)
8416 const char *str = XSTR (sym, 0);
8417 int len = strlen (str) + 1;
8418 char *newstr, *p;
8420 p = newstr = XALLOCAVEC (char, len + 1);
8421 *p++ = '@';
8422 strcpy (p, str);
8424 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8427 static void
8428 pa_encode_section_info (tree decl, rtx rtl, int first)
8430 int old_referenced = 0;
8432 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8433 old_referenced
8434 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8436 default_encode_section_info (decl, rtl, first);
8438 if (first && TEXT_SPACE_P (decl))
8440 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8441 if (TREE_CODE (decl) == FUNCTION_DECL)
8442 pa_encode_label (XEXP (rtl, 0));
8444 else if (old_referenced)
8445 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8448 /* This is sort of inverse to pa_encode_section_info. */
8450 static const char *
8451 pa_strip_name_encoding (const char *str)
8453 str += (*str == '@');
8454 str += (*str == '*');
8455 return str;
8458 /* Returns 1 if OP is a function label involved in a simple addition
8459 with a constant. Used to keep certain patterns from matching
8460 during instruction combination. */
8462 pa_is_function_label_plus_const (rtx op)
8464 /* Strip off any CONST. */
8465 if (GET_CODE (op) == CONST)
8466 op = XEXP (op, 0);
8468 return (GET_CODE (op) == PLUS
8469 && function_label_operand (XEXP (op, 0), VOIDmode)
8470 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8473 /* Output the assembler code for a thunk function. THUNK_DECL is the
8474 declaration for the thunk function itself, FUNCTION is the decl for
8475 the target function. DELTA is an immediate constant offset to be
8476 added to THIS. If VCALL_OFFSET is nonzero, the word at
8477 *(*this + vcall_offset) should be added to THIS. */
8479 static void
8480 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8481 HOST_WIDE_INT vcall_offset, tree function)
8483 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8484 static unsigned int current_thunk_number;
8485 int val_14 = VAL_14_BITS_P (delta);
8486 unsigned int old_last_address = last_address, nbytes = 0;
8487 char label[17];
8488 rtx xoperands[4];
8490 xoperands[0] = XEXP (DECL_RTL (function), 0);
8491 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8492 xoperands[2] = GEN_INT (delta);
8494 assemble_start_function (thunk_fndecl, fnname);
8495 final_start_function (emit_barrier (), file, 1);
8497 if (!vcall_offset)
8499 /* Output the thunk. We know that the function is in the same
8500 translation unit (i.e., the same space) as the thunk, and that
8501 thunks are output after their method. Thus, we don't need an
8502 external branch to reach the function. With SOM and GAS,
8503 functions and thunks are effectively in different sections.
8504 Thus, we can always use a IA-relative branch and the linker
8505 will add a long branch stub if necessary.
8507 However, we have to be careful when generating PIC code on the
8508 SOM port to ensure that the sequence does not transfer to an
8509 import stub for the target function as this could clobber the
8510 return value saved at SP-24. This would also apply to the
8511 32-bit linux port if the multi-space model is implemented. */
8512 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8513 && !(flag_pic && TREE_PUBLIC (function))
8514 && (TARGET_GAS || last_address < 262132))
8515 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8516 && ((targetm_common.have_named_sections
8517 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8518 /* The GNU 64-bit linker has rather poor stub management.
8519 So, we use a long branch from thunks that aren't in
8520 the same section as the target function. */
8521 && ((!TARGET_64BIT
8522 && (DECL_SECTION_NAME (thunk_fndecl)
8523 != DECL_SECTION_NAME (function)))
8524 || ((DECL_SECTION_NAME (thunk_fndecl)
8525 == DECL_SECTION_NAME (function))
8526 && last_address < 262132)))
8527 /* In this case, we need to be able to reach the start of
8528 the stub table even though the function is likely closer
8529 and can be jumped to directly. */
8530 || (targetm_common.have_named_sections
8531 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8532 && DECL_SECTION_NAME (function) == NULL
8533 && total_code_bytes < MAX_PCREL17F_OFFSET)
8534 /* Likewise. */
8535 || (!targetm_common.have_named_sections
8536 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8538 if (!val_14)
8539 output_asm_insn ("addil L'%2,%%r26", xoperands);
8541 output_asm_insn ("b %0", xoperands);
8543 if (val_14)
8545 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8546 nbytes += 8;
8548 else
8550 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8551 nbytes += 12;
8554 else if (TARGET_64BIT)
8556 rtx xop[4];
8558 /* We only have one call-clobbered scratch register, so we can't
8559 make use of the delay slot if delta doesn't fit in 14 bits. */
8560 if (!val_14)
8562 output_asm_insn ("addil L'%2,%%r26", xoperands);
8563 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8566 /* Load function address into %r1. */
8567 xop[0] = xoperands[0];
8568 xop[1] = gen_rtx_REG (Pmode, 1);
8569 xop[2] = xop[1];
8570 pa_output_pic_pcrel_sequence (xop);
8572 if (val_14)
8574 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8575 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8576 nbytes += 20;
8578 else
8580 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8581 nbytes += 24;
8584 else if (TARGET_PORTABLE_RUNTIME)
8586 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8587 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8589 if (!val_14)
8590 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8592 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8594 if (val_14)
8596 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8597 nbytes += 16;
8599 else
8601 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8602 nbytes += 20;
8605 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8607 /* The function is accessible from outside this module. The only
8608 way to avoid an import stub between the thunk and function is to
8609 call the function directly with an indirect sequence similar to
8610 that used by $$dyncall. This is possible because $$dyncall acts
8611 as the import stub in an indirect call. */
8612 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8613 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8614 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8615 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8616 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8617 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8618 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8619 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8620 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8622 if (!val_14)
8624 output_asm_insn ("addil L'%2,%%r26", xoperands);
8625 nbytes += 4;
8628 if (TARGET_PA_20)
8630 output_asm_insn ("bve (%%r22)", xoperands);
8631 nbytes += 36;
8633 else if (TARGET_NO_SPACE_REGS)
8635 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8636 nbytes += 36;
8638 else
8640 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8641 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8642 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8643 nbytes += 44;
8646 if (val_14)
8647 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8648 else
8649 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8651 else if (flag_pic)
8653 rtx xop[4];
8655 /* Load function address into %r22. */
8656 xop[0] = xoperands[0];
8657 xop[1] = gen_rtx_REG (Pmode, 1);
8658 xop[2] = gen_rtx_REG (Pmode, 22);
8659 pa_output_pic_pcrel_sequence (xop);
8661 if (!val_14)
8662 output_asm_insn ("addil L'%2,%%r26", xoperands);
8664 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8666 if (val_14)
8668 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8669 nbytes += 20;
8671 else
8673 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8674 nbytes += 24;
8677 else
8679 if (!val_14)
8680 output_asm_insn ("addil L'%2,%%r26", xoperands);
8682 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8683 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8685 if (val_14)
8687 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8688 nbytes += 12;
8690 else
8692 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8693 nbytes += 16;
8697 else
8699 rtx xop[4];
8701 /* Add DELTA to THIS. */
8702 if (val_14)
8704 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8705 nbytes += 4;
8707 else
8709 output_asm_insn ("addil L'%2,%%r26", xoperands);
8710 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8711 nbytes += 8;
8714 if (TARGET_64BIT)
8716 /* Load *(THIS + DELTA) to %r1. */
8717 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8719 val_14 = VAL_14_BITS_P (vcall_offset);
8720 xoperands[2] = GEN_INT (vcall_offset);
8722 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8723 if (val_14)
8725 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8726 nbytes += 8;
8728 else
8730 output_asm_insn ("addil L'%2,%%r1", xoperands);
8731 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8732 nbytes += 12;
8735 else
8737 /* Load *(THIS + DELTA) to %r1. */
8738 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8740 val_14 = VAL_14_BITS_P (vcall_offset);
8741 xoperands[2] = GEN_INT (vcall_offset);
8743 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8744 if (val_14)
8746 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8747 nbytes += 8;
8749 else
8751 output_asm_insn ("addil L'%2,%%r1", xoperands);
8752 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8753 nbytes += 12;
8757 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8758 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8759 && !(flag_pic && TREE_PUBLIC (function))
8760 && (TARGET_GAS || last_address < 262132))
8761 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8762 && ((targetm_common.have_named_sections
8763 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8764 /* The GNU 64-bit linker has rather poor stub management.
8765 So, we use a long branch from thunks that aren't in
8766 the same section as the target function. */
8767 && ((!TARGET_64BIT
8768 && (DECL_SECTION_NAME (thunk_fndecl)
8769 != DECL_SECTION_NAME (function)))
8770 || ((DECL_SECTION_NAME (thunk_fndecl)
8771 == DECL_SECTION_NAME (function))
8772 && last_address < 262132)))
8773 /* In this case, we need to be able to reach the start of
8774 the stub table even though the function is likely closer
8775 and can be jumped to directly. */
8776 || (targetm_common.have_named_sections
8777 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8778 && DECL_SECTION_NAME (function) == NULL
8779 && total_code_bytes < MAX_PCREL17F_OFFSET)
8780 /* Likewise. */
8781 || (!targetm_common.have_named_sections
8782 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8784 nbytes += 4;
8785 output_asm_insn ("b %0", xoperands);
8787 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8788 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8790 else if (TARGET_64BIT)
8792 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8793 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8795 /* Load function address into %r1. */
8796 nbytes += 16;
8797 xop[0] = xoperands[0];
8798 xop[1] = gen_rtx_REG (Pmode, 1);
8799 xop[2] = xop[1];
8800 pa_output_pic_pcrel_sequence (xop);
8802 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8804 else if (TARGET_PORTABLE_RUNTIME)
8806 /* Load function address into %r22. */
8807 nbytes += 12;
8808 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8809 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8811 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8813 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8814 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8816 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8818 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8819 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8821 /* The function is accessible from outside this module. The only
8822 way to avoid an import stub between the thunk and function is to
8823 call the function directly with an indirect sequence similar to
8824 that used by $$dyncall. This is possible because $$dyncall acts
8825 as the import stub in an indirect call. */
8826 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8827 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8828 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8829 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8830 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8831 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8832 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8833 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8834 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8836 if (TARGET_PA_20)
8838 output_asm_insn ("bve,n (%%r22)", xoperands);
8839 nbytes += 32;
8841 else if (TARGET_NO_SPACE_REGS)
8843 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8844 nbytes += 32;
8846 else
8848 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8849 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8850 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8851 nbytes += 40;
8854 else if (flag_pic)
8856 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8857 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8859 /* Load function address into %r1. */
8860 nbytes += 16;
8861 xop[0] = xoperands[0];
8862 xop[1] = gen_rtx_REG (Pmode, 1);
8863 xop[2] = xop[1];
8864 pa_output_pic_pcrel_sequence (xop);
8866 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8868 else
8870 /* Load function address into %r22. */
8871 nbytes += 8;
8872 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8873 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8875 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8876 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8880 final_end_function ();
8882 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8884 switch_to_section (data_section);
8885 output_asm_insn (".align 4", xoperands);
8886 ASM_OUTPUT_LABEL (file, label);
8887 output_asm_insn (".word P'%0", xoperands);
8890 current_thunk_number++;
8891 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8892 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8893 last_address += nbytes;
8894 if (old_last_address > last_address)
8895 last_address = UINT_MAX;
8896 update_total_code_bytes (nbytes);
8897 assemble_end_function (thunk_fndecl, fnname);
8900 /* Only direct calls to static functions are allowed to be sibling (tail)
8901 call optimized.
8903 This restriction is necessary because some linker generated stubs will
8904 store return pointers into rp' in some cases which might clobber a
8905 live value already in rp'.
8907 In a sibcall the current function and the target function share stack
8908 space. Thus if the path to the current function and the path to the
8909 target function save a value in rp', they save the value into the
8910 same stack slot, which has undesirable consequences.
8912 Because of the deferred binding nature of shared libraries any function
8913 with external scope could be in a different load module and thus require
8914 rp' to be saved when calling that function. So sibcall optimizations
8915 can only be safe for static function.
8917 Note that GCC never needs return value relocations, so we don't have to
8918 worry about static calls with return value relocations (which require
8919 saving rp').
8921 It is safe to perform a sibcall optimization when the target function
8922 will never return. */
8923 static bool
8924 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8926 /* Sibcalls are not ok because the arg pointer register is not a fixed
8927 register. This prevents the sibcall optimization from occurring. In
8928 addition, there are problems with stub placement using GNU ld. This
8929 is because a normal sibcall branch uses a 17-bit relocation while
8930 a regular call branch uses a 22-bit relocation. As a result, more
8931 care needs to be taken in the placement of long-branch stubs. */
8932 if (TARGET_64BIT)
8933 return false;
8935 if (TARGET_PORTABLE_RUNTIME)
8936 return false;
8938 /* Sibcalls are only ok within a translation unit. */
8939 return decl && targetm.binds_local_p (decl);
8942 /* ??? Addition is not commutative on the PA due to the weird implicit
8943 space register selection rules for memory addresses. Therefore, we
8944 don't consider a + b == b + a, as this might be inside a MEM. */
8945 static bool
8946 pa_commutative_p (const_rtx x, int outer_code)
8948 return (COMMUTATIVE_P (x)
8949 && (TARGET_NO_SPACE_REGS
8950 || (outer_code != UNKNOWN && outer_code != MEM)
8951 || GET_CODE (x) != PLUS));
8954 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8955 use in fmpyadd instructions. */
8957 pa_fmpyaddoperands (rtx *operands)
8959 machine_mode mode = GET_MODE (operands[0]);
8961 /* Must be a floating point mode. */
8962 if (mode != SFmode && mode != DFmode)
8963 return 0;
8965 /* All modes must be the same. */
8966 if (! (mode == GET_MODE (operands[1])
8967 && mode == GET_MODE (operands[2])
8968 && mode == GET_MODE (operands[3])
8969 && mode == GET_MODE (operands[4])
8970 && mode == GET_MODE (operands[5])))
8971 return 0;
8973 /* All operands must be registers. */
8974 if (! (GET_CODE (operands[1]) == REG
8975 && GET_CODE (operands[2]) == REG
8976 && GET_CODE (operands[3]) == REG
8977 && GET_CODE (operands[4]) == REG
8978 && GET_CODE (operands[5]) == REG))
8979 return 0;
8981 /* Only 2 real operands to the addition. One of the input operands must
8982 be the same as the output operand. */
8983 if (! rtx_equal_p (operands[3], operands[4])
8984 && ! rtx_equal_p (operands[3], operands[5]))
8985 return 0;
8987 /* Inout operand of add cannot conflict with any operands from multiply. */
8988 if (rtx_equal_p (operands[3], operands[0])
8989 || rtx_equal_p (operands[3], operands[1])
8990 || rtx_equal_p (operands[3], operands[2]))
8991 return 0;
8993 /* multiply cannot feed into addition operands. */
8994 if (rtx_equal_p (operands[4], operands[0])
8995 || rtx_equal_p (operands[5], operands[0]))
8996 return 0;
8998 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8999 if (mode == SFmode
9000 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9001 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9002 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9003 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9004 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9005 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9006 return 0;
9008 /* Passed. Operands are suitable for fmpyadd. */
9009 return 1;
9012 #if !defined(USE_COLLECT2)
9013 static void
9014 pa_asm_out_constructor (rtx symbol, int priority)
9016 if (!function_label_operand (symbol, VOIDmode))
9017 pa_encode_label (symbol);
9019 #ifdef CTORS_SECTION_ASM_OP
9020 default_ctor_section_asm_out_constructor (symbol, priority);
9021 #else
9022 # ifdef TARGET_ASM_NAMED_SECTION
9023 default_named_section_asm_out_constructor (symbol, priority);
9024 # else
9025 default_stabs_asm_out_constructor (symbol, priority);
9026 # endif
9027 #endif
9030 static void
9031 pa_asm_out_destructor (rtx symbol, int priority)
9033 if (!function_label_operand (symbol, VOIDmode))
9034 pa_encode_label (symbol);
9036 #ifdef DTORS_SECTION_ASM_OP
9037 default_dtor_section_asm_out_destructor (symbol, priority);
9038 #else
9039 # ifdef TARGET_ASM_NAMED_SECTION
9040 default_named_section_asm_out_destructor (symbol, priority);
9041 # else
9042 default_stabs_asm_out_destructor (symbol, priority);
9043 # endif
9044 #endif
9046 #endif
9048 /* This function places uninitialized global data in the bss section.
9049 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9050 function on the SOM port to prevent uninitialized global data from
9051 being placed in the data section. */
9053 void
9054 pa_asm_output_aligned_bss (FILE *stream,
9055 const char *name,
9056 unsigned HOST_WIDE_INT size,
9057 unsigned int align)
9059 switch_to_section (bss_section);
9061 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9062 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9063 #endif
9065 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9066 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9067 #endif
9069 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9070 ASM_OUTPUT_LABEL (stream, name);
9071 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9074 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9075 that doesn't allow the alignment of global common storage to be directly
9076 specified. The SOM linker aligns common storage based on the rounded
9077 value of the NUM_BYTES parameter in the .comm directive. It's not
9078 possible to use the .align directive as it doesn't affect the alignment
9079 of the label associated with a .comm directive. */
9081 void
9082 pa_asm_output_aligned_common (FILE *stream,
9083 const char *name,
9084 unsigned HOST_WIDE_INT size,
9085 unsigned int align)
9087 unsigned int max_common_align;
9089 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9090 if (align > max_common_align)
9092 /* Alignment exceeds maximum alignment for global common data. */
9093 align = max_common_align;
9096 switch_to_section (bss_section);
9098 assemble_name (stream, name);
9099 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9100 MAX (size, align / BITS_PER_UNIT));
9103 /* We can't use .comm for local common storage as the SOM linker effectively
9104 treats the symbol as universal and uses the same storage for local symbols
9105 with the same name in different object files. The .block directive
9106 reserves an uninitialized block of storage. However, it's not common
9107 storage. Fortunately, GCC never requests common storage with the same
9108 name in any given translation unit. */
9110 void
9111 pa_asm_output_aligned_local (FILE *stream,
9112 const char *name,
9113 unsigned HOST_WIDE_INT size,
9114 unsigned int align)
9116 switch_to_section (bss_section);
9117 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9119 #ifdef LOCAL_ASM_OP
9120 fprintf (stream, "%s", LOCAL_ASM_OP);
9121 assemble_name (stream, name);
9122 fprintf (stream, "\n");
9123 #endif
9125 ASM_OUTPUT_LABEL (stream, name);
9126 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9129 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9130 use in fmpysub instructions. */
9132 pa_fmpysuboperands (rtx *operands)
9134 machine_mode mode = GET_MODE (operands[0]);
9136 /* Must be a floating point mode. */
9137 if (mode != SFmode && mode != DFmode)
9138 return 0;
9140 /* All modes must be the same. */
9141 if (! (mode == GET_MODE (operands[1])
9142 && mode == GET_MODE (operands[2])
9143 && mode == GET_MODE (operands[3])
9144 && mode == GET_MODE (operands[4])
9145 && mode == GET_MODE (operands[5])))
9146 return 0;
9148 /* All operands must be registers. */
9149 if (! (GET_CODE (operands[1]) == REG
9150 && GET_CODE (operands[2]) == REG
9151 && GET_CODE (operands[3]) == REG
9152 && GET_CODE (operands[4]) == REG
9153 && GET_CODE (operands[5]) == REG))
9154 return 0;
9156 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9157 operation, so operands[4] must be the same as operand[3]. */
9158 if (! rtx_equal_p (operands[3], operands[4]))
9159 return 0;
9161 /* multiply cannot feed into subtraction. */
9162 if (rtx_equal_p (operands[5], operands[0]))
9163 return 0;
9165 /* Inout operand of sub cannot conflict with any operands from multiply. */
9166 if (rtx_equal_p (operands[3], operands[0])
9167 || rtx_equal_p (operands[3], operands[1])
9168 || rtx_equal_p (operands[3], operands[2]))
9169 return 0;
9171 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9172 if (mode == SFmode
9173 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9174 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9175 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9176 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9177 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9178 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9179 return 0;
9181 /* Passed. Operands are suitable for fmpysub. */
9182 return 1;
9185 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9186 constants for a MULT embedded inside a memory address. */
9188 pa_mem_shadd_constant_p (int val)
9190 if (val == 2 || val == 4 || val == 8)
9191 return 1;
9192 else
9193 return 0;
9196 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9197 constants for shadd instructions. */
9199 pa_shadd_constant_p (int val)
9201 if (val == 1 || val == 2 || val == 3)
9202 return 1;
9203 else
9204 return 0;
9207 /* Return TRUE if INSN branches forward. */
9209 static bool
9210 forward_branch_p (rtx_insn *insn)
9212 rtx lab = JUMP_LABEL (insn);
9214 /* The INSN must have a jump label. */
9215 gcc_assert (lab != NULL_RTX);
9217 if (INSN_ADDRESSES_SET_P ())
9218 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9220 while (insn)
9222 if (insn == lab)
9223 return true;
9224 else
9225 insn = NEXT_INSN (insn);
9228 return false;
9231 /* Output an unconditional move and branch insn. */
9233 const char *
9234 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9236 int length = get_attr_length (insn);
9238 /* These are the cases in which we win. */
9239 if (length == 4)
9240 return "mov%I1b,tr %1,%0,%2";
9242 /* None of the following cases win, but they don't lose either. */
9243 if (length == 8)
9245 if (dbr_sequence_length () == 0)
9247 /* Nothing in the delay slot, fake it by putting the combined
9248 insn (the copy or add) in the delay slot of a bl. */
9249 if (GET_CODE (operands[1]) == CONST_INT)
9250 return "b %2\n\tldi %1,%0";
9251 else
9252 return "b %2\n\tcopy %1,%0";
9254 else
9256 /* Something in the delay slot, but we've got a long branch. */
9257 if (GET_CODE (operands[1]) == CONST_INT)
9258 return "ldi %1,%0\n\tb %2";
9259 else
9260 return "copy %1,%0\n\tb %2";
9264 if (GET_CODE (operands[1]) == CONST_INT)
9265 output_asm_insn ("ldi %1,%0", operands);
9266 else
9267 output_asm_insn ("copy %1,%0", operands);
9268 return pa_output_lbranch (operands[2], insn, 1);
9271 /* Output an unconditional add and branch insn. */
9273 const char *
9274 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9276 int length = get_attr_length (insn);
9278 /* To make life easy we want operand0 to be the shared input/output
9279 operand and operand1 to be the readonly operand. */
9280 if (operands[0] == operands[1])
9281 operands[1] = operands[2];
9283 /* These are the cases in which we win. */
9284 if (length == 4)
9285 return "add%I1b,tr %1,%0,%3";
9287 /* None of the following cases win, but they don't lose either. */
9288 if (length == 8)
9290 if (dbr_sequence_length () == 0)
9291 /* Nothing in the delay slot, fake it by putting the combined
9292 insn (the copy or add) in the delay slot of a bl. */
9293 return "b %3\n\tadd%I1 %1,%0,%0";
9294 else
9295 /* Something in the delay slot, but we've got a long branch. */
9296 return "add%I1 %1,%0,%0\n\tb %3";
9299 output_asm_insn ("add%I1 %1,%0,%0", operands);
9300 return pa_output_lbranch (operands[3], insn, 1);
9303 /* We use this hook to perform a PA specific optimization which is difficult
9304 to do in earlier passes. */
9306 static void
9307 pa_reorg (void)
9309 remove_useless_addtr_insns (1);
9311 if (pa_cpu < PROCESSOR_8000)
9312 pa_combine_instructions ();
9315 /* The PA has a number of odd instructions which can perform multiple
9316 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9317 it may be profitable to combine two instructions into one instruction
9318 with two outputs. It's not profitable PA2.0 machines because the
9319 two outputs would take two slots in the reorder buffers.
9321 This routine finds instructions which can be combined and combines
9322 them. We only support some of the potential combinations, and we
9323 only try common ways to find suitable instructions.
9325 * addb can add two registers or a register and a small integer
9326 and jump to a nearby (+-8k) location. Normally the jump to the
9327 nearby location is conditional on the result of the add, but by
9328 using the "true" condition we can make the jump unconditional.
9329 Thus addb can perform two independent operations in one insn.
9331 * movb is similar to addb in that it can perform a reg->reg
9332 or small immediate->reg copy and jump to a nearby (+-8k location).
9334 * fmpyadd and fmpysub can perform a FP multiply and either an
9335 FP add or FP sub if the operands of the multiply and add/sub are
9336 independent (there are other minor restrictions). Note both
9337 the fmpy and fadd/fsub can in theory move to better spots according
9338 to data dependencies, but for now we require the fmpy stay at a
9339 fixed location.
9341 * Many of the memory operations can perform pre & post updates
9342 of index registers. GCC's pre/post increment/decrement addressing
9343 is far too simple to take advantage of all the possibilities. This
9344 pass may not be suitable since those insns may not be independent.
9346 * comclr can compare two ints or an int and a register, nullify
9347 the following instruction and zero some other register. This
9348 is more difficult to use as it's harder to find an insn which
9349 will generate a comclr than finding something like an unconditional
9350 branch. (conditional moves & long branches create comclr insns).
9352 * Most arithmetic operations can conditionally skip the next
9353 instruction. They can be viewed as "perform this operation
9354 and conditionally jump to this nearby location" (where nearby
9355 is an insns away). These are difficult to use due to the
9356 branch length restrictions. */
9358 static void
9359 pa_combine_instructions (void)
9361 rtx_insn *anchor;
9363 /* This can get expensive since the basic algorithm is on the
9364 order of O(n^2) (or worse). Only do it for -O2 or higher
9365 levels of optimization. */
9366 if (optimize < 2)
9367 return;
9369 /* Walk down the list of insns looking for "anchor" insns which
9370 may be combined with "floating" insns. As the name implies,
9371 "anchor" instructions don't move, while "floating" insns may
9372 move around. */
9373 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9374 rtx_insn *new_rtx = make_insn_raw (par);
9376 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9378 enum attr_pa_combine_type anchor_attr;
9379 enum attr_pa_combine_type floater_attr;
9381 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9382 Also ignore any special USE insns. */
9383 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9384 || GET_CODE (PATTERN (anchor)) == USE
9385 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9386 continue;
9388 anchor_attr = get_attr_pa_combine_type (anchor);
9389 /* See if anchor is an insn suitable for combination. */
9390 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9391 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9392 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9393 && ! forward_branch_p (anchor)))
9395 rtx_insn *floater;
9397 for (floater = PREV_INSN (anchor);
9398 floater;
9399 floater = PREV_INSN (floater))
9401 if (NOTE_P (floater)
9402 || (NONJUMP_INSN_P (floater)
9403 && (GET_CODE (PATTERN (floater)) == USE
9404 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9405 continue;
9407 /* Anything except a regular INSN will stop our search. */
9408 if (! NONJUMP_INSN_P (floater))
9410 floater = NULL;
9411 break;
9414 /* See if FLOATER is suitable for combination with the
9415 anchor. */
9416 floater_attr = get_attr_pa_combine_type (floater);
9417 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9418 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9419 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9420 && floater_attr == PA_COMBINE_TYPE_FMPY))
9422 /* If ANCHOR and FLOATER can be combined, then we're
9423 done with this pass. */
9424 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9425 SET_DEST (PATTERN (floater)),
9426 XEXP (SET_SRC (PATTERN (floater)), 0),
9427 XEXP (SET_SRC (PATTERN (floater)), 1)))
9428 break;
9431 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9432 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9434 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9436 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9437 SET_DEST (PATTERN (floater)),
9438 XEXP (SET_SRC (PATTERN (floater)), 0),
9439 XEXP (SET_SRC (PATTERN (floater)), 1)))
9440 break;
9442 else
9444 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9445 SET_DEST (PATTERN (floater)),
9446 SET_SRC (PATTERN (floater)),
9447 SET_SRC (PATTERN (floater))))
9448 break;
9453 /* If we didn't find anything on the backwards scan try forwards. */
9454 if (!floater
9455 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9456 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9458 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9460 if (NOTE_P (floater)
9461 || (NONJUMP_INSN_P (floater)
9462 && (GET_CODE (PATTERN (floater)) == USE
9463 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9465 continue;
9467 /* Anything except a regular INSN will stop our search. */
9468 if (! NONJUMP_INSN_P (floater))
9470 floater = NULL;
9471 break;
9474 /* See if FLOATER is suitable for combination with the
9475 anchor. */
9476 floater_attr = get_attr_pa_combine_type (floater);
9477 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9478 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9479 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9480 && floater_attr == PA_COMBINE_TYPE_FMPY))
9482 /* If ANCHOR and FLOATER can be combined, then we're
9483 done with this pass. */
9484 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9485 SET_DEST (PATTERN (floater)),
9486 XEXP (SET_SRC (PATTERN (floater)),
9488 XEXP (SET_SRC (PATTERN (floater)),
9489 1)))
9490 break;
9495 /* FLOATER will be nonzero if we found a suitable floating
9496 insn for combination with ANCHOR. */
9497 if (floater
9498 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9499 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9501 /* Emit the new instruction and delete the old anchor. */
9502 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9503 copy_rtx (PATTERN (floater)));
9504 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9505 emit_insn_before (temp, anchor);
9507 SET_INSN_DELETED (anchor);
9509 /* Emit a special USE insn for FLOATER, then delete
9510 the floating insn. */
9511 temp = copy_rtx (PATTERN (floater));
9512 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9513 delete_insn (floater);
9515 continue;
9517 else if (floater
9518 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9520 /* Emit the new_jump instruction and delete the old anchor. */
9521 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9522 copy_rtx (PATTERN (floater)));
9523 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9524 temp = emit_jump_insn_before (temp, anchor);
9526 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9527 SET_INSN_DELETED (anchor);
9529 /* Emit a special USE insn for FLOATER, then delete
9530 the floating insn. */
9531 temp = copy_rtx (PATTERN (floater));
9532 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9533 delete_insn (floater);
9534 continue;
9540 static int
9541 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9542 int reversed, rtx dest,
9543 rtx src1, rtx src2)
9545 int insn_code_number;
9546 rtx_insn *start, *end;
9548 /* Create a PARALLEL with the patterns of ANCHOR and
9549 FLOATER, try to recognize it, then test constraints
9550 for the resulting pattern.
9552 If the pattern doesn't match or the constraints
9553 aren't met keep searching for a suitable floater
9554 insn. */
9555 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9556 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9557 INSN_CODE (new_rtx) = -1;
9558 insn_code_number = recog_memoized (new_rtx);
9559 basic_block bb = BLOCK_FOR_INSN (anchor);
9560 if (insn_code_number < 0
9561 || (extract_insn (new_rtx),
9562 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9563 return 0;
9565 if (reversed)
9567 start = anchor;
9568 end = floater;
9570 else
9572 start = floater;
9573 end = anchor;
9576 /* There's up to three operands to consider. One
9577 output and two inputs.
9579 The output must not be used between FLOATER & ANCHOR
9580 exclusive. The inputs must not be set between
9581 FLOATER and ANCHOR exclusive. */
9583 if (reg_used_between_p (dest, start, end))
9584 return 0;
9586 if (reg_set_between_p (src1, start, end))
9587 return 0;
9589 if (reg_set_between_p (src2, start, end))
9590 return 0;
9592 /* If we get here, then everything is good. */
9593 return 1;
9596 /* Return nonzero if references for INSN are delayed.
9598 Millicode insns are actually function calls with some special
9599 constraints on arguments and register usage.
9601 Millicode calls always expect their arguments in the integer argument
9602 registers, and always return their result in %r29 (ret1). They
9603 are expected to clobber their arguments, %r1, %r29, and the return
9604 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9606 This function tells reorg that the references to arguments and
9607 millicode calls do not appear to happen until after the millicode call.
9608 This allows reorg to put insns which set the argument registers into the
9609 delay slot of the millicode call -- thus they act more like traditional
9610 CALL_INSNs.
9612 Note we cannot consider side effects of the insn to be delayed because
9613 the branch and link insn will clobber the return pointer. If we happened
9614 to use the return pointer in the delay slot of the call, then we lose.
9616 get_attr_type will try to recognize the given insn, so make sure to
9617 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9618 in particular. */
9620 pa_insn_refs_are_delayed (rtx_insn *insn)
9622 return ((NONJUMP_INSN_P (insn)
9623 && GET_CODE (PATTERN (insn)) != SEQUENCE
9624 && GET_CODE (PATTERN (insn)) != USE
9625 && GET_CODE (PATTERN (insn)) != CLOBBER
9626 && get_attr_type (insn) == TYPE_MILLI));
9629 /* Promote the return value, but not the arguments. */
9631 static machine_mode
9632 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9633 machine_mode mode,
9634 int *punsignedp ATTRIBUTE_UNUSED,
9635 const_tree fntype ATTRIBUTE_UNUSED,
9636 int for_return)
9638 if (for_return == 0)
9639 return mode;
9640 return promote_mode (type, mode, punsignedp);
9643 /* On the HP-PA the value is found in register(s) 28(-29), unless
9644 the mode is SF or DF. Then the value is returned in fr4 (32).
9646 This must perform the same promotions as PROMOTE_MODE, else promoting
9647 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9649 Small structures must be returned in a PARALLEL on PA64 in order
9650 to match the HP Compiler ABI. */
9652 static rtx
9653 pa_function_value (const_tree valtype,
9654 const_tree func ATTRIBUTE_UNUSED,
9655 bool outgoing ATTRIBUTE_UNUSED)
9657 machine_mode valmode;
9659 if (AGGREGATE_TYPE_P (valtype)
9660 || TREE_CODE (valtype) == COMPLEX_TYPE
9661 || VECTOR_TYPE_P (valtype))
9663 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9665 /* Handle aggregates that fit exactly in a word or double word. */
9666 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9667 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9669 if (TARGET_64BIT)
9671 /* Aggregates with a size less than or equal to 128 bits are
9672 returned in GR 28(-29). They are left justified. The pad
9673 bits are undefined. Larger aggregates are returned in
9674 memory. */
9675 rtx loc[2];
9676 int i, offset = 0;
9677 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9679 for (i = 0; i < ub; i++)
9681 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9682 gen_rtx_REG (DImode, 28 + i),
9683 GEN_INT (offset));
9684 offset += 8;
9687 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9689 else if (valsize > UNITS_PER_WORD)
9691 /* Aggregates 5 to 8 bytes in size are returned in general
9692 registers r28-r29 in the same manner as other non
9693 floating-point objects. The data is right-justified and
9694 zero-extended to 64 bits. This is opposite to the normal
9695 justification used on big endian targets and requires
9696 special treatment. */
9697 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9698 gen_rtx_REG (DImode, 28), const0_rtx);
9699 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9703 if ((INTEGRAL_TYPE_P (valtype)
9704 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9705 || POINTER_TYPE_P (valtype))
9706 valmode = word_mode;
9707 else
9708 valmode = TYPE_MODE (valtype);
9710 if (SCALAR_FLOAT_TYPE_P (valtype)
9711 && !AGGREGATE_TYPE_P (valtype)
9712 && TYPE_MODE (valtype) != TFmode
9713 && !TARGET_SOFT_FLOAT)
9714 return gen_rtx_REG (valmode, 32);
9716 return gen_rtx_REG (valmode, 28);
9719 /* Implement the TARGET_LIBCALL_VALUE hook. */
9721 static rtx
9722 pa_libcall_value (machine_mode mode,
9723 const_rtx fun ATTRIBUTE_UNUSED)
9725 if (! TARGET_SOFT_FLOAT
9726 && (mode == SFmode || mode == DFmode))
9727 return gen_rtx_REG (mode, 32);
9728 else
9729 return gen_rtx_REG (mode, 28);
9732 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9734 static bool
9735 pa_function_value_regno_p (const unsigned int regno)
9737 if (regno == 28
9738 || (! TARGET_SOFT_FLOAT && regno == 32))
9739 return true;
9741 return false;
9744 /* Update the data in CUM to advance over argument ARG. */
9746 static void
9747 pa_function_arg_advance (cumulative_args_t cum_v,
9748 const function_arg_info &arg)
9750 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9751 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9753 cum->nargs_prototype--;
9754 cum->words += (arg_size
9755 + ((cum->words & 01)
9756 && arg.type != NULL_TREE
9757 && arg_size > 1));
9760 /* Return the location of a parameter that is passed in a register or NULL
9761 if the parameter has any component that is passed in memory.
9763 This is new code and will be pushed to into the net sources after
9764 further testing.
9766 ??? We might want to restructure this so that it looks more like other
9767 ports. */
9768 static rtx
9769 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9771 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9772 tree type = arg.type;
9773 machine_mode mode = arg.mode;
9774 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9775 int alignment = 0;
9776 int arg_size;
9777 int fpr_reg_base;
9778 int gpr_reg_base;
9779 rtx retval;
9781 if (arg.end_marker_p ())
9782 return NULL_RTX;
9784 arg_size = pa_function_arg_size (mode, type);
9785 if (!arg_size)
9786 return NULL_RTX;
9788 /* If this arg would be passed partially or totally on the stack, then
9789 this routine should return zero. pa_arg_partial_bytes will
9790 handle arguments which are split between regs and stack slots if
9791 the ABI mandates split arguments. */
9792 if (!TARGET_64BIT)
9794 /* The 32-bit ABI does not split arguments. */
9795 if (cum->words + arg_size > max_arg_words)
9796 return NULL_RTX;
9798 else
9800 if (arg_size > 1)
9801 alignment = cum->words & 1;
9802 if (cum->words + alignment >= max_arg_words)
9803 return NULL_RTX;
9806 /* The 32bit ABIs and the 64bit ABIs are rather different,
9807 particularly in their handling of FP registers. We might
9808 be able to cleverly share code between them, but I'm not
9809 going to bother in the hope that splitting them up results
9810 in code that is more easily understood. */
9812 if (TARGET_64BIT)
9814 /* Advance the base registers to their current locations.
9816 Remember, gprs grow towards smaller register numbers while
9817 fprs grow to higher register numbers. Also remember that
9818 although FP regs are 32-bit addressable, we pretend that
9819 the registers are 64-bits wide. */
9820 gpr_reg_base = 26 - cum->words;
9821 fpr_reg_base = 32 + cum->words;
9823 /* Arguments wider than one word and small aggregates need special
9824 treatment. */
9825 if (arg_size > 1
9826 || mode == BLKmode
9827 || (type && (AGGREGATE_TYPE_P (type)
9828 || TREE_CODE (type) == COMPLEX_TYPE
9829 || VECTOR_TYPE_P (type))))
9831 /* Double-extended precision (80-bit), quad-precision (128-bit)
9832 and aggregates including complex numbers are aligned on
9833 128-bit boundaries. The first eight 64-bit argument slots
9834 are associated one-to-one, with general registers r26
9835 through r19, and also with floating-point registers fr4
9836 through fr11. Arguments larger than one word are always
9837 passed in general registers.
9839 Using a PARALLEL with a word mode register results in left
9840 justified data on a big-endian target. */
9842 rtx loc[8];
9843 int i, offset = 0, ub = arg_size;
9845 /* Align the base register. */
9846 gpr_reg_base -= alignment;
9848 ub = MIN (ub, max_arg_words - cum->words - alignment);
9849 for (i = 0; i < ub; i++)
9851 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9852 gen_rtx_REG (DImode, gpr_reg_base),
9853 GEN_INT (offset));
9854 gpr_reg_base -= 1;
9855 offset += 8;
9858 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9861 else
9863 /* If the argument is larger than a word, then we know precisely
9864 which registers we must use. */
9865 if (arg_size > 1)
9867 if (cum->words)
9869 gpr_reg_base = 23;
9870 fpr_reg_base = 38;
9872 else
9874 gpr_reg_base = 25;
9875 fpr_reg_base = 34;
9878 /* Structures 5 to 8 bytes in size are passed in the general
9879 registers in the same manner as other non floating-point
9880 objects. The data is right-justified and zero-extended
9881 to 64 bits. This is opposite to the normal justification
9882 used on big endian targets and requires special treatment.
9883 We now define BLOCK_REG_PADDING to pad these objects.
9884 Aggregates, complex and vector types are passed in the same
9885 manner as structures. */
9886 if (mode == BLKmode
9887 || (type && (AGGREGATE_TYPE_P (type)
9888 || TREE_CODE (type) == COMPLEX_TYPE
9889 || VECTOR_TYPE_P (type))))
9891 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9892 gen_rtx_REG (DImode, gpr_reg_base),
9893 const0_rtx);
9894 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9897 else
9899 /* We have a single word (32 bits). A simple computation
9900 will get us the register #s we need. */
9901 gpr_reg_base = 26 - cum->words;
9902 fpr_reg_base = 32 + 2 * cum->words;
9906 /* Determine if the argument needs to be passed in both general and
9907 floating point registers. */
9908 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9909 /* If we are doing soft-float with portable runtime, then there
9910 is no need to worry about FP regs. */
9911 && !TARGET_SOFT_FLOAT
9912 /* The parameter must be some kind of scalar float, else we just
9913 pass it in integer registers. */
9914 && GET_MODE_CLASS (mode) == MODE_FLOAT
9915 /* The target function must not have a prototype. */
9916 && cum->nargs_prototype <= 0
9917 /* libcalls do not need to pass items in both FP and general
9918 registers. */
9919 && type != NULL_TREE
9920 /* All this hair applies to "outgoing" args only. This includes
9921 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9922 && !cum->incoming)
9923 /* Also pass outgoing floating arguments in both registers in indirect
9924 calls with the 32 bit ABI and the HP assembler since there is no
9925 way to the specify argument locations in static functions. */
9926 || (!TARGET_64BIT
9927 && !TARGET_GAS
9928 && !cum->incoming
9929 && cum->indirect
9930 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9932 retval
9933 = gen_rtx_PARALLEL
9934 (mode,
9935 gen_rtvec (2,
9936 gen_rtx_EXPR_LIST (VOIDmode,
9937 gen_rtx_REG (mode, fpr_reg_base),
9938 const0_rtx),
9939 gen_rtx_EXPR_LIST (VOIDmode,
9940 gen_rtx_REG (mode, gpr_reg_base),
9941 const0_rtx)));
9943 else
9945 /* See if we should pass this parameter in a general register. */
9946 if (TARGET_SOFT_FLOAT
9947 /* Indirect calls in the normal 32bit ABI require all arguments
9948 to be passed in general registers. */
9949 || (!TARGET_PORTABLE_RUNTIME
9950 && !TARGET_64BIT
9951 && !TARGET_ELF32
9952 && cum->indirect)
9953 /* If the parameter is not a scalar floating-point parameter,
9954 then it belongs in GPRs. */
9955 || GET_MODE_CLASS (mode) != MODE_FLOAT
9956 /* Structure with single SFmode field belongs in GPR. */
9957 || (type && AGGREGATE_TYPE_P (type)))
9958 retval = gen_rtx_REG (mode, gpr_reg_base);
9959 else
9960 retval = gen_rtx_REG (mode, fpr_reg_base);
9962 return retval;
9965 /* Arguments larger than one word are double word aligned. */
9967 static unsigned int
9968 pa_function_arg_boundary (machine_mode mode, const_tree type)
9970 bool singleword = (type
9971 ? (integer_zerop (TYPE_SIZE (type))
9972 || !TREE_CONSTANT (TYPE_SIZE (type))
9973 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9974 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9976 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9979 /* If this arg would be passed totally in registers or totally on the stack,
9980 then this routine should return zero. */
9982 static int
9983 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9985 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9986 unsigned int max_arg_words = 8;
9987 unsigned int offset = 0;
9988 int arg_size;
9990 if (!TARGET_64BIT)
9991 return 0;
9993 arg_size = pa_function_arg_size (arg.mode, arg.type);
9994 if (arg_size > 1 && (cum->words & 1))
9995 offset = 1;
9997 if (cum->words + offset + arg_size <= max_arg_words)
9998 /* Arg fits fully into registers. */
9999 return 0;
10000 else if (cum->words + offset >= max_arg_words)
10001 /* Arg fully on the stack. */
10002 return 0;
10003 else
10004 /* Arg is split. */
10005 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10009 /* A get_unnamed_section callback for switching to the text section.
10011 This function is only used with SOM. Because we don't support
10012 named subspaces, we can only create a new subspace or switch back
10013 to the default text subspace. */
10015 static void
10016 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10018 gcc_assert (TARGET_SOM);
10019 if (TARGET_GAS)
10021 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10023 /* We only want to emit a .nsubspa directive once at the
10024 start of the function. */
10025 cfun->machine->in_nsubspa = 1;
10027 /* Create a new subspace for the text. This provides
10028 better stub placement and one-only functions. */
10029 if (cfun->decl
10030 && DECL_ONE_ONLY (cfun->decl)
10031 && !DECL_WEAK (cfun->decl))
10033 output_section_asm_op ("\t.SPACE $TEXT$\n"
10034 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10035 "ACCESS=44,SORT=24,COMDAT");
10036 return;
10039 else
10041 /* There isn't a current function or the body of the current
10042 function has been completed. So, we are changing to the
10043 text section to output debugging information. Thus, we
10044 need to forget that we are in the text section so that
10045 varasm.cc will call us when text_section is selected again. */
10046 gcc_assert (!cfun || !cfun->machine
10047 || cfun->machine->in_nsubspa == 2);
10048 in_section = NULL;
10050 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10051 return;
10053 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10056 /* A get_unnamed_section callback for switching to comdat data
10057 sections. This function is only used with SOM. */
10059 static void
10060 som_output_comdat_data_section_asm_op (const char *data)
10062 in_section = NULL;
10063 output_section_asm_op (data);
10066 /* Implement TARGET_ASM_INIT_SECTIONS. */
10068 static void
10069 pa_som_asm_init_sections (void)
10071 text_section
10072 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10074 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10075 is not being generated. */
10076 som_readonly_data_section
10077 = get_unnamed_section (0, output_section_asm_op,
10078 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10080 /* When secondary definitions are not supported, SOM makes readonly
10081 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10082 the comdat flag. */
10083 som_one_only_readonly_data_section
10084 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10085 "\t.SPACE $TEXT$\n"
10086 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10087 "ACCESS=0x2c,SORT=16,COMDAT");
10090 /* When secondary definitions are not supported, SOM makes data one-only
10091 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10092 som_one_only_data_section
10093 = get_unnamed_section (SECTION_WRITE,
10094 som_output_comdat_data_section_asm_op,
10095 "\t.SPACE $PRIVATE$\n"
10096 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10097 "ACCESS=31,SORT=24,COMDAT");
10099 if (flag_tm)
10100 som_tm_clone_table_section
10101 = get_unnamed_section (0, output_section_asm_op,
10102 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10104 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10105 reference data within the $TEXT$ space (for example constant
10106 strings in the $LIT$ subspace).
10108 The assemblers (GAS and HP as) both have problems with handling
10109 the difference of two symbols. This is the other correct way to
10110 reference constant data during PIC code generation.
10112 Thus, we can't put constant data needing relocation in the $TEXT$
10113 space during PIC generation.
10115 Previously, we placed all constant data into the $DATA$ subspace
10116 when generating PIC code. This reduces sharing, but it works
10117 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10118 This puts constant data not needing relocation into the $TEXT$ space. */
10119 readonly_data_section = som_readonly_data_section;
10121 /* We must not have a reference to an external symbol defined in a
10122 shared library in a readonly section, else the SOM linker will
10123 complain.
10125 So, we force exception information into the data section. */
10126 exception_section = data_section;
10129 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10131 static section *
10132 pa_som_tm_clone_table_section (void)
10134 return som_tm_clone_table_section;
10137 /* On hpux10, the linker will give an error if we have a reference
10138 in the read-only data section to a symbol defined in a shared
10139 library. Therefore, expressions that might require a reloc
10140 cannot be placed in the read-only data section. */
10142 static section *
10143 pa_select_section (tree exp, int reloc,
10144 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10146 if (TREE_CODE (exp) == VAR_DECL
10147 && TREE_READONLY (exp)
10148 && !TREE_THIS_VOLATILE (exp)
10149 && DECL_INITIAL (exp)
10150 && (DECL_INITIAL (exp) == error_mark_node
10151 || TREE_CONSTANT (DECL_INITIAL (exp)))
10152 && !(reloc & pa_reloc_rw_mask ()))
10154 if (TARGET_SOM
10155 && DECL_ONE_ONLY (exp)
10156 && !DECL_WEAK (exp))
10157 return som_one_only_readonly_data_section;
10158 else
10159 return readonly_data_section;
10161 else if (CONSTANT_CLASS_P (exp)
10162 && !(reloc & pa_reloc_rw_mask ()))
10163 return readonly_data_section;
10164 else if (TARGET_SOM
10165 && TREE_CODE (exp) == VAR_DECL
10166 && DECL_ONE_ONLY (exp)
10167 && !DECL_WEAK (exp))
10168 return som_one_only_data_section;
10169 else
10170 return data_section;
10173 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10174 and the function is in a COMDAT group, place the plabel reference in the
10175 .data.rel.ro.local section. The linker ignores references to symbols in
10176 discarded sections from this section. */
10178 static section *
10179 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10180 unsigned HOST_WIDE_INT align)
10182 if (function_label_operand (x, VOIDmode))
10184 tree decl = SYMBOL_REF_DECL (x);
10186 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10187 return get_named_section (NULL, ".data.rel.ro.local", 1);
10190 return default_elf_select_rtx_section (mode, x, align);
10193 /* Implement pa_reloc_rw_mask. */
10195 static int
10196 pa_reloc_rw_mask (void)
10198 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10199 return 3;
10201 /* HP linker does not support global relocs in readonly memory. */
10202 return TARGET_SOM ? 2 : 0;
10205 static void
10206 pa_globalize_label (FILE *stream, const char *name)
10208 /* We only handle DATA objects here, functions are globalized in
10209 ASM_DECLARE_FUNCTION_NAME. */
10210 if (! FUNCTION_NAME_P (name))
10212 fputs ("\t.EXPORT ", stream);
10213 assemble_name (stream, name);
10214 fputs (",DATA\n", stream);
10218 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10220 static rtx
10221 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10222 int incoming ATTRIBUTE_UNUSED)
10224 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10227 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10229 bool
10230 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10232 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10233 PA64 ABI says that objects larger than 128 bits are returned in memory.
10234 Note, int_size_in_bytes can return -1 if the size of the object is
10235 variable or larger than the maximum value that can be expressed as
10236 a HOST_WIDE_INT. It can also return zero for an empty type. The
10237 simplest way to handle variable and empty types is to pass them in
10238 memory. This avoids problems in defining the boundaries of argument
10239 slots, allocating registers, etc. */
10240 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10241 || int_size_in_bytes (type) <= 0);
10244 /* Structure to hold declaration and name of external symbols that are
10245 emitted by GCC. We generate a vector of these symbols and output them
10246 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10247 This avoids putting out names that are never really used. */
10249 typedef struct GTY(()) extern_symbol
10251 tree decl;
10252 const char *name;
10253 } extern_symbol;
10255 /* Define gc'd vector type for extern_symbol. */
10257 /* Vector of extern_symbol pointers. */
10258 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10260 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10261 /* Mark DECL (name NAME) as an external reference (assembler output
10262 file FILE). This saves the names to output at the end of the file
10263 if actually referenced. */
10265 void
10266 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10268 gcc_assert (file == asm_out_file);
10269 extern_symbol p = {decl, name};
10270 vec_safe_push (extern_symbols, p);
10272 #endif
10274 /* Output text required at the end of an assembler file.
10275 This includes deferred plabels and .import directives for
10276 all external symbols that were actually referenced. */
10278 static void
10279 pa_file_end (void)
10281 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10282 unsigned int i;
10283 extern_symbol *p;
10285 if (!NO_DEFERRED_PROFILE_COUNTERS)
10286 output_deferred_profile_counters ();
10287 #endif
10289 output_deferred_plabels ();
10291 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10292 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10294 tree decl = p->decl;
10296 if (!TREE_ASM_WRITTEN (decl)
10297 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10298 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10301 vec_free (extern_symbols);
10302 #endif
10304 if (NEED_INDICATE_EXEC_STACK)
10305 file_end_indicate_exec_stack ();
10308 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10310 static bool
10311 pa_can_change_mode_class (machine_mode from, machine_mode to,
10312 reg_class_t rclass)
10314 if (from == to)
10315 return true;
10317 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10318 return true;
10320 /* Reject changes to/from modes with zero size. */
10321 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10322 return false;
10324 /* Reject changes to/from complex and vector modes. */
10325 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10326 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10327 return false;
10329 /* There is no way to load QImode or HImode values directly from memory
10330 to a FP register. SImode loads to the FP registers are not zero
10331 extended. On the 64-bit target, this conflicts with the definition
10332 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10333 except for DImode to SImode on the 64-bit target. It is handled by
10334 register renaming in pa_print_operand. */
10335 if (MAYBE_FP_REG_CLASS_P (rclass))
10336 return TARGET_64BIT && from == DImode && to == SImode;
10338 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10339 in specific sets of registers. Thus, we cannot allow changing
10340 to a larger mode when it's larger than a word. */
10341 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10342 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10343 return false;
10345 return true;
10348 /* Implement TARGET_MODES_TIEABLE_P.
10350 We should return FALSE for QImode and HImode because these modes
10351 are not ok in the floating-point registers. However, this prevents
10352 tieing these modes to SImode and DImode in the general registers.
10353 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10354 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10355 in the floating-point registers. */
10357 static bool
10358 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10360 /* Don't tie modes in different classes. */
10361 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10362 return false;
10364 return true;
10368 /* Length in units of the trampoline instruction code. */
10370 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10373 /* Output assembler code for a block containing the constant parts
10374 of a trampoline, leaving space for the variable parts.\
10376 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10377 and then branches to the specified routine.
10379 This code template is copied from text segment to stack location
10380 and then patched with pa_trampoline_init to contain valid values,
10381 and then entered as a subroutine.
10383 It is best to keep this as small as possible to avoid having to
10384 flush multiple lines in the cache. */
10386 static void
10387 pa_asm_trampoline_template (FILE *f)
10389 if (!TARGET_64BIT)
10391 if (TARGET_PA_20)
10393 fputs ("\tmfia %r20\n", f);
10394 fputs ("\tldw 48(%r20),%r22\n", f);
10395 fputs ("\tcopy %r22,%r21\n", f);
10396 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10397 fputs ("\tdepwi 0,31,2,%r22\n", f);
10398 fputs ("\tldw 0(%r22),%r21\n", f);
10399 fputs ("\tldw 4(%r22),%r19\n", f);
10400 fputs ("\tbve (%r21)\n", f);
10401 fputs ("\tldw 52(%r20),%r29\n", f);
10402 fputs ("\t.word 0\n", f);
10403 fputs ("\t.word 0\n", f);
10404 fputs ("\t.word 0\n", f);
10406 else
10408 if (ASSEMBLER_DIALECT == 0)
10410 fputs ("\tbl .+8,%r20\n", f);
10411 fputs ("\tdepi 0,31,2,%r20\n", f);
10413 else
10415 fputs ("\tb,l .+8,%r20\n", f);
10416 fputs ("\tdepwi 0,31,2,%r20\n", f);
10418 fputs ("\tldw 40(%r20),%r22\n", f);
10419 fputs ("\tcopy %r22,%r21\n", f);
10420 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10421 if (ASSEMBLER_DIALECT == 0)
10422 fputs ("\tdepi 0,31,2,%r22\n", f);
10423 else
10424 fputs ("\tdepwi 0,31,2,%r22\n", f);
10425 fputs ("\tldw 0(%r22),%r21\n", f);
10426 fputs ("\tldw 4(%r22),%r19\n", f);
10427 fputs ("\tldsid (%r21),%r1\n", f);
10428 fputs ("\tmtsp %r1,%sr0\n", f);
10429 fputs ("\tbe 0(%sr0,%r21)\n", f);
10430 fputs ("\tldw 44(%r20),%r29\n", f);
10432 fputs ("\t.word 0\n", f);
10433 fputs ("\t.word 0\n", f);
10434 fputs ("\t.word 0\n", f);
10435 fputs ("\t.word 0\n", f);
10437 else
10439 fputs ("\t.dword 0\n", f);
10440 fputs ("\t.dword 0\n", f);
10441 fputs ("\t.dword 0\n", f);
10442 fputs ("\t.dword 0\n", f);
10443 fputs ("\tmfia %r31\n", f);
10444 fputs ("\tldd 24(%r31),%r27\n", f);
10445 fputs ("\tldd 32(%r31),%r31\n", f);
10446 fputs ("\tldd 16(%r27),%r1\n", f);
10447 fputs ("\tbve (%r1)\n", f);
10448 fputs ("\tldd 24(%r27),%r27\n", f);
10449 fputs ("\t.dword 0 ; fptr\n", f);
10450 fputs ("\t.dword 0 ; static link\n", f);
10454 /* Emit RTL insns to initialize the variable parts of a trampoline.
10455 FNADDR is an RTX for the address of the function's pure code.
10456 CXT is an RTX for the static chain value for the function.
10458 Move the function address to the trampoline template at offset 48.
10459 Move the static chain value to trampoline template at offset 52.
10460 Move the trampoline address to trampoline template at offset 56.
10461 Move r19 to trampoline template at offset 60. The latter two
10462 words create a plabel for the indirect call to the trampoline.
10464 A similar sequence is used for the 64-bit port but the plabel is
10465 at the beginning of the trampoline.
10467 Finally, the cache entries for the trampoline code are flushed.
10468 This is necessary to ensure that the trampoline instruction sequence
10469 is written to memory prior to any attempts at prefetching the code
10470 sequence. */
10472 static void
10473 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10475 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10476 rtx start_addr = gen_reg_rtx (Pmode);
10477 rtx end_addr = gen_reg_rtx (Pmode);
10478 rtx line_length = gen_reg_rtx (Pmode);
10479 rtx r_tramp, tmp;
10481 emit_block_move (m_tramp, assemble_trampoline_template (),
10482 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10483 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10485 if (!TARGET_64BIT)
10487 tmp = adjust_address (m_tramp, Pmode, 48);
10488 emit_move_insn (tmp, fnaddr);
10489 tmp = adjust_address (m_tramp, Pmode, 52);
10490 emit_move_insn (tmp, chain_value);
10492 /* Create a fat pointer for the trampoline. */
10493 tmp = adjust_address (m_tramp, Pmode, 56);
10494 emit_move_insn (tmp, r_tramp);
10495 tmp = adjust_address (m_tramp, Pmode, 60);
10496 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10498 /* fdc and fic only use registers for the address to flush,
10499 they do not accept integer displacements. We align the
10500 start and end addresses to the beginning of their respective
10501 cache lines to minimize the number of lines flushed. */
10502 emit_insn (gen_andsi3 (start_addr, r_tramp,
10503 GEN_INT (-MIN_CACHELINE_SIZE)));
10504 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10505 TRAMPOLINE_CODE_SIZE-1));
10506 emit_insn (gen_andsi3 (end_addr, tmp,
10507 GEN_INT (-MIN_CACHELINE_SIZE)));
10508 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10509 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10510 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10511 gen_reg_rtx (Pmode),
10512 gen_reg_rtx (Pmode)));
10514 else
10516 tmp = adjust_address (m_tramp, Pmode, 56);
10517 emit_move_insn (tmp, fnaddr);
10518 tmp = adjust_address (m_tramp, Pmode, 64);
10519 emit_move_insn (tmp, chain_value);
10521 /* Create a fat pointer for the trampoline. */
10522 tmp = adjust_address (m_tramp, Pmode, 16);
10523 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10524 r_tramp, 32)));
10525 tmp = adjust_address (m_tramp, Pmode, 24);
10526 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10528 /* fdc and fic only use registers for the address to flush,
10529 they do not accept integer displacements. We align the
10530 start and end addresses to the beginning of their respective
10531 cache lines to minimize the number of lines flushed. */
10532 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10533 emit_insn (gen_anddi3 (start_addr, tmp,
10534 GEN_INT (-MIN_CACHELINE_SIZE)));
10535 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10536 TRAMPOLINE_CODE_SIZE - 1));
10537 emit_insn (gen_anddi3 (end_addr, tmp,
10538 GEN_INT (-MIN_CACHELINE_SIZE)));
10539 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10540 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10541 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10542 gen_reg_rtx (Pmode),
10543 gen_reg_rtx (Pmode)));
10546 #ifdef HAVE_ENABLE_EXECUTE_STACK
10547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10548 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10549 #endif
10552 /* Perform any machine-specific adjustment in the address of the trampoline.
10553 ADDR contains the address that was passed to pa_trampoline_init.
10554 Adjust the trampoline address to point to the plabel at offset 56. */
10556 static rtx
10557 pa_trampoline_adjust_address (rtx addr)
10559 if (!TARGET_64BIT)
10560 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10561 return addr;
10564 static rtx
10565 pa_delegitimize_address (rtx orig_x)
10567 rtx x = delegitimize_mem_from_attrs (orig_x);
10569 if (GET_CODE (x) == LO_SUM
10570 && GET_CODE (XEXP (x, 1)) == UNSPEC
10571 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10572 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10573 return x;
10576 static rtx
10577 pa_internal_arg_pointer (void)
10579 /* The argument pointer and the hard frame pointer are the same in
10580 the 32-bit runtime, so we don't need a copy. */
10581 if (TARGET_64BIT)
10582 return copy_to_reg (virtual_incoming_args_rtx);
10583 else
10584 return virtual_incoming_args_rtx;
10587 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10588 Frame pointer elimination is automatically handled. */
10590 static bool
10591 pa_can_eliminate (const int from, const int to)
10593 /* The argument cannot be eliminated in the 64-bit runtime. */
10594 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10595 return false;
10597 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10598 ? ! frame_pointer_needed
10599 : true);
10602 /* Define the offset between two registers, FROM to be eliminated and its
10603 replacement TO, at the start of a routine. */
10604 HOST_WIDE_INT
10605 pa_initial_elimination_offset (int from, int to)
10607 HOST_WIDE_INT offset;
10609 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10610 && to == STACK_POINTER_REGNUM)
10611 offset = -pa_compute_frame_size (get_frame_size (), 0);
10612 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10613 offset = 0;
10614 else
10615 gcc_unreachable ();
10617 return offset;
10620 static void
10621 pa_conditional_register_usage (void)
10623 int i;
10625 if (!TARGET_64BIT && !TARGET_PA_11)
10627 for (i = 56; i <= FP_REG_LAST; i++)
10628 fixed_regs[i] = call_used_regs[i] = 1;
10629 for (i = 33; i < 56; i += 2)
10630 fixed_regs[i] = call_used_regs[i] = 1;
10632 if (TARGET_SOFT_FLOAT)
10634 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10635 fixed_regs[i] = call_used_regs[i] = 1;
10637 if (flag_pic)
10638 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10641 /* Target hook for c_mode_for_suffix. */
10643 static machine_mode
10644 pa_c_mode_for_suffix (char suffix)
10646 if (HPUX_LONG_DOUBLE_LIBRARY)
10648 if (suffix == 'q')
10649 return TFmode;
10652 return VOIDmode;
10655 /* Target hook for function_section. */
10657 static section *
10658 pa_function_section (tree decl, enum node_frequency freq,
10659 bool startup, bool exit)
10661 /* Put functions in text section if target doesn't have named sections. */
10662 if (!targetm_common.have_named_sections)
10663 return text_section;
10665 /* Force nested functions into the same section as the containing
10666 function. */
10667 if (decl
10668 && DECL_SECTION_NAME (decl) == NULL
10669 && DECL_CONTEXT (decl) != NULL_TREE
10670 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10671 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10672 return function_section (DECL_CONTEXT (decl));
10674 /* Otherwise, use the default function section. */
10675 return default_function_section (decl, freq, startup, exit);
10678 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10680 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10681 that need more than three instructions to load prior to reload. This
10682 limit is somewhat arbitrary. It takes three instructions to load a
10683 CONST_INT from memory but two are memory accesses. It may be better
10684 to increase the allowed range for CONST_INTS. We may also be able
10685 to handle CONST_DOUBLES. */
10687 static bool
10688 pa_legitimate_constant_p (machine_mode mode, rtx x)
10690 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10691 return false;
10693 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10694 return false;
10696 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10697 legitimate constants. The other variants can't be handled by
10698 the move patterns after reload starts. */
10699 if (tls_referenced_p (x))
10700 return false;
10702 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10703 return false;
10705 if (TARGET_64BIT
10706 && HOST_BITS_PER_WIDE_INT > 32
10707 && GET_CODE (x) == CONST_INT
10708 && !reload_in_progress
10709 && !reload_completed
10710 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10711 && !pa_cint_ok_for_move (UINTVAL (x)))
10712 return false;
10714 if (function_label_operand (x, mode))
10715 return false;
10717 return true;
10720 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10722 static unsigned int
10723 pa_section_type_flags (tree decl, const char *name, int reloc)
10725 unsigned int flags;
10727 flags = default_section_type_flags (decl, name, reloc);
10729 /* Function labels are placed in the constant pool. This can
10730 cause a section conflict if decls are put in ".data.rel.ro"
10731 or ".data.rel.ro.local" using the __attribute__ construct. */
10732 if (strcmp (name, ".data.rel.ro") == 0
10733 || strcmp (name, ".data.rel.ro.local") == 0)
10734 flags |= SECTION_WRITE | SECTION_RELRO;
10736 return flags;
10739 /* pa_legitimate_address_p recognizes an RTL expression that is a
10740 valid memory address for an instruction. The MODE argument is the
10741 machine mode for the MEM expression that wants to use this address.
10743 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10744 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10745 available with floating point loads and stores, and integer loads.
10746 We get better code by allowing indexed addresses in the initial
10747 RTL generation.
10749 The acceptance of indexed addresses as legitimate implies that we
10750 must provide patterns for doing indexed integer stores, or the move
10751 expanders must force the address of an indexed store to a register.
10752 We have adopted the latter approach.
10754 Another function of pa_legitimate_address_p is to ensure that
10755 the base register is a valid pointer for indexed instructions.
10756 On targets that have non-equivalent space registers, we have to
10757 know at the time of assembler output which register in a REG+REG
10758 pair is the base register. The REG_POINTER flag is sometimes lost
10759 in reload and the following passes, so it can't be relied on during
10760 code generation. Thus, we either have to canonicalize the order
10761 of the registers in REG+REG indexed addresses, or treat REG+REG
10762 addresses separately and provide patterns for both permutations.
10764 The latter approach requires several hundred additional lines of
10765 code in pa.md. The downside to canonicalizing is that a PLUS
10766 in the wrong order can't combine to form to make a scaled indexed
10767 memory operand. As we won't need to canonicalize the operands if
10768 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10770 We initially break out scaled indexed addresses in canonical order
10771 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10772 scaled indexed addresses during RTL generation. However, fold_rtx
10773 has its own opinion on how the operands of a PLUS should be ordered.
10774 If one of the operands is equivalent to a constant, it will make
10775 that operand the second operand. As the base register is likely to
10776 be equivalent to a SYMBOL_REF, we have made it the second operand.
10778 pa_legitimate_address_p accepts REG+REG as legitimate when the
10779 operands are in the order INDEX+BASE on targets with non-equivalent
10780 space registers, and in any order on targets with equivalent space
10781 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10783 We treat a SYMBOL_REF as legitimate if it is part of the current
10784 function's constant-pool, because such addresses can actually be
10785 output as REG+SMALLINT. */
10787 static bool
10788 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
10790 if ((REG_P (x)
10791 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10792 : REG_OK_FOR_BASE_P (x)))
10793 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10794 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10795 && REG_P (XEXP (x, 0))
10796 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10797 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10798 return true;
10800 if (GET_CODE (x) == PLUS)
10802 rtx base, index;
10804 /* For REG+REG, the base register should be in XEXP (x, 1),
10805 so check it first. */
10806 if (REG_P (XEXP (x, 1))
10807 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10808 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10809 base = XEXP (x, 1), index = XEXP (x, 0);
10810 else if (REG_P (XEXP (x, 0))
10811 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10812 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10813 base = XEXP (x, 0), index = XEXP (x, 1);
10814 else
10815 return false;
10817 if (GET_CODE (index) == CONST_INT)
10819 /* Short 5-bit displacements always okay. */
10820 if (INT_5_BITS (index))
10821 return true;
10823 if (!base14_operand (index, mode))
10824 return false;
10826 /* Long 14-bit displacements always okay for these cases. */
10827 if (INT14_OK_STRICT
10828 || mode == QImode
10829 || mode == HImode)
10830 return true;
10832 /* A secondary reload may be needed to adjust the displacement
10833 of floating-point accesses when STRICT is nonzero. */
10834 if (strict)
10835 return false;
10837 /* We get significantly better code if we allow long displacements
10838 before reload for all accesses. Instructions must satisfy their
10839 constraints after reload, so we must have an integer access.
10840 Return true for both cases. */
10841 return true;
10844 if (!TARGET_DISABLE_INDEXING
10845 /* Only accept the "canonical" INDEX+BASE operand order
10846 on targets with non-equivalent space registers. */
10847 && (TARGET_NO_SPACE_REGS
10848 ? REG_P (index)
10849 : (base == XEXP (x, 1) && REG_P (index)
10850 && (reload_completed
10851 || (reload_in_progress && HARD_REGISTER_P (base))
10852 || REG_POINTER (base))
10853 && (reload_completed
10854 || (reload_in_progress && HARD_REGISTER_P (index))
10855 || !REG_POINTER (index))))
10856 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10857 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10858 : REG_OK_FOR_INDEX_P (index))
10859 && borx_reg_operand (base, Pmode)
10860 && borx_reg_operand (index, Pmode))
10861 return true;
10863 if (!TARGET_DISABLE_INDEXING
10864 && GET_CODE (index) == MULT
10865 /* Only accept base operands with the REG_POINTER flag prior to
10866 reload on targets with non-equivalent space registers. */
10867 && (TARGET_NO_SPACE_REGS
10868 || (base == XEXP (x, 1)
10869 && (reload_completed
10870 || (reload_in_progress && HARD_REGISTER_P (base))
10871 || REG_POINTER (base))))
10872 && REG_P (XEXP (index, 0))
10873 && GET_MODE (XEXP (index, 0)) == Pmode
10874 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10875 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10876 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10877 && GET_CODE (XEXP (index, 1)) == CONST_INT
10878 && INTVAL (XEXP (index, 1))
10879 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10880 && borx_reg_operand (base, Pmode))
10881 return true;
10883 return false;
10886 if (GET_CODE (x) == LO_SUM)
10888 rtx y = XEXP (x, 0);
10890 if (GET_CODE (y) == SUBREG)
10891 y = SUBREG_REG (y);
10893 if (REG_P (y)
10894 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10895 : REG_OK_FOR_BASE_P (y)))
10897 /* Needed for -fPIC */
10898 if (mode == Pmode
10899 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10900 return true;
10902 if (!INT14_OK_STRICT
10903 && (strict || !(reload_in_progress || reload_completed))
10904 && mode != QImode
10905 && mode != HImode)
10906 return false;
10908 if (CONSTANT_P (XEXP (x, 1)))
10909 return true;
10911 return false;
10914 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10915 return true;
10917 return false;
10920 /* Look for machine dependent ways to make the invalid address AD a
10921 valid address.
10923 For the PA, transform:
10925 memory(X + <large int>)
10927 into:
10929 if (<large int> & mask) >= 16
10930 Y = (<large int> & ~mask) + mask + 1 Round up.
10931 else
10932 Y = (<large int> & ~mask) Round down.
10933 Z = X + Y
10934 memory (Z + (<large int> - Y));
10936 This makes reload inheritance and reload_cse work better since Z
10937 can be reused.
10939 There may be more opportunities to improve code with this hook. */
10942 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10943 int opnum, int type,
10944 int ind_levels ATTRIBUTE_UNUSED)
10946 long offset, newoffset, mask;
10947 rtx new_rtx, temp = NULL_RTX;
10949 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10950 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10952 if (optimize && GET_CODE (ad) == PLUS)
10953 temp = simplify_binary_operation (PLUS, Pmode,
10954 XEXP (ad, 0), XEXP (ad, 1));
10956 new_rtx = temp ? temp : ad;
10958 if (optimize
10959 && GET_CODE (new_rtx) == PLUS
10960 && GET_CODE (XEXP (new_rtx, 0)) == REG
10961 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10963 offset = INTVAL (XEXP ((new_rtx), 1));
10965 /* Choose rounding direction. Round up if we are >= halfway. */
10966 if ((offset & mask) >= ((mask + 1) / 2))
10967 newoffset = (offset & ~mask) + mask + 1;
10968 else
10969 newoffset = offset & ~mask;
10971 /* Ensure that long displacements are aligned. */
10972 if (mask == 0x3fff
10973 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10974 || (TARGET_64BIT && (mode) == DImode)))
10975 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10977 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10979 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10980 GEN_INT (newoffset));
10981 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10982 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10983 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10984 opnum, (enum reload_type) type);
10985 return ad;
10989 return NULL_RTX;
10992 /* Output address vector. */
10994 void
10995 pa_output_addr_vec (rtx lab, rtx body)
10997 int idx, vlen = XVECLEN (body, 0);
10999 if (!TARGET_SOM)
11000 fputs ("\t.align 4\n", asm_out_file);
11001 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11002 if (TARGET_GAS)
11003 fputs ("\t.begin_brtab\n", asm_out_file);
11004 for (idx = 0; idx < vlen; idx++)
11006 ASM_OUTPUT_ADDR_VEC_ELT
11007 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11009 if (TARGET_GAS)
11010 fputs ("\t.end_brtab\n", asm_out_file);
11013 /* Output address difference vector. */
11015 void
11016 pa_output_addr_diff_vec (rtx lab, rtx body)
11018 rtx base = XEXP (XEXP (body, 0), 0);
11019 int idx, vlen = XVECLEN (body, 1);
11021 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11022 if (TARGET_GAS)
11023 fputs ("\t.begin_brtab\n", asm_out_file);
11024 for (idx = 0; idx < vlen; idx++)
11026 ASM_OUTPUT_ADDR_DIFF_ELT
11027 (asm_out_file,
11028 body,
11029 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11030 CODE_LABEL_NUMBER (base));
11032 if (TARGET_GAS)
11033 fputs ("\t.end_brtab\n", asm_out_file);
11036 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11037 arguments passed by hidden reference in the 32-bit HP runtime. Users
11038 can override this behavior for better compatibility with openmp at the
11039 risk of library incompatibilities. Arguments are always passed by value
11040 in the 64-bit HP runtime. */
11042 static bool
11043 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11045 return !TARGET_CALLER_COPIES;
11048 /* Implement TARGET_HARD_REGNO_NREGS. */
11050 static unsigned int
11051 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11053 return PA_HARD_REGNO_NREGS (regno, mode);
11056 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11058 static bool
11059 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11061 return PA_HARD_REGNO_MODE_OK (regno, mode);
11064 /* Implement TARGET_STARTING_FRAME_OFFSET.
11066 On the 32-bit ports, we reserve one slot for the previous frame
11067 pointer and one fill slot. The fill slot is for compatibility
11068 with HP compiled programs. On the 64-bit ports, we reserve one
11069 slot for the previous frame pointer. */
11071 static HOST_WIDE_INT
11072 pa_starting_frame_offset (void)
11074 return 8;
11077 /* Figure out the size in words of the function argument. */
11080 pa_function_arg_size (machine_mode mode, const_tree type)
11082 HOST_WIDE_INT size;
11084 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11086 /* The 64-bit runtime does not restrict the size of stack frames,
11087 but the gcc calling conventions limit argument sizes to 1G. Our
11088 prologue/epilogue code limits frame sizes to just under 32 bits.
11089 1G is also the maximum frame size that can be handled by the HPUX
11090 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11091 occur for (parallel:BLK []), we need to ignore large arguments
11092 passed by value. */
11093 if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11094 size = 0;
11095 return (int) CEIL (size, UNITS_PER_WORD);
11098 #include "gt-pa.h"