Fix handling of large arguments passed by value.
[official-gcc.git] / gcc / config / pa / pa.cc
blobdb633b275e5ebf50ee5b6fc65283908fd9bb2421
1 /* Subroutines for insn-output.cc for HPPA.
2 Copyright (C) 1992-2023 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
57 /* This file should be included last. */
58 #include "target-def.h"
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
81 other_mode = GET_MODE (SET_SRC (set));
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
208 /* The following extra sections are only used for SOM. */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
214 /* Counts for the number of callee-saved general and floating point
215 registers which were saved by the current function's prologue. */
216 static int gr_saved, fr_saved;
218 /* Boolean indicating whether the return pointer was saved by the
219 current function's prologue. */
220 static bool rp_saved;
222 static rtx find_addr_reg (rtx);
224 /* Keep track of the number of bytes we have output in the CODE subspace
225 during this compilation so we'll know when to emit inline long-calls. */
226 unsigned long total_code_bytes;
228 /* The last address of the previous function plus the number of bytes in
229 associated thunks that have been output. This is used to determine if
230 a thunk can use an IA-relative branch to reach its target function. */
231 static unsigned int last_address;
233 /* Variables to handle plabels that we discover are necessary at assembly
234 output time. They are output after the current function. */
235 struct GTY(()) deferred_plabel
237 rtx internal_label;
238 rtx symbol;
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241 deferred_plabels;
242 static size_t n_deferred_plabels = 0;
244 /* Initialize the GCC target structure. */
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
428 struct gcc_target targetm = TARGET_INITIALIZER;
430 /* Parse the -mfixed-range= option string. */
432 static void
433 fix_range (const char *const_str)
435 int i, first, last;
436 char *str, *dash, *comma;
438 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439 REG2 are either register names or register numbers. The effect
440 of this option is to mark the registers in the range from REG1 to
441 REG2 as ``fixed'' so they won't be used by the compiler. This is
442 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
444 i = strlen (const_str);
445 str = (char *) alloca (i + 1);
446 memcpy (str, const_str, i + 1);
448 while (1)
450 dash = strchr (str, '-');
451 if (!dash)
453 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 return;
456 *dash = '\0';
458 comma = strchr (dash + 1, ',');
459 if (comma)
460 *comma = '\0';
462 first = decode_reg_name (str);
463 if (first < 0)
465 warning (0, "unknown register name: %s", str);
466 return;
469 last = decode_reg_name (dash + 1);
470 if (last < 0)
472 warning (0, "unknown register name: %s", dash + 1);
473 return;
476 *dash = '-';
478 if (first > last)
480 warning (0, "%s-%s is an empty range", str, dash + 1);
481 return;
484 for (i = first; i <= last; ++i)
485 fixed_regs[i] = call_used_regs[i] = 1;
487 if (!comma)
488 break;
490 *comma = ',';
491 str = comma + 1;
494 /* Check if all floating point registers have been fixed. */
495 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496 if (!fixed_regs[i])
497 break;
499 if (i > FP_REG_LAST)
500 target_flags |= MASK_SOFT_FLOAT;
503 /* Implement the TARGET_OPTION_OVERRIDE hook. */
505 static void
506 pa_option_override (void)
508 unsigned int i;
509 cl_deferred_option *opt;
510 vec<cl_deferred_option> *v
511 = (vec<cl_deferred_option> *) pa_deferred_options;
513 if (v)
514 FOR_EACH_VEC_ELT (*v, i, opt)
516 switch (opt->opt_index)
518 case OPT_mfixed_range_:
519 fix_range (opt->arg);
520 break;
522 default:
523 gcc_unreachable ();
527 if (flag_pic && TARGET_PORTABLE_RUNTIME)
529 warning (0, "PIC code generation is not supported in the portable runtime model");
532 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
534 warning (0, "PIC code generation is not compatible with fast indirect calls");
537 if (! TARGET_GAS && write_symbols != NO_DEBUG)
539 warning (0, "%<-g%> is only supported when using GAS on this processor");
540 warning (0, "%<-g%> option disabled");
541 write_symbols = NO_DEBUG;
544 if (TARGET_64BIT && TARGET_HPUX)
546 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
547 specifically selected. */
548 if (!OPTION_SET_P (dwarf_strict))
549 dwarf_strict = 1;
550 if (!OPTION_SET_P (dwarf_version))
551 dwarf_version = 4;
554 /* We only support the "big PIC" model now. And we always generate PIC
555 code when in 64bit mode. */
556 if (flag_pic == 1 || TARGET_64BIT)
557 flag_pic = 2;
559 /* Disable -freorder-blocks-and-partition as we don't support hot and
560 cold partitioning. */
561 if (flag_reorder_blocks_and_partition)
563 inform (input_location,
564 "%<-freorder-blocks-and-partition%> does not work "
565 "on this architecture");
566 flag_reorder_blocks_and_partition = 0;
567 flag_reorder_blocks = 1;
570 /* Disable -fstack-protector to suppress warning. */
571 flag_stack_protect = 0;
573 /* We can't guarantee that .dword is available for 32-bit targets. */
574 if (UNITS_PER_WORD == 4)
575 targetm.asm_out.aligned_op.di = NULL;
577 /* The unaligned ops are only available when using GAS. */
578 if (!TARGET_GAS)
580 targetm.asm_out.unaligned_op.hi = NULL;
581 targetm.asm_out.unaligned_op.si = NULL;
582 targetm.asm_out.unaligned_op.di = NULL;
585 init_machine_status = pa_init_machine_status;
588 enum pa_builtins
590 PA_BUILTIN_COPYSIGNQ,
591 PA_BUILTIN_FABSQ,
592 PA_BUILTIN_INFQ,
593 PA_BUILTIN_HUGE_VALQ,
594 PA_BUILTIN_max
597 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
599 static void
600 pa_init_builtins (void)
602 #ifdef DONT_HAVE_FPUTC_UNLOCKED
604 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
605 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
606 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
608 #endif
609 #if TARGET_HPUX_11
611 tree decl;
613 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
614 set_user_assembler_name (decl, "_Isfinite");
615 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
616 set_user_assembler_name (decl, "_Isfinitef");
618 #endif
620 if (HPUX_LONG_DOUBLE_LIBRARY)
622 tree decl, ftype;
624 /* Under HPUX, the __float128 type is a synonym for "long double". */
625 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
626 "__float128");
628 /* TFmode support builtins. */
629 ftype = build_function_type_list (long_double_type_node,
630 long_double_type_node,
631 NULL_TREE);
632 decl = add_builtin_function ("__builtin_fabsq", ftype,
633 PA_BUILTIN_FABSQ, BUILT_IN_MD,
634 "_U_Qfabs", NULL_TREE);
635 TREE_READONLY (decl) = 1;
636 pa_builtins[PA_BUILTIN_FABSQ] = decl;
638 ftype = build_function_type_list (long_double_type_node,
639 long_double_type_node,
640 long_double_type_node,
641 NULL_TREE);
642 decl = add_builtin_function ("__builtin_copysignq", ftype,
643 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
644 "_U_Qfcopysign", NULL_TREE);
645 TREE_READONLY (decl) = 1;
646 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
648 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
649 decl = add_builtin_function ("__builtin_infq", ftype,
650 PA_BUILTIN_INFQ, BUILT_IN_MD,
651 NULL, NULL_TREE);
652 pa_builtins[PA_BUILTIN_INFQ] = decl;
654 decl = add_builtin_function ("__builtin_huge_valq", ftype,
655 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
656 NULL, NULL_TREE);
657 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
661 static rtx
662 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
663 machine_mode mode ATTRIBUTE_UNUSED,
664 int ignore ATTRIBUTE_UNUSED)
666 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
667 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
669 switch (fcode)
671 case PA_BUILTIN_FABSQ:
672 case PA_BUILTIN_COPYSIGNQ:
673 return expand_call (exp, target, ignore);
675 case PA_BUILTIN_INFQ:
676 case PA_BUILTIN_HUGE_VALQ:
678 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
679 REAL_VALUE_TYPE inf;
680 rtx tmp;
682 real_inf (&inf);
683 tmp = const_double_from_real_value (inf, target_mode);
685 tmp = validize_mem (force_const_mem (target_mode, tmp));
687 if (target == 0)
688 target = gen_reg_rtx (target_mode);
690 emit_move_insn (target, tmp);
691 return target;
694 default:
695 gcc_unreachable ();
698 return NULL_RTX;
701 /* Function to init struct machine_function.
702 This will be called, via a pointer variable,
703 from push_function_context. */
705 static struct machine_function *
706 pa_init_machine_status (void)
708 return ggc_cleared_alloc<machine_function> ();
711 /* If FROM is a probable pointer register, mark TO as a probable
712 pointer register with the same pointer alignment as FROM. */
714 static void
715 copy_reg_pointer (rtx to, rtx from)
717 if (REG_POINTER (from))
718 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
721 /* Return 1 if X contains a symbolic expression. We know these
722 expressions will have one of a few well defined forms, so
723 we need only check those forms. */
725 pa_symbolic_expression_p (rtx x)
728 /* Strip off any HIGH. */
729 if (GET_CODE (x) == HIGH)
730 x = XEXP (x, 0);
732 return symbolic_operand (x, VOIDmode);
735 /* Accept any constant that can be moved in one instruction into a
736 general register. */
738 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
740 /* OK if ldo, ldil, or zdepi, can be used. */
741 return (VAL_14_BITS_P (ival)
742 || pa_ldil_cint_p (ival)
743 || pa_zdepi_cint_p (ival));
746 /* True iff ldil can be used to load this CONST_INT. The least
747 significant 11 bits of the value must be zero and the value must
748 not change sign when extended from 32 to 64 bits. */
750 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
752 unsigned HOST_WIDE_INT x;
754 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
755 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
758 /* True iff zdepi can be used to generate this CONST_INT.
759 zdepi first sign extends a 5-bit signed number to a given field
760 length, then places this field anywhere in a zero. */
762 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
764 unsigned HOST_WIDE_INT lsb_mask, t;
766 /* This might not be obvious, but it's at least fast.
767 This function is critical; we don't have the time loops would take. */
768 lsb_mask = x & -x;
769 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
770 /* Return true iff t is a power of two. */
771 return ((t & (t - 1)) == 0);
774 /* True iff depi or extru can be used to compute (reg & mask).
775 Accept bit pattern like these:
776 0....01....1
777 1....10....0
778 1..10..01..1 */
780 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
782 mask = ~mask;
783 mask += mask & -mask;
784 return (mask & (mask - 1)) == 0;
787 /* True iff depi can be used to compute (reg | MASK). */
789 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
791 mask += mask & -mask;
792 return (mask & (mask - 1)) == 0;
795 /* Legitimize PIC addresses. If the address is already
796 position-independent, we return ORIG. Newly generated
797 position-independent addresses go to REG. If we need more
798 than one register, we lose. */
800 static rtx
801 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
803 rtx pic_ref = orig;
805 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
807 /* Labels need special handling. */
808 if (pic_label_operand (orig, mode))
810 rtx_insn *insn;
812 /* We do not want to go through the movXX expanders here since that
813 would create recursion.
815 Nor do we really want to call a generator for a named pattern
816 since that requires multiple patterns if we want to support
817 multiple word sizes.
819 So instead we just emit the raw set, which avoids the movXX
820 expanders completely. */
821 mark_reg_pointer (reg, BITS_PER_UNIT);
822 insn = emit_insn (gen_rtx_SET (reg, orig));
824 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
825 add_reg_note (insn, REG_EQUAL, orig);
827 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
828 and update LABEL_NUSES because this is not done automatically. */
829 if (reload_in_progress || reload_completed)
831 /* Extract LABEL_REF. */
832 if (GET_CODE (orig) == CONST)
833 orig = XEXP (XEXP (orig, 0), 0);
834 /* Extract CODE_LABEL. */
835 orig = XEXP (orig, 0);
836 add_reg_note (insn, REG_LABEL_OPERAND, orig);
837 /* Make sure we have label and not a note. */
838 if (LABEL_P (orig))
839 LABEL_NUSES (orig)++;
841 crtl->uses_pic_offset_table = 1;
842 return reg;
844 if (GET_CODE (orig) == SYMBOL_REF)
846 rtx_insn *insn;
847 rtx tmp_reg;
849 gcc_assert (reg);
851 /* Before reload, allocate a temporary register for the intermediate
852 result. This allows the sequence to be deleted when the final
853 result is unused and the insns are trivially dead. */
854 tmp_reg = ((reload_in_progress || reload_completed)
855 ? reg : gen_reg_rtx (Pmode));
857 if (function_label_operand (orig, VOIDmode))
859 /* Force function label into memory in word mode. */
860 orig = XEXP (force_const_mem (word_mode, orig), 0);
861 /* Load plabel address from DLT. */
862 emit_move_insn (tmp_reg,
863 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
864 gen_rtx_HIGH (word_mode, orig)));
865 pic_ref
866 = gen_const_mem (Pmode,
867 gen_rtx_LO_SUM (Pmode, tmp_reg,
868 gen_rtx_UNSPEC (Pmode,
869 gen_rtvec (1, orig),
870 UNSPEC_DLTIND14R)));
871 emit_move_insn (reg, pic_ref);
872 /* Now load address of function descriptor. */
873 pic_ref = gen_rtx_MEM (Pmode, reg);
875 else
877 /* Load symbol reference from DLT. */
878 emit_move_insn (tmp_reg,
879 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
880 gen_rtx_HIGH (word_mode, orig)));
881 pic_ref
882 = gen_const_mem (Pmode,
883 gen_rtx_LO_SUM (Pmode, tmp_reg,
884 gen_rtx_UNSPEC (Pmode,
885 gen_rtvec (1, orig),
886 UNSPEC_DLTIND14R)));
889 crtl->uses_pic_offset_table = 1;
890 mark_reg_pointer (reg, BITS_PER_UNIT);
891 insn = emit_move_insn (reg, pic_ref);
893 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
894 set_unique_reg_note (insn, REG_EQUAL, orig);
896 return reg;
898 else if (GET_CODE (orig) == CONST)
900 rtx base;
902 if (GET_CODE (XEXP (orig, 0)) == PLUS
903 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
904 return orig;
906 gcc_assert (reg);
907 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
909 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
910 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
911 base == reg ? 0 : reg);
913 if (GET_CODE (orig) == CONST_INT)
915 if (INT_14_BITS (orig))
916 return plus_constant (Pmode, base, INTVAL (orig));
917 orig = force_reg (Pmode, orig);
919 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
920 /* Likewise, should we set special REG_NOTEs here? */
923 return pic_ref;
926 static GTY(()) rtx gen_tls_tga;
928 static rtx
929 gen_tls_get_addr (void)
931 if (!gen_tls_tga)
932 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
933 return gen_tls_tga;
936 static rtx
937 hppa_tls_call (rtx arg)
939 rtx ret;
941 ret = gen_reg_rtx (Pmode);
942 emit_library_call_value (gen_tls_get_addr (), ret,
943 LCT_CONST, Pmode, arg, Pmode);
945 return ret;
948 static rtx
949 legitimize_tls_address (rtx addr)
951 rtx ret, tmp, t1, t2, tp;
952 rtx_insn *insn;
954 /* Currently, we can't handle anything but a SYMBOL_REF. */
955 if (GET_CODE (addr) != SYMBOL_REF)
956 return addr;
958 switch (SYMBOL_REF_TLS_MODEL (addr))
960 case TLS_MODEL_GLOBAL_DYNAMIC:
961 tmp = gen_reg_rtx (Pmode);
962 if (flag_pic)
963 emit_insn (gen_tgd_load_pic (tmp, addr));
964 else
965 emit_insn (gen_tgd_load (tmp, addr));
966 ret = hppa_tls_call (tmp);
967 break;
969 case TLS_MODEL_LOCAL_DYNAMIC:
970 ret = gen_reg_rtx (Pmode);
971 tmp = gen_reg_rtx (Pmode);
972 start_sequence ();
973 if (flag_pic)
974 emit_insn (gen_tld_load_pic (tmp, addr));
975 else
976 emit_insn (gen_tld_load (tmp, addr));
977 t1 = hppa_tls_call (tmp);
978 insn = get_insns ();
979 end_sequence ();
980 t2 = gen_reg_rtx (Pmode);
981 emit_libcall_block (insn, t2, t1,
982 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
983 UNSPEC_TLSLDBASE));
984 emit_insn (gen_tld_offset_load (ret, addr, t2));
985 break;
987 case TLS_MODEL_INITIAL_EXEC:
988 tp = gen_reg_rtx (Pmode);
989 tmp = gen_reg_rtx (Pmode);
990 ret = gen_reg_rtx (Pmode);
991 emit_insn (gen_tp_load (tp));
992 if (flag_pic)
993 emit_insn (gen_tie_load_pic (tmp, addr));
994 else
995 emit_insn (gen_tie_load (tmp, addr));
996 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
997 break;
999 case TLS_MODEL_LOCAL_EXEC:
1000 tp = gen_reg_rtx (Pmode);
1001 ret = gen_reg_rtx (Pmode);
1002 emit_insn (gen_tp_load (tp));
1003 emit_insn (gen_tle_load (ret, addr, tp));
1004 break;
1006 default:
1007 gcc_unreachable ();
1010 return ret;
1013 /* Helper for hppa_legitimize_address. Given X, return true if it
1014 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1016 This respectively represent canonical shift-add rtxs or scaled
1017 memory addresses. */
1018 static bool
1019 mem_shadd_or_shadd_rtx_p (rtx x)
1021 return ((GET_CODE (x) == ASHIFT
1022 || GET_CODE (x) == MULT)
1023 && GET_CODE (XEXP (x, 1)) == CONST_INT
1024 && ((GET_CODE (x) == ASHIFT
1025 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1026 || (GET_CODE (x) == MULT
1027 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1030 /* Try machine-dependent ways of modifying an illegitimate address
1031 to be legitimate. If we find one, return the new, valid address.
1032 This macro is used in only one place: `memory_address' in explow.cc.
1034 OLDX is the address as it was before break_out_memory_refs was called.
1035 In some cases it is useful to look at this to decide what needs to be done.
1037 It is always safe for this macro to do nothing. It exists to recognize
1038 opportunities to optimize the output.
1040 For the PA, transform:
1042 memory(X + <large int>)
1044 into:
1046 if (<large int> & mask) >= 16
1047 Y = (<large int> & ~mask) + mask + 1 Round up.
1048 else
1049 Y = (<large int> & ~mask) Round down.
1050 Z = X + Y
1051 memory (Z + (<large int> - Y));
1053 This is for CSE to find several similar references, and only use one Z.
1055 X can either be a SYMBOL_REF or REG, but because combine cannot
1056 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1057 D will not fit in 14 bits.
1059 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1060 0x1f as the mask.
1062 MODE_INT references allow displacements which fit in 14 bits, so use
1063 0x3fff as the mask.
1065 This relies on the fact that most mode MODE_FLOAT references will use FP
1066 registers and most mode MODE_INT references will use integer registers.
1067 (In the rare case of an FP register used in an integer MODE, we depend
1068 on secondary reloads to clean things up.)
1071 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1072 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1073 addressing modes to be used).
1075 Note that the addresses passed into hppa_legitimize_address always
1076 come from a MEM, so we only have to match the MULT form on incoming
1077 addresses. But to be future proof we also match the ASHIFT form.
1079 However, this routine always places those shift-add sequences into
1080 registers, so we have to generate the ASHIFT form as our output.
1082 Put X and Z into registers. Then put the entire expression into
1083 a register. */
1086 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1087 machine_mode mode)
1089 rtx orig = x;
1091 /* We need to canonicalize the order of operands in unscaled indexed
1092 addresses since the code that checks if an address is valid doesn't
1093 always try both orders. */
1094 if (!TARGET_NO_SPACE_REGS
1095 && GET_CODE (x) == PLUS
1096 && GET_MODE (x) == Pmode
1097 && REG_P (XEXP (x, 0))
1098 && REG_P (XEXP (x, 1))
1099 && REG_POINTER (XEXP (x, 0))
1100 && !REG_POINTER (XEXP (x, 1)))
1101 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1103 if (tls_referenced_p (x))
1104 return legitimize_tls_address (x);
1105 else if (flag_pic)
1106 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1108 /* Strip off CONST. */
1109 if (GET_CODE (x) == CONST)
1110 x = XEXP (x, 0);
1112 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1113 That should always be safe. */
1114 if (GET_CODE (x) == PLUS
1115 && GET_CODE (XEXP (x, 0)) == REG
1116 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1118 rtx reg = force_reg (Pmode, XEXP (x, 1));
1119 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1122 /* Note we must reject symbols which represent function addresses
1123 since the assembler/linker can't handle arithmetic on plabels. */
1124 if (GET_CODE (x) == PLUS
1125 && GET_CODE (XEXP (x, 1)) == CONST_INT
1126 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1127 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1128 || GET_CODE (XEXP (x, 0)) == REG))
1130 rtx int_part, ptr_reg;
1131 int newoffset;
1132 int offset = INTVAL (XEXP (x, 1));
1133 int mask;
1135 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1136 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1138 /* Choose which way to round the offset. Round up if we
1139 are >= halfway to the next boundary. */
1140 if ((offset & mask) >= ((mask + 1) / 2))
1141 newoffset = (offset & ~ mask) + mask + 1;
1142 else
1143 newoffset = (offset & ~ mask);
1145 /* If the newoffset will not fit in 14 bits (ldo), then
1146 handling this would take 4 or 5 instructions (2 to load
1147 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1148 add the new offset and the SYMBOL_REF.) Combine cannot
1149 handle 4->2 or 5->2 combinations, so do not create
1150 them. */
1151 if (! VAL_14_BITS_P (newoffset)
1152 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1154 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1155 rtx tmp_reg
1156 = force_reg (Pmode,
1157 gen_rtx_HIGH (Pmode, const_part));
1158 ptr_reg
1159 = force_reg (Pmode,
1160 gen_rtx_LO_SUM (Pmode,
1161 tmp_reg, const_part));
1163 else
1165 if (! VAL_14_BITS_P (newoffset))
1166 int_part = force_reg (Pmode, GEN_INT (newoffset));
1167 else
1168 int_part = GEN_INT (newoffset);
1170 ptr_reg = force_reg (Pmode,
1171 gen_rtx_PLUS (Pmode,
1172 force_reg (Pmode, XEXP (x, 0)),
1173 int_part));
1175 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1178 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1180 if (GET_CODE (x) == PLUS
1181 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1182 && (OBJECT_P (XEXP (x, 1))
1183 || GET_CODE (XEXP (x, 1)) == SUBREG)
1184 && GET_CODE (XEXP (x, 1)) != CONST)
1186 /* If we were given a MULT, we must fix the constant
1187 as we're going to create the ASHIFT form. */
1188 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1189 if (GET_CODE (XEXP (x, 0)) == MULT)
1190 shift_val = exact_log2 (shift_val);
1192 rtx reg1, reg2;
1193 reg1 = XEXP (x, 1);
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1197 reg2 = XEXP (XEXP (x, 0), 0);
1198 if (GET_CODE (reg2) != REG)
1199 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1201 return force_reg (Pmode,
1202 gen_rtx_PLUS (Pmode,
1203 gen_rtx_ASHIFT (Pmode, reg2,
1204 GEN_INT (shift_val)),
1205 reg1));
1208 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1210 Only do so for floating point modes since this is more speculative
1211 and we lose if it's an integer store. */
1212 if (GET_CODE (x) == PLUS
1213 && GET_CODE (XEXP (x, 0)) == PLUS
1214 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1215 && (mode == SFmode || mode == DFmode))
1217 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1219 /* If we were given a MULT, we must fix the constant
1220 as we're going to create the ASHIFT form. */
1221 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1222 shift_val = exact_log2 (shift_val);
1224 /* Try and figure out what to use as a base register. */
1225 rtx reg1, reg2, base, idx;
1227 reg1 = XEXP (XEXP (x, 0), 1);
1228 reg2 = XEXP (x, 1);
1229 base = NULL_RTX;
1230 idx = NULL_RTX;
1232 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1233 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1234 it's a base register below. */
1235 if (GET_CODE (reg1) != REG)
1236 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1238 if (GET_CODE (reg2) != REG)
1239 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1241 /* Figure out what the base and index are. */
1243 if (GET_CODE (reg1) == REG
1244 && REG_POINTER (reg1))
1246 base = reg1;
1247 idx = gen_rtx_PLUS (Pmode,
1248 gen_rtx_ASHIFT (Pmode,
1249 XEXP (XEXP (XEXP (x, 0), 0), 0),
1250 GEN_INT (shift_val)),
1251 XEXP (x, 1));
1253 else if (GET_CODE (reg2) == REG
1254 && REG_POINTER (reg2))
1256 base = reg2;
1257 idx = XEXP (x, 0);
1260 if (base == 0)
1261 return orig;
1263 /* If the index adds a large constant, try to scale the
1264 constant so that it can be loaded with only one insn. */
1265 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1266 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1267 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1268 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1270 /* Divide the CONST_INT by the scale factor, then add it to A. */
1271 int val = INTVAL (XEXP (idx, 1));
1272 val /= (1 << shift_val);
1274 reg1 = XEXP (XEXP (idx, 0), 0);
1275 if (GET_CODE (reg1) != REG)
1276 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1278 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1280 /* We can now generate a simple scaled indexed address. */
1281 return
1282 force_reg
1283 (Pmode, gen_rtx_PLUS (Pmode,
1284 gen_rtx_ASHIFT (Pmode, reg1,
1285 GEN_INT (shift_val)),
1286 base));
1289 /* If B + C is still a valid base register, then add them. */
1290 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1291 && INTVAL (XEXP (idx, 1)) <= 4096
1292 && INTVAL (XEXP (idx, 1)) >= -4096)
1294 rtx reg1, reg2;
1296 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1298 reg2 = XEXP (XEXP (idx, 0), 0);
1299 if (GET_CODE (reg2) != CONST_INT)
1300 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1302 return force_reg (Pmode,
1303 gen_rtx_PLUS (Pmode,
1304 gen_rtx_ASHIFT (Pmode, reg2,
1305 GEN_INT (shift_val)),
1306 reg1));
1309 /* Get the index into a register, then add the base + index and
1310 return a register holding the result. */
1312 /* First get A into a register. */
1313 reg1 = XEXP (XEXP (idx, 0), 0);
1314 if (GET_CODE (reg1) != REG)
1315 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1317 /* And get B into a register. */
1318 reg2 = XEXP (idx, 1);
1319 if (GET_CODE (reg2) != REG)
1320 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1322 reg1 = force_reg (Pmode,
1323 gen_rtx_PLUS (Pmode,
1324 gen_rtx_ASHIFT (Pmode, reg1,
1325 GEN_INT (shift_val)),
1326 reg2));
1328 /* Add the result to our base register and return. */
1329 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1333 /* Uh-oh. We might have an address for x[n-100000]. This needs
1334 special handling to avoid creating an indexed memory address
1335 with x-100000 as the base.
1337 If the constant part is small enough, then it's still safe because
1338 there is a guard page at the beginning and end of the data segment.
1340 Scaled references are common enough that we want to try and rearrange the
1341 terms so that we can use indexing for these addresses too. Only
1342 do the optimization for floatint point modes. */
1344 if (GET_CODE (x) == PLUS
1345 && pa_symbolic_expression_p (XEXP (x, 1)))
1347 /* Ugly. We modify things here so that the address offset specified
1348 by the index expression is computed first, then added to x to form
1349 the entire address. */
1351 rtx regx1, regx2, regy1, regy2, y;
1353 /* Strip off any CONST. */
1354 y = XEXP (x, 1);
1355 if (GET_CODE (y) == CONST)
1356 y = XEXP (y, 0);
1358 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1360 /* See if this looks like
1361 (plus (mult (reg) (mem_shadd_const))
1362 (const (plus (symbol_ref) (const_int))))
1364 Where const_int is small. In that case the const
1365 expression is a valid pointer for indexing.
1367 If const_int is big, but can be divided evenly by shadd_const
1368 and added to (reg). This allows more scaled indexed addresses. */
1369 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1370 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1371 && GET_CODE (XEXP (y, 1)) == CONST_INT
1372 && INTVAL (XEXP (y, 1)) >= -4096
1373 && INTVAL (XEXP (y, 1)) <= 4095)
1375 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1377 /* If we were given a MULT, we must fix the constant
1378 as we're going to create the ASHIFT form. */
1379 if (GET_CODE (XEXP (x, 0)) == MULT)
1380 shift_val = exact_log2 (shift_val);
1382 rtx reg1, reg2;
1384 reg1 = XEXP (x, 1);
1385 if (GET_CODE (reg1) != REG)
1386 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1388 reg2 = XEXP (XEXP (x, 0), 0);
1389 if (GET_CODE (reg2) != REG)
1390 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1392 return
1393 force_reg (Pmode,
1394 gen_rtx_PLUS (Pmode,
1395 gen_rtx_ASHIFT (Pmode,
1396 reg2,
1397 GEN_INT (shift_val)),
1398 reg1));
1400 else if ((mode == DFmode || mode == SFmode)
1401 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1402 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1403 && GET_CODE (XEXP (y, 1)) == CONST_INT
1404 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1406 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1408 /* If we were given a MULT, we must fix the constant
1409 as we're going to create the ASHIFT form. */
1410 if (GET_CODE (XEXP (x, 0)) == MULT)
1411 shift_val = exact_log2 (shift_val);
1413 regx1
1414 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1415 / INTVAL (XEXP (XEXP (x, 0), 1))));
1416 regx2 = XEXP (XEXP (x, 0), 0);
1417 if (GET_CODE (regx2) != REG)
1418 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1419 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1420 regx2, regx1));
1421 return
1422 force_reg (Pmode,
1423 gen_rtx_PLUS (Pmode,
1424 gen_rtx_ASHIFT (Pmode, regx2,
1425 GEN_INT (shift_val)),
1426 force_reg (Pmode, XEXP (y, 0))));
1428 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1429 && INTVAL (XEXP (y, 1)) >= -4096
1430 && INTVAL (XEXP (y, 1)) <= 4095)
1432 /* This is safe because of the guard page at the
1433 beginning and end of the data space. Just
1434 return the original address. */
1435 return orig;
1437 else
1439 /* Doesn't look like one we can optimize. */
1440 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1441 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1442 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1443 regx1 = force_reg (Pmode,
1444 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1445 regx1, regy2));
1446 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1451 return orig;
1454 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1456 Compute extra cost of moving data between one register class
1457 and another.
1459 Make moves from SAR so expensive they should never happen. We used to
1460 have 0xffff here, but that generates overflow in rare cases.
1462 Copies involving a FP register and a non-FP register are relatively
1463 expensive because they must go through memory.
1465 Other copies are reasonably cheap. */
1467 static int
1468 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1469 reg_class_t from, reg_class_t to)
1471 if (from == SHIFT_REGS)
1472 return 0x100;
1473 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1474 return 18;
1475 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1476 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1477 return 16;
1478 else
1479 return 2;
1482 /* For the HPPA, REG and REG+CONST is cost 0
1483 and addresses involving symbolic constants are cost 2.
1485 PIC addresses are very expensive.
1487 It is no coincidence that this has the same structure
1488 as pa_legitimate_address_p. */
1490 static int
1491 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1492 addr_space_t as ATTRIBUTE_UNUSED,
1493 bool speed ATTRIBUTE_UNUSED)
1495 switch (GET_CODE (X))
1497 case REG:
1498 case PLUS:
1499 case LO_SUM:
1500 return 1;
1501 case HIGH:
1502 return 2;
1503 default:
1504 return 4;
1508 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1509 The machine mode of X is known to be SImode or DImode. */
1511 static bool
1512 hppa_rtx_costs_shadd_p (rtx x)
1514 if (GET_CODE (x) != PLUS
1515 || !REG_P (XEXP (x, 1)))
1516 return false;
1517 rtx op0 = XEXP (x, 0);
1518 if (GET_CODE (op0) == ASHIFT
1519 && CONST_INT_P (XEXP (op0, 1))
1520 && REG_P (XEXP (op0, 0)))
1522 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1523 return x == 1 || x == 2 || x == 3;
1525 if (GET_CODE (op0) == MULT
1526 && CONST_INT_P (XEXP (op0, 1))
1527 && REG_P (XEXP (op0, 0)))
1529 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1530 return x == 2 || x == 4 || x == 8;
1532 return false;
1535 /* Compute a (partial) cost for rtx X. Return true if the complete
1536 cost has been computed, and false if subexpressions should be
1537 scanned. In either case, *TOTAL contains the cost result. */
1539 static bool
1540 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1541 int opno ATTRIBUTE_UNUSED,
1542 int *total, bool speed)
1544 int code = GET_CODE (x);
1546 switch (code)
1548 case CONST_INT:
1549 if (outer_code == SET)
1550 *total = COSTS_N_INSNS (1);
1551 else if (INTVAL (x) == 0)
1552 *total = 0;
1553 else if (INT_14_BITS (x))
1554 *total = 1;
1555 else
1556 *total = 2;
1557 return true;
1559 case HIGH:
1560 *total = 2;
1561 return true;
1563 case CONST:
1564 case LABEL_REF:
1565 case SYMBOL_REF:
1566 *total = 4;
1567 return true;
1569 case CONST_DOUBLE:
1570 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1571 && outer_code != SET)
1572 *total = 0;
1573 else
1574 *total = 8;
1575 return true;
1577 case MULT:
1578 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1580 *total = COSTS_N_INSNS (3);
1582 else if (mode == DImode)
1584 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1585 *total = COSTS_N_INSNS (25);
1586 else
1587 *total = COSTS_N_INSNS (80);
1589 else
1591 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1592 *total = COSTS_N_INSNS (8);
1593 else
1594 *total = COSTS_N_INSNS (20);
1596 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1598 case DIV:
1599 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1601 *total = COSTS_N_INSNS (14);
1602 return false;
1604 /* FALLTHRU */
1606 case UDIV:
1607 case MOD:
1608 case UMOD:
1609 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1610 if (mode == DImode)
1611 *total = COSTS_N_INSNS (240);
1612 else
1613 *total = COSTS_N_INSNS (60);
1614 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1616 case PLUS: /* this includes shNadd insns */
1617 case MINUS:
1618 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1619 *total = COSTS_N_INSNS (3);
1620 else if (mode == DImode)
1622 if (TARGET_64BIT)
1624 *total = COSTS_N_INSNS (1);
1625 /* Handle shladd,l instructions. */
1626 if (hppa_rtx_costs_shadd_p (x))
1627 return true;
1629 else
1630 *total = COSTS_N_INSNS (2);
1632 else
1634 *total = COSTS_N_INSNS (1);
1635 /* Handle shNadd instructions. */
1636 if (hppa_rtx_costs_shadd_p (x))
1637 return true;
1639 return REG_P (XEXP (x, 0))
1640 && (REG_P (XEXP (x, 1))
1641 || CONST_INT_P (XEXP (x, 1)));
1643 case ASHIFT:
1644 if (mode == DImode)
1646 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1648 if (TARGET_64BIT)
1649 *total = COSTS_N_INSNS (1);
1650 else
1651 *total = COSTS_N_INSNS (2);
1652 return true;
1654 else if (TARGET_64BIT)
1655 *total = COSTS_N_INSNS (3);
1656 else if (speed)
1657 *total = COSTS_N_INSNS (13);
1658 else
1659 *total = COSTS_N_INSNS (18);
1661 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1663 if (TARGET_64BIT)
1664 *total = COSTS_N_INSNS (2);
1665 else
1666 *total = COSTS_N_INSNS (1);
1667 return true;
1669 else if (TARGET_64BIT)
1670 *total = COSTS_N_INSNS (4);
1671 else
1672 *total = COSTS_N_INSNS (2);
1673 return REG_P (XEXP (x, 0))
1674 && (REG_P (XEXP (x, 1))
1675 || CONST_INT_P (XEXP (x, 1)));
1677 case ASHIFTRT:
1678 if (mode == DImode)
1680 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1682 if (TARGET_64BIT)
1683 *total = COSTS_N_INSNS (1);
1684 else
1685 *total = COSTS_N_INSNS (2);
1686 return true;
1688 else if (TARGET_64BIT)
1689 *total = COSTS_N_INSNS (3);
1690 else if (speed)
1691 *total = COSTS_N_INSNS (14);
1692 else
1693 *total = COSTS_N_INSNS (19);
1695 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1697 if (TARGET_64BIT)
1698 *total = COSTS_N_INSNS (2);
1699 else
1700 *total = COSTS_N_INSNS (1);
1701 return true;
1703 else if (TARGET_64BIT)
1704 *total = COSTS_N_INSNS (4);
1705 else
1706 *total = COSTS_N_INSNS (2);
1707 return REG_P (XEXP (x, 0))
1708 && (REG_P (XEXP (x, 1))
1709 || CONST_INT_P (XEXP (x, 1)));
1711 case LSHIFTRT:
1712 if (mode == DImode)
1714 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1716 if (TARGET_64BIT)
1717 *total = COSTS_N_INSNS (1);
1718 else
1719 *total = COSTS_N_INSNS (2);
1720 return true;
1722 else if (TARGET_64BIT)
1723 *total = COSTS_N_INSNS (2);
1724 else if (speed)
1725 *total = COSTS_N_INSNS (12);
1726 else
1727 *total = COSTS_N_INSNS (15);
1729 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1731 *total = COSTS_N_INSNS (1);
1732 return true;
1734 else if (TARGET_64BIT)
1735 *total = COSTS_N_INSNS (3);
1736 else
1737 *total = COSTS_N_INSNS (2);
1738 return REG_P (XEXP (x, 0))
1739 && (REG_P (XEXP (x, 1))
1740 || CONST_INT_P (XEXP (x, 1)));
1742 default:
1743 return false;
1747 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1748 new rtx with the correct mode. */
1749 static inline rtx
1750 force_mode (machine_mode mode, rtx orig)
1752 if (mode == GET_MODE (orig))
1753 return orig;
1755 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1757 return gen_rtx_REG (mode, REGNO (orig));
1760 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1762 static bool
1763 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1765 return tls_referenced_p (x);
1768 /* Emit insns to move operands[1] into operands[0].
1770 Return 1 if we have written out everything that needs to be done to
1771 do the move. Otherwise, return 0 and the caller will emit the move
1772 normally.
1774 Note SCRATCH_REG may not be in the proper mode depending on how it
1775 will be used. This routine is responsible for creating a new copy
1776 of SCRATCH_REG in the proper mode. */
1779 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1781 rtx operand0 = operands[0];
1782 rtx operand1 = operands[1];
1783 rtx tem;
1785 /* We can only handle indexed addresses in the destination operand
1786 of floating point stores. Thus, we need to break out indexed
1787 addresses from the destination operand. */
1788 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1790 gcc_assert (can_create_pseudo_p ());
1792 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1793 operand0 = replace_equiv_address (operand0, tem);
1796 /* On targets with non-equivalent space registers, break out unscaled
1797 indexed addresses from the source operand before the final CSE.
1798 We have to do this because the REG_POINTER flag is not correctly
1799 carried through various optimization passes and CSE may substitute
1800 a pseudo without the pointer set for one with the pointer set. As
1801 a result, we loose various opportunities to create insns with
1802 unscaled indexed addresses. */
1803 if (!TARGET_NO_SPACE_REGS
1804 && !cse_not_expected
1805 && GET_CODE (operand1) == MEM
1806 && GET_CODE (XEXP (operand1, 0)) == PLUS
1807 && REG_P (XEXP (XEXP (operand1, 0), 0))
1808 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1809 operand1
1810 = replace_equiv_address (operand1,
1811 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1813 if (scratch_reg
1814 && reload_in_progress && GET_CODE (operand0) == REG
1815 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1816 operand0 = reg_equiv_mem (REGNO (operand0));
1817 else if (scratch_reg
1818 && reload_in_progress && GET_CODE (operand0) == SUBREG
1819 && GET_CODE (SUBREG_REG (operand0)) == REG
1820 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1822 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1823 the code which tracks sets/uses for delete_output_reload. */
1824 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1825 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1826 SUBREG_BYTE (operand0));
1827 operand0 = alter_subreg (&temp, true);
1830 if (scratch_reg
1831 && reload_in_progress && GET_CODE (operand1) == REG
1832 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1833 operand1 = reg_equiv_mem (REGNO (operand1));
1834 else if (scratch_reg
1835 && reload_in_progress && GET_CODE (operand1) == SUBREG
1836 && GET_CODE (SUBREG_REG (operand1)) == REG
1837 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1839 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1840 the code which tracks sets/uses for delete_output_reload. */
1841 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1842 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1843 SUBREG_BYTE (operand1));
1844 operand1 = alter_subreg (&temp, true);
1847 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1848 && ((tem = find_replacement (&XEXP (operand0, 0)))
1849 != XEXP (operand0, 0)))
1850 operand0 = replace_equiv_address (operand0, tem);
1852 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1853 && ((tem = find_replacement (&XEXP (operand1, 0)))
1854 != XEXP (operand1, 0)))
1855 operand1 = replace_equiv_address (operand1, tem);
1857 /* Handle secondary reloads for loads/stores of FP registers from
1858 REG+D addresses where D does not fit in 5 or 14 bits, including
1859 (subreg (mem (addr))) cases, and reloads for other unsupported
1860 memory operands. */
1861 if (scratch_reg
1862 && FP_REG_P (operand0)
1863 && (MEM_P (operand1)
1864 || (GET_CODE (operand1) == SUBREG
1865 && MEM_P (XEXP (operand1, 0)))))
1867 rtx op1 = operand1;
1869 if (GET_CODE (op1) == SUBREG)
1870 op1 = XEXP (op1, 0);
1872 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1874 if (!(TARGET_PA_20
1875 && !TARGET_ELF32
1876 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1877 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1879 /* SCRATCH_REG will hold an address and maybe the actual data.
1880 We want it in WORD_MODE regardless of what mode it was
1881 originally given to us. */
1882 scratch_reg = force_mode (word_mode, scratch_reg);
1884 /* D might not fit in 14 bits either; for such cases load D
1885 into scratch reg. */
1886 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1888 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1889 emit_move_insn (scratch_reg,
1890 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1891 Pmode,
1892 XEXP (XEXP (op1, 0), 0),
1893 scratch_reg));
1895 else
1896 emit_move_insn (scratch_reg, XEXP (op1, 0));
1897 op1 = replace_equiv_address (op1, scratch_reg);
1900 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1901 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1902 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1904 /* Load memory address into SCRATCH_REG. */
1905 scratch_reg = force_mode (word_mode, scratch_reg);
1906 emit_move_insn (scratch_reg, XEXP (op1, 0));
1907 op1 = replace_equiv_address (op1, scratch_reg);
1909 emit_insn (gen_rtx_SET (operand0, op1));
1910 return 1;
1912 else if (scratch_reg
1913 && FP_REG_P (operand1)
1914 && (MEM_P (operand0)
1915 || (GET_CODE (operand0) == SUBREG
1916 && MEM_P (XEXP (operand0, 0)))))
1918 rtx op0 = operand0;
1920 if (GET_CODE (op0) == SUBREG)
1921 op0 = XEXP (op0, 0);
1923 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1925 if (!(TARGET_PA_20
1926 && !TARGET_ELF32
1927 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1928 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1930 /* SCRATCH_REG will hold an address and maybe the actual data.
1931 We want it in WORD_MODE regardless of what mode it was
1932 originally given to us. */
1933 scratch_reg = force_mode (word_mode, scratch_reg);
1935 /* D might not fit in 14 bits either; for such cases load D
1936 into scratch reg. */
1937 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1939 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1940 emit_move_insn (scratch_reg,
1941 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1942 Pmode,
1943 XEXP (XEXP (op0, 0), 0),
1944 scratch_reg));
1946 else
1947 emit_move_insn (scratch_reg, XEXP (op0, 0));
1948 op0 = replace_equiv_address (op0, scratch_reg);
1951 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1952 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1953 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1955 /* Load memory address into SCRATCH_REG. */
1956 scratch_reg = force_mode (word_mode, scratch_reg);
1957 emit_move_insn (scratch_reg, XEXP (op0, 0));
1958 op0 = replace_equiv_address (op0, scratch_reg);
1960 emit_insn (gen_rtx_SET (op0, operand1));
1961 return 1;
1963 /* Handle secondary reloads for loads of FP registers from constant
1964 expressions by forcing the constant into memory. For the most part,
1965 this is only necessary for SImode and DImode.
1967 Use scratch_reg to hold the address of the memory location. */
1968 else if (scratch_reg
1969 && CONSTANT_P (operand1)
1970 && FP_REG_P (operand0))
1972 rtx const_mem, xoperands[2];
1974 if (operand1 == CONST0_RTX (mode))
1976 emit_insn (gen_rtx_SET (operand0, operand1));
1977 return 1;
1980 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1981 it in WORD_MODE regardless of what mode it was originally given
1982 to us. */
1983 scratch_reg = force_mode (word_mode, scratch_reg);
1985 /* Force the constant into memory and put the address of the
1986 memory location into scratch_reg. */
1987 const_mem = force_const_mem (mode, operand1);
1988 xoperands[0] = scratch_reg;
1989 xoperands[1] = XEXP (const_mem, 0);
1990 pa_emit_move_sequence (xoperands, Pmode, 0);
1992 /* Now load the destination register. */
1993 emit_insn (gen_rtx_SET (operand0,
1994 replace_equiv_address (const_mem, scratch_reg)));
1995 return 1;
1997 /* Handle secondary reloads for SAR. These occur when trying to load
1998 the SAR from memory or a constant. */
1999 else if (scratch_reg
2000 && GET_CODE (operand0) == REG
2001 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2002 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2003 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2005 /* D might not fit in 14 bits either; for such cases load D into
2006 scratch reg. */
2007 if (GET_CODE (operand1) == MEM
2008 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2010 /* We are reloading the address into the scratch register, so we
2011 want to make sure the scratch register is a full register. */
2012 scratch_reg = force_mode (word_mode, scratch_reg);
2014 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2015 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2016 0)),
2017 Pmode,
2018 XEXP (XEXP (operand1, 0),
2020 scratch_reg));
2022 /* Now we are going to load the scratch register from memory,
2023 we want to load it in the same width as the original MEM,
2024 which must be the same as the width of the ultimate destination,
2025 OPERAND0. */
2026 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2028 emit_move_insn (scratch_reg,
2029 replace_equiv_address (operand1, scratch_reg));
2031 else
2033 /* We want to load the scratch register using the same mode as
2034 the ultimate destination. */
2035 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2037 emit_move_insn (scratch_reg, operand1);
2040 /* And emit the insn to set the ultimate destination. We know that
2041 the scratch register has the same mode as the destination at this
2042 point. */
2043 emit_move_insn (operand0, scratch_reg);
2044 return 1;
2047 /* Handle the most common case: storing into a register. */
2048 if (register_operand (operand0, mode))
2050 /* Legitimize TLS symbol references. This happens for references
2051 that aren't a legitimate constant. */
2052 if (PA_SYMBOL_REF_TLS_P (operand1))
2053 operand1 = legitimize_tls_address (operand1);
2055 if (register_operand (operand1, mode)
2056 || (GET_CODE (operand1) == CONST_INT
2057 && pa_cint_ok_for_move (UINTVAL (operand1)))
2058 || (operand1 == CONST0_RTX (mode))
2059 || (GET_CODE (operand1) == HIGH
2060 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2061 /* Only `general_operands' can come here, so MEM is ok. */
2062 || GET_CODE (operand1) == MEM)
2064 /* Various sets are created during RTL generation which don't
2065 have the REG_POINTER flag correctly set. After the CSE pass,
2066 instruction recognition can fail if we don't consistently
2067 set this flag when performing register copies. This should
2068 also improve the opportunities for creating insns that use
2069 unscaled indexing. */
2070 if (REG_P (operand0) && REG_P (operand1))
2072 if (REG_POINTER (operand1)
2073 && !REG_POINTER (operand0)
2074 && !HARD_REGISTER_P (operand0))
2075 copy_reg_pointer (operand0, operand1);
2078 /* When MEMs are broken out, the REG_POINTER flag doesn't
2079 get set. In some cases, we can set the REG_POINTER flag
2080 from the declaration for the MEM. */
2081 if (REG_P (operand0)
2082 && GET_CODE (operand1) == MEM
2083 && !REG_POINTER (operand0))
2085 tree decl = MEM_EXPR (operand1);
2087 /* Set the register pointer flag and register alignment
2088 if the declaration for this memory reference is a
2089 pointer type. */
2090 if (decl)
2092 tree type;
2094 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2095 tree operand 1. */
2096 if (TREE_CODE (decl) == COMPONENT_REF)
2097 decl = TREE_OPERAND (decl, 1);
2099 type = TREE_TYPE (decl);
2100 type = strip_array_types (type);
2102 if (POINTER_TYPE_P (type))
2103 mark_reg_pointer (operand0, BITS_PER_UNIT);
2107 emit_insn (gen_rtx_SET (operand0, operand1));
2108 return 1;
2111 else if (GET_CODE (operand0) == MEM)
2113 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2114 && !(reload_in_progress || reload_completed))
2116 rtx temp = gen_reg_rtx (DFmode);
2118 emit_insn (gen_rtx_SET (temp, operand1));
2119 emit_insn (gen_rtx_SET (operand0, temp));
2120 return 1;
2122 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2124 /* Run this case quickly. */
2125 emit_insn (gen_rtx_SET (operand0, operand1));
2126 return 1;
2128 if (! (reload_in_progress || reload_completed))
2130 operands[0] = validize_mem (operand0);
2131 operands[1] = operand1 = force_reg (mode, operand1);
2135 /* Simplify the source if we need to.
2136 Note we do have to handle function labels here, even though we do
2137 not consider them legitimate constants. Loop optimizations can
2138 call the emit_move_xxx with one as a source. */
2139 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2140 || (GET_CODE (operand1) == HIGH
2141 && symbolic_operand (XEXP (operand1, 0), mode))
2142 || function_label_operand (operand1, VOIDmode)
2143 || tls_referenced_p (operand1))
2145 int ishighonly = 0;
2147 if (GET_CODE (operand1) == HIGH)
2149 ishighonly = 1;
2150 operand1 = XEXP (operand1, 0);
2152 if (symbolic_operand (operand1, mode))
2154 /* Argh. The assembler and linker can't handle arithmetic
2155 involving plabels.
2157 So we force the plabel into memory, load operand0 from
2158 the memory location, then add in the constant part. */
2159 if ((GET_CODE (operand1) == CONST
2160 && GET_CODE (XEXP (operand1, 0)) == PLUS
2161 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2162 VOIDmode))
2163 || function_label_operand (operand1, VOIDmode))
2165 rtx temp, const_part;
2167 /* Figure out what (if any) scratch register to use. */
2168 if (reload_in_progress || reload_completed)
2170 scratch_reg = scratch_reg ? scratch_reg : operand0;
2171 /* SCRATCH_REG will hold an address and maybe the actual
2172 data. We want it in WORD_MODE regardless of what mode it
2173 was originally given to us. */
2174 scratch_reg = force_mode (word_mode, scratch_reg);
2176 else if (flag_pic)
2177 scratch_reg = gen_reg_rtx (Pmode);
2179 if (GET_CODE (operand1) == CONST)
2181 /* Save away the constant part of the expression. */
2182 const_part = XEXP (XEXP (operand1, 0), 1);
2183 gcc_assert (GET_CODE (const_part) == CONST_INT);
2185 /* Force the function label into memory. */
2186 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2188 else
2190 /* No constant part. */
2191 const_part = NULL_RTX;
2193 /* Force the function label into memory. */
2194 temp = force_const_mem (mode, operand1);
2198 /* Get the address of the memory location. PIC-ify it if
2199 necessary. */
2200 temp = XEXP (temp, 0);
2201 if (flag_pic)
2202 temp = legitimize_pic_address (temp, mode, scratch_reg);
2204 /* Put the address of the memory location into our destination
2205 register. */
2206 operands[1] = temp;
2207 pa_emit_move_sequence (operands, mode, scratch_reg);
2209 /* Now load from the memory location into our destination
2210 register. */
2211 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2212 pa_emit_move_sequence (operands, mode, scratch_reg);
2214 /* And add back in the constant part. */
2215 if (const_part != NULL_RTX)
2216 expand_inc (operand0, const_part);
2218 return 1;
2221 if (flag_pic)
2223 rtx_insn *insn;
2224 rtx temp;
2226 if (reload_in_progress || reload_completed)
2228 temp = scratch_reg ? scratch_reg : operand0;
2229 /* TEMP will hold an address and maybe the actual
2230 data. We want it in WORD_MODE regardless of what mode it
2231 was originally given to us. */
2232 temp = force_mode (word_mode, temp);
2234 else
2235 temp = gen_reg_rtx (Pmode);
2237 /* Force (const (plus (symbol) (const_int))) to memory
2238 if the const_int will not fit in 14 bits. Although
2239 this requires a relocation, the instruction sequence
2240 needed to load the value is shorter. */
2241 if (GET_CODE (operand1) == CONST
2242 && GET_CODE (XEXP (operand1, 0)) == PLUS
2243 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2244 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2246 rtx x, m = force_const_mem (mode, operand1);
2248 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2249 x = replace_equiv_address (m, x);
2250 insn = emit_move_insn (operand0, x);
2252 else
2254 operands[1] = legitimize_pic_address (operand1, mode, temp);
2255 if (REG_P (operand0) && REG_P (operands[1]))
2256 copy_reg_pointer (operand0, operands[1]);
2257 insn = emit_move_insn (operand0, operands[1]);
2260 /* Put a REG_EQUAL note on this insn. */
2261 set_unique_reg_note (insn, REG_EQUAL, operand1);
2263 /* On the HPPA, references to data space are supposed to use dp,
2264 register 27, but showing it in the RTL inhibits various cse
2265 and loop optimizations. */
2266 else
2268 rtx temp, set;
2270 if (reload_in_progress || reload_completed)
2272 temp = scratch_reg ? scratch_reg : operand0;
2273 /* TEMP will hold an address and maybe the actual
2274 data. We want it in WORD_MODE regardless of what mode it
2275 was originally given to us. */
2276 temp = force_mode (word_mode, temp);
2278 else
2279 temp = gen_reg_rtx (mode);
2281 /* Loading a SYMBOL_REF into a register makes that register
2282 safe to be used as the base in an indexed address.
2284 Don't mark hard registers though. That loses. */
2285 if (GET_CODE (operand0) == REG
2286 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2287 mark_reg_pointer (operand0, BITS_PER_UNIT);
2288 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2289 mark_reg_pointer (temp, BITS_PER_UNIT);
2291 if (ishighonly)
2292 set = gen_rtx_SET (operand0, temp);
2293 else
2294 set = gen_rtx_SET (operand0,
2295 gen_rtx_LO_SUM (mode, temp, operand1));
2297 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2298 emit_insn (set);
2301 return 1;
2303 else if (tls_referenced_p (operand1))
2305 rtx tmp = operand1;
2306 rtx addend = NULL;
2308 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2310 addend = XEXP (XEXP (tmp, 0), 1);
2311 tmp = XEXP (XEXP (tmp, 0), 0);
2314 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2315 tmp = legitimize_tls_address (tmp);
2316 if (addend)
2318 tmp = gen_rtx_PLUS (mode, tmp, addend);
2319 tmp = force_operand (tmp, operands[0]);
2321 operands[1] = tmp;
2323 else if (GET_CODE (operand1) != CONST_INT
2324 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2326 rtx temp;
2327 rtx_insn *insn;
2328 rtx op1 = operand1;
2329 HOST_WIDE_INT value = 0;
2330 HOST_WIDE_INT insv = 0;
2331 int insert = 0;
2333 if (GET_CODE (operand1) == CONST_INT)
2334 value = INTVAL (operand1);
2336 if (TARGET_64BIT
2337 && GET_CODE (operand1) == CONST_INT
2338 && HOST_BITS_PER_WIDE_INT > 32
2339 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2341 HOST_WIDE_INT nval;
2343 /* Extract the low order 32 bits of the value and sign extend.
2344 If the new value is the same as the original value, we can
2345 can use the original value as-is. If the new value is
2346 different, we use it and insert the most-significant 32-bits
2347 of the original value into the final result. */
2348 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2349 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2350 if (value != nval)
2352 #if HOST_BITS_PER_WIDE_INT > 32
2353 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2354 #endif
2355 insert = 1;
2356 value = nval;
2357 operand1 = GEN_INT (nval);
2361 if (reload_in_progress || reload_completed)
2362 temp = scratch_reg ? scratch_reg : operand0;
2363 else
2364 temp = gen_reg_rtx (mode);
2366 /* We don't directly split DImode constants on 32-bit targets
2367 because PLUS uses an 11-bit immediate and the insn sequence
2368 generated is not as efficient as the one using HIGH/LO_SUM. */
2369 if (GET_CODE (operand1) == CONST_INT
2370 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2371 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2372 && !insert)
2374 /* Directly break constant into high and low parts. This
2375 provides better optimization opportunities because various
2376 passes recognize constants split with PLUS but not LO_SUM.
2377 We use a 14-bit signed low part except when the addition
2378 of 0x4000 to the high part might change the sign of the
2379 high part. */
2380 HOST_WIDE_INT low = value & 0x3fff;
2381 HOST_WIDE_INT high = value & ~ 0x3fff;
2383 if (low >= 0x2000)
2385 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2386 high += 0x2000;
2387 else
2388 high += 0x4000;
2391 low = value - high;
2393 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2394 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2396 else
2398 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2399 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2402 insn = emit_move_insn (operands[0], operands[1]);
2404 /* Now insert the most significant 32 bits of the value
2405 into the register. When we don't have a second register
2406 available, it could take up to nine instructions to load
2407 a 64-bit integer constant. Prior to reload, we force
2408 constants that would take more than three instructions
2409 to load to the constant pool. During and after reload,
2410 we have to handle all possible values. */
2411 if (insert)
2413 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2414 register and the value to be inserted is outside the
2415 range that can be loaded with three depdi instructions. */
2416 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2418 operand1 = GEN_INT (insv);
2420 emit_insn (gen_rtx_SET (temp,
2421 gen_rtx_HIGH (mode, operand1)));
2422 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2423 if (mode == DImode)
2424 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2425 const0_rtx, temp));
2426 else
2427 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2428 const0_rtx, temp));
2430 else
2432 int len = 5, pos = 27;
2434 /* Insert the bits using the depdi instruction. */
2435 while (pos >= 0)
2437 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2438 HOST_WIDE_INT sign = v5 < 0;
2440 /* Left extend the insertion. */
2441 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2442 while (pos > 0 && (insv & 1) == sign)
2444 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2445 len += 1;
2446 pos -= 1;
2449 if (mode == DImode)
2450 insn = emit_insn (gen_insvdi (operand0,
2451 GEN_INT (len),
2452 GEN_INT (pos),
2453 GEN_INT (v5)));
2454 else
2455 insn = emit_insn (gen_insvsi (operand0,
2456 GEN_INT (len),
2457 GEN_INT (pos),
2458 GEN_INT (v5)));
2460 len = pos > 0 && pos < 5 ? pos : 5;
2461 pos -= len;
2466 set_unique_reg_note (insn, REG_EQUAL, op1);
2468 return 1;
2471 /* Now have insn-emit do whatever it normally does. */
2472 return 0;
2475 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2476 it will need a link/runtime reloc). */
2479 pa_reloc_needed (tree exp)
2481 int reloc = 0;
2483 switch (TREE_CODE (exp))
2485 case ADDR_EXPR:
2486 return 1;
2488 case POINTER_PLUS_EXPR:
2489 case PLUS_EXPR:
2490 case MINUS_EXPR:
2491 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2492 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2493 break;
2495 CASE_CONVERT:
2496 case NON_LVALUE_EXPR:
2497 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2498 break;
2500 case CONSTRUCTOR:
2502 tree value;
2503 unsigned HOST_WIDE_INT ix;
2505 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2506 if (value)
2507 reloc |= pa_reloc_needed (value);
2509 break;
2511 case ERROR_MARK:
2512 break;
2514 default:
2515 break;
2517 return reloc;
2521 /* Return the best assembler insn template
2522 for moving operands[1] into operands[0] as a fullword. */
2523 const char *
2524 pa_singlemove_string (rtx *operands)
2526 HOST_WIDE_INT intval;
2528 if (GET_CODE (operands[0]) == MEM)
2529 return "stw %r1,%0";
2530 if (GET_CODE (operands[1]) == MEM)
2531 return "ldw %1,%0";
2532 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2534 long i;
2536 gcc_assert (GET_MODE (operands[1]) == SFmode);
2538 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2539 bit pattern. */
2540 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2542 operands[1] = GEN_INT (i);
2543 /* Fall through to CONST_INT case. */
2545 if (GET_CODE (operands[1]) == CONST_INT)
2547 intval = INTVAL (operands[1]);
2549 if (VAL_14_BITS_P (intval))
2550 return "ldi %1,%0";
2551 else if ((intval & 0x7ff) == 0)
2552 return "ldil L'%1,%0";
2553 else if (pa_zdepi_cint_p (intval))
2554 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2555 else
2556 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2558 return "copy %1,%0";
2562 /* Compute position (in OP[1]) and width (in OP[2])
2563 useful for copying IMM to a register using the zdepi
2564 instructions. Store the immediate value to insert in OP[0]. */
2565 static void
2566 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2568 int lsb, len;
2570 /* Find the least significant set bit in IMM. */
2571 for (lsb = 0; lsb < 32; lsb++)
2573 if ((imm & 1) != 0)
2574 break;
2575 imm >>= 1;
2578 /* Choose variants based on *sign* of the 5-bit field. */
2579 if ((imm & 0x10) == 0)
2580 len = (lsb <= 28) ? 4 : 32 - lsb;
2581 else
2583 /* Find the width of the bitstring in IMM. */
2584 for (len = 5; len < 32 - lsb; len++)
2586 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2587 break;
2590 /* Sign extend IMM as a 5-bit value. */
2591 imm = (imm & 0xf) - 0x10;
2594 op[0] = imm;
2595 op[1] = 31 - lsb;
2596 op[2] = len;
2599 /* Compute position (in OP[1]) and width (in OP[2])
2600 useful for copying IMM to a register using the depdi,z
2601 instructions. Store the immediate value to insert in OP[0]. */
2603 static void
2604 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2606 int lsb, len, maxlen;
2608 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2610 /* Find the least significant set bit in IMM. */
2611 for (lsb = 0; lsb < maxlen; lsb++)
2613 if ((imm & 1) != 0)
2614 break;
2615 imm >>= 1;
2618 /* Choose variants based on *sign* of the 5-bit field. */
2619 if ((imm & 0x10) == 0)
2620 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2621 else
2623 /* Find the width of the bitstring in IMM. */
2624 for (len = 5; len < maxlen - lsb; len++)
2626 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2627 break;
2630 /* Extend length if host is narrow and IMM is negative. */
2631 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2632 len += 32;
2634 /* Sign extend IMM as a 5-bit value. */
2635 imm = (imm & 0xf) - 0x10;
2638 op[0] = imm;
2639 op[1] = 63 - lsb;
2640 op[2] = len;
2643 /* Output assembler code to perform a doubleword move insn
2644 with operands OPERANDS. */
2646 const char *
2647 pa_output_move_double (rtx *operands)
2649 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2650 rtx latehalf[2];
2651 rtx addreg0 = 0, addreg1 = 0;
2652 int highonly = 0;
2654 /* First classify both operands. */
2656 if (REG_P (operands[0]))
2657 optype0 = REGOP;
2658 else if (offsettable_memref_p (operands[0]))
2659 optype0 = OFFSOP;
2660 else if (GET_CODE (operands[0]) == MEM)
2661 optype0 = MEMOP;
2662 else
2663 optype0 = RNDOP;
2665 if (REG_P (operands[1]))
2666 optype1 = REGOP;
2667 else if (CONSTANT_P (operands[1]))
2668 optype1 = CNSTOP;
2669 else if (offsettable_memref_p (operands[1]))
2670 optype1 = OFFSOP;
2671 else if (GET_CODE (operands[1]) == MEM)
2672 optype1 = MEMOP;
2673 else
2674 optype1 = RNDOP;
2676 /* Check for the cases that the operand constraints are not
2677 supposed to allow to happen. */
2678 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2680 /* Handle copies between general and floating registers. */
2682 if (optype0 == REGOP && optype1 == REGOP
2683 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2685 if (FP_REG_P (operands[0]))
2687 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2688 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2689 return "{fldds|fldd} -16(%%sp),%0";
2691 else
2693 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2694 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2695 return "{ldws|ldw} -12(%%sp),%R0";
2699 /* Handle auto decrementing and incrementing loads and stores
2700 specifically, since the structure of the function doesn't work
2701 for them without major modification. Do it better when we learn
2702 this port about the general inc/dec addressing of PA.
2703 (This was written by tege. Chide him if it doesn't work.) */
2705 if (optype0 == MEMOP)
2707 /* We have to output the address syntax ourselves, since print_operand
2708 doesn't deal with the addresses we want to use. Fix this later. */
2710 rtx addr = XEXP (operands[0], 0);
2711 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2713 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2715 operands[0] = XEXP (addr, 0);
2716 gcc_assert (GET_CODE (operands[1]) == REG
2717 && GET_CODE (operands[0]) == REG);
2719 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2721 /* No overlap between high target register and address
2722 register. (We do this in a non-obvious way to
2723 save a register file writeback) */
2724 if (GET_CODE (addr) == POST_INC)
2725 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2726 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2728 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2730 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2732 operands[0] = XEXP (addr, 0);
2733 gcc_assert (GET_CODE (operands[1]) == REG
2734 && GET_CODE (operands[0]) == REG);
2736 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2737 /* No overlap between high target register and address
2738 register. (We do this in a non-obvious way to save a
2739 register file writeback) */
2740 if (GET_CODE (addr) == PRE_INC)
2741 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2742 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2745 if (optype1 == MEMOP)
2747 /* We have to output the address syntax ourselves, since print_operand
2748 doesn't deal with the addresses we want to use. Fix this later. */
2750 rtx addr = XEXP (operands[1], 0);
2751 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2753 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2755 operands[1] = XEXP (addr, 0);
2756 gcc_assert (GET_CODE (operands[0]) == REG
2757 && GET_CODE (operands[1]) == REG);
2759 if (!reg_overlap_mentioned_p (high_reg, addr))
2761 /* No overlap between high target register and address
2762 register. (We do this in a non-obvious way to
2763 save a register file writeback) */
2764 if (GET_CODE (addr) == POST_INC)
2765 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2766 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2768 else
2770 /* This is an undefined situation. We should load into the
2771 address register *and* update that register. Probably
2772 we don't need to handle this at all. */
2773 if (GET_CODE (addr) == POST_INC)
2774 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2775 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2778 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2780 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2782 operands[1] = XEXP (addr, 0);
2783 gcc_assert (GET_CODE (operands[0]) == REG
2784 && GET_CODE (operands[1]) == REG);
2786 if (!reg_overlap_mentioned_p (high_reg, addr))
2788 /* No overlap between high target register and address
2789 register. (We do this in a non-obvious way to
2790 save a register file writeback) */
2791 if (GET_CODE (addr) == PRE_INC)
2792 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2793 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2795 else
2797 /* This is an undefined situation. We should load into the
2798 address register *and* update that register. Probably
2799 we don't need to handle this at all. */
2800 if (GET_CODE (addr) == PRE_INC)
2801 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2802 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2805 else if (GET_CODE (addr) == PLUS
2806 && GET_CODE (XEXP (addr, 0)) == MULT)
2808 rtx xoperands[4];
2810 /* Load address into left half of destination register. */
2811 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2812 xoperands[1] = XEXP (addr, 1);
2813 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2814 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2815 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2816 xoperands);
2817 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2819 else if (GET_CODE (addr) == PLUS
2820 && REG_P (XEXP (addr, 0))
2821 && REG_P (XEXP (addr, 1)))
2823 rtx xoperands[3];
2825 /* Load address into left half of destination register. */
2826 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2827 xoperands[1] = XEXP (addr, 0);
2828 xoperands[2] = XEXP (addr, 1);
2829 output_asm_insn ("{addl|add,l} %1,%2,%0",
2830 xoperands);
2831 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2835 /* If an operand is an unoffsettable memory ref, find a register
2836 we can increment temporarily to make it refer to the second word. */
2838 if (optype0 == MEMOP)
2839 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2841 if (optype1 == MEMOP)
2842 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2844 /* Ok, we can do one word at a time.
2845 Normally we do the low-numbered word first.
2847 In either case, set up in LATEHALF the operands to use
2848 for the high-numbered word and in some cases alter the
2849 operands in OPERANDS to be suitable for the low-numbered word. */
2851 if (optype0 == REGOP)
2852 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2853 else if (optype0 == OFFSOP)
2854 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2855 else
2856 latehalf[0] = operands[0];
2858 if (optype1 == REGOP)
2859 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2860 else if (optype1 == OFFSOP)
2861 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2862 else if (optype1 == CNSTOP)
2864 if (GET_CODE (operands[1]) == HIGH)
2866 operands[1] = XEXP (operands[1], 0);
2867 highonly = 1;
2869 split_double (operands[1], &operands[1], &latehalf[1]);
2871 else
2872 latehalf[1] = operands[1];
2874 /* If the first move would clobber the source of the second one,
2875 do them in the other order.
2877 This can happen in two cases:
2879 mem -> register where the first half of the destination register
2880 is the same register used in the memory's address. Reload
2881 can create such insns.
2883 mem in this case will be either register indirect or register
2884 indirect plus a valid offset.
2886 register -> register move where REGNO(dst) == REGNO(src + 1)
2887 someone (Tim/Tege?) claimed this can happen for parameter loads.
2889 Handle mem -> register case first. */
2890 if (optype0 == REGOP
2891 && (optype1 == MEMOP || optype1 == OFFSOP)
2892 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2894 /* Do the late half first. */
2895 if (addreg1)
2896 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2897 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2899 /* Then clobber. */
2900 if (addreg1)
2901 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2902 return pa_singlemove_string (operands);
2905 /* Now handle register -> register case. */
2906 if (optype0 == REGOP && optype1 == REGOP
2907 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2909 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2910 return pa_singlemove_string (operands);
2913 /* Normal case: do the two words, low-numbered first. */
2915 output_asm_insn (pa_singlemove_string (operands), operands);
2917 /* Make any unoffsettable addresses point at high-numbered word. */
2918 if (addreg0)
2919 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2920 if (addreg1)
2921 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2923 /* Do high-numbered word. */
2924 if (highonly)
2925 output_asm_insn ("ldil L'%1,%0", latehalf);
2926 else
2927 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2929 /* Undo the adds we just did. */
2930 if (addreg0)
2931 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2932 if (addreg1)
2933 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2935 return "";
2938 const char *
2939 pa_output_fp_move_double (rtx *operands)
2941 if (FP_REG_P (operands[0]))
2943 if (FP_REG_P (operands[1])
2944 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2945 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2946 else
2947 output_asm_insn ("fldd%F1 %1,%0", operands);
2949 else if (FP_REG_P (operands[1]))
2951 output_asm_insn ("fstd%F0 %1,%0", operands);
2953 else
2955 rtx xoperands[2];
2957 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2959 /* This is a pain. You have to be prepared to deal with an
2960 arbitrary address here including pre/post increment/decrement.
2962 so avoid this in the MD. */
2963 gcc_assert (GET_CODE (operands[0]) == REG);
2965 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2966 xoperands[0] = operands[0];
2967 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2969 return "";
2972 /* Return a REG that occurs in ADDR with coefficient 1.
2973 ADDR can be effectively incremented by incrementing REG. */
2975 static rtx
2976 find_addr_reg (rtx addr)
2978 while (GET_CODE (addr) == PLUS)
2980 if (GET_CODE (XEXP (addr, 0)) == REG)
2981 addr = XEXP (addr, 0);
2982 else if (GET_CODE (XEXP (addr, 1)) == REG)
2983 addr = XEXP (addr, 1);
2984 else if (CONSTANT_P (XEXP (addr, 0)))
2985 addr = XEXP (addr, 1);
2986 else if (CONSTANT_P (XEXP (addr, 1)))
2987 addr = XEXP (addr, 0);
2988 else
2989 gcc_unreachable ();
2991 gcc_assert (GET_CODE (addr) == REG);
2992 return addr;
2995 /* Emit code to perform a block move.
2997 OPERANDS[0] is the destination pointer as a REG, clobbered.
2998 OPERANDS[1] is the source pointer as a REG, clobbered.
2999 OPERANDS[2] is a register for temporary storage.
3000 OPERANDS[3] is a register for temporary storage.
3001 OPERANDS[4] is the size as a CONST_INT
3002 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3003 OPERANDS[6] is another temporary register. */
3005 const char *
3006 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3008 int align = INTVAL (operands[5]);
3009 unsigned long n_bytes = INTVAL (operands[4]);
3011 /* We can't move more than a word at a time because the PA
3012 has no longer integer move insns. (Could use fp mem ops?) */
3013 if (align > (TARGET_64BIT ? 8 : 4))
3014 align = (TARGET_64BIT ? 8 : 4);
3016 /* Note that we know each loop below will execute at least twice
3017 (else we would have open-coded the copy). */
3018 switch (align)
3020 case 8:
3021 /* Pre-adjust the loop counter. */
3022 operands[4] = GEN_INT (n_bytes - 16);
3023 output_asm_insn ("ldi %4,%2", operands);
3025 /* Copying loop. */
3026 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3027 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3028 output_asm_insn ("std,ma %3,8(%0)", operands);
3029 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3030 output_asm_insn ("std,ma %6,8(%0)", operands);
3032 /* Handle the residual. There could be up to 7 bytes of
3033 residual to copy! */
3034 if (n_bytes % 16 != 0)
3036 operands[4] = GEN_INT (n_bytes % 8);
3037 if (n_bytes % 16 >= 8)
3038 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3039 if (n_bytes % 8 != 0)
3040 output_asm_insn ("ldd 0(%1),%6", operands);
3041 if (n_bytes % 16 >= 8)
3042 output_asm_insn ("std,ma %3,8(%0)", operands);
3043 if (n_bytes % 8 != 0)
3044 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3046 return "";
3048 case 4:
3049 /* Pre-adjust the loop counter. */
3050 operands[4] = GEN_INT (n_bytes - 8);
3051 output_asm_insn ("ldi %4,%2", operands);
3053 /* Copying loop. */
3054 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3055 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3056 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3057 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3058 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3060 /* Handle the residual. There could be up to 7 bytes of
3061 residual to copy! */
3062 if (n_bytes % 8 != 0)
3064 operands[4] = GEN_INT (n_bytes % 4);
3065 if (n_bytes % 8 >= 4)
3066 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3067 if (n_bytes % 4 != 0)
3068 output_asm_insn ("ldw 0(%1),%6", operands);
3069 if (n_bytes % 8 >= 4)
3070 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3071 if (n_bytes % 4 != 0)
3072 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3074 return "";
3076 case 2:
3077 /* Pre-adjust the loop counter. */
3078 operands[4] = GEN_INT (n_bytes - 4);
3079 output_asm_insn ("ldi %4,%2", operands);
3081 /* Copying loop. */
3082 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3083 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3084 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3085 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3086 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3088 /* Handle the residual. */
3089 if (n_bytes % 4 != 0)
3091 if (n_bytes % 4 >= 2)
3092 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3093 if (n_bytes % 2 != 0)
3094 output_asm_insn ("ldb 0(%1),%6", operands);
3095 if (n_bytes % 4 >= 2)
3096 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3097 if (n_bytes % 2 != 0)
3098 output_asm_insn ("stb %6,0(%0)", operands);
3100 return "";
3102 case 1:
3103 /* Pre-adjust the loop counter. */
3104 operands[4] = GEN_INT (n_bytes - 2);
3105 output_asm_insn ("ldi %4,%2", operands);
3107 /* Copying loop. */
3108 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3109 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3110 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3111 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3112 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3114 /* Handle the residual. */
3115 if (n_bytes % 2 != 0)
3117 output_asm_insn ("ldb 0(%1),%3", operands);
3118 output_asm_insn ("stb %3,0(%0)", operands);
3120 return "";
3122 default:
3123 gcc_unreachable ();
3127 /* Count the number of insns necessary to handle this block move.
3129 Basic structure is the same as emit_block_move, except that we
3130 count insns rather than emit them. */
3132 static int
3133 compute_cpymem_length (rtx_insn *insn)
3135 rtx pat = PATTERN (insn);
3136 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3137 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3138 unsigned int n_insns = 0;
3140 /* We can't move more than four bytes at a time because the PA
3141 has no longer integer move insns. (Could use fp mem ops?) */
3142 if (align > (TARGET_64BIT ? 8 : 4))
3143 align = (TARGET_64BIT ? 8 : 4);
3145 /* The basic copying loop. */
3146 n_insns = 6;
3148 /* Residuals. */
3149 if (n_bytes % (2 * align) != 0)
3151 if ((n_bytes % (2 * align)) >= align)
3152 n_insns += 2;
3154 if ((n_bytes % align) != 0)
3155 n_insns += 2;
3158 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3159 return n_insns * 4;
3162 /* Emit code to perform a block clear.
3164 OPERANDS[0] is the destination pointer as a REG, clobbered.
3165 OPERANDS[1] is a register for temporary storage.
3166 OPERANDS[2] is the size as a CONST_INT
3167 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3169 const char *
3170 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3172 int align = INTVAL (operands[3]);
3173 unsigned long n_bytes = INTVAL (operands[2]);
3175 /* We can't clear more than a word at a time because the PA
3176 has no longer integer move insns. */
3177 if (align > (TARGET_64BIT ? 8 : 4))
3178 align = (TARGET_64BIT ? 8 : 4);
3180 /* Note that we know each loop below will execute at least twice
3181 (else we would have open-coded the copy). */
3182 switch (align)
3184 case 8:
3185 /* Pre-adjust the loop counter. */
3186 operands[2] = GEN_INT (n_bytes - 16);
3187 output_asm_insn ("ldi %2,%1", operands);
3189 /* Loop. */
3190 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3191 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3192 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3194 /* Handle the residual. There could be up to 7 bytes of
3195 residual to copy! */
3196 if (n_bytes % 16 != 0)
3198 operands[2] = GEN_INT (n_bytes % 8);
3199 if (n_bytes % 16 >= 8)
3200 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3201 if (n_bytes % 8 != 0)
3202 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3204 return "";
3206 case 4:
3207 /* Pre-adjust the loop counter. */
3208 operands[2] = GEN_INT (n_bytes - 8);
3209 output_asm_insn ("ldi %2,%1", operands);
3211 /* Loop. */
3212 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3213 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3214 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3216 /* Handle the residual. There could be up to 7 bytes of
3217 residual to copy! */
3218 if (n_bytes % 8 != 0)
3220 operands[2] = GEN_INT (n_bytes % 4);
3221 if (n_bytes % 8 >= 4)
3222 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3223 if (n_bytes % 4 != 0)
3224 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3226 return "";
3228 case 2:
3229 /* Pre-adjust the loop counter. */
3230 operands[2] = GEN_INT (n_bytes - 4);
3231 output_asm_insn ("ldi %2,%1", operands);
3233 /* Loop. */
3234 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3235 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3236 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3238 /* Handle the residual. */
3239 if (n_bytes % 4 != 0)
3241 if (n_bytes % 4 >= 2)
3242 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3243 if (n_bytes % 2 != 0)
3244 output_asm_insn ("stb %%r0,0(%0)", operands);
3246 return "";
3248 case 1:
3249 /* Pre-adjust the loop counter. */
3250 operands[2] = GEN_INT (n_bytes - 2);
3251 output_asm_insn ("ldi %2,%1", operands);
3253 /* Loop. */
3254 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3255 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3256 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3258 /* Handle the residual. */
3259 if (n_bytes % 2 != 0)
3260 output_asm_insn ("stb %%r0,0(%0)", operands);
3262 return "";
3264 default:
3265 gcc_unreachable ();
3269 /* Count the number of insns necessary to handle this block move.
3271 Basic structure is the same as emit_block_move, except that we
3272 count insns rather than emit them. */
3274 static int
3275 compute_clrmem_length (rtx_insn *insn)
3277 rtx pat = PATTERN (insn);
3278 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3279 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3280 unsigned int n_insns = 0;
3282 /* We can't clear more than a word at a time because the PA
3283 has no longer integer move insns. */
3284 if (align > (TARGET_64BIT ? 8 : 4))
3285 align = (TARGET_64BIT ? 8 : 4);
3287 /* The basic loop. */
3288 n_insns = 4;
3290 /* Residuals. */
3291 if (n_bytes % (2 * align) != 0)
3293 if ((n_bytes % (2 * align)) >= align)
3294 n_insns++;
3296 if ((n_bytes % align) != 0)
3297 n_insns++;
3300 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3301 return n_insns * 4;
3305 const char *
3306 pa_output_and (rtx *operands)
3308 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3310 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3311 int ls0, ls1, ms0, p, len;
3313 for (ls0 = 0; ls0 < 32; ls0++)
3314 if ((mask & (1 << ls0)) == 0)
3315 break;
3317 for (ls1 = ls0; ls1 < 32; ls1++)
3318 if ((mask & (1 << ls1)) != 0)
3319 break;
3321 for (ms0 = ls1; ms0 < 32; ms0++)
3322 if ((mask & (1 << ms0)) == 0)
3323 break;
3325 gcc_assert (ms0 == 32);
3327 if (ls1 == 32)
3329 len = ls0;
3331 gcc_assert (len);
3333 operands[2] = GEN_INT (len);
3334 return "{extru|extrw,u} %1,31,%2,%0";
3336 else
3338 /* We could use this `depi' for the case above as well, but `depi'
3339 requires one more register file access than an `extru'. */
3341 p = 31 - ls0;
3342 len = ls1 - ls0;
3344 operands[2] = GEN_INT (p);
3345 operands[3] = GEN_INT (len);
3346 return "{depi|depwi} 0,%2,%3,%0";
3349 else
3350 return "and %1,%2,%0";
3353 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3354 storing the result in operands[0]. */
3355 const char *
3356 pa_output_64bit_and (rtx *operands)
3358 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3360 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3361 int ls0, ls1, ms0, p, len;
3363 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3364 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3365 break;
3367 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3368 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3369 break;
3371 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3372 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3373 break;
3375 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3377 if (ls1 == HOST_BITS_PER_WIDE_INT)
3379 len = ls0;
3381 gcc_assert (len);
3383 operands[2] = GEN_INT (len);
3384 return "extrd,u %1,63,%2,%0";
3386 else
3388 /* We could use this `depi' for the case above as well, but `depi'
3389 requires one more register file access than an `extru'. */
3391 p = 63 - ls0;
3392 len = ls1 - ls0;
3394 operands[2] = GEN_INT (p);
3395 operands[3] = GEN_INT (len);
3396 return "depdi 0,%2,%3,%0";
3399 else
3400 return "and %1,%2,%0";
3403 const char *
3404 pa_output_ior (rtx *operands)
3406 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3407 int bs0, bs1, p, len;
3409 if (INTVAL (operands[2]) == 0)
3410 return "copy %1,%0";
3412 for (bs0 = 0; bs0 < 32; bs0++)
3413 if ((mask & (1 << bs0)) != 0)
3414 break;
3416 for (bs1 = bs0; bs1 < 32; bs1++)
3417 if ((mask & (1 << bs1)) == 0)
3418 break;
3420 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3422 p = 31 - bs0;
3423 len = bs1 - bs0;
3425 operands[2] = GEN_INT (p);
3426 operands[3] = GEN_INT (len);
3427 return "{depi|depwi} -1,%2,%3,%0";
3430 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3431 storing the result in operands[0]. */
3432 const char *
3433 pa_output_64bit_ior (rtx *operands)
3435 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3436 int bs0, bs1, p, len;
3438 if (INTVAL (operands[2]) == 0)
3439 return "copy %1,%0";
3441 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3442 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3443 break;
3445 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3446 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3447 break;
3449 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3450 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3452 p = 63 - bs0;
3453 len = bs1 - bs0;
3455 operands[2] = GEN_INT (p);
3456 operands[3] = GEN_INT (len);
3457 return "depdi -1,%2,%3,%0";
3460 /* Target hook for assembling integer objects. This code handles
3461 aligned SI and DI integers specially since function references
3462 must be preceded by P%. */
3464 static bool
3465 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3467 bool result;
3468 tree decl = NULL;
3470 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3471 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3472 calling output_addr_const. Otherwise, it may call assemble_external
3473 in the midst of outputing the assembler code for the SYMBOL_REF.
3474 We restore the SYMBOL_REF_DECL after the output is done. */
3475 if (GET_CODE (x) == SYMBOL_REF)
3477 decl = SYMBOL_REF_DECL (x);
3478 if (decl)
3480 assemble_external (decl);
3481 SET_SYMBOL_REF_DECL (x, NULL);
3485 if (size == UNITS_PER_WORD
3486 && aligned_p
3487 && function_label_operand (x, VOIDmode))
3489 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3491 /* We don't want an OPD when generating fast indirect calls. */
3492 if (!TARGET_FAST_INDIRECT_CALLS)
3493 fputs ("P%", asm_out_file);
3495 output_addr_const (asm_out_file, x);
3496 fputc ('\n', asm_out_file);
3497 result = true;
3499 else
3500 result = default_assemble_integer (x, size, aligned_p);
3502 if (decl)
3503 SET_SYMBOL_REF_DECL (x, decl);
3505 return result;
3508 /* Output an ascii string. */
3509 void
3510 pa_output_ascii (FILE *file, const char *p, int size)
3512 int i;
3513 int chars_output;
3514 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3516 /* The HP assembler can only take strings of 256 characters at one
3517 time. This is a limitation on input line length, *not* the
3518 length of the string. Sigh. Even worse, it seems that the
3519 restriction is in number of input characters (see \xnn &
3520 \whatever). So we have to do this very carefully. */
3522 fputs ("\t.STRING \"", file);
3524 chars_output = 0;
3525 for (i = 0; i < size; i += 4)
3527 int co = 0;
3528 int io = 0;
3529 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3531 unsigned int c = (unsigned char) p[i + io];
3533 if (c == '\"' || c == '\\')
3534 partial_output[co++] = '\\';
3535 if (c >= ' ' && c < 0177)
3536 partial_output[co++] = c;
3537 else
3539 unsigned int hexd;
3540 partial_output[co++] = '\\';
3541 partial_output[co++] = 'x';
3542 hexd = c / 16 - 0 + '0';
3543 if (hexd > '9')
3544 hexd -= '9' - 'a' + 1;
3545 partial_output[co++] = hexd;
3546 hexd = c % 16 - 0 + '0';
3547 if (hexd > '9')
3548 hexd -= '9' - 'a' + 1;
3549 partial_output[co++] = hexd;
3552 if (chars_output + co > 243)
3554 fputs ("\"\n\t.STRING \"", file);
3555 chars_output = 0;
3557 fwrite (partial_output, 1, (size_t) co, file);
3558 chars_output += co;
3559 co = 0;
3561 fputs ("\"\n", file);
3564 /* Try to rewrite floating point comparisons & branches to avoid
3565 useless add,tr insns.
3567 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3568 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3569 first attempt to remove useless add,tr insns. It is zero
3570 for the second pass as reorg sometimes leaves bogus REG_DEAD
3571 notes lying around.
3573 When CHECK_NOTES is zero we can only eliminate add,tr insns
3574 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3575 instructions. */
3576 static void
3577 remove_useless_addtr_insns (int check_notes)
3579 rtx_insn *insn;
3580 static int pass = 0;
3582 /* This is fairly cheap, so always run it when optimizing. */
3583 if (optimize > 0)
3585 int fcmp_count = 0;
3586 int fbranch_count = 0;
3588 /* Walk all the insns in this function looking for fcmp & fbranch
3589 instructions. Keep track of how many of each we find. */
3590 for (insn = get_insns (); insn; insn = next_insn (insn))
3592 rtx tmp;
3594 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3595 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3596 continue;
3598 tmp = PATTERN (insn);
3600 /* It must be a set. */
3601 if (GET_CODE (tmp) != SET)
3602 continue;
3604 /* If the destination is CCFP, then we've found an fcmp insn. */
3605 tmp = SET_DEST (tmp);
3606 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3608 fcmp_count++;
3609 continue;
3612 tmp = PATTERN (insn);
3613 /* If this is an fbranch instruction, bump the fbranch counter. */
3614 if (GET_CODE (tmp) == SET
3615 && SET_DEST (tmp) == pc_rtx
3616 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3617 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3618 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3619 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3621 fbranch_count++;
3622 continue;
3627 /* Find all floating point compare + branch insns. If possible,
3628 reverse the comparison & the branch to avoid add,tr insns. */
3629 for (insn = get_insns (); insn; insn = next_insn (insn))
3631 rtx tmp;
3632 rtx_insn *next;
3634 /* Ignore anything that isn't an INSN. */
3635 if (! NONJUMP_INSN_P (insn))
3636 continue;
3638 tmp = PATTERN (insn);
3640 /* It must be a set. */
3641 if (GET_CODE (tmp) != SET)
3642 continue;
3644 /* The destination must be CCFP, which is register zero. */
3645 tmp = SET_DEST (tmp);
3646 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3647 continue;
3649 /* INSN should be a set of CCFP.
3651 See if the result of this insn is used in a reversed FP
3652 conditional branch. If so, reverse our condition and
3653 the branch. Doing so avoids useless add,tr insns. */
3654 next = next_insn (insn);
3655 while (next)
3657 /* Jumps, calls and labels stop our search. */
3658 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3659 break;
3661 /* As does another fcmp insn. */
3662 if (NONJUMP_INSN_P (next)
3663 && GET_CODE (PATTERN (next)) == SET
3664 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3665 && REGNO (SET_DEST (PATTERN (next))) == 0)
3666 break;
3668 next = next_insn (next);
3671 /* Is NEXT_INSN a branch? */
3672 if (next && JUMP_P (next))
3674 rtx pattern = PATTERN (next);
3676 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3677 and CCFP dies, then reverse our conditional and the branch
3678 to avoid the add,tr. */
3679 if (GET_CODE (pattern) == SET
3680 && SET_DEST (pattern) == pc_rtx
3681 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3682 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3683 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3684 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3685 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3686 && (fcmp_count == fbranch_count
3687 || (check_notes
3688 && find_regno_note (next, REG_DEAD, 0))))
3690 /* Reverse the branch. */
3691 tmp = XEXP (SET_SRC (pattern), 1);
3692 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3693 XEXP (SET_SRC (pattern), 2) = tmp;
3694 INSN_CODE (next) = -1;
3696 /* Reverse our condition. */
3697 tmp = PATTERN (insn);
3698 PUT_CODE (XEXP (tmp, 1),
3699 (reverse_condition_maybe_unordered
3700 (GET_CODE (XEXP (tmp, 1)))));
3706 pass = !pass;
3710 /* You may have trouble believing this, but this is the 32 bit HP-PA
3711 stack layout. Wow.
3713 Offset Contents
3715 Variable arguments (optional; any number may be allocated)
3717 SP-(4*(N+9)) arg word N
3719 SP-56 arg word 5
3720 SP-52 arg word 4
3722 Fixed arguments (must be allocated; may remain unused)
3724 SP-48 arg word 3
3725 SP-44 arg word 2
3726 SP-40 arg word 1
3727 SP-36 arg word 0
3729 Frame Marker
3731 SP-32 External Data Pointer (DP)
3732 SP-28 External sr4
3733 SP-24 External/stub RP (RP')
3734 SP-20 Current RP
3735 SP-16 Static Link
3736 SP-12 Clean up
3737 SP-8 Calling Stub RP (RP'')
3738 SP-4 Previous SP
3740 Top of Frame
3742 SP-0 Stack Pointer (points to next available address)
3746 /* This function saves registers as follows. Registers marked with ' are
3747 this function's registers (as opposed to the previous function's).
3748 If a frame_pointer isn't needed, r4 is saved as a general register;
3749 the space for the frame pointer is still allocated, though, to keep
3750 things simple.
3753 Top of Frame
3755 SP (FP') Previous FP
3756 SP + 4 Alignment filler (sigh)
3757 SP + 8 Space for locals reserved here.
3761 SP + n All call saved register used.
3765 SP + o All call saved fp registers used.
3769 SP + p (SP') points to next available address.
3773 /* Global variables set by output_function_prologue(). */
3774 /* Size of frame. Need to know this to emit return insns from
3775 leaf procedures. */
3776 static HOST_WIDE_INT actual_fsize, local_fsize;
3777 static int save_fregs;
3779 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3780 Handle case where DISP > 8k by using the add_high_const patterns.
3782 Note in DISP > 8k case, we will leave the high part of the address
3783 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3785 static void
3786 store_reg (int reg, HOST_WIDE_INT disp, int base)
3788 rtx dest, src, basereg;
3789 rtx_insn *insn;
3791 src = gen_rtx_REG (word_mode, reg);
3792 basereg = gen_rtx_REG (Pmode, base);
3793 if (VAL_14_BITS_P (disp))
3795 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3796 insn = emit_move_insn (dest, src);
3798 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3800 rtx delta = GEN_INT (disp);
3801 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3803 emit_move_insn (tmpreg, delta);
3804 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3805 if (DO_FRAME_NOTES)
3807 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3808 gen_rtx_SET (tmpreg,
3809 gen_rtx_PLUS (Pmode, basereg, delta)));
3810 RTX_FRAME_RELATED_P (insn) = 1;
3812 dest = gen_rtx_MEM (word_mode, tmpreg);
3813 insn = emit_move_insn (dest, src);
3815 else
3817 rtx delta = GEN_INT (disp);
3818 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3819 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3821 emit_move_insn (tmpreg, high);
3822 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3823 insn = emit_move_insn (dest, src);
3824 if (DO_FRAME_NOTES)
3825 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3826 gen_rtx_SET (gen_rtx_MEM (word_mode,
3827 gen_rtx_PLUS (word_mode,
3828 basereg,
3829 delta)),
3830 src));
3833 if (DO_FRAME_NOTES)
3834 RTX_FRAME_RELATED_P (insn) = 1;
3837 /* Emit RTL to store REG at the memory location specified by BASE and then
3838 add MOD to BASE. MOD must be <= 8k. */
3840 static void
3841 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3843 rtx basereg, srcreg, delta;
3844 rtx_insn *insn;
3846 gcc_assert (VAL_14_BITS_P (mod));
3848 basereg = gen_rtx_REG (Pmode, base);
3849 srcreg = gen_rtx_REG (word_mode, reg);
3850 delta = GEN_INT (mod);
3852 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3853 if (DO_FRAME_NOTES)
3855 RTX_FRAME_RELATED_P (insn) = 1;
3857 /* RTX_FRAME_RELATED_P must be set on each frame related set
3858 in a parallel with more than one element. */
3859 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3860 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3864 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3865 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3866 whether to add a frame note or not.
3868 In the DISP > 8k case, we leave the high part of the address in %r1.
3869 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3871 static void
3872 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3874 rtx_insn *insn;
3876 if (VAL_14_BITS_P (disp))
3878 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3879 plus_constant (Pmode,
3880 gen_rtx_REG (Pmode, base), disp));
3882 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3884 rtx basereg = gen_rtx_REG (Pmode, base);
3885 rtx delta = GEN_INT (disp);
3886 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3888 emit_move_insn (tmpreg, delta);
3889 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3890 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3891 if (DO_FRAME_NOTES)
3892 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3893 gen_rtx_SET (tmpreg,
3894 gen_rtx_PLUS (Pmode, basereg, delta)));
3896 else
3898 rtx basereg = gen_rtx_REG (Pmode, base);
3899 rtx delta = GEN_INT (disp);
3900 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3902 emit_move_insn (tmpreg,
3903 gen_rtx_PLUS (Pmode, basereg,
3904 gen_rtx_HIGH (Pmode, delta)));
3905 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3906 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3909 if (DO_FRAME_NOTES && note)
3910 RTX_FRAME_RELATED_P (insn) = 1;
3913 HOST_WIDE_INT
3914 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3916 int freg_saved = 0;
3917 int i, j;
3919 /* The code in pa_expand_prologue and pa_expand_epilogue must
3920 be consistent with the rounding and size calculation done here.
3921 Change them at the same time. */
3923 /* We do our own stack alignment. First, round the size of the
3924 stack locals up to a word boundary. */
3925 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3927 /* Space for previous frame pointer + filler. If any frame is
3928 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3929 waste some space here for the sake of HP compatibility. The
3930 first slot is only used when the frame pointer is needed. */
3931 if (size || frame_pointer_needed)
3932 size += pa_starting_frame_offset ();
3934 /* If the current function calls __builtin_eh_return, then we need
3935 to allocate stack space for registers that will hold data for
3936 the exception handler. */
3937 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3939 unsigned int i;
3941 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3942 continue;
3943 size += i * UNITS_PER_WORD;
3946 /* Account for space used by the callee general register saves. */
3947 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3948 if (df_regs_ever_live_p (i))
3949 size += UNITS_PER_WORD;
3951 /* Account for space used by the callee floating point register saves. */
3952 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3953 if (df_regs_ever_live_p (i)
3954 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3956 freg_saved = 1;
3958 /* We always save both halves of the FP register, so always
3959 increment the frame size by 8 bytes. */
3960 size += 8;
3963 /* If any of the floating registers are saved, account for the
3964 alignment needed for the floating point register save block. */
3965 if (freg_saved)
3967 size = (size + 7) & ~7;
3968 if (fregs_live)
3969 *fregs_live = 1;
3972 /* The various ABIs include space for the outgoing parameters in the
3973 size of the current function's stack frame. We don't need to align
3974 for the outgoing arguments as their alignment is set by the final
3975 rounding for the frame as a whole. */
3976 size += crtl->outgoing_args_size;
3978 /* Allocate space for the fixed frame marker. This space must be
3979 allocated for any function that makes calls or allocates
3980 stack space. */
3981 if (!crtl->is_leaf || size)
3982 size += TARGET_64BIT ? 48 : 32;
3984 /* Finally, round to the preferred stack boundary. */
3985 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3986 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3989 /* Output function label, and associated .PROC and .CALLINFO statements. */
3991 void
3992 pa_output_function_label (FILE *file)
3994 /* The function's label and associated .PROC must never be
3995 separated and must be output *after* any profiling declarations
3996 to avoid changing spaces/subspaces within a procedure. */
3997 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3998 fputs ("\t.PROC\n", file);
4000 /* pa_expand_prologue does the dirty work now. We just need
4001 to output the assembler directives which denote the start
4002 of a function. */
4003 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4004 if (crtl->is_leaf)
4005 fputs (",NO_CALLS", file);
4006 else
4007 fputs (",CALLS", file);
4008 if (rp_saved)
4009 fputs (",SAVE_RP", file);
4011 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4012 at the beginning of the frame and that it is used as the frame
4013 pointer for the frame. We do this because our current frame
4014 layout doesn't conform to that specified in the HP runtime
4015 documentation and we need a way to indicate to programs such as
4016 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4017 isn't used by HP compilers but is supported by the assembler.
4018 However, SAVE_SP is supposed to indicate that the previous stack
4019 pointer has been saved in the frame marker. */
4020 if (frame_pointer_needed)
4021 fputs (",SAVE_SP", file);
4023 /* Pass on information about the number of callee register saves
4024 performed in the prologue.
4026 The compiler is supposed to pass the highest register number
4027 saved, the assembler then has to adjust that number before
4028 entering it into the unwind descriptor (to account for any
4029 caller saved registers with lower register numbers than the
4030 first callee saved register). */
4031 if (gr_saved)
4032 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4034 if (fr_saved)
4035 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4037 fputs ("\n\t.ENTRY\n", file);
4040 /* Output function prologue. */
4042 static void
4043 pa_output_function_prologue (FILE *file)
4045 pa_output_function_label (file);
4046 remove_useless_addtr_insns (0);
4049 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4051 static void
4052 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4054 remove_useless_addtr_insns (0);
4057 void
4058 pa_expand_prologue (void)
4060 int merge_sp_adjust_with_store = 0;
4061 HOST_WIDE_INT size = get_frame_size ();
4062 HOST_WIDE_INT offset;
4063 int i;
4064 rtx tmpreg;
4065 rtx_insn *insn;
4067 gr_saved = 0;
4068 fr_saved = 0;
4069 save_fregs = 0;
4071 /* Compute total size for frame pointer, filler, locals and rounding to
4072 the next word boundary. Similar code appears in pa_compute_frame_size
4073 and must be changed in tandem with this code. */
4074 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4075 if (local_fsize || frame_pointer_needed)
4076 local_fsize += pa_starting_frame_offset ();
4078 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4079 if (flag_stack_usage_info)
4080 current_function_static_stack_size = actual_fsize;
4082 /* Compute a few things we will use often. */
4083 tmpreg = gen_rtx_REG (word_mode, 1);
4085 /* Save RP first. The calling conventions manual states RP will
4086 always be stored into the caller's frame at sp - 20 or sp - 16
4087 depending on which ABI is in use. */
4088 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4090 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4091 rp_saved = true;
4093 else
4094 rp_saved = false;
4096 /* Allocate the local frame and set up the frame pointer if needed. */
4097 if (actual_fsize != 0)
4099 if (frame_pointer_needed)
4101 /* Copy the old frame pointer temporarily into %r1. Set up the
4102 new stack pointer, then store away the saved old frame pointer
4103 into the stack at sp and at the same time update the stack
4104 pointer by actual_fsize bytes. Two versions, first
4105 handles small (<8k) frames. The second handles large (>=8k)
4106 frames. */
4107 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4108 if (DO_FRAME_NOTES)
4109 RTX_FRAME_RELATED_P (insn) = 1;
4111 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4112 if (DO_FRAME_NOTES)
4113 RTX_FRAME_RELATED_P (insn) = 1;
4115 if (VAL_14_BITS_P (actual_fsize))
4116 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4117 else
4119 /* It is incorrect to store the saved frame pointer at *sp,
4120 then increment sp (writes beyond the current stack boundary).
4122 So instead use stwm to store at *sp and post-increment the
4123 stack pointer as an atomic operation. Then increment sp to
4124 finish allocating the new frame. */
4125 HOST_WIDE_INT adjust1 = 8192 - 64;
4126 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4128 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4129 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4130 adjust2, 1);
4133 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4134 we need to store the previous stack pointer (frame pointer)
4135 into the frame marker on targets that use the HP unwind
4136 library. This allows the HP unwind library to be used to
4137 unwind GCC frames. However, we are not fully compatible
4138 with the HP library because our frame layout differs from
4139 that specified in the HP runtime specification.
4141 We don't want a frame note on this instruction as the frame
4142 marker moves during dynamic stack allocation.
4144 This instruction also serves as a blockage to prevent
4145 register spills from being scheduled before the stack
4146 pointer is raised. This is necessary as we store
4147 registers using the frame pointer as a base register,
4148 and the frame pointer is set before sp is raised. */
4149 if (TARGET_HPUX_UNWIND_LIBRARY)
4151 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4152 GEN_INT (TARGET_64BIT ? -8 : -4));
4154 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4155 hard_frame_pointer_rtx);
4157 else
4158 emit_insn (gen_blockage ());
4160 /* no frame pointer needed. */
4161 else
4163 /* In some cases we can perform the first callee register save
4164 and allocating the stack frame at the same time. If so, just
4165 make a note of it and defer allocating the frame until saving
4166 the callee registers. */
4167 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4168 merge_sp_adjust_with_store = 1;
4169 /* Cannot optimize. Adjust the stack frame by actual_fsize
4170 bytes. */
4171 else
4172 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4173 actual_fsize, 1);
4177 /* Normal register save.
4179 Do not save the frame pointer in the frame_pointer_needed case. It
4180 was done earlier. */
4181 if (frame_pointer_needed)
4183 offset = local_fsize;
4185 /* Saving the EH return data registers in the frame is the simplest
4186 way to get the frame unwind information emitted. We put them
4187 just before the general registers. */
4188 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4190 unsigned int i, regno;
4192 for (i = 0; ; ++i)
4194 regno = EH_RETURN_DATA_REGNO (i);
4195 if (regno == INVALID_REGNUM)
4196 break;
4198 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4199 offset += UNITS_PER_WORD;
4203 for (i = 18; i >= 4; i--)
4204 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4206 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4207 offset += UNITS_PER_WORD;
4208 gr_saved++;
4210 /* Account for %r3 which is saved in a special place. */
4211 gr_saved++;
4213 /* No frame pointer needed. */
4214 else
4216 offset = local_fsize - actual_fsize;
4218 /* Saving the EH return data registers in the frame is the simplest
4219 way to get the frame unwind information emitted. */
4220 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4222 unsigned int i, regno;
4224 for (i = 0; ; ++i)
4226 regno = EH_RETURN_DATA_REGNO (i);
4227 if (regno == INVALID_REGNUM)
4228 break;
4230 /* If merge_sp_adjust_with_store is nonzero, then we can
4231 optimize the first save. */
4232 if (merge_sp_adjust_with_store)
4234 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4235 merge_sp_adjust_with_store = 0;
4237 else
4238 store_reg (regno, offset, STACK_POINTER_REGNUM);
4239 offset += UNITS_PER_WORD;
4243 for (i = 18; i >= 3; i--)
4244 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4246 /* If merge_sp_adjust_with_store is nonzero, then we can
4247 optimize the first GR save. */
4248 if (merge_sp_adjust_with_store)
4250 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4251 merge_sp_adjust_with_store = 0;
4253 else
4254 store_reg (i, offset, STACK_POINTER_REGNUM);
4255 offset += UNITS_PER_WORD;
4256 gr_saved++;
4259 /* If we wanted to merge the SP adjustment with a GR save, but we never
4260 did any GR saves, then just emit the adjustment here. */
4261 if (merge_sp_adjust_with_store)
4262 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4263 actual_fsize, 1);
4266 /* The hppa calling conventions say that %r19, the pic offset
4267 register, is saved at sp - 32 (in this function's frame)
4268 when generating PIC code. FIXME: What is the correct thing
4269 to do for functions which make no calls and allocate no
4270 frame? Do we need to allocate a frame, or can we just omit
4271 the save? For now we'll just omit the save.
4273 We don't want a note on this insn as the frame marker can
4274 move if there is a dynamic stack allocation. */
4275 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4277 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4279 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4283 /* Align pointer properly (doubleword boundary). */
4284 offset = (offset + 7) & ~7;
4286 /* Floating point register store. */
4287 if (save_fregs)
4289 rtx base;
4291 /* First get the frame or stack pointer to the start of the FP register
4292 save area. */
4293 if (frame_pointer_needed)
4295 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4296 base = hard_frame_pointer_rtx;
4298 else
4300 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4301 base = stack_pointer_rtx;
4304 /* Now actually save the FP registers. */
4305 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4307 if (df_regs_ever_live_p (i)
4308 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4310 rtx addr, reg;
4311 rtx_insn *insn;
4312 addr = gen_rtx_MEM (DFmode,
4313 gen_rtx_POST_INC (word_mode, tmpreg));
4314 reg = gen_rtx_REG (DFmode, i);
4315 insn = emit_move_insn (addr, reg);
4316 if (DO_FRAME_NOTES)
4318 RTX_FRAME_RELATED_P (insn) = 1;
4319 if (TARGET_64BIT)
4321 rtx mem = gen_rtx_MEM (DFmode,
4322 plus_constant (Pmode, base,
4323 offset));
4324 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4325 gen_rtx_SET (mem, reg));
4327 else
4329 rtx meml = gen_rtx_MEM (SFmode,
4330 plus_constant (Pmode, base,
4331 offset));
4332 rtx memr = gen_rtx_MEM (SFmode,
4333 plus_constant (Pmode, base,
4334 offset + 4));
4335 rtx regl = gen_rtx_REG (SFmode, i);
4336 rtx regr = gen_rtx_REG (SFmode, i + 1);
4337 rtx setl = gen_rtx_SET (meml, regl);
4338 rtx setr = gen_rtx_SET (memr, regr);
4339 rtvec vec;
4341 RTX_FRAME_RELATED_P (setl) = 1;
4342 RTX_FRAME_RELATED_P (setr) = 1;
4343 vec = gen_rtvec (2, setl, setr);
4344 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4345 gen_rtx_SEQUENCE (VOIDmode, vec));
4348 offset += GET_MODE_SIZE (DFmode);
4349 fr_saved++;
4355 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4356 Handle case where DISP > 8k by using the add_high_const patterns. */
4358 static void
4359 load_reg (int reg, HOST_WIDE_INT disp, int base)
4361 rtx dest = gen_rtx_REG (word_mode, reg);
4362 rtx basereg = gen_rtx_REG (Pmode, base);
4363 rtx src;
4365 if (VAL_14_BITS_P (disp))
4366 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4367 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4369 rtx delta = GEN_INT (disp);
4370 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4372 emit_move_insn (tmpreg, delta);
4373 if (TARGET_DISABLE_INDEXING)
4375 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4376 src = gen_rtx_MEM (word_mode, tmpreg);
4378 else
4379 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4381 else
4383 rtx delta = GEN_INT (disp);
4384 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4385 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4387 emit_move_insn (tmpreg, high);
4388 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4391 emit_move_insn (dest, src);
4394 /* Update the total code bytes output to the text section. */
4396 static void
4397 update_total_code_bytes (unsigned int nbytes)
4399 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4400 && !IN_NAMED_SECTION_P (cfun->decl))
4402 unsigned int old_total = total_code_bytes;
4404 total_code_bytes += nbytes;
4406 /* Be prepared to handle overflows. */
4407 if (old_total > total_code_bytes)
4408 total_code_bytes = UINT_MAX;
4412 /* This function generates the assembly code for function exit.
4413 Args are as for output_function_prologue ().
4415 The function epilogue should not depend on the current stack
4416 pointer! It should use the frame pointer only. This is mandatory
4417 because of alloca; we also take advantage of it to omit stack
4418 adjustments before returning. */
4420 static void
4421 pa_output_function_epilogue (FILE *file)
4423 rtx_insn *insn = get_last_insn ();
4424 bool extra_nop;
4426 /* pa_expand_epilogue does the dirty work now. We just need
4427 to output the assembler directives which denote the end
4428 of a function.
4430 To make debuggers happy, emit a nop if the epilogue was completely
4431 eliminated due to a volatile call as the last insn in the
4432 current function. That way the return address (in %r2) will
4433 always point to a valid instruction in the current function. */
4435 /* Get the last real insn. */
4436 if (NOTE_P (insn))
4437 insn = prev_real_insn (insn);
4439 /* If it is a sequence, then look inside. */
4440 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4441 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4443 /* If insn is a CALL_INSN, then it must be a call to a volatile
4444 function (otherwise there would be epilogue insns). */
4445 if (insn && CALL_P (insn))
4447 fputs ("\tnop\n", file);
4448 extra_nop = true;
4450 else
4451 extra_nop = false;
4453 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4455 if (TARGET_SOM && TARGET_GAS)
4457 /* We are done with this subspace except possibly for some additional
4458 debug information. Forget that we are in this subspace to ensure
4459 that the next function is output in its own subspace. */
4460 in_section = NULL;
4461 cfun->machine->in_nsubspa = 2;
4464 /* Thunks do their own insn accounting. */
4465 if (cfun->is_thunk)
4466 return;
4468 if (INSN_ADDRESSES_SET_P ())
4470 last_address = extra_nop ? 4 : 0;
4471 insn = get_last_nonnote_insn ();
4472 if (insn)
4474 last_address += INSN_ADDRESSES (INSN_UID (insn));
4475 if (INSN_P (insn))
4476 last_address += insn_default_length (insn);
4478 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4479 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4481 else
4482 last_address = UINT_MAX;
4484 /* Finally, update the total number of code bytes output so far. */
4485 update_total_code_bytes (last_address);
4488 void
4489 pa_expand_epilogue (void)
4491 rtx tmpreg;
4492 HOST_WIDE_INT offset;
4493 HOST_WIDE_INT ret_off = 0;
4494 int i;
4495 int merge_sp_adjust_with_load = 0;
4497 /* We will use this often. */
4498 tmpreg = gen_rtx_REG (word_mode, 1);
4500 /* Try to restore RP early to avoid load/use interlocks when
4501 RP gets used in the return (bv) instruction. This appears to still
4502 be necessary even when we schedule the prologue and epilogue. */
4503 if (rp_saved)
4505 ret_off = TARGET_64BIT ? -16 : -20;
4506 if (frame_pointer_needed)
4508 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4509 ret_off = 0;
4511 else
4513 /* No frame pointer, and stack is smaller than 8k. */
4514 if (VAL_14_BITS_P (ret_off - actual_fsize))
4516 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4517 ret_off = 0;
4522 /* General register restores. */
4523 if (frame_pointer_needed)
4525 offset = local_fsize;
4527 /* If the current function calls __builtin_eh_return, then we need
4528 to restore the saved EH data registers. */
4529 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4531 unsigned int i, regno;
4533 for (i = 0; ; ++i)
4535 regno = EH_RETURN_DATA_REGNO (i);
4536 if (regno == INVALID_REGNUM)
4537 break;
4539 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4540 offset += UNITS_PER_WORD;
4544 for (i = 18; i >= 4; i--)
4545 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4547 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4548 offset += UNITS_PER_WORD;
4551 else
4553 offset = local_fsize - actual_fsize;
4555 /* If the current function calls __builtin_eh_return, then we need
4556 to restore the saved EH data registers. */
4557 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4559 unsigned int i, regno;
4561 for (i = 0; ; ++i)
4563 regno = EH_RETURN_DATA_REGNO (i);
4564 if (regno == INVALID_REGNUM)
4565 break;
4567 /* Only for the first load.
4568 merge_sp_adjust_with_load holds the register load
4569 with which we will merge the sp adjustment. */
4570 if (merge_sp_adjust_with_load == 0
4571 && local_fsize == 0
4572 && VAL_14_BITS_P (-actual_fsize))
4573 merge_sp_adjust_with_load = regno;
4574 else
4575 load_reg (regno, offset, STACK_POINTER_REGNUM);
4576 offset += UNITS_PER_WORD;
4580 for (i = 18; i >= 3; i--)
4582 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4584 /* Only for the first load.
4585 merge_sp_adjust_with_load holds the register load
4586 with which we will merge the sp adjustment. */
4587 if (merge_sp_adjust_with_load == 0
4588 && local_fsize == 0
4589 && VAL_14_BITS_P (-actual_fsize))
4590 merge_sp_adjust_with_load = i;
4591 else
4592 load_reg (i, offset, STACK_POINTER_REGNUM);
4593 offset += UNITS_PER_WORD;
4598 /* Align pointer properly (doubleword boundary). */
4599 offset = (offset + 7) & ~7;
4601 /* FP register restores. */
4602 if (save_fregs)
4604 /* Adjust the register to index off of. */
4605 if (frame_pointer_needed)
4606 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4607 else
4608 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4610 /* Actually do the restores now. */
4611 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4612 if (df_regs_ever_live_p (i)
4613 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4615 rtx src = gen_rtx_MEM (DFmode,
4616 gen_rtx_POST_INC (word_mode, tmpreg));
4617 rtx dest = gen_rtx_REG (DFmode, i);
4618 emit_move_insn (dest, src);
4622 /* Emit a blockage insn here to keep these insns from being moved to
4623 an earlier spot in the epilogue, or into the main instruction stream.
4625 This is necessary as we must not cut the stack back before all the
4626 restores are finished. */
4627 emit_insn (gen_blockage ());
4629 /* Reset stack pointer (and possibly frame pointer). The stack
4630 pointer is initially set to fp + 64 to avoid a race condition. */
4631 if (frame_pointer_needed)
4633 rtx delta = GEN_INT (-64);
4635 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4636 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4637 stack_pointer_rtx, delta));
4639 /* If we were deferring a callee register restore, do it now. */
4640 else if (merge_sp_adjust_with_load)
4642 rtx delta = GEN_INT (-actual_fsize);
4643 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4645 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4647 else if (actual_fsize != 0)
4648 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4649 - actual_fsize, 0);
4651 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4652 frame greater than 8k), do so now. */
4653 if (ret_off != 0)
4654 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4656 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4658 rtx sa = EH_RETURN_STACKADJ_RTX;
4660 emit_insn (gen_blockage ());
4661 emit_insn (TARGET_64BIT
4662 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4663 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4667 bool
4668 pa_can_use_return_insn (void)
4670 if (!reload_completed)
4671 return false;
4673 if (frame_pointer_needed)
4674 return false;
4676 if (df_regs_ever_live_p (2))
4677 return false;
4679 if (crtl->profile)
4680 return false;
4682 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4686 hppa_pic_save_rtx (void)
4688 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4691 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4692 #define NO_DEFERRED_PROFILE_COUNTERS 0
4693 #endif
4696 /* Vector of funcdef numbers. */
4697 static vec<int> funcdef_nos;
4699 /* Output deferred profile counters. */
4700 static void
4701 output_deferred_profile_counters (void)
4703 unsigned int i;
4704 int align, n;
4706 if (funcdef_nos.is_empty ())
4707 return;
4709 switch_to_section (data_section);
4710 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4711 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4713 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4715 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4716 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4719 funcdef_nos.release ();
4722 void
4723 hppa_profile_hook (int label_no)
4725 rtx_code_label *label_rtx = gen_label_rtx ();
4726 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4727 rtx arg_bytes, begin_label_rtx, mcount, sym;
4728 rtx_insn *call_insn;
4729 char begin_label_name[16];
4730 bool use_mcount_pcrel_call;
4732 /* Set up call destination. */
4733 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4734 pa_encode_label (sym);
4735 mcount = gen_rtx_MEM (Pmode, sym);
4737 /* If we can reach _mcount with a pc-relative call, we can optimize
4738 loading the address of the current function. This requires linker
4739 long branch stub support. */
4740 if (!TARGET_PORTABLE_RUNTIME
4741 && !TARGET_LONG_CALLS
4742 && (TARGET_SOM || flag_function_sections))
4743 use_mcount_pcrel_call = TRUE;
4744 else
4745 use_mcount_pcrel_call = FALSE;
4747 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4748 label_no);
4749 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4751 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4753 if (!use_mcount_pcrel_call)
4755 /* The address of the function is loaded into %r25 with an instruction-
4756 relative sequence that avoids the use of relocations. We use SImode
4757 for the address of the function in both 32 and 64-bit code to avoid
4758 having to provide DImode versions of the lcla2 pattern. */
4759 if (TARGET_PA_20)
4760 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4761 else
4762 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4765 if (!NO_DEFERRED_PROFILE_COUNTERS)
4767 rtx count_label_rtx, addr, r24;
4768 char count_label_name[16];
4770 funcdef_nos.safe_push (label_no);
4771 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4772 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4773 ggc_strdup (count_label_name));
4775 addr = force_reg (Pmode, count_label_rtx);
4776 r24 = gen_rtx_REG (Pmode, 24);
4777 emit_move_insn (r24, addr);
4779 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4780 if (use_mcount_pcrel_call)
4781 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4782 begin_label_rtx));
4783 else
4784 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4786 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4788 else
4790 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4791 if (use_mcount_pcrel_call)
4792 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4793 begin_label_rtx));
4794 else
4795 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4798 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4799 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4801 /* Indicate the _mcount call cannot throw, nor will it execute a
4802 non-local goto. */
4803 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4805 /* Allocate space for fixed arguments. */
4806 if (reg_parm_stack_space > crtl->outgoing_args_size)
4807 crtl->outgoing_args_size = reg_parm_stack_space;
4810 /* Fetch the return address for the frame COUNT steps up from
4811 the current frame, after the prologue. FRAMEADDR is the
4812 frame pointer of the COUNT frame.
4814 We want to ignore any export stub remnants here. To handle this,
4815 we examine the code at the return address, and if it is an export
4816 stub, we return a memory rtx for the stub return address stored
4817 at frame-24.
4819 The value returned is used in two different ways:
4821 1. To find a function's caller.
4823 2. To change the return address for a function.
4825 This function handles most instances of case 1; however, it will
4826 fail if there are two levels of stubs to execute on the return
4827 path. The only way I believe that can happen is if the return value
4828 needs a parameter relocation, which never happens for C code.
4830 This function handles most instances of case 2; however, it will
4831 fail if we did not originally have stub code on the return path
4832 but will need stub code on the new return path. This can happen if
4833 the caller & callee are both in the main program, but the new
4834 return location is in a shared library. */
4837 pa_return_addr_rtx (int count, rtx frameaddr)
4839 rtx label;
4840 rtx rp;
4841 rtx saved_rp;
4842 rtx ins;
4844 /* The instruction stream at the return address of a PA1.X export stub is:
4846 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4847 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4848 0x00011820 | stub+16: mtsp r1,sr0
4849 0xe0400002 | stub+20: be,n 0(sr0,rp)
4851 0xe0400002 must be specified as -532676606 so that it won't be
4852 rejected as an invalid immediate operand on 64-bit hosts.
4854 The instruction stream at the return address of a PA2.0 export stub is:
4856 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4857 0xe840d002 | stub+12: bve,n (rp)
4860 HOST_WIDE_INT insns[4];
4861 int i, len;
4863 if (count != 0)
4864 return NULL_RTX;
4866 rp = get_hard_reg_initial_val (Pmode, 2);
4868 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4869 return rp;
4871 /* If there is no export stub then just use the value saved from
4872 the return pointer register. */
4874 saved_rp = gen_reg_rtx (Pmode);
4875 emit_move_insn (saved_rp, rp);
4877 /* Get pointer to the instruction stream. We have to mask out the
4878 privilege level from the two low order bits of the return address
4879 pointer here so that ins will point to the start of the first
4880 instruction that would have been executed if we returned. */
4881 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4882 label = gen_label_rtx ();
4884 if (TARGET_PA_20)
4886 insns[0] = 0x4bc23fd1;
4887 insns[1] = -398405630;
4888 len = 2;
4890 else
4892 insns[0] = 0x4bc23fd1;
4893 insns[1] = 0x004010a1;
4894 insns[2] = 0x00011820;
4895 insns[3] = -532676606;
4896 len = 4;
4899 /* Check the instruction stream at the normal return address for the
4900 export stub. If it is an export stub, than our return address is
4901 really in -24[frameaddr]. */
4903 for (i = 0; i < len; i++)
4905 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4906 rtx op1 = GEN_INT (insns[i]);
4907 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4910 /* Here we know that our return address points to an export
4911 stub. We don't want to return the address of the export stub,
4912 but rather the return address of the export stub. That return
4913 address is stored at -24[frameaddr]. */
4915 emit_move_insn (saved_rp,
4916 gen_rtx_MEM (Pmode,
4917 memory_address (Pmode,
4918 plus_constant (Pmode, frameaddr,
4919 -24))));
4921 emit_label (label);
4923 return saved_rp;
4926 void
4927 pa_emit_bcond_fp (rtx operands[])
4929 enum rtx_code code = GET_CODE (operands[0]);
4930 rtx operand0 = operands[1];
4931 rtx operand1 = operands[2];
4932 rtx label = operands[3];
4934 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4935 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4937 emit_jump_insn (gen_rtx_SET (pc_rtx,
4938 gen_rtx_IF_THEN_ELSE (VOIDmode,
4939 gen_rtx_fmt_ee (NE,
4940 VOIDmode,
4941 gen_rtx_REG (CCFPmode, 0),
4942 const0_rtx),
4943 gen_rtx_LABEL_REF (VOIDmode, label),
4944 pc_rtx)));
4948 /* Adjust the cost of a scheduling dependency. Return the new cost of
4949 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4951 static int
4952 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4953 unsigned int)
4955 enum attr_type attr_type;
4957 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4958 true dependencies as they are described with bypasses now. */
4959 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4960 return cost;
4962 if (! recog_memoized (insn))
4963 return 0;
4965 attr_type = get_attr_type (insn);
4967 switch (dep_type)
4969 case REG_DEP_ANTI:
4970 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4971 cycles later. */
4973 if (attr_type == TYPE_FPLOAD)
4975 rtx pat = PATTERN (insn);
4976 rtx dep_pat = PATTERN (dep_insn);
4977 if (GET_CODE (pat) == PARALLEL)
4979 /* This happens for the fldXs,mb patterns. */
4980 pat = XVECEXP (pat, 0, 0);
4982 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4983 /* If this happens, we have to extend this to schedule
4984 optimally. Return 0 for now. */
4985 return 0;
4987 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4989 if (! recog_memoized (dep_insn))
4990 return 0;
4991 switch (get_attr_type (dep_insn))
4993 case TYPE_FPALU:
4994 case TYPE_FPMULSGL:
4995 case TYPE_FPMULDBL:
4996 case TYPE_FPDIVSGL:
4997 case TYPE_FPDIVDBL:
4998 case TYPE_FPSQRTSGL:
4999 case TYPE_FPSQRTDBL:
5000 /* A fpload can't be issued until one cycle before a
5001 preceding arithmetic operation has finished if
5002 the target of the fpload is any of the sources
5003 (or destination) of the arithmetic operation. */
5004 return insn_default_latency (dep_insn) - 1;
5006 default:
5007 return 0;
5011 else if (attr_type == TYPE_FPALU)
5013 rtx pat = PATTERN (insn);
5014 rtx dep_pat = PATTERN (dep_insn);
5015 if (GET_CODE (pat) == PARALLEL)
5017 /* This happens for the fldXs,mb patterns. */
5018 pat = XVECEXP (pat, 0, 0);
5020 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5021 /* If this happens, we have to extend this to schedule
5022 optimally. Return 0 for now. */
5023 return 0;
5025 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5027 if (! recog_memoized (dep_insn))
5028 return 0;
5029 switch (get_attr_type (dep_insn))
5031 case TYPE_FPDIVSGL:
5032 case TYPE_FPDIVDBL:
5033 case TYPE_FPSQRTSGL:
5034 case TYPE_FPSQRTDBL:
5035 /* An ALU flop can't be issued until two cycles before a
5036 preceding divide or sqrt operation has finished if
5037 the target of the ALU flop is any of the sources
5038 (or destination) of the divide or sqrt operation. */
5039 return insn_default_latency (dep_insn) - 2;
5041 default:
5042 return 0;
5047 /* For other anti dependencies, the cost is 0. */
5048 return 0;
5050 case REG_DEP_OUTPUT:
5051 /* Output dependency; DEP_INSN writes a register that INSN writes some
5052 cycles later. */
5053 if (attr_type == TYPE_FPLOAD)
5055 rtx pat = PATTERN (insn);
5056 rtx dep_pat = PATTERN (dep_insn);
5057 if (GET_CODE (pat) == PARALLEL)
5059 /* This happens for the fldXs,mb patterns. */
5060 pat = XVECEXP (pat, 0, 0);
5062 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5063 /* If this happens, we have to extend this to schedule
5064 optimally. Return 0 for now. */
5065 return 0;
5067 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5069 if (! recog_memoized (dep_insn))
5070 return 0;
5071 switch (get_attr_type (dep_insn))
5073 case TYPE_FPALU:
5074 case TYPE_FPMULSGL:
5075 case TYPE_FPMULDBL:
5076 case TYPE_FPDIVSGL:
5077 case TYPE_FPDIVDBL:
5078 case TYPE_FPSQRTSGL:
5079 case TYPE_FPSQRTDBL:
5080 /* A fpload can't be issued until one cycle before a
5081 preceding arithmetic operation has finished if
5082 the target of the fpload is the destination of the
5083 arithmetic operation.
5085 Exception: For PA7100LC, PA7200 and PA7300, the cost
5086 is 3 cycles, unless they bundle together. We also
5087 pay the penalty if the second insn is a fpload. */
5088 return insn_default_latency (dep_insn) - 1;
5090 default:
5091 return 0;
5095 else if (attr_type == TYPE_FPALU)
5097 rtx pat = PATTERN (insn);
5098 rtx dep_pat = PATTERN (dep_insn);
5099 if (GET_CODE (pat) == PARALLEL)
5101 /* This happens for the fldXs,mb patterns. */
5102 pat = XVECEXP (pat, 0, 0);
5104 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5105 /* If this happens, we have to extend this to schedule
5106 optimally. Return 0 for now. */
5107 return 0;
5109 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5111 if (! recog_memoized (dep_insn))
5112 return 0;
5113 switch (get_attr_type (dep_insn))
5115 case TYPE_FPDIVSGL:
5116 case TYPE_FPDIVDBL:
5117 case TYPE_FPSQRTSGL:
5118 case TYPE_FPSQRTDBL:
5119 /* An ALU flop can't be issued until two cycles before a
5120 preceding divide or sqrt operation has finished if
5121 the target of the ALU flop is also the target of
5122 the divide or sqrt operation. */
5123 return insn_default_latency (dep_insn) - 2;
5125 default:
5126 return 0;
5131 /* For other output dependencies, the cost is 0. */
5132 return 0;
5134 default:
5135 gcc_unreachable ();
5139 /* The 700 can only issue a single insn at a time.
5140 The 7XXX processors can issue two insns at a time.
5141 The 8000 can issue 4 insns at a time. */
5142 static int
5143 pa_issue_rate (void)
5145 switch (pa_cpu)
5147 case PROCESSOR_700: return 1;
5148 case PROCESSOR_7100: return 2;
5149 case PROCESSOR_7100LC: return 2;
5150 case PROCESSOR_7200: return 2;
5151 case PROCESSOR_7300: return 2;
5152 case PROCESSOR_8000: return 4;
5154 default:
5155 gcc_unreachable ();
5161 /* Return any length plus adjustment needed by INSN which already has
5162 its length computed as LENGTH. Return LENGTH if no adjustment is
5163 necessary.
5165 Also compute the length of an inline block move here as it is too
5166 complicated to express as a length attribute in pa.md. */
5168 pa_adjust_insn_length (rtx_insn *insn, int length)
5170 rtx pat = PATTERN (insn);
5172 /* If length is negative or undefined, provide initial length. */
5173 if ((unsigned int) length >= INT_MAX)
5175 if (GET_CODE (pat) == SEQUENCE)
5176 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5178 switch (get_attr_type (insn))
5180 case TYPE_MILLI:
5181 length = pa_attr_length_millicode_call (insn);
5182 break;
5183 case TYPE_CALL:
5184 length = pa_attr_length_call (insn, 0);
5185 break;
5186 case TYPE_SIBCALL:
5187 length = pa_attr_length_call (insn, 1);
5188 break;
5189 case TYPE_DYNCALL:
5190 length = pa_attr_length_indirect_call (insn);
5191 break;
5192 case TYPE_SH_FUNC_ADRS:
5193 length = pa_attr_length_millicode_call (insn) + 20;
5194 break;
5195 default:
5196 gcc_unreachable ();
5200 /* Block move pattern. */
5201 if (NONJUMP_INSN_P (insn)
5202 && GET_CODE (pat) == PARALLEL
5203 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5204 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5205 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5206 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5207 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5208 length += compute_cpymem_length (insn) - 4;
5209 /* Block clear pattern. */
5210 else if (NONJUMP_INSN_P (insn)
5211 && GET_CODE (pat) == PARALLEL
5212 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5213 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5214 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5215 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5216 length += compute_clrmem_length (insn) - 4;
5217 /* Conditional branch with an unfilled delay slot. */
5218 else if (JUMP_P (insn) && ! simplejump_p (insn))
5220 /* Adjust a short backwards conditional with an unfilled delay slot. */
5221 if (GET_CODE (pat) == SET
5222 && length == 4
5223 && JUMP_LABEL (insn) != NULL_RTX
5224 && ! forward_branch_p (insn))
5225 length += 4;
5226 else if (GET_CODE (pat) == PARALLEL
5227 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5228 && length == 4)
5229 length += 4;
5230 /* Adjust dbra insn with short backwards conditional branch with
5231 unfilled delay slot -- only for case where counter is in a
5232 general register register. */
5233 else if (GET_CODE (pat) == PARALLEL
5234 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5235 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5236 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5237 && length == 4
5238 && ! forward_branch_p (insn))
5239 length += 4;
5241 return length;
5244 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5246 static bool
5247 pa_print_operand_punct_valid_p (unsigned char code)
5249 if (code == '@'
5250 || code == '#'
5251 || code == '*'
5252 || code == '^')
5253 return true;
5255 return false;
5258 /* Print operand X (an rtx) in assembler syntax to file FILE.
5259 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5260 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5262 void
5263 pa_print_operand (FILE *file, rtx x, int code)
5265 switch (code)
5267 case '#':
5268 /* Output a 'nop' if there's nothing for the delay slot. */
5269 if (dbr_sequence_length () == 0)
5270 fputs ("\n\tnop", file);
5271 return;
5272 case '*':
5273 /* Output a nullification completer if there's nothing for the */
5274 /* delay slot or nullification is requested. */
5275 if (dbr_sequence_length () == 0 ||
5276 (final_sequence &&
5277 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5278 fputs (",n", file);
5279 return;
5280 case 'R':
5281 /* Print out the second register name of a register pair.
5282 I.e., R (6) => 7. */
5283 fputs (reg_names[REGNO (x) + 1], file);
5284 return;
5285 case 'r':
5286 /* A register or zero. */
5287 if (x == const0_rtx
5288 || (x == CONST0_RTX (DFmode))
5289 || (x == CONST0_RTX (SFmode)))
5291 fputs ("%r0", file);
5292 return;
5294 else
5295 break;
5296 case 'f':
5297 /* A register or zero (floating point). */
5298 if (x == const0_rtx
5299 || (x == CONST0_RTX (DFmode))
5300 || (x == CONST0_RTX (SFmode)))
5302 fputs ("%fr0", file);
5303 return;
5305 else
5306 break;
5307 case 'A':
5309 rtx xoperands[2];
5311 xoperands[0] = XEXP (XEXP (x, 0), 0);
5312 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5313 pa_output_global_address (file, xoperands[1], 0);
5314 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5315 return;
5318 case 'C': /* Plain (C)ondition */
5319 case 'X':
5320 switch (GET_CODE (x))
5322 case EQ:
5323 fputs ("=", file); break;
5324 case NE:
5325 fputs ("<>", file); break;
5326 case GT:
5327 fputs (">", file); break;
5328 case GE:
5329 fputs (">=", file); break;
5330 case GEU:
5331 fputs (">>=", file); break;
5332 case GTU:
5333 fputs (">>", file); break;
5334 case LT:
5335 fputs ("<", file); break;
5336 case LE:
5337 fputs ("<=", file); break;
5338 case LEU:
5339 fputs ("<<=", file); break;
5340 case LTU:
5341 fputs ("<<", file); break;
5342 default:
5343 gcc_unreachable ();
5345 return;
5346 case 'N': /* Condition, (N)egated */
5347 switch (GET_CODE (x))
5349 case EQ:
5350 fputs ("<>", file); break;
5351 case NE:
5352 fputs ("=", file); break;
5353 case GT:
5354 fputs ("<=", file); break;
5355 case GE:
5356 fputs ("<", file); break;
5357 case GEU:
5358 fputs ("<<", file); break;
5359 case GTU:
5360 fputs ("<<=", file); break;
5361 case LT:
5362 fputs (">=", file); break;
5363 case LE:
5364 fputs (">", file); break;
5365 case LEU:
5366 fputs (">>", file); break;
5367 case LTU:
5368 fputs (">>=", file); break;
5369 default:
5370 gcc_unreachable ();
5372 return;
5373 /* For floating point comparisons. Note that the output
5374 predicates are the complement of the desired mode. The
5375 conditions for GT, GE, LT, LE and LTGT cause an invalid
5376 operation exception if the result is unordered and this
5377 exception is enabled in the floating-point status register. */
5378 case 'Y':
5379 switch (GET_CODE (x))
5381 case EQ:
5382 fputs ("!=", file); break;
5383 case NE:
5384 fputs ("=", file); break;
5385 case GT:
5386 fputs ("!>", file); break;
5387 case GE:
5388 fputs ("!>=", file); break;
5389 case LT:
5390 fputs ("!<", file); break;
5391 case LE:
5392 fputs ("!<=", file); break;
5393 case LTGT:
5394 fputs ("!<>", file); break;
5395 case UNLE:
5396 fputs ("!?<=", file); break;
5397 case UNLT:
5398 fputs ("!?<", file); break;
5399 case UNGE:
5400 fputs ("!?>=", file); break;
5401 case UNGT:
5402 fputs ("!?>", file); break;
5403 case UNEQ:
5404 fputs ("!?=", file); break;
5405 case UNORDERED:
5406 fputs ("!?", file); break;
5407 case ORDERED:
5408 fputs ("?", file); break;
5409 default:
5410 gcc_unreachable ();
5412 return;
5413 case 'S': /* Condition, operands are (S)wapped. */
5414 switch (GET_CODE (x))
5416 case EQ:
5417 fputs ("=", file); break;
5418 case NE:
5419 fputs ("<>", file); break;
5420 case GT:
5421 fputs ("<", file); break;
5422 case GE:
5423 fputs ("<=", file); break;
5424 case GEU:
5425 fputs ("<<=", file); break;
5426 case GTU:
5427 fputs ("<<", file); break;
5428 case LT:
5429 fputs (">", file); break;
5430 case LE:
5431 fputs (">=", file); break;
5432 case LEU:
5433 fputs (">>=", file); break;
5434 case LTU:
5435 fputs (">>", file); break;
5436 default:
5437 gcc_unreachable ();
5439 return;
5440 case 'B': /* Condition, (B)oth swapped and negate. */
5441 switch (GET_CODE (x))
5443 case EQ:
5444 fputs ("<>", file); break;
5445 case NE:
5446 fputs ("=", file); break;
5447 case GT:
5448 fputs (">=", file); break;
5449 case GE:
5450 fputs (">", file); break;
5451 case GEU:
5452 fputs (">>", file); break;
5453 case GTU:
5454 fputs (">>=", file); break;
5455 case LT:
5456 fputs ("<=", file); break;
5457 case LE:
5458 fputs ("<", file); break;
5459 case LEU:
5460 fputs ("<<", file); break;
5461 case LTU:
5462 fputs ("<<=", file); break;
5463 default:
5464 gcc_unreachable ();
5466 return;
5467 case 'k':
5468 gcc_assert (GET_CODE (x) == CONST_INT);
5469 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5470 return;
5471 case 'Q':
5472 gcc_assert (GET_CODE (x) == CONST_INT);
5473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5474 return;
5475 case 'L':
5476 gcc_assert (GET_CODE (x) == CONST_INT);
5477 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5478 return;
5479 case 'o':
5480 gcc_assert (GET_CODE (x) == CONST_INT
5481 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5483 return;
5484 case 'O':
5485 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5486 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5487 return;
5488 case 'p':
5489 gcc_assert (GET_CODE (x) == CONST_INT);
5490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5491 return;
5492 case 'P':
5493 gcc_assert (GET_CODE (x) == CONST_INT);
5494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5495 return;
5496 case 'I':
5497 if (GET_CODE (x) == CONST_INT)
5498 fputs ("i", file);
5499 return;
5500 case 'M':
5501 case 'F':
5502 switch (GET_CODE (XEXP (x, 0)))
5504 case PRE_DEC:
5505 case PRE_INC:
5506 if (ASSEMBLER_DIALECT == 0)
5507 fputs ("s,mb", file);
5508 else
5509 fputs (",mb", file);
5510 break;
5511 case POST_DEC:
5512 case POST_INC:
5513 if (ASSEMBLER_DIALECT == 0)
5514 fputs ("s,ma", file);
5515 else
5516 fputs (",ma", file);
5517 break;
5518 case PLUS:
5519 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5520 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5522 if (ASSEMBLER_DIALECT == 0)
5523 fputs ("x", file);
5525 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5526 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5528 if (ASSEMBLER_DIALECT == 0)
5529 fputs ("x,s", file);
5530 else
5531 fputs (",s", file);
5533 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5534 fputs ("s", file);
5535 break;
5536 default:
5537 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5538 fputs ("s", file);
5539 break;
5541 return;
5542 case 'G':
5543 pa_output_global_address (file, x, 0);
5544 return;
5545 case 'H':
5546 pa_output_global_address (file, x, 1);
5547 return;
5548 case 0: /* Don't do anything special */
5549 break;
5550 case 'Z':
5552 unsigned op[3];
5553 compute_zdepwi_operands (INTVAL (x), op);
5554 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5555 return;
5557 case 'z':
5559 unsigned op[3];
5560 compute_zdepdi_operands (INTVAL (x), op);
5561 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5562 return;
5564 case 'c':
5565 /* We can get here from a .vtable_inherit due to our
5566 CONSTANT_ADDRESS_P rejecting perfectly good constant
5567 addresses. */
5568 break;
5569 default:
5570 gcc_unreachable ();
5572 if (GET_CODE (x) == REG)
5574 fputs (reg_names [REGNO (x)], file);
5575 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5577 fputs ("R", file);
5578 return;
5580 if (FP_REG_P (x)
5581 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5582 && (REGNO (x) & 1) == 0)
5583 fputs ("L", file);
5585 else if (GET_CODE (x) == MEM)
5587 int size = GET_MODE_SIZE (GET_MODE (x));
5588 rtx base = NULL_RTX;
5589 switch (GET_CODE (XEXP (x, 0)))
5591 case PRE_DEC:
5592 case POST_DEC:
5593 base = XEXP (XEXP (x, 0), 0);
5594 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5595 break;
5596 case PRE_INC:
5597 case POST_INC:
5598 base = XEXP (XEXP (x, 0), 0);
5599 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5600 break;
5601 case PLUS:
5602 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5603 fprintf (file, "%s(%s)",
5604 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5605 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5606 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5607 fprintf (file, "%s(%s)",
5608 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5609 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5610 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5613 /* Because the REG_POINTER flag can get lost during reload,
5614 pa_legitimate_address_p canonicalizes the order of the
5615 index and base registers in the combined move patterns. */
5616 rtx base = XEXP (XEXP (x, 0), 1);
5617 rtx index = XEXP (XEXP (x, 0), 0);
5619 fprintf (file, "%s(%s)",
5620 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5622 else
5623 output_address (GET_MODE (x), XEXP (x, 0));
5624 break;
5625 default:
5626 output_address (GET_MODE (x), XEXP (x, 0));
5627 break;
5630 else
5631 output_addr_const (file, x);
5634 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5636 void
5637 pa_output_global_address (FILE *file, rtx x, int round_constant)
5640 /* Imagine (high (const (plus ...))). */
5641 if (GET_CODE (x) == HIGH)
5642 x = XEXP (x, 0);
5644 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5645 output_addr_const (file, x);
5646 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5648 output_addr_const (file, x);
5649 fputs ("-$global$", file);
5651 else if (GET_CODE (x) == CONST)
5653 const char *sep = "";
5654 int offset = 0; /* assembler wants -$global$ at end */
5655 rtx base = NULL_RTX;
5657 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5659 case LABEL_REF:
5660 case SYMBOL_REF:
5661 base = XEXP (XEXP (x, 0), 0);
5662 output_addr_const (file, base);
5663 break;
5664 case CONST_INT:
5665 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5666 break;
5667 default:
5668 gcc_unreachable ();
5671 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5673 case LABEL_REF:
5674 case SYMBOL_REF:
5675 base = XEXP (XEXP (x, 0), 1);
5676 output_addr_const (file, base);
5677 break;
5678 case CONST_INT:
5679 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5680 break;
5681 default:
5682 gcc_unreachable ();
5685 /* How bogus. The compiler is apparently responsible for
5686 rounding the constant if it uses an LR field selector.
5688 The linker and/or assembler seem a better place since
5689 they have to do this kind of thing already.
5691 If we fail to do this, HP's optimizing linker may eliminate
5692 an addil, but not update the ldw/stw/ldo instruction that
5693 uses the result of the addil. */
5694 if (round_constant)
5695 offset = ((offset + 0x1000) & ~0x1fff);
5697 switch (GET_CODE (XEXP (x, 0)))
5699 case PLUS:
5700 if (offset < 0)
5702 offset = -offset;
5703 sep = "-";
5705 else
5706 sep = "+";
5707 break;
5709 case MINUS:
5710 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5711 sep = "-";
5712 break;
5714 default:
5715 gcc_unreachable ();
5718 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5719 fputs ("-$global$", file);
5720 if (offset)
5721 fprintf (file, "%s%d", sep, offset);
5723 else
5724 output_addr_const (file, x);
5727 /* Output boilerplate text to appear at the beginning of the file.
5728 There are several possible versions. */
5729 #define aputs(x) fputs(x, asm_out_file)
5730 static inline void
5731 pa_file_start_level (void)
5733 if (TARGET_64BIT)
5734 aputs ("\t.LEVEL 2.0w\n");
5735 else if (TARGET_PA_20)
5736 aputs ("\t.LEVEL 2.0\n");
5737 else if (TARGET_PA_11)
5738 aputs ("\t.LEVEL 1.1\n");
5739 else
5740 aputs ("\t.LEVEL 1.0\n");
5743 static inline void
5744 pa_file_start_space (int sortspace)
5746 aputs ("\t.SPACE $PRIVATE$");
5747 if (sortspace)
5748 aputs (",SORT=16");
5749 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5750 if (flag_tm)
5751 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5752 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5753 "\n\t.SPACE $TEXT$");
5754 if (sortspace)
5755 aputs (",SORT=8");
5756 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5757 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5760 static inline void
5761 pa_file_start_file (int want_version)
5763 if (write_symbols != NO_DEBUG)
5765 output_file_directive (asm_out_file, main_input_filename);
5766 if (want_version)
5767 aputs ("\t.version\t\"01.01\"\n");
5771 static inline void
5772 pa_file_start_mcount (const char *aswhat)
5774 if (profile_flag)
5775 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5778 static void
5779 pa_elf_file_start (void)
5781 pa_file_start_level ();
5782 pa_file_start_mcount ("ENTRY");
5783 pa_file_start_file (0);
5786 static void
5787 pa_som_file_start (void)
5789 pa_file_start_level ();
5790 pa_file_start_space (0);
5791 aputs ("\t.IMPORT $global$,DATA\n"
5792 "\t.IMPORT $$dyncall,MILLICODE\n");
5793 pa_file_start_mcount ("CODE");
5794 pa_file_start_file (0);
5797 static void
5798 pa_linux_file_start (void)
5800 pa_file_start_file (1);
5801 pa_file_start_level ();
5802 pa_file_start_mcount ("CODE");
5805 static void
5806 pa_hpux64_gas_file_start (void)
5808 pa_file_start_level ();
5809 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5810 if (profile_flag)
5811 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5812 #endif
5813 pa_file_start_file (1);
5816 static void
5817 pa_hpux64_hpas_file_start (void)
5819 pa_file_start_level ();
5820 pa_file_start_space (1);
5821 pa_file_start_mcount ("CODE");
5822 pa_file_start_file (0);
5824 #undef aputs
5826 /* Search the deferred plabel list for SYMBOL and return its internal
5827 label. If an entry for SYMBOL is not found, a new entry is created. */
5830 pa_get_deferred_plabel (rtx symbol)
5832 const char *fname = XSTR (symbol, 0);
5833 size_t i;
5835 /* See if we have already put this function on the list of deferred
5836 plabels. This list is generally small, so a liner search is not
5837 too ugly. If it proves too slow replace it with something faster. */
5838 for (i = 0; i < n_deferred_plabels; i++)
5839 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5840 break;
5842 /* If the deferred plabel list is empty, or this entry was not found
5843 on the list, create a new entry on the list. */
5844 if (deferred_plabels == NULL || i == n_deferred_plabels)
5846 tree id;
5848 if (deferred_plabels == 0)
5849 deferred_plabels = ggc_alloc<deferred_plabel> ();
5850 else
5851 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5852 deferred_plabels,
5853 n_deferred_plabels + 1);
5855 i = n_deferred_plabels++;
5856 deferred_plabels[i].internal_label = gen_label_rtx ();
5857 deferred_plabels[i].symbol = symbol;
5859 /* Gross. We have just implicitly taken the address of this
5860 function. Mark it in the same manner as assemble_name. */
5861 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5862 if (id)
5863 mark_referenced (id);
5866 return deferred_plabels[i].internal_label;
5869 static void
5870 output_deferred_plabels (void)
5872 size_t i;
5874 /* If we have some deferred plabels, then we need to switch into the
5875 data or readonly data section, and align it to a 4 byte boundary
5876 before outputting the deferred plabels. */
5877 if (n_deferred_plabels)
5879 switch_to_section (flag_pic ? data_section : readonly_data_section);
5880 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5883 /* Now output the deferred plabels. */
5884 for (i = 0; i < n_deferred_plabels; i++)
5886 targetm.asm_out.internal_label (asm_out_file, "L",
5887 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5888 assemble_integer (deferred_plabels[i].symbol,
5889 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5893 /* Initialize optabs to point to emulation routines. */
5895 static void
5896 pa_init_libfuncs (void)
5898 if (HPUX_LONG_DOUBLE_LIBRARY)
5900 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5901 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5902 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5903 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5904 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5905 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5906 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5907 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5908 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5910 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5911 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5912 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5913 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5914 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5915 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5916 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5918 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5919 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5920 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5921 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5923 set_conv_libfunc (sfix_optab, SImode, TFmode,
5924 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5925 : "_U_Qfcnvfxt_quad_to_sgl");
5926 set_conv_libfunc (sfix_optab, DImode, TFmode,
5927 "_U_Qfcnvfxt_quad_to_dbl");
5928 set_conv_libfunc (ufix_optab, SImode, TFmode,
5929 "_U_Qfcnvfxt_quad_to_usgl");
5930 set_conv_libfunc (ufix_optab, DImode, TFmode,
5931 "_U_Qfcnvfxt_quad_to_udbl");
5933 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5934 "_U_Qfcnvxf_sgl_to_quad");
5935 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5936 "_U_Qfcnvxf_dbl_to_quad");
5937 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5938 "_U_Qfcnvxf_usgl_to_quad");
5939 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5940 "_U_Qfcnvxf_udbl_to_quad");
5943 if (TARGET_SYNC_LIBCALLS)
5944 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
5947 /* HP's millicode routines mean something special to the assembler.
5948 Keep track of which ones we have used. */
5950 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5951 static void import_milli (enum millicodes);
5952 static char imported[(int) end1000];
5953 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5954 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5955 #define MILLI_START 10
5957 static void
5958 import_milli (enum millicodes code)
5960 char str[sizeof (import_string)];
5962 if (!imported[(int) code])
5964 imported[(int) code] = 1;
5965 strcpy (str, import_string);
5966 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5967 output_asm_insn (str, 0);
5971 /* The register constraints have put the operands and return value in
5972 the proper registers. */
5974 const char *
5975 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5977 import_milli (mulI);
5978 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5981 /* Emit the rtl for doing a division by a constant. */
5983 /* Do magic division millicodes exist for this value? */
5984 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5986 /* We'll use an array to keep track of the magic millicodes and
5987 whether or not we've used them already. [n][0] is signed, [n][1] is
5988 unsigned. */
5990 static int div_milli[16][2];
5993 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5995 if (GET_CODE (operands[2]) == CONST_INT
5996 && INTVAL (operands[2]) > 0
5997 && INTVAL (operands[2]) < 16
5998 && pa_magic_milli[INTVAL (operands[2])])
6000 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6002 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6003 emit
6004 (gen_rtx_PARALLEL
6005 (VOIDmode,
6006 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6007 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6008 SImode,
6009 gen_rtx_REG (SImode, 26),
6010 operands[2])),
6011 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6012 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6013 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6014 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6015 gen_rtx_CLOBBER (VOIDmode, ret))));
6016 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6017 return 1;
6019 return 0;
6022 const char *
6023 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6025 int divisor;
6027 /* If the divisor is a constant, try to use one of the special
6028 opcodes .*/
6029 if (GET_CODE (operands[0]) == CONST_INT)
6031 static char buf[100];
6032 divisor = INTVAL (operands[0]);
6033 if (!div_milli[divisor][unsignedp])
6035 div_milli[divisor][unsignedp] = 1;
6036 if (unsignedp)
6037 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6038 else
6039 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6041 if (unsignedp)
6043 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6044 INTVAL (operands[0]));
6045 return pa_output_millicode_call (insn,
6046 gen_rtx_SYMBOL_REF (SImode, buf));
6048 else
6050 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6051 INTVAL (operands[0]));
6052 return pa_output_millicode_call (insn,
6053 gen_rtx_SYMBOL_REF (SImode, buf));
6056 /* Divisor isn't a special constant. */
6057 else
6059 if (unsignedp)
6061 import_milli (divU);
6062 return pa_output_millicode_call (insn,
6063 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6065 else
6067 import_milli (divI);
6068 return pa_output_millicode_call (insn,
6069 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6074 /* Output a $$rem millicode to do mod. */
6076 const char *
6077 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6079 if (unsignedp)
6081 import_milli (remU);
6082 return pa_output_millicode_call (insn,
6083 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6085 else
6087 import_milli (remI);
6088 return pa_output_millicode_call (insn,
6089 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6093 void
6094 pa_output_arg_descriptor (rtx_insn *call_insn)
6096 const char *arg_regs[4];
6097 machine_mode arg_mode;
6098 rtx link;
6099 int i, output_flag = 0;
6100 int regno;
6102 /* We neither need nor want argument location descriptors for the
6103 64bit runtime environment or the ELF32 environment. */
6104 if (TARGET_64BIT || TARGET_ELF32)
6105 return;
6107 for (i = 0; i < 4; i++)
6108 arg_regs[i] = 0;
6110 /* Specify explicitly that no argument relocations should take place
6111 if using the portable runtime calling conventions. */
6112 if (TARGET_PORTABLE_RUNTIME)
6114 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6115 asm_out_file);
6116 return;
6119 gcc_assert (CALL_P (call_insn));
6120 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6121 link; link = XEXP (link, 1))
6123 rtx use = XEXP (link, 0);
6125 if (! (GET_CODE (use) == USE
6126 && GET_CODE (XEXP (use, 0)) == REG
6127 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6128 continue;
6130 arg_mode = GET_MODE (XEXP (use, 0));
6131 regno = REGNO (XEXP (use, 0));
6132 if (regno >= 23 && regno <= 26)
6134 arg_regs[26 - regno] = "GR";
6135 if (arg_mode == DImode)
6136 arg_regs[25 - regno] = "GR";
6138 else if (regno >= 32 && regno <= 39)
6140 if (arg_mode == SFmode)
6141 arg_regs[(regno - 32) / 2] = "FR";
6142 else
6144 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6145 arg_regs[(regno - 34) / 2] = "FR";
6146 arg_regs[(regno - 34) / 2 + 1] = "FU";
6147 #else
6148 arg_regs[(regno - 34) / 2] = "FU";
6149 arg_regs[(regno - 34) / 2 + 1] = "FR";
6150 #endif
6154 fputs ("\t.CALL ", asm_out_file);
6155 for (i = 0; i < 4; i++)
6157 if (arg_regs[i])
6159 if (output_flag++)
6160 fputc (',', asm_out_file);
6161 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6164 fputc ('\n', asm_out_file);
6167 /* Inform reload about cases where moving X with a mode MODE to or from
6168 a register in RCLASS requires an extra scratch or immediate register.
6169 Return the class needed for the immediate register. */
6171 static reg_class_t
6172 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6173 machine_mode mode, secondary_reload_info *sri)
6175 int regno;
6176 enum reg_class rclass = (enum reg_class) rclass_i;
6178 /* Handle the easy stuff first. */
6179 if (rclass == R1_REGS)
6180 return NO_REGS;
6182 if (REG_P (x))
6184 regno = REGNO (x);
6185 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6186 return NO_REGS;
6188 else
6189 regno = -1;
6191 /* If we have something like (mem (mem (...)), we can safely assume the
6192 inner MEM will end up in a general register after reloading, so there's
6193 no need for a secondary reload. */
6194 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6195 return NO_REGS;
6197 /* Trying to load a constant into a FP register during PIC code
6198 generation requires %r1 as a scratch register. For float modes,
6199 the only legitimate constant is CONST0_RTX. However, there are
6200 a few patterns that accept constant double operands. */
6201 if (flag_pic
6202 && FP_REG_CLASS_P (rclass)
6203 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6205 switch (mode)
6207 case E_SImode:
6208 sri->icode = CODE_FOR_reload_insi_r1;
6209 break;
6211 case E_DImode:
6212 sri->icode = CODE_FOR_reload_indi_r1;
6213 break;
6215 case E_SFmode:
6216 sri->icode = CODE_FOR_reload_insf_r1;
6217 break;
6219 case E_DFmode:
6220 sri->icode = CODE_FOR_reload_indf_r1;
6221 break;
6223 default:
6224 gcc_unreachable ();
6226 return NO_REGS;
6229 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6230 register when we're generating PIC code or when the operand isn't
6231 readonly. */
6232 if (pa_symbolic_expression_p (x))
6234 if (GET_CODE (x) == HIGH)
6235 x = XEXP (x, 0);
6237 if (flag_pic || !read_only_operand (x, VOIDmode))
6239 switch (mode)
6241 case E_SImode:
6242 sri->icode = CODE_FOR_reload_insi_r1;
6243 break;
6245 case E_DImode:
6246 sri->icode = CODE_FOR_reload_indi_r1;
6247 break;
6249 default:
6250 gcc_unreachable ();
6252 return NO_REGS;
6256 /* Profiling showed the PA port spends about 1.3% of its compilation
6257 time in true_regnum from calls inside pa_secondary_reload_class. */
6258 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6259 regno = true_regnum (x);
6261 /* Handle reloads for floating point loads and stores. */
6262 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6263 && FP_REG_CLASS_P (rclass))
6265 if (MEM_P (x))
6267 x = XEXP (x, 0);
6269 /* We don't need a secondary reload for indexed memory addresses.
6271 When INT14_OK_STRICT is true, it might appear that we could
6272 directly allow register indirect memory addresses. However,
6273 this doesn't work because we don't support SUBREGs in
6274 floating-point register copies and reload doesn't tell us
6275 when it's going to use a SUBREG. */
6276 if (IS_INDEX_ADDR_P (x))
6277 return NO_REGS;
6280 /* Request a secondary reload with a general scratch register
6281 for everything else. ??? Could symbolic operands be handled
6282 directly when generating non-pic PA 2.0 code? */
6283 sri->icode = (in_p
6284 ? direct_optab_handler (reload_in_optab, mode)
6285 : direct_optab_handler (reload_out_optab, mode));
6286 return NO_REGS;
6289 /* A SAR<->FP register copy requires an intermediate general register
6290 and secondary memory. We need a secondary reload with a general
6291 scratch register for spills. */
6292 if (rclass == SHIFT_REGS)
6294 /* Handle spill. */
6295 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6297 sri->icode = (in_p
6298 ? direct_optab_handler (reload_in_optab, mode)
6299 : direct_optab_handler (reload_out_optab, mode));
6300 return NO_REGS;
6303 /* Handle FP copy. */
6304 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6305 return GENERAL_REGS;
6308 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6309 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6310 && FP_REG_CLASS_P (rclass))
6311 return GENERAL_REGS;
6313 return NO_REGS;
6316 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6318 static bool
6319 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6320 reg_class_t class1 ATTRIBUTE_UNUSED,
6321 reg_class_t class2 ATTRIBUTE_UNUSED)
6323 #ifdef PA_SECONDARY_MEMORY_NEEDED
6324 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6325 #else
6326 return false;
6327 #endif
6330 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6331 is only marked as live on entry by df-scan when it is a fixed
6332 register. It isn't a fixed register in the 64-bit runtime,
6333 so we need to mark it here. */
6335 static void
6336 pa_extra_live_on_entry (bitmap regs)
6338 if (TARGET_64BIT)
6339 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6342 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6343 to prevent it from being deleted. */
6346 pa_eh_return_handler_rtx (void)
6348 rtx tmp;
6350 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6351 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6352 tmp = gen_rtx_MEM (word_mode, tmp);
6353 tmp->volatil = 1;
6354 return tmp;
6357 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6358 by invisible reference. As a GCC extension, we also pass anything
6359 with a zero or variable size by reference.
6361 The 64-bit runtime does not describe passing any types by invisible
6362 reference. The internals of GCC can't currently handle passing
6363 empty structures, and zero or variable length arrays when they are
6364 not passed entirely on the stack or by reference. Thus, as a GCC
6365 extension, we pass these types by reference. The HP compiler doesn't
6366 support these types, so hopefully there shouldn't be any compatibility
6367 issues. This may have to be revisited when HP releases a C99 compiler
6368 or updates the ABI. */
6370 static bool
6371 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6373 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6374 if (TARGET_64BIT)
6375 return size <= 0;
6376 else
6377 return size <= 0 || size > 8;
6380 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6382 static pad_direction
6383 pa_function_arg_padding (machine_mode mode, const_tree type)
6385 if (mode == BLKmode
6386 || (TARGET_64BIT
6387 && type
6388 && (AGGREGATE_TYPE_P (type)
6389 || TREE_CODE (type) == COMPLEX_TYPE
6390 || TREE_CODE (type) == VECTOR_TYPE)))
6392 /* Return PAD_NONE if justification is not required. */
6393 if (type
6394 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6395 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6396 return PAD_NONE;
6398 /* The directions set here are ignored when a BLKmode argument larger
6399 than a word is placed in a register. Different code is used for
6400 the stack and registers. This makes it difficult to have a
6401 consistent data representation for both the stack and registers.
6402 For both runtimes, the justification and padding for arguments on
6403 the stack and in registers should be identical. */
6404 if (TARGET_64BIT)
6405 /* The 64-bit runtime specifies left justification for aggregates. */
6406 return PAD_UPWARD;
6407 else
6408 /* The 32-bit runtime architecture specifies right justification.
6409 When the argument is passed on the stack, the argument is padded
6410 with garbage on the left. The HP compiler pads with zeros. */
6411 return PAD_DOWNWARD;
6414 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6415 return PAD_DOWNWARD;
6416 else
6417 return PAD_NONE;
6421 /* Do what is necessary for `va_start'. We look at the current function
6422 to determine if stdargs or varargs is used and fill in an initial
6423 va_list. A pointer to this constructor is returned. */
6425 static rtx
6426 hppa_builtin_saveregs (void)
6428 rtx offset, dest;
6429 tree fntype = TREE_TYPE (current_function_decl);
6430 int argadj = ((!stdarg_p (fntype))
6431 ? UNITS_PER_WORD : 0);
6433 if (argadj)
6434 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6435 else
6436 offset = crtl->args.arg_offset_rtx;
6438 if (TARGET_64BIT)
6440 int i, off;
6442 /* Adjust for varargs/stdarg differences. */
6443 if (argadj)
6444 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6445 else
6446 offset = crtl->args.arg_offset_rtx;
6448 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6449 from the incoming arg pointer and growing to larger addresses. */
6450 for (i = 26, off = -64; i >= 19; i--, off += 8)
6451 emit_move_insn (gen_rtx_MEM (word_mode,
6452 plus_constant (Pmode,
6453 arg_pointer_rtx, off)),
6454 gen_rtx_REG (word_mode, i));
6456 /* The incoming args pointer points just beyond the flushback area;
6457 normally this is not a serious concern. However, when we are doing
6458 varargs/stdargs we want to make the arg pointer point to the start
6459 of the incoming argument area. */
6460 emit_move_insn (virtual_incoming_args_rtx,
6461 plus_constant (Pmode, arg_pointer_rtx, -64));
6463 /* Now return a pointer to the first anonymous argument. */
6464 return copy_to_reg (expand_binop (Pmode, add_optab,
6465 virtual_incoming_args_rtx,
6466 offset, 0, 0, OPTAB_LIB_WIDEN));
6469 /* Store general registers on the stack. */
6470 dest = gen_rtx_MEM (BLKmode,
6471 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6472 -16));
6473 set_mem_alias_set (dest, get_varargs_alias_set ());
6474 set_mem_align (dest, BITS_PER_WORD);
6475 move_block_from_reg (23, dest, 4);
6477 /* move_block_from_reg will emit code to store the argument registers
6478 individually as scalar stores.
6480 However, other insns may later load from the same addresses for
6481 a structure load (passing a struct to a varargs routine).
6483 The alias code assumes that such aliasing can never happen, so we
6484 have to keep memory referencing insns from moving up beyond the
6485 last argument register store. So we emit a blockage insn here. */
6486 emit_insn (gen_blockage ());
6488 return copy_to_reg (expand_binop (Pmode, add_optab,
6489 crtl->args.internal_arg_pointer,
6490 offset, 0, 0, OPTAB_LIB_WIDEN));
6493 static void
6494 hppa_va_start (tree valist, rtx nextarg)
6496 nextarg = expand_builtin_saveregs ();
6497 std_expand_builtin_va_start (valist, nextarg);
6500 static tree
6501 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6502 gimple_seq *post_p)
6504 if (TARGET_64BIT)
6506 /* Args grow upward. We can use the generic routines. */
6507 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6509 else /* !TARGET_64BIT */
6511 tree ptr = build_pointer_type (type);
6512 tree valist_type;
6513 tree t, u;
6514 unsigned int size, ofs;
6515 bool indirect;
6517 indirect = pass_va_arg_by_reference (type);
6518 if (indirect)
6520 type = ptr;
6521 ptr = build_pointer_type (type);
6523 size = int_size_in_bytes (type);
6524 valist_type = TREE_TYPE (valist);
6526 /* Args grow down. Not handled by generic routines. */
6528 u = fold_convert (sizetype, size_in_bytes (type));
6529 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6530 t = fold_build_pointer_plus (valist, u);
6532 /* Align to 4 or 8 byte boundary depending on argument size. */
6534 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6535 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6536 t = fold_convert (valist_type, t);
6538 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6540 ofs = (8 - size) % 4;
6541 if (ofs != 0)
6542 t = fold_build_pointer_plus_hwi (t, ofs);
6544 t = fold_convert (ptr, t);
6545 t = build_va_arg_indirect_ref (t);
6547 if (indirect)
6548 t = build_va_arg_indirect_ref (t);
6550 return t;
6554 /* True if MODE is valid for the target. By "valid", we mean able to
6555 be manipulated in non-trivial ways. In particular, this means all
6556 the arithmetic is supported. */
6558 static bool
6559 pa_scalar_mode_supported_p (scalar_mode mode)
6561 int precision = GET_MODE_PRECISION (mode);
6563 if (TARGET_64BIT && mode == TImode)
6564 return true;
6566 switch (GET_MODE_CLASS (mode))
6568 case MODE_PARTIAL_INT:
6569 case MODE_INT:
6570 if (precision == CHAR_TYPE_SIZE)
6571 return true;
6572 if (precision == SHORT_TYPE_SIZE)
6573 return true;
6574 if (precision == INT_TYPE_SIZE)
6575 return true;
6576 if (precision == LONG_TYPE_SIZE)
6577 return true;
6578 if (precision == LONG_LONG_TYPE_SIZE)
6579 return true;
6580 return false;
6582 case MODE_FLOAT:
6583 if (precision == FLOAT_TYPE_SIZE)
6584 return true;
6585 if (precision == DOUBLE_TYPE_SIZE)
6586 return true;
6587 if (precision == LONG_DOUBLE_TYPE_SIZE)
6588 return true;
6589 return false;
6591 case MODE_DECIMAL_FLOAT:
6592 return false;
6594 default:
6595 gcc_unreachable ();
6599 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6600 it branches into the delay slot. Otherwise, return FALSE. */
6602 static bool
6603 branch_to_delay_slot_p (rtx_insn *insn)
6605 rtx_insn *jump_insn;
6607 if (dbr_sequence_length ())
6608 return FALSE;
6610 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6611 while (insn)
6613 insn = next_active_insn (insn);
6614 if (jump_insn == insn)
6615 return TRUE;
6617 /* We can't rely on the length of asms. So, we return FALSE when
6618 the branch is followed by an asm. */
6619 if (!insn
6620 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6621 || asm_noperands (PATTERN (insn)) >= 0
6622 || get_attr_length (insn) > 0)
6623 break;
6626 return FALSE;
6629 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6631 This occurs when INSN has an unfilled delay slot and is followed
6632 by an asm. Disaster can occur if the asm is empty and the jump
6633 branches into the delay slot. So, we add a nop in the delay slot
6634 when this occurs. */
6636 static bool
6637 branch_needs_nop_p (rtx_insn *insn)
6639 rtx_insn *jump_insn;
6641 if (dbr_sequence_length ())
6642 return FALSE;
6644 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6645 while (insn)
6647 insn = next_active_insn (insn);
6648 if (!insn || jump_insn == insn)
6649 return TRUE;
6651 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6652 || asm_noperands (PATTERN (insn)) >= 0)
6653 && get_attr_length (insn) > 0)
6654 break;
6657 return FALSE;
6660 /* Return TRUE if INSN, a forward jump insn, can use nullification
6661 to skip the following instruction. This avoids an extra cycle due
6662 to a mis-predicted branch when we fall through. */
6664 static bool
6665 use_skip_p (rtx_insn *insn)
6667 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6669 while (insn)
6671 insn = next_active_insn (insn);
6673 /* We can't rely on the length of asms, so we can't skip asms. */
6674 if (!insn
6675 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6676 || asm_noperands (PATTERN (insn)) >= 0)
6677 break;
6678 if (get_attr_length (insn) == 4
6679 && jump_insn == next_active_insn (insn))
6680 return TRUE;
6681 if (get_attr_length (insn) > 0)
6682 break;
6685 return FALSE;
6688 /* This routine handles all the normal conditional branch sequences we
6689 might need to generate. It handles compare immediate vs compare
6690 register, nullification of delay slots, varying length branches,
6691 negated branches, and all combinations of the above. It returns the
6692 output appropriate to emit the branch corresponding to all given
6693 parameters. */
6695 const char *
6696 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6698 static char buf[100];
6699 bool useskip;
6700 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6701 int length = get_attr_length (insn);
6702 int xdelay;
6704 /* A conditional branch to the following instruction (e.g. the delay slot)
6705 is asking for a disaster. This can happen when not optimizing and
6706 when jump optimization fails.
6708 While it is usually safe to emit nothing, this can fail if the
6709 preceding instruction is a nullified branch with an empty delay
6710 slot and the same branch target as this branch. We could check
6711 for this but jump optimization should eliminate nop jumps. It
6712 is always safe to emit a nop. */
6713 if (branch_to_delay_slot_p (insn))
6714 return "nop";
6716 /* The doubleword form of the cmpib instruction doesn't have the LEU
6717 and GTU conditions while the cmpb instruction does. Since we accept
6718 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6719 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6720 operands[2] = gen_rtx_REG (DImode, 0);
6721 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6722 operands[1] = gen_rtx_REG (DImode, 0);
6724 /* If this is a long branch with its delay slot unfilled, set `nullify'
6725 as it can nullify the delay slot and save a nop. */
6726 if (length == 8 && dbr_sequence_length () == 0)
6727 nullify = 1;
6729 /* If this is a short forward conditional branch which did not get
6730 its delay slot filled, the delay slot can still be nullified. */
6731 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6732 nullify = forward_branch_p (insn);
6734 /* A forward branch over a single nullified insn can be done with a
6735 comclr instruction. This avoids a single cycle penalty due to
6736 mis-predicted branch if we fall through (branch not taken). */
6737 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6739 switch (length)
6741 /* All short conditional branches except backwards with an unfilled
6742 delay slot. */
6743 case 4:
6744 if (useskip)
6745 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6746 else
6747 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6748 if (GET_MODE (operands[1]) == DImode)
6749 strcat (buf, "*");
6750 if (negated)
6751 strcat (buf, "%B3");
6752 else
6753 strcat (buf, "%S3");
6754 if (useskip)
6755 strcat (buf, " %2,%r1,%%r0");
6756 else if (nullify)
6758 if (branch_needs_nop_p (insn))
6759 strcat (buf, ",n %2,%r1,%0%#");
6760 else
6761 strcat (buf, ",n %2,%r1,%0");
6763 else
6764 strcat (buf, " %2,%r1,%0");
6765 break;
6767 /* All long conditionals. Note a short backward branch with an
6768 unfilled delay slot is treated just like a long backward branch
6769 with an unfilled delay slot. */
6770 case 8:
6771 /* Handle weird backwards branch with a filled delay slot
6772 which is nullified. */
6773 if (dbr_sequence_length () != 0
6774 && ! forward_branch_p (insn)
6775 && nullify)
6777 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6778 if (GET_MODE (operands[1]) == DImode)
6779 strcat (buf, "*");
6780 if (negated)
6781 strcat (buf, "%S3");
6782 else
6783 strcat (buf, "%B3");
6784 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6786 /* Handle short backwards branch with an unfilled delay slot.
6787 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6788 taken and untaken branches. */
6789 else if (dbr_sequence_length () == 0
6790 && ! forward_branch_p (insn)
6791 && INSN_ADDRESSES_SET_P ()
6792 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6793 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6795 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6796 if (GET_MODE (operands[1]) == DImode)
6797 strcat (buf, "*");
6798 if (negated)
6799 strcat (buf, "%B3 %2,%r1,%0%#");
6800 else
6801 strcat (buf, "%S3 %2,%r1,%0%#");
6803 else
6805 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6806 if (GET_MODE (operands[1]) == DImode)
6807 strcat (buf, "*");
6808 if (negated)
6809 strcat (buf, "%S3");
6810 else
6811 strcat (buf, "%B3");
6812 if (nullify)
6813 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6814 else
6815 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6817 break;
6819 default:
6820 /* The reversed conditional branch must branch over one additional
6821 instruction if the delay slot is filled and needs to be extracted
6822 by pa_output_lbranch. If the delay slot is empty or this is a
6823 nullified forward branch, the instruction after the reversed
6824 condition branch must be nullified. */
6825 if (dbr_sequence_length () == 0
6826 || (nullify && forward_branch_p (insn)))
6828 nullify = 1;
6829 xdelay = 0;
6830 operands[4] = GEN_INT (length);
6832 else
6834 xdelay = 1;
6835 operands[4] = GEN_INT (length + 4);
6838 /* Create a reversed conditional branch which branches around
6839 the following insns. */
6840 if (GET_MODE (operands[1]) != DImode)
6842 if (nullify)
6844 if (negated)
6845 strcpy (buf,
6846 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6847 else
6848 strcpy (buf,
6849 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6851 else
6853 if (negated)
6854 strcpy (buf,
6855 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6856 else
6857 strcpy (buf,
6858 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6861 else
6863 if (nullify)
6865 if (negated)
6866 strcpy (buf,
6867 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6868 else
6869 strcpy (buf,
6870 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6872 else
6874 if (negated)
6875 strcpy (buf,
6876 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6877 else
6878 strcpy (buf,
6879 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6883 output_asm_insn (buf, operands);
6884 return pa_output_lbranch (operands[0], insn, xdelay);
6886 return buf;
6889 /* Output a PIC pc-relative instruction sequence to load the address of
6890 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6891 or a code label. OPERANDS[1] specifies the register to use to load
6892 the program counter. OPERANDS[3] may be used for label generation
6893 The sequence is always three instructions in length. The program
6894 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6895 Register %r1 is clobbered. */
6897 static void
6898 pa_output_pic_pcrel_sequence (rtx *operands)
6900 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6901 if (TARGET_PA_20)
6903 /* We can use mfia to determine the current program counter. */
6904 if (TARGET_SOM || !TARGET_GAS)
6906 operands[3] = gen_label_rtx ();
6907 targetm.asm_out.internal_label (asm_out_file, "L",
6908 CODE_LABEL_NUMBER (operands[3]));
6909 output_asm_insn ("mfia %1", operands);
6910 output_asm_insn ("addil L'%0-%l3,%1", operands);
6911 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6913 else
6915 output_asm_insn ("mfia %1", operands);
6916 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6917 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6920 else
6922 /* We need to use a branch to determine the current program counter. */
6923 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6924 if (TARGET_SOM || !TARGET_GAS)
6926 operands[3] = gen_label_rtx ();
6927 output_asm_insn ("addil L'%0-%l3,%1", operands);
6928 targetm.asm_out.internal_label (asm_out_file, "L",
6929 CODE_LABEL_NUMBER (operands[3]));
6930 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6932 else
6934 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6935 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6940 /* This routine handles output of long unconditional branches that
6941 exceed the maximum range of a simple branch instruction. Since
6942 we don't have a register available for the branch, we save register
6943 %r1 in the frame marker, load the branch destination DEST into %r1,
6944 execute the branch, and restore %r1 in the delay slot of the branch.
6946 Since long branches may have an insn in the delay slot and the
6947 delay slot is used to restore %r1, we in general need to extract
6948 this insn and execute it before the branch. However, to facilitate
6949 use of this function by conditional branches, we also provide an
6950 option to not extract the delay insn so that it will be emitted
6951 after the long branch. So, if there is an insn in the delay slot,
6952 it is extracted if XDELAY is nonzero.
6954 The lengths of the various long-branch sequences are 20, 16 and 24
6955 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6957 const char *
6958 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6960 rtx xoperands[4];
6962 xoperands[0] = dest;
6964 /* First, free up the delay slot. */
6965 if (xdelay && dbr_sequence_length () != 0)
6967 /* We can't handle a jump in the delay slot. */
6968 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6970 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6971 optimize, 0, NULL);
6973 /* Now delete the delay insn. */
6974 SET_INSN_DELETED (NEXT_INSN (insn));
6977 /* Output an insn to save %r1. The runtime documentation doesn't
6978 specify whether the "Clean Up" slot in the callers frame can
6979 be clobbered by the callee. It isn't copied by HP's builtin
6980 alloca, so this suggests that it can be clobbered if necessary.
6981 The "Static Link" location is copied by HP builtin alloca, so
6982 we avoid using it. Using the cleanup slot might be a problem
6983 if we have to interoperate with languages that pass cleanup
6984 information. However, it should be possible to handle these
6985 situations with GCC's asm feature.
6987 The "Current RP" slot is reserved for the called procedure, so
6988 we try to use it when we don't have a frame of our own. It's
6989 rather unlikely that we won't have a frame when we need to emit
6990 a very long branch.
6992 Really the way to go long term is a register scavenger; goto
6993 the target of the jump and find a register which we can use
6994 as a scratch to hold the value in %r1. Then, we wouldn't have
6995 to free up the delay slot or clobber a slot that may be needed
6996 for other purposes. */
6997 if (TARGET_64BIT)
6999 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7000 /* Use the return pointer slot in the frame marker. */
7001 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7002 else
7003 /* Use the slot at -40 in the frame marker since HP builtin
7004 alloca doesn't copy it. */
7005 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7007 else
7009 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7010 /* Use the return pointer slot in the frame marker. */
7011 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7012 else
7013 /* Use the "Clean Up" slot in the frame marker. In GCC,
7014 the only other use of this location is for copying a
7015 floating point double argument from a floating-point
7016 register to two general registers. The copy is done
7017 as an "atomic" operation when outputting a call, so it
7018 won't interfere with our using the location here. */
7019 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7022 if (TARGET_PORTABLE_RUNTIME)
7024 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7025 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7026 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7028 else if (flag_pic)
7030 xoperands[1] = gen_rtx_REG (Pmode, 1);
7031 xoperands[2] = xoperands[1];
7032 pa_output_pic_pcrel_sequence (xoperands);
7033 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7035 else
7036 /* Now output a very long branch to the original target. */
7037 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7039 /* Now restore the value of %r1 in the delay slot. */
7040 if (TARGET_64BIT)
7042 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7043 return "ldd -16(%%r30),%%r1";
7044 else
7045 return "ldd -40(%%r30),%%r1";
7047 else
7049 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7050 return "ldw -20(%%r30),%%r1";
7051 else
7052 return "ldw -12(%%r30),%%r1";
7056 /* This routine handles all the branch-on-bit conditional branch sequences we
7057 might need to generate. It handles nullification of delay slots,
7058 varying length branches, negated branches and all combinations of the
7059 above. it returns the appropriate output template to emit the branch. */
7061 const char *
7062 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7064 static char buf[100];
7065 bool useskip;
7066 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7067 int length = get_attr_length (insn);
7068 int xdelay;
7070 /* A conditional branch to the following instruction (e.g. the delay slot) is
7071 asking for a disaster. I do not think this can happen as this pattern
7072 is only used when optimizing; jump optimization should eliminate the
7073 jump. But be prepared just in case. */
7075 if (branch_to_delay_slot_p (insn))
7076 return "nop";
7078 /* If this is a long branch with its delay slot unfilled, set `nullify'
7079 as it can nullify the delay slot and save a nop. */
7080 if (length == 8 && dbr_sequence_length () == 0)
7081 nullify = 1;
7083 /* If this is a short forward conditional branch which did not get
7084 its delay slot filled, the delay slot can still be nullified. */
7085 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7086 nullify = forward_branch_p (insn);
7088 /* A forward branch over a single nullified insn can be done with a
7089 extrs instruction. This avoids a single cycle penalty due to
7090 mis-predicted branch if we fall through (branch not taken). */
7091 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7093 switch (length)
7096 /* All short conditional branches except backwards with an unfilled
7097 delay slot. */
7098 case 4:
7099 if (useskip)
7100 strcpy (buf, "{extrs,|extrw,s,}");
7101 else
7102 strcpy (buf, "bb,");
7103 if (useskip && GET_MODE (operands[0]) == DImode)
7104 strcpy (buf, "extrd,s,*");
7105 else if (GET_MODE (operands[0]) == DImode)
7106 strcpy (buf, "bb,*");
7107 if ((which == 0 && negated)
7108 || (which == 1 && ! negated))
7109 strcat (buf, ">=");
7110 else
7111 strcat (buf, "<");
7112 if (useskip)
7113 strcat (buf, " %0,%1,1,%%r0");
7114 else if (nullify && negated)
7116 if (branch_needs_nop_p (insn))
7117 strcat (buf, ",n %0,%1,%3%#");
7118 else
7119 strcat (buf, ",n %0,%1,%3");
7121 else if (nullify && ! negated)
7123 if (branch_needs_nop_p (insn))
7124 strcat (buf, ",n %0,%1,%2%#");
7125 else
7126 strcat (buf, ",n %0,%1,%2");
7128 else if (! nullify && negated)
7129 strcat (buf, " %0,%1,%3");
7130 else if (! nullify && ! negated)
7131 strcat (buf, " %0,%1,%2");
7132 break;
7134 /* All long conditionals. Note a short backward branch with an
7135 unfilled delay slot is treated just like a long backward branch
7136 with an unfilled delay slot. */
7137 case 8:
7138 /* Handle weird backwards branch with a filled delay slot
7139 which is nullified. */
7140 if (dbr_sequence_length () != 0
7141 && ! forward_branch_p (insn)
7142 && nullify)
7144 strcpy (buf, "bb,");
7145 if (GET_MODE (operands[0]) == DImode)
7146 strcat (buf, "*");
7147 if ((which == 0 && negated)
7148 || (which == 1 && ! negated))
7149 strcat (buf, "<");
7150 else
7151 strcat (buf, ">=");
7152 if (negated)
7153 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7154 else
7155 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7157 /* Handle short backwards branch with an unfilled delay slot.
7158 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7159 taken and untaken branches. */
7160 else if (dbr_sequence_length () == 0
7161 && ! forward_branch_p (insn)
7162 && INSN_ADDRESSES_SET_P ()
7163 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7164 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7166 strcpy (buf, "bb,");
7167 if (GET_MODE (operands[0]) == DImode)
7168 strcat (buf, "*");
7169 if ((which == 0 && negated)
7170 || (which == 1 && ! negated))
7171 strcat (buf, ">=");
7172 else
7173 strcat (buf, "<");
7174 if (negated)
7175 strcat (buf, " %0,%1,%3%#");
7176 else
7177 strcat (buf, " %0,%1,%2%#");
7179 else
7181 if (GET_MODE (operands[0]) == DImode)
7182 strcpy (buf, "extrd,s,*");
7183 else
7184 strcpy (buf, "{extrs,|extrw,s,}");
7185 if ((which == 0 && negated)
7186 || (which == 1 && ! negated))
7187 strcat (buf, "<");
7188 else
7189 strcat (buf, ">=");
7190 if (nullify && negated)
7191 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7192 else if (nullify && ! negated)
7193 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7194 else if (negated)
7195 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7196 else
7197 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7199 break;
7201 default:
7202 /* The reversed conditional branch must branch over one additional
7203 instruction if the delay slot is filled and needs to be extracted
7204 by pa_output_lbranch. If the delay slot is empty or this is a
7205 nullified forward branch, the instruction after the reversed
7206 condition branch must be nullified. */
7207 if (dbr_sequence_length () == 0
7208 || (nullify && forward_branch_p (insn)))
7210 nullify = 1;
7211 xdelay = 0;
7212 operands[4] = GEN_INT (length);
7214 else
7216 xdelay = 1;
7217 operands[4] = GEN_INT (length + 4);
7220 if (GET_MODE (operands[0]) == DImode)
7221 strcpy (buf, "bb,*");
7222 else
7223 strcpy (buf, "bb,");
7224 if ((which == 0 && negated)
7225 || (which == 1 && !negated))
7226 strcat (buf, "<");
7227 else
7228 strcat (buf, ">=");
7229 if (nullify)
7230 strcat (buf, ",n %0,%1,.+%4");
7231 else
7232 strcat (buf, " %0,%1,.+%4");
7233 output_asm_insn (buf, operands);
7234 return pa_output_lbranch (negated ? operands[3] : operands[2],
7235 insn, xdelay);
7237 return buf;
7240 /* This routine handles all the branch-on-variable-bit conditional branch
7241 sequences we might need to generate. It handles nullification of delay
7242 slots, varying length branches, negated branches and all combinations
7243 of the above. it returns the appropriate output template to emit the
7244 branch. */
7246 const char *
7247 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7248 int which)
7250 static char buf[100];
7251 bool useskip;
7252 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7253 int length = get_attr_length (insn);
7254 int xdelay;
7256 /* A conditional branch to the following instruction (e.g. the delay slot) is
7257 asking for a disaster. I do not think this can happen as this pattern
7258 is only used when optimizing; jump optimization should eliminate the
7259 jump. But be prepared just in case. */
7261 if (branch_to_delay_slot_p (insn))
7262 return "nop";
7264 /* If this is a long branch with its delay slot unfilled, set `nullify'
7265 as it can nullify the delay slot and save a nop. */
7266 if (length == 8 && dbr_sequence_length () == 0)
7267 nullify = 1;
7269 /* If this is a short forward conditional branch which did not get
7270 its delay slot filled, the delay slot can still be nullified. */
7271 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7272 nullify = forward_branch_p (insn);
7274 /* A forward branch over a single nullified insn can be done with a
7275 extrs instruction. This avoids a single cycle penalty due to
7276 mis-predicted branch if we fall through (branch not taken). */
7277 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7279 switch (length)
7282 /* All short conditional branches except backwards with an unfilled
7283 delay slot. */
7284 case 4:
7285 if (useskip)
7286 strcpy (buf, "{vextrs,|extrw,s,}");
7287 else
7288 strcpy (buf, "{bvb,|bb,}");
7289 if (useskip && GET_MODE (operands[0]) == DImode)
7290 strcpy (buf, "extrd,s,*");
7291 else if (GET_MODE (operands[0]) == DImode)
7292 strcpy (buf, "bb,*");
7293 if ((which == 0 && negated)
7294 || (which == 1 && ! negated))
7295 strcat (buf, ">=");
7296 else
7297 strcat (buf, "<");
7298 if (useskip)
7299 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7300 else if (nullify && negated)
7302 if (branch_needs_nop_p (insn))
7303 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7304 else
7305 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7307 else if (nullify && ! negated)
7309 if (branch_needs_nop_p (insn))
7310 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7311 else
7312 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7314 else if (! nullify && negated)
7315 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7316 else if (! nullify && ! negated)
7317 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7318 break;
7320 /* All long conditionals. Note a short backward branch with an
7321 unfilled delay slot is treated just like a long backward branch
7322 with an unfilled delay slot. */
7323 case 8:
7324 /* Handle weird backwards branch with a filled delay slot
7325 which is nullified. */
7326 if (dbr_sequence_length () != 0
7327 && ! forward_branch_p (insn)
7328 && nullify)
7330 strcpy (buf, "{bvb,|bb,}");
7331 if (GET_MODE (operands[0]) == DImode)
7332 strcat (buf, "*");
7333 if ((which == 0 && negated)
7334 || (which == 1 && ! negated))
7335 strcat (buf, "<");
7336 else
7337 strcat (buf, ">=");
7338 if (negated)
7339 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7340 else
7341 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7343 /* Handle short backwards branch with an unfilled delay slot.
7344 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7345 taken and untaken branches. */
7346 else if (dbr_sequence_length () == 0
7347 && ! forward_branch_p (insn)
7348 && INSN_ADDRESSES_SET_P ()
7349 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7350 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7352 strcpy (buf, "{bvb,|bb,}");
7353 if (GET_MODE (operands[0]) == DImode)
7354 strcat (buf, "*");
7355 if ((which == 0 && negated)
7356 || (which == 1 && ! negated))
7357 strcat (buf, ">=");
7358 else
7359 strcat (buf, "<");
7360 if (negated)
7361 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7362 else
7363 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7365 else
7367 strcpy (buf, "{vextrs,|extrw,s,}");
7368 if (GET_MODE (operands[0]) == DImode)
7369 strcpy (buf, "extrd,s,*");
7370 if ((which == 0 && negated)
7371 || (which == 1 && ! negated))
7372 strcat (buf, "<");
7373 else
7374 strcat (buf, ">=");
7375 if (nullify && negated)
7376 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7377 else if (nullify && ! negated)
7378 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7379 else if (negated)
7380 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7381 else
7382 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7384 break;
7386 default:
7387 /* The reversed conditional branch must branch over one additional
7388 instruction if the delay slot is filled and needs to be extracted
7389 by pa_output_lbranch. If the delay slot is empty or this is a
7390 nullified forward branch, the instruction after the reversed
7391 condition branch must be nullified. */
7392 if (dbr_sequence_length () == 0
7393 || (nullify && forward_branch_p (insn)))
7395 nullify = 1;
7396 xdelay = 0;
7397 operands[4] = GEN_INT (length);
7399 else
7401 xdelay = 1;
7402 operands[4] = GEN_INT (length + 4);
7405 if (GET_MODE (operands[0]) == DImode)
7406 strcpy (buf, "bb,*");
7407 else
7408 strcpy (buf, "{bvb,|bb,}");
7409 if ((which == 0 && negated)
7410 || (which == 1 && !negated))
7411 strcat (buf, "<");
7412 else
7413 strcat (buf, ">=");
7414 if (nullify)
7415 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7416 else
7417 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7418 output_asm_insn (buf, operands);
7419 return pa_output_lbranch (negated ? operands[3] : operands[2],
7420 insn, xdelay);
7422 return buf;
7425 /* Return the output template for emitting a dbra type insn.
7427 Note it may perform some output operations on its own before
7428 returning the final output string. */
7429 const char *
7430 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7432 int length = get_attr_length (insn);
7434 /* A conditional branch to the following instruction (e.g. the delay slot) is
7435 asking for a disaster. Be prepared! */
7437 if (branch_to_delay_slot_p (insn))
7439 if (which_alternative == 0)
7440 return "ldo %1(%0),%0";
7441 else if (which_alternative == 1)
7443 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7444 output_asm_insn ("ldw -16(%%r30),%4", operands);
7445 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7446 return "{fldws|fldw} -16(%%r30),%0";
7448 else
7450 output_asm_insn ("ldw %0,%4", operands);
7451 return "ldo %1(%4),%4\n\tstw %4,%0";
7455 if (which_alternative == 0)
7457 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7458 int xdelay;
7460 /* If this is a long branch with its delay slot unfilled, set `nullify'
7461 as it can nullify the delay slot and save a nop. */
7462 if (length == 8 && dbr_sequence_length () == 0)
7463 nullify = 1;
7465 /* If this is a short forward conditional branch which did not get
7466 its delay slot filled, the delay slot can still be nullified. */
7467 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7468 nullify = forward_branch_p (insn);
7470 switch (length)
7472 case 4:
7473 if (nullify)
7475 if (branch_needs_nop_p (insn))
7476 return "addib,%C2,n %1,%0,%3%#";
7477 else
7478 return "addib,%C2,n %1,%0,%3";
7480 else
7481 return "addib,%C2 %1,%0,%3";
7483 case 8:
7484 /* Handle weird backwards branch with a fulled delay slot
7485 which is nullified. */
7486 if (dbr_sequence_length () != 0
7487 && ! forward_branch_p (insn)
7488 && nullify)
7489 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7490 /* Handle short backwards branch with an unfilled delay slot.
7491 Using a addb;nop rather than addi;bl saves 1 cycle for both
7492 taken and untaken branches. */
7493 else if (dbr_sequence_length () == 0
7494 && ! forward_branch_p (insn)
7495 && INSN_ADDRESSES_SET_P ()
7496 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7497 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7498 return "addib,%C2 %1,%0,%3%#";
7500 /* Handle normal cases. */
7501 if (nullify)
7502 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7503 else
7504 return "addi,%N2 %1,%0,%0\n\tb %3";
7506 default:
7507 /* The reversed conditional branch must branch over one additional
7508 instruction if the delay slot is filled and needs to be extracted
7509 by pa_output_lbranch. If the delay slot is empty or this is a
7510 nullified forward branch, the instruction after the reversed
7511 condition branch must be nullified. */
7512 if (dbr_sequence_length () == 0
7513 || (nullify && forward_branch_p (insn)))
7515 nullify = 1;
7516 xdelay = 0;
7517 operands[4] = GEN_INT (length);
7519 else
7521 xdelay = 1;
7522 operands[4] = GEN_INT (length + 4);
7525 if (nullify)
7526 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7527 else
7528 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7530 return pa_output_lbranch (operands[3], insn, xdelay);
7534 /* Deal with gross reload from FP register case. */
7535 else if (which_alternative == 1)
7537 /* Move loop counter from FP register to MEM then into a GR,
7538 increment the GR, store the GR into MEM, and finally reload
7539 the FP register from MEM from within the branch's delay slot. */
7540 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7541 operands);
7542 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7543 if (length == 24)
7544 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7545 else if (length == 28)
7546 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7547 else
7549 operands[5] = GEN_INT (length - 16);
7550 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7551 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7552 return pa_output_lbranch (operands[3], insn, 0);
7555 /* Deal with gross reload from memory case. */
7556 else
7558 /* Reload loop counter from memory, the store back to memory
7559 happens in the branch's delay slot. */
7560 output_asm_insn ("ldw %0,%4", operands);
7561 if (length == 12)
7562 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7563 else if (length == 16)
7564 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7565 else
7567 operands[5] = GEN_INT (length - 4);
7568 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7569 return pa_output_lbranch (operands[3], insn, 0);
7574 /* Return the output template for emitting a movb type insn.
7576 Note it may perform some output operations on its own before
7577 returning the final output string. */
7578 const char *
7579 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7580 int reverse_comparison)
7582 int length = get_attr_length (insn);
7584 /* A conditional branch to the following instruction (e.g. the delay slot) is
7585 asking for a disaster. Be prepared! */
7587 if (branch_to_delay_slot_p (insn))
7589 if (which_alternative == 0)
7590 return "copy %1,%0";
7591 else if (which_alternative == 1)
7593 output_asm_insn ("stw %1,-16(%%r30)", operands);
7594 return "{fldws|fldw} -16(%%r30),%0";
7596 else if (which_alternative == 2)
7597 return "stw %1,%0";
7598 else
7599 return "mtsar %r1";
7602 /* Support the second variant. */
7603 if (reverse_comparison)
7604 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7606 if (which_alternative == 0)
7608 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7609 int xdelay;
7611 /* If this is a long branch with its delay slot unfilled, set `nullify'
7612 as it can nullify the delay slot and save a nop. */
7613 if (length == 8 && dbr_sequence_length () == 0)
7614 nullify = 1;
7616 /* If this is a short forward conditional branch which did not get
7617 its delay slot filled, the delay slot can still be nullified. */
7618 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7619 nullify = forward_branch_p (insn);
7621 switch (length)
7623 case 4:
7624 if (nullify)
7626 if (branch_needs_nop_p (insn))
7627 return "movb,%C2,n %1,%0,%3%#";
7628 else
7629 return "movb,%C2,n %1,%0,%3";
7631 else
7632 return "movb,%C2 %1,%0,%3";
7634 case 8:
7635 /* Handle weird backwards branch with a filled delay slot
7636 which is nullified. */
7637 if (dbr_sequence_length () != 0
7638 && ! forward_branch_p (insn)
7639 && nullify)
7640 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7642 /* Handle short backwards branch with an unfilled delay slot.
7643 Using a movb;nop rather than or;bl saves 1 cycle for both
7644 taken and untaken branches. */
7645 else if (dbr_sequence_length () == 0
7646 && ! forward_branch_p (insn)
7647 && INSN_ADDRESSES_SET_P ()
7648 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7649 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7650 return "movb,%C2 %1,%0,%3%#";
7651 /* Handle normal cases. */
7652 if (nullify)
7653 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7654 else
7655 return "or,%N2 %1,%%r0,%0\n\tb %3";
7657 default:
7658 /* The reversed conditional branch must branch over one additional
7659 instruction if the delay slot is filled and needs to be extracted
7660 by pa_output_lbranch. If the delay slot is empty or this is a
7661 nullified forward branch, the instruction after the reversed
7662 condition branch must be nullified. */
7663 if (dbr_sequence_length () == 0
7664 || (nullify && forward_branch_p (insn)))
7666 nullify = 1;
7667 xdelay = 0;
7668 operands[4] = GEN_INT (length);
7670 else
7672 xdelay = 1;
7673 operands[4] = GEN_INT (length + 4);
7676 if (nullify)
7677 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7678 else
7679 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7681 return pa_output_lbranch (operands[3], insn, xdelay);
7684 /* Deal with gross reload for FP destination register case. */
7685 else if (which_alternative == 1)
7687 /* Move source register to MEM, perform the branch test, then
7688 finally load the FP register from MEM from within the branch's
7689 delay slot. */
7690 output_asm_insn ("stw %1,-16(%%r30)", operands);
7691 if (length == 12)
7692 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7693 else if (length == 16)
7694 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7695 else
7697 operands[4] = GEN_INT (length - 4);
7698 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7699 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7700 return pa_output_lbranch (operands[3], insn, 0);
7703 /* Deal with gross reload from memory case. */
7704 else if (which_alternative == 2)
7706 /* Reload loop counter from memory, the store back to memory
7707 happens in the branch's delay slot. */
7708 if (length == 8)
7709 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7710 else if (length == 12)
7711 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7712 else
7714 operands[4] = GEN_INT (length);
7715 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7716 operands);
7717 return pa_output_lbranch (operands[3], insn, 0);
7720 /* Handle SAR as a destination. */
7721 else
7723 if (length == 8)
7724 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7725 else if (length == 12)
7726 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7727 else
7729 operands[4] = GEN_INT (length);
7730 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7731 operands);
7732 return pa_output_lbranch (operands[3], insn, 0);
7737 /* Copy any FP arguments in INSN into integer registers. */
7738 static void
7739 copy_fp_args (rtx_insn *insn)
7741 rtx link;
7742 rtx xoperands[2];
7744 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7746 int arg_mode, regno;
7747 rtx use = XEXP (link, 0);
7749 if (! (GET_CODE (use) == USE
7750 && GET_CODE (XEXP (use, 0)) == REG
7751 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7752 continue;
7754 arg_mode = GET_MODE (XEXP (use, 0));
7755 regno = REGNO (XEXP (use, 0));
7757 /* Is it a floating point register? */
7758 if (regno >= 32 && regno <= 39)
7760 /* Copy the FP register into an integer register via memory. */
7761 if (arg_mode == SFmode)
7763 xoperands[0] = XEXP (use, 0);
7764 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7765 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7766 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7768 else
7770 xoperands[0] = XEXP (use, 0);
7771 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7772 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7773 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7774 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7780 /* Compute length of the FP argument copy sequence for INSN. */
7781 static int
7782 length_fp_args (rtx_insn *insn)
7784 int length = 0;
7785 rtx link;
7787 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7789 int arg_mode, regno;
7790 rtx use = XEXP (link, 0);
7792 if (! (GET_CODE (use) == USE
7793 && GET_CODE (XEXP (use, 0)) == REG
7794 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7795 continue;
7797 arg_mode = GET_MODE (XEXP (use, 0));
7798 regno = REGNO (XEXP (use, 0));
7800 /* Is it a floating point register? */
7801 if (regno >= 32 && regno <= 39)
7803 if (arg_mode == SFmode)
7804 length += 8;
7805 else
7806 length += 12;
7810 return length;
7813 /* Return the attribute length for the millicode call instruction INSN.
7814 The length must match the code generated by pa_output_millicode_call.
7815 We include the delay slot in the returned length as it is better to
7816 over estimate the length than to under estimate it. */
7819 pa_attr_length_millicode_call (rtx_insn *insn)
7821 unsigned long distance = -1;
7822 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7824 if (INSN_ADDRESSES_SET_P ())
7826 distance = (total + insn_current_reference_address (insn));
7827 if (distance < total)
7828 distance = -1;
7831 if (TARGET_64BIT)
7833 if (!TARGET_LONG_CALLS && distance < 7600000)
7834 return 8;
7836 return 20;
7838 else if (TARGET_PORTABLE_RUNTIME)
7839 return 24;
7840 else
7842 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7843 return 8;
7845 if (!flag_pic)
7846 return 12;
7848 return 24;
7852 /* INSN is a function call.
7854 CALL_DEST is the routine we are calling. */
7856 const char *
7857 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7859 int attr_length = get_attr_length (insn);
7860 int seq_length = dbr_sequence_length ();
7861 rtx xoperands[4];
7863 xoperands[0] = call_dest;
7865 /* Handle the common case where we are sure that the branch will
7866 reach the beginning of the $CODE$ subspace. The within reach
7867 form of the $$sh_func_adrs call has a length of 28. Because it
7868 has an attribute type of sh_func_adrs, it never has a nonzero
7869 sequence length (i.e., the delay slot is never filled). */
7870 if (!TARGET_LONG_CALLS
7871 && (attr_length == 8
7872 || (attr_length == 28
7873 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7875 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7876 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7878 else
7880 if (TARGET_64BIT)
7882 /* It might seem that one insn could be saved by accessing
7883 the millicode function using the linkage table. However,
7884 this doesn't work in shared libraries and other dynamically
7885 loaded objects. Using a pc-relative sequence also avoids
7886 problems related to the implicit use of the gp register. */
7887 xoperands[1] = gen_rtx_REG (Pmode, 1);
7888 xoperands[2] = xoperands[1];
7889 pa_output_pic_pcrel_sequence (xoperands);
7890 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7892 else if (TARGET_PORTABLE_RUNTIME)
7894 /* Pure portable runtime doesn't allow be/ble; we also don't
7895 have PIC support in the assembler/linker, so this sequence
7896 is needed. */
7898 /* Get the address of our target into %r1. */
7899 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7900 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7902 /* Get our return address into %r31. */
7903 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7904 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7906 /* Jump to our target address in %r1. */
7907 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7909 else if (!flag_pic)
7911 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7912 if (TARGET_PA_20)
7913 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7914 else
7915 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7917 else
7919 xoperands[1] = gen_rtx_REG (Pmode, 31);
7920 xoperands[2] = gen_rtx_REG (Pmode, 1);
7921 pa_output_pic_pcrel_sequence (xoperands);
7923 /* Adjust return address. */
7924 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7926 /* Jump to our target address in %r1. */
7927 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7931 if (seq_length == 0)
7932 output_asm_insn ("nop", xoperands);
7934 return "";
7937 /* Return the attribute length of the call instruction INSN. The SIBCALL
7938 flag indicates whether INSN is a regular call or a sibling call. The
7939 length returned must be longer than the code actually generated by
7940 pa_output_call. Since branch shortening is done before delay branch
7941 sequencing, there is no way to determine whether or not the delay
7942 slot will be filled during branch shortening. Even when the delay
7943 slot is filled, we may have to add a nop if the delay slot contains
7944 a branch that can't reach its target. Thus, we always have to include
7945 the delay slot in the length estimate. This used to be done in
7946 pa_adjust_insn_length but we do it here now as some sequences always
7947 fill the delay slot and we can save four bytes in the estimate for
7948 these sequences. */
7951 pa_attr_length_call (rtx_insn *insn, int sibcall)
7953 int local_call;
7954 rtx call, call_dest;
7955 tree call_decl;
7956 int length = 0;
7957 rtx pat = PATTERN (insn);
7958 unsigned long distance = -1;
7960 gcc_assert (CALL_P (insn));
7962 if (INSN_ADDRESSES_SET_P ())
7964 unsigned long total;
7966 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7967 distance = (total + insn_current_reference_address (insn));
7968 if (distance < total)
7969 distance = -1;
7972 gcc_assert (GET_CODE (pat) == PARALLEL);
7974 /* Get the call rtx. */
7975 call = XVECEXP (pat, 0, 0);
7976 if (GET_CODE (call) == SET)
7977 call = SET_SRC (call);
7979 gcc_assert (GET_CODE (call) == CALL);
7981 /* Determine if this is a local call. */
7982 call_dest = XEXP (XEXP (call, 0), 0);
7983 call_decl = SYMBOL_REF_DECL (call_dest);
7984 local_call = call_decl && targetm.binds_local_p (call_decl);
7986 /* pc-relative branch. */
7987 if (!TARGET_LONG_CALLS
7988 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7989 || distance < MAX_PCREL17F_OFFSET))
7990 length += 8;
7992 /* 64-bit plabel sequence. */
7993 else if (TARGET_64BIT && !local_call)
7994 length += 24;
7996 /* non-pic long absolute branch sequence. */
7997 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7998 length += 12;
8000 /* long pc-relative branch sequence. */
8001 else if (TARGET_LONG_PIC_SDIFF_CALL
8002 || (TARGET_GAS && !TARGET_SOM && local_call))
8004 length += 20;
8006 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8007 length += 8;
8010 /* 32-bit plabel sequence. */
8011 else
8013 length += 32;
8015 if (TARGET_SOM)
8016 length += length_fp_args (insn);
8018 if (flag_pic)
8019 length += 4;
8021 if (!TARGET_PA_20)
8023 if (!sibcall)
8024 length += 8;
8026 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8027 length += 8;
8031 return length;
8034 /* INSN is a function call.
8036 CALL_DEST is the routine we are calling. */
8038 const char *
8039 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8041 int seq_length = dbr_sequence_length ();
8042 tree call_decl = SYMBOL_REF_DECL (call_dest);
8043 int local_call = call_decl && targetm.binds_local_p (call_decl);
8044 rtx xoperands[4];
8046 xoperands[0] = call_dest;
8048 /* Handle the common case where we're sure that the branch will reach
8049 the beginning of the "$CODE$" subspace. This is the beginning of
8050 the current function if we are in a named section. */
8051 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8053 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8054 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8056 else
8058 if (TARGET_64BIT && !local_call)
8060 /* ??? As far as I can tell, the HP linker doesn't support the
8061 long pc-relative sequence described in the 64-bit runtime
8062 architecture. So, we use a slightly longer indirect call. */
8063 xoperands[0] = pa_get_deferred_plabel (call_dest);
8064 xoperands[1] = gen_label_rtx ();
8066 /* Put the load of %r27 into the delay slot. We don't need to
8067 do anything when generating fast indirect calls. */
8068 if (seq_length != 0)
8070 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8071 optimize, 0, NULL);
8073 /* Now delete the delay insn. */
8074 SET_INSN_DELETED (NEXT_INSN (insn));
8077 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8078 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8079 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8080 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8081 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8082 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8083 seq_length = 1;
8085 else
8087 int indirect_call = 0;
8089 /* Emit a long call. There are several different sequences
8090 of increasing length and complexity. In most cases,
8091 they don't allow an instruction in the delay slot. */
8092 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8093 && !TARGET_LONG_PIC_SDIFF_CALL
8094 && !(TARGET_GAS && !TARGET_SOM && local_call)
8095 && !TARGET_64BIT)
8096 indirect_call = 1;
8098 if (seq_length != 0
8099 && !sibcall
8100 && (!TARGET_PA_20
8101 || indirect_call
8102 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8104 /* A non-jump insn in the delay slot. By definition we can
8105 emit this insn before the call (and in fact before argument
8106 relocating. */
8107 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8108 NULL);
8110 /* Now delete the delay insn. */
8111 SET_INSN_DELETED (NEXT_INSN (insn));
8112 seq_length = 0;
8115 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8117 /* This is the best sequence for making long calls in
8118 non-pic code. Unfortunately, GNU ld doesn't provide
8119 the stub needed for external calls, and GAS's support
8120 for this with the SOM linker is buggy. It is safe
8121 to use this for local calls. */
8122 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8123 if (sibcall)
8124 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8125 else
8127 if (TARGET_PA_20)
8128 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8129 xoperands);
8130 else
8131 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8133 output_asm_insn ("copy %%r31,%%r2", xoperands);
8134 seq_length = 1;
8137 else
8139 /* The HP assembler and linker can handle relocations for
8140 the difference of two symbols. The HP assembler
8141 recognizes the sequence as a pc-relative call and
8142 the linker provides stubs when needed. */
8144 /* GAS currently can't generate the relocations that
8145 are needed for the SOM linker under HP-UX using this
8146 sequence. The GNU linker doesn't generate the stubs
8147 that are needed for external calls on TARGET_ELF32
8148 with this sequence. For now, we have to use a longer
8149 plabel sequence when using GAS for non local calls. */
8150 if (TARGET_LONG_PIC_SDIFF_CALL
8151 || (TARGET_GAS && !TARGET_SOM && local_call))
8153 xoperands[1] = gen_rtx_REG (Pmode, 1);
8154 xoperands[2] = xoperands[1];
8155 pa_output_pic_pcrel_sequence (xoperands);
8157 else
8159 /* Emit a long plabel-based call sequence. This is
8160 essentially an inline implementation of $$dyncall.
8161 We don't actually try to call $$dyncall as this is
8162 as difficult as calling the function itself. */
8163 xoperands[0] = pa_get_deferred_plabel (call_dest);
8164 xoperands[1] = gen_label_rtx ();
8166 /* Since the call is indirect, FP arguments in registers
8167 need to be copied to the general registers. Then, the
8168 argument relocation stub will copy them back. */
8169 if (TARGET_SOM)
8170 copy_fp_args (insn);
8172 if (flag_pic)
8174 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8175 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8176 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8178 else
8180 output_asm_insn ("addil LR'%0-$global$,%%r27",
8181 xoperands);
8182 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8183 xoperands);
8186 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8187 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8188 /* Should this be an ordered load to ensure the target
8189 address is loaded before the global pointer? */
8190 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8191 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8193 if (!sibcall && !TARGET_PA_20)
8195 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8196 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8197 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8198 else
8199 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8203 if (TARGET_PA_20)
8205 if (sibcall)
8206 output_asm_insn ("bve (%%r1)", xoperands);
8207 else
8209 if (indirect_call)
8211 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8212 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8213 seq_length = 1;
8215 else
8216 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8219 else
8221 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8222 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8223 xoperands);
8225 if (sibcall)
8227 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8228 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8229 else
8230 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8232 else
8234 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8235 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8236 else
8237 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8239 if (indirect_call)
8240 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8241 else
8242 output_asm_insn ("copy %%r31,%%r2", xoperands);
8243 seq_length = 1;
8250 if (seq_length == 0)
8251 output_asm_insn ("nop", xoperands);
8253 return "";
8256 /* Return the attribute length of the indirect call instruction INSN.
8257 The length must match the code generated by output_indirect call.
8258 The returned length includes the delay slot. Currently, the delay
8259 slot of an indirect call sequence is not exposed and it is used by
8260 the sequence itself. */
8263 pa_attr_length_indirect_call (rtx_insn *insn)
8265 unsigned long distance = -1;
8266 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8268 if (INSN_ADDRESSES_SET_P ())
8270 distance = (total + insn_current_reference_address (insn));
8271 if (distance < total)
8272 distance = -1;
8275 if (TARGET_64BIT)
8276 return 12;
8278 if (TARGET_FAST_INDIRECT_CALLS)
8279 return 8;
8281 if (TARGET_PORTABLE_RUNTIME)
8282 return 16;
8284 if (!TARGET_LONG_CALLS
8285 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8286 || distance < MAX_PCREL17F_OFFSET))
8287 return 8;
8289 /* Out of reach, can use ble. */
8290 if (!flag_pic)
8291 return 12;
8293 /* Inline versions of $$dyncall. */
8294 if (!optimize_size)
8296 if (TARGET_NO_SPACE_REGS)
8297 return 28;
8299 if (TARGET_PA_20)
8300 return 32;
8303 /* Long PIC pc-relative call. */
8304 return 20;
8307 const char *
8308 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8310 rtx xoperands[4];
8311 int length;
8313 if (TARGET_64BIT)
8315 xoperands[0] = call_dest;
8316 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8317 "bve,l (%%r2),%%r2\n\t"
8318 "ldd 24(%0),%%r27", xoperands);
8319 return "";
8322 /* First the special case for kernels, level 0 systems, etc. */
8323 if (TARGET_FAST_INDIRECT_CALLS)
8325 pa_output_arg_descriptor (insn);
8326 if (TARGET_PA_20)
8327 return "bve,l,n (%%r22),%%r2\n\tnop";
8328 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8331 if (TARGET_PORTABLE_RUNTIME)
8333 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8334 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8335 pa_output_arg_descriptor (insn);
8336 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8339 /* Now the normal case -- we can reach $$dyncall directly or
8340 we're sure that we can get there via a long-branch stub.
8342 No need to check target flags as the length uniquely identifies
8343 the remaining cases. */
8344 length = pa_attr_length_indirect_call (insn);
8345 if (length == 8)
8347 pa_output_arg_descriptor (insn);
8349 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8350 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8351 variant of the B,L instruction can't be used on the SOM target. */
8352 if (TARGET_PA_20 && !TARGET_SOM)
8353 return "b,l,n $$dyncall,%%r2\n\tnop";
8354 else
8355 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8358 /* Long millicode call, but we are not generating PIC or portable runtime
8359 code. */
8360 if (length == 12)
8362 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8363 pa_output_arg_descriptor (insn);
8364 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8367 /* The long PIC pc-relative call sequence is five instructions. So,
8368 let's use an inline version of $$dyncall when the calling sequence
8369 has a roughly similar number of instructions and we are not optimizing
8370 for size. We need two instructions to load the return pointer plus
8371 the $$dyncall implementation. */
8372 if (!optimize_size)
8374 if (TARGET_NO_SPACE_REGS)
8376 pa_output_arg_descriptor (insn);
8377 output_asm_insn ("bl .+8,%%r2\n\t"
8378 "ldo 20(%%r2),%%r2\n\t"
8379 "extru,<> %%r22,30,1,%%r0\n\t"
8380 "bv,n %%r0(%%r22)\n\t"
8381 "ldw -2(%%r22),%%r21\n\t"
8382 "bv %%r0(%%r21)\n\t"
8383 "ldw 2(%%r22),%%r19", xoperands);
8384 return "";
8386 if (TARGET_PA_20)
8388 pa_output_arg_descriptor (insn);
8389 output_asm_insn ("bl .+8,%%r2\n\t"
8390 "ldo 24(%%r2),%%r2\n\t"
8391 "stw %%r2,-24(%%sp)\n\t"
8392 "extru,<> %r22,30,1,%%r0\n\t"
8393 "bve,n (%%r22)\n\t"
8394 "ldw -2(%%r22),%%r21\n\t"
8395 "bve (%%r21)\n\t"
8396 "ldw 2(%%r22),%%r19", xoperands);
8397 return "";
8401 /* We need a long PIC call to $$dyncall. */
8402 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8403 xoperands[1] = gen_rtx_REG (Pmode, 2);
8404 xoperands[2] = gen_rtx_REG (Pmode, 1);
8405 pa_output_pic_pcrel_sequence (xoperands);
8406 pa_output_arg_descriptor (insn);
8407 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8410 /* In HPUX 8.0's shared library scheme, special relocations are needed
8411 for function labels if they might be passed to a function
8412 in a shared library (because shared libraries don't live in code
8413 space), and special magic is needed to construct their address. */
8415 void
8416 pa_encode_label (rtx sym)
8418 const char *str = XSTR (sym, 0);
8419 int len = strlen (str) + 1;
8420 char *newstr, *p;
8422 p = newstr = XALLOCAVEC (char, len + 1);
8423 *p++ = '@';
8424 strcpy (p, str);
8426 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8429 static void
8430 pa_encode_section_info (tree decl, rtx rtl, int first)
8432 int old_referenced = 0;
8434 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8435 old_referenced
8436 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8438 default_encode_section_info (decl, rtl, first);
8440 if (first && TEXT_SPACE_P (decl))
8442 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8443 if (TREE_CODE (decl) == FUNCTION_DECL)
8444 pa_encode_label (XEXP (rtl, 0));
8446 else if (old_referenced)
8447 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8450 /* This is sort of inverse to pa_encode_section_info. */
8452 static const char *
8453 pa_strip_name_encoding (const char *str)
8455 str += (*str == '@');
8456 str += (*str == '*');
8457 return str;
8460 /* Returns 1 if OP is a function label involved in a simple addition
8461 with a constant. Used to keep certain patterns from matching
8462 during instruction combination. */
8464 pa_is_function_label_plus_const (rtx op)
8466 /* Strip off any CONST. */
8467 if (GET_CODE (op) == CONST)
8468 op = XEXP (op, 0);
8470 return (GET_CODE (op) == PLUS
8471 && function_label_operand (XEXP (op, 0), VOIDmode)
8472 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8475 /* Output the assembler code for a thunk function. THUNK_DECL is the
8476 declaration for the thunk function itself, FUNCTION is the decl for
8477 the target function. DELTA is an immediate constant offset to be
8478 added to THIS. If VCALL_OFFSET is nonzero, the word at
8479 *(*this + vcall_offset) should be added to THIS. */
8481 static void
8482 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8483 HOST_WIDE_INT vcall_offset, tree function)
8485 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8486 static unsigned int current_thunk_number;
8487 int val_14 = VAL_14_BITS_P (delta);
8488 unsigned int old_last_address = last_address, nbytes = 0;
8489 char label[17];
8490 rtx xoperands[4];
8492 xoperands[0] = XEXP (DECL_RTL (function), 0);
8493 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8494 xoperands[2] = GEN_INT (delta);
8496 assemble_start_function (thunk_fndecl, fnname);
8497 final_start_function (emit_barrier (), file, 1);
8499 if (!vcall_offset)
8501 /* Output the thunk. We know that the function is in the same
8502 translation unit (i.e., the same space) as the thunk, and that
8503 thunks are output after their method. Thus, we don't need an
8504 external branch to reach the function. With SOM and GAS,
8505 functions and thunks are effectively in different sections.
8506 Thus, we can always use a IA-relative branch and the linker
8507 will add a long branch stub if necessary.
8509 However, we have to be careful when generating PIC code on the
8510 SOM port to ensure that the sequence does not transfer to an
8511 import stub for the target function as this could clobber the
8512 return value saved at SP-24. This would also apply to the
8513 32-bit linux port if the multi-space model is implemented. */
8514 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8515 && !(flag_pic && TREE_PUBLIC (function))
8516 && (TARGET_GAS || last_address < 262132))
8517 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8518 && ((targetm_common.have_named_sections
8519 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8520 /* The GNU 64-bit linker has rather poor stub management.
8521 So, we use a long branch from thunks that aren't in
8522 the same section as the target function. */
8523 && ((!TARGET_64BIT
8524 && (DECL_SECTION_NAME (thunk_fndecl)
8525 != DECL_SECTION_NAME (function)))
8526 || ((DECL_SECTION_NAME (thunk_fndecl)
8527 == DECL_SECTION_NAME (function))
8528 && last_address < 262132)))
8529 /* In this case, we need to be able to reach the start of
8530 the stub table even though the function is likely closer
8531 and can be jumped to directly. */
8532 || (targetm_common.have_named_sections
8533 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8534 && DECL_SECTION_NAME (function) == NULL
8535 && total_code_bytes < MAX_PCREL17F_OFFSET)
8536 /* Likewise. */
8537 || (!targetm_common.have_named_sections
8538 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8540 if (!val_14)
8541 output_asm_insn ("addil L'%2,%%r26", xoperands);
8543 output_asm_insn ("b %0", xoperands);
8545 if (val_14)
8547 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8548 nbytes += 8;
8550 else
8552 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8553 nbytes += 12;
8556 else if (TARGET_64BIT)
8558 rtx xop[4];
8560 /* We only have one call-clobbered scratch register, so we can't
8561 make use of the delay slot if delta doesn't fit in 14 bits. */
8562 if (!val_14)
8564 output_asm_insn ("addil L'%2,%%r26", xoperands);
8565 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8568 /* Load function address into %r1. */
8569 xop[0] = xoperands[0];
8570 xop[1] = gen_rtx_REG (Pmode, 1);
8571 xop[2] = xop[1];
8572 pa_output_pic_pcrel_sequence (xop);
8574 if (val_14)
8576 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8577 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8578 nbytes += 20;
8580 else
8582 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8583 nbytes += 24;
8586 else if (TARGET_PORTABLE_RUNTIME)
8588 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8589 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8591 if (!val_14)
8592 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8594 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8596 if (val_14)
8598 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8599 nbytes += 16;
8601 else
8603 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8604 nbytes += 20;
8607 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8609 /* The function is accessible from outside this module. The only
8610 way to avoid an import stub between the thunk and function is to
8611 call the function directly with an indirect sequence similar to
8612 that used by $$dyncall. This is possible because $$dyncall acts
8613 as the import stub in an indirect call. */
8614 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8615 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8616 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8617 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8618 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8619 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8620 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8621 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8622 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8624 if (!val_14)
8626 output_asm_insn ("addil L'%2,%%r26", xoperands);
8627 nbytes += 4;
8630 if (TARGET_PA_20)
8632 output_asm_insn ("bve (%%r22)", xoperands);
8633 nbytes += 36;
8635 else if (TARGET_NO_SPACE_REGS)
8637 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8638 nbytes += 36;
8640 else
8642 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8643 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8644 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8645 nbytes += 44;
8648 if (val_14)
8649 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8650 else
8651 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8653 else if (flag_pic)
8655 rtx xop[4];
8657 /* Load function address into %r22. */
8658 xop[0] = xoperands[0];
8659 xop[1] = gen_rtx_REG (Pmode, 1);
8660 xop[2] = gen_rtx_REG (Pmode, 22);
8661 pa_output_pic_pcrel_sequence (xop);
8663 if (!val_14)
8664 output_asm_insn ("addil L'%2,%%r26", xoperands);
8666 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8668 if (val_14)
8670 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8671 nbytes += 20;
8673 else
8675 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8676 nbytes += 24;
8679 else
8681 if (!val_14)
8682 output_asm_insn ("addil L'%2,%%r26", xoperands);
8684 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8685 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8687 if (val_14)
8689 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8690 nbytes += 12;
8692 else
8694 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8695 nbytes += 16;
8699 else
8701 rtx xop[4];
8703 /* Add DELTA to THIS. */
8704 if (val_14)
8706 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8707 nbytes += 4;
8709 else
8711 output_asm_insn ("addil L'%2,%%r26", xoperands);
8712 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8713 nbytes += 8;
8716 if (TARGET_64BIT)
8718 /* Load *(THIS + DELTA) to %r1. */
8719 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8721 val_14 = VAL_14_BITS_P (vcall_offset);
8722 xoperands[2] = GEN_INT (vcall_offset);
8724 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8725 if (val_14)
8727 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8728 nbytes += 8;
8730 else
8732 output_asm_insn ("addil L'%2,%%r1", xoperands);
8733 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8734 nbytes += 12;
8737 else
8739 /* Load *(THIS + DELTA) to %r1. */
8740 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8742 val_14 = VAL_14_BITS_P (vcall_offset);
8743 xoperands[2] = GEN_INT (vcall_offset);
8745 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8746 if (val_14)
8748 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8749 nbytes += 8;
8751 else
8753 output_asm_insn ("addil L'%2,%%r1", xoperands);
8754 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8755 nbytes += 12;
8759 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8760 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8761 && !(flag_pic && TREE_PUBLIC (function))
8762 && (TARGET_GAS || last_address < 262132))
8763 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8764 && ((targetm_common.have_named_sections
8765 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8766 /* The GNU 64-bit linker has rather poor stub management.
8767 So, we use a long branch from thunks that aren't in
8768 the same section as the target function. */
8769 && ((!TARGET_64BIT
8770 && (DECL_SECTION_NAME (thunk_fndecl)
8771 != DECL_SECTION_NAME (function)))
8772 || ((DECL_SECTION_NAME (thunk_fndecl)
8773 == DECL_SECTION_NAME (function))
8774 && last_address < 262132)))
8775 /* In this case, we need to be able to reach the start of
8776 the stub table even though the function is likely closer
8777 and can be jumped to directly. */
8778 || (targetm_common.have_named_sections
8779 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8780 && DECL_SECTION_NAME (function) == NULL
8781 && total_code_bytes < MAX_PCREL17F_OFFSET)
8782 /* Likewise. */
8783 || (!targetm_common.have_named_sections
8784 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8786 nbytes += 4;
8787 output_asm_insn ("b %0", xoperands);
8789 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8790 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8792 else if (TARGET_64BIT)
8794 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8795 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8797 /* Load function address into %r1. */
8798 nbytes += 16;
8799 xop[0] = xoperands[0];
8800 xop[1] = gen_rtx_REG (Pmode, 1);
8801 xop[2] = xop[1];
8802 pa_output_pic_pcrel_sequence (xop);
8804 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8806 else if (TARGET_PORTABLE_RUNTIME)
8808 /* Load function address into %r22. */
8809 nbytes += 12;
8810 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8811 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8813 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8815 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8816 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8818 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8820 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8821 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8823 /* The function is accessible from outside this module. The only
8824 way to avoid an import stub between the thunk and function is to
8825 call the function directly with an indirect sequence similar to
8826 that used by $$dyncall. This is possible because $$dyncall acts
8827 as the import stub in an indirect call. */
8828 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8829 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8830 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8831 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8832 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8833 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8834 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8835 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8836 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8838 if (TARGET_PA_20)
8840 output_asm_insn ("bve,n (%%r22)", xoperands);
8841 nbytes += 32;
8843 else if (TARGET_NO_SPACE_REGS)
8845 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8846 nbytes += 32;
8848 else
8850 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8851 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8852 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8853 nbytes += 40;
8856 else if (flag_pic)
8858 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8859 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8861 /* Load function address into %r1. */
8862 nbytes += 16;
8863 xop[0] = xoperands[0];
8864 xop[1] = gen_rtx_REG (Pmode, 1);
8865 xop[2] = xop[1];
8866 pa_output_pic_pcrel_sequence (xop);
8868 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8870 else
8872 /* Load function address into %r22. */
8873 nbytes += 8;
8874 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8875 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8877 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8878 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8882 final_end_function ();
8884 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8886 switch_to_section (data_section);
8887 output_asm_insn (".align 4", xoperands);
8888 ASM_OUTPUT_LABEL (file, label);
8889 output_asm_insn (".word P'%0", xoperands);
8892 current_thunk_number++;
8893 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8894 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8895 last_address += nbytes;
8896 if (old_last_address > last_address)
8897 last_address = UINT_MAX;
8898 update_total_code_bytes (nbytes);
8899 assemble_end_function (thunk_fndecl, fnname);
8902 /* Only direct calls to static functions are allowed to be sibling (tail)
8903 call optimized.
8905 This restriction is necessary because some linker generated stubs will
8906 store return pointers into rp' in some cases which might clobber a
8907 live value already in rp'.
8909 In a sibcall the current function and the target function share stack
8910 space. Thus if the path to the current function and the path to the
8911 target function save a value in rp', they save the value into the
8912 same stack slot, which has undesirable consequences.
8914 Because of the deferred binding nature of shared libraries any function
8915 with external scope could be in a different load module and thus require
8916 rp' to be saved when calling that function. So sibcall optimizations
8917 can only be safe for static function.
8919 Note that GCC never needs return value relocations, so we don't have to
8920 worry about static calls with return value relocations (which require
8921 saving rp').
8923 It is safe to perform a sibcall optimization when the target function
8924 will never return. */
8925 static bool
8926 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8928 /* Sibcalls are not ok because the arg pointer register is not a fixed
8929 register. This prevents the sibcall optimization from occurring. In
8930 addition, there are problems with stub placement using GNU ld. This
8931 is because a normal sibcall branch uses a 17-bit relocation while
8932 a regular call branch uses a 22-bit relocation. As a result, more
8933 care needs to be taken in the placement of long-branch stubs. */
8934 if (TARGET_64BIT)
8935 return false;
8937 if (TARGET_PORTABLE_RUNTIME)
8938 return false;
8940 /* Sibcalls are only ok within a translation unit. */
8941 return decl && targetm.binds_local_p (decl);
8944 /* ??? Addition is not commutative on the PA due to the weird implicit
8945 space register selection rules for memory addresses. Therefore, we
8946 don't consider a + b == b + a, as this might be inside a MEM. */
8947 static bool
8948 pa_commutative_p (const_rtx x, int outer_code)
8950 return (COMMUTATIVE_P (x)
8951 && (TARGET_NO_SPACE_REGS
8952 || (outer_code != UNKNOWN && outer_code != MEM)
8953 || GET_CODE (x) != PLUS));
8956 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8957 use in fmpyadd instructions. */
8959 pa_fmpyaddoperands (rtx *operands)
8961 machine_mode mode = GET_MODE (operands[0]);
8963 /* Must be a floating point mode. */
8964 if (mode != SFmode && mode != DFmode)
8965 return 0;
8967 /* All modes must be the same. */
8968 if (! (mode == GET_MODE (operands[1])
8969 && mode == GET_MODE (operands[2])
8970 && mode == GET_MODE (operands[3])
8971 && mode == GET_MODE (operands[4])
8972 && mode == GET_MODE (operands[5])))
8973 return 0;
8975 /* All operands must be registers. */
8976 if (! (GET_CODE (operands[1]) == REG
8977 && GET_CODE (operands[2]) == REG
8978 && GET_CODE (operands[3]) == REG
8979 && GET_CODE (operands[4]) == REG
8980 && GET_CODE (operands[5]) == REG))
8981 return 0;
8983 /* Only 2 real operands to the addition. One of the input operands must
8984 be the same as the output operand. */
8985 if (! rtx_equal_p (operands[3], operands[4])
8986 && ! rtx_equal_p (operands[3], operands[5]))
8987 return 0;
8989 /* Inout operand of add cannot conflict with any operands from multiply. */
8990 if (rtx_equal_p (operands[3], operands[0])
8991 || rtx_equal_p (operands[3], operands[1])
8992 || rtx_equal_p (operands[3], operands[2]))
8993 return 0;
8995 /* multiply cannot feed into addition operands. */
8996 if (rtx_equal_p (operands[4], operands[0])
8997 || rtx_equal_p (operands[5], operands[0]))
8998 return 0;
9000 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9001 if (mode == SFmode
9002 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9003 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9004 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9005 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9006 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9007 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9008 return 0;
9010 /* Passed. Operands are suitable for fmpyadd. */
9011 return 1;
9014 #if !defined(USE_COLLECT2)
9015 static void
9016 pa_asm_out_constructor (rtx symbol, int priority)
9018 if (!function_label_operand (symbol, VOIDmode))
9019 pa_encode_label (symbol);
9021 #ifdef CTORS_SECTION_ASM_OP
9022 default_ctor_section_asm_out_constructor (symbol, priority);
9023 #else
9024 # ifdef TARGET_ASM_NAMED_SECTION
9025 default_named_section_asm_out_constructor (symbol, priority);
9026 # else
9027 default_stabs_asm_out_constructor (symbol, priority);
9028 # endif
9029 #endif
9032 static void
9033 pa_asm_out_destructor (rtx symbol, int priority)
9035 if (!function_label_operand (symbol, VOIDmode))
9036 pa_encode_label (symbol);
9038 #ifdef DTORS_SECTION_ASM_OP
9039 default_dtor_section_asm_out_destructor (symbol, priority);
9040 #else
9041 # ifdef TARGET_ASM_NAMED_SECTION
9042 default_named_section_asm_out_destructor (symbol, priority);
9043 # else
9044 default_stabs_asm_out_destructor (symbol, priority);
9045 # endif
9046 #endif
9048 #endif
9050 /* This function places uninitialized global data in the bss section.
9051 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9052 function on the SOM port to prevent uninitialized global data from
9053 being placed in the data section. */
9055 void
9056 pa_asm_output_aligned_bss (FILE *stream,
9057 const char *name,
9058 unsigned HOST_WIDE_INT size,
9059 unsigned int align)
9061 switch_to_section (bss_section);
9063 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9064 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9065 #endif
9067 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9068 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9069 #endif
9071 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9072 ASM_OUTPUT_LABEL (stream, name);
9073 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9076 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9077 that doesn't allow the alignment of global common storage to be directly
9078 specified. The SOM linker aligns common storage based on the rounded
9079 value of the NUM_BYTES parameter in the .comm directive. It's not
9080 possible to use the .align directive as it doesn't affect the alignment
9081 of the label associated with a .comm directive. */
9083 void
9084 pa_asm_output_aligned_common (FILE *stream,
9085 const char *name,
9086 unsigned HOST_WIDE_INT size,
9087 unsigned int align)
9089 unsigned int max_common_align;
9091 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9092 if (align > max_common_align)
9094 /* Alignment exceeds maximum alignment for global common data. */
9095 align = max_common_align;
9098 switch_to_section (bss_section);
9100 assemble_name (stream, name);
9101 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9102 MAX (size, align / BITS_PER_UNIT));
9105 /* We can't use .comm for local common storage as the SOM linker effectively
9106 treats the symbol as universal and uses the same storage for local symbols
9107 with the same name in different object files. The .block directive
9108 reserves an uninitialized block of storage. However, it's not common
9109 storage. Fortunately, GCC never requests common storage with the same
9110 name in any given translation unit. */
9112 void
9113 pa_asm_output_aligned_local (FILE *stream,
9114 const char *name,
9115 unsigned HOST_WIDE_INT size,
9116 unsigned int align)
9118 switch_to_section (bss_section);
9119 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9121 #ifdef LOCAL_ASM_OP
9122 fprintf (stream, "%s", LOCAL_ASM_OP);
9123 assemble_name (stream, name);
9124 fprintf (stream, "\n");
9125 #endif
9127 ASM_OUTPUT_LABEL (stream, name);
9128 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9131 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9132 use in fmpysub instructions. */
9134 pa_fmpysuboperands (rtx *operands)
9136 machine_mode mode = GET_MODE (operands[0]);
9138 /* Must be a floating point mode. */
9139 if (mode != SFmode && mode != DFmode)
9140 return 0;
9142 /* All modes must be the same. */
9143 if (! (mode == GET_MODE (operands[1])
9144 && mode == GET_MODE (operands[2])
9145 && mode == GET_MODE (operands[3])
9146 && mode == GET_MODE (operands[4])
9147 && mode == GET_MODE (operands[5])))
9148 return 0;
9150 /* All operands must be registers. */
9151 if (! (GET_CODE (operands[1]) == REG
9152 && GET_CODE (operands[2]) == REG
9153 && GET_CODE (operands[3]) == REG
9154 && GET_CODE (operands[4]) == REG
9155 && GET_CODE (operands[5]) == REG))
9156 return 0;
9158 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9159 operation, so operands[4] must be the same as operand[3]. */
9160 if (! rtx_equal_p (operands[3], operands[4]))
9161 return 0;
9163 /* multiply cannot feed into subtraction. */
9164 if (rtx_equal_p (operands[5], operands[0]))
9165 return 0;
9167 /* Inout operand of sub cannot conflict with any operands from multiply. */
9168 if (rtx_equal_p (operands[3], operands[0])
9169 || rtx_equal_p (operands[3], operands[1])
9170 || rtx_equal_p (operands[3], operands[2]))
9171 return 0;
9173 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9174 if (mode == SFmode
9175 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9176 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9177 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9178 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9179 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9180 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9181 return 0;
9183 /* Passed. Operands are suitable for fmpysub. */
9184 return 1;
9187 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9188 constants for a MULT embedded inside a memory address. */
9190 pa_mem_shadd_constant_p (int val)
9192 if (val == 2 || val == 4 || val == 8)
9193 return 1;
9194 else
9195 return 0;
9198 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9199 constants for shadd instructions. */
9201 pa_shadd_constant_p (int val)
9203 if (val == 1 || val == 2 || val == 3)
9204 return 1;
9205 else
9206 return 0;
9209 /* Return TRUE if INSN branches forward. */
9211 static bool
9212 forward_branch_p (rtx_insn *insn)
9214 rtx lab = JUMP_LABEL (insn);
9216 /* The INSN must have a jump label. */
9217 gcc_assert (lab != NULL_RTX);
9219 if (INSN_ADDRESSES_SET_P ())
9220 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9222 while (insn)
9224 if (insn == lab)
9225 return true;
9226 else
9227 insn = NEXT_INSN (insn);
9230 return false;
9233 /* Output an unconditional move and branch insn. */
9235 const char *
9236 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9238 int length = get_attr_length (insn);
9240 /* These are the cases in which we win. */
9241 if (length == 4)
9242 return "mov%I1b,tr %1,%0,%2";
9244 /* None of the following cases win, but they don't lose either. */
9245 if (length == 8)
9247 if (dbr_sequence_length () == 0)
9249 /* Nothing in the delay slot, fake it by putting the combined
9250 insn (the copy or add) in the delay slot of a bl. */
9251 if (GET_CODE (operands[1]) == CONST_INT)
9252 return "b %2\n\tldi %1,%0";
9253 else
9254 return "b %2\n\tcopy %1,%0";
9256 else
9258 /* Something in the delay slot, but we've got a long branch. */
9259 if (GET_CODE (operands[1]) == CONST_INT)
9260 return "ldi %1,%0\n\tb %2";
9261 else
9262 return "copy %1,%0\n\tb %2";
9266 if (GET_CODE (operands[1]) == CONST_INT)
9267 output_asm_insn ("ldi %1,%0", operands);
9268 else
9269 output_asm_insn ("copy %1,%0", operands);
9270 return pa_output_lbranch (operands[2], insn, 1);
9273 /* Output an unconditional add and branch insn. */
9275 const char *
9276 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9278 int length = get_attr_length (insn);
9280 /* To make life easy we want operand0 to be the shared input/output
9281 operand and operand1 to be the readonly operand. */
9282 if (operands[0] == operands[1])
9283 operands[1] = operands[2];
9285 /* These are the cases in which we win. */
9286 if (length == 4)
9287 return "add%I1b,tr %1,%0,%3";
9289 /* None of the following cases win, but they don't lose either. */
9290 if (length == 8)
9292 if (dbr_sequence_length () == 0)
9293 /* Nothing in the delay slot, fake it by putting the combined
9294 insn (the copy or add) in the delay slot of a bl. */
9295 return "b %3\n\tadd%I1 %1,%0,%0";
9296 else
9297 /* Something in the delay slot, but we've got a long branch. */
9298 return "add%I1 %1,%0,%0\n\tb %3";
9301 output_asm_insn ("add%I1 %1,%0,%0", operands);
9302 return pa_output_lbranch (operands[3], insn, 1);
9305 /* We use this hook to perform a PA specific optimization which is difficult
9306 to do in earlier passes. */
9308 static void
9309 pa_reorg (void)
9311 remove_useless_addtr_insns (1);
9313 if (pa_cpu < PROCESSOR_8000)
9314 pa_combine_instructions ();
9317 /* The PA has a number of odd instructions which can perform multiple
9318 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9319 it may be profitable to combine two instructions into one instruction
9320 with two outputs. It's not profitable PA2.0 machines because the
9321 two outputs would take two slots in the reorder buffers.
9323 This routine finds instructions which can be combined and combines
9324 them. We only support some of the potential combinations, and we
9325 only try common ways to find suitable instructions.
9327 * addb can add two registers or a register and a small integer
9328 and jump to a nearby (+-8k) location. Normally the jump to the
9329 nearby location is conditional on the result of the add, but by
9330 using the "true" condition we can make the jump unconditional.
9331 Thus addb can perform two independent operations in one insn.
9333 * movb is similar to addb in that it can perform a reg->reg
9334 or small immediate->reg copy and jump to a nearby (+-8k location).
9336 * fmpyadd and fmpysub can perform a FP multiply and either an
9337 FP add or FP sub if the operands of the multiply and add/sub are
9338 independent (there are other minor restrictions). Note both
9339 the fmpy and fadd/fsub can in theory move to better spots according
9340 to data dependencies, but for now we require the fmpy stay at a
9341 fixed location.
9343 * Many of the memory operations can perform pre & post updates
9344 of index registers. GCC's pre/post increment/decrement addressing
9345 is far too simple to take advantage of all the possibilities. This
9346 pass may not be suitable since those insns may not be independent.
9348 * comclr can compare two ints or an int and a register, nullify
9349 the following instruction and zero some other register. This
9350 is more difficult to use as it's harder to find an insn which
9351 will generate a comclr than finding something like an unconditional
9352 branch. (conditional moves & long branches create comclr insns).
9354 * Most arithmetic operations can conditionally skip the next
9355 instruction. They can be viewed as "perform this operation
9356 and conditionally jump to this nearby location" (where nearby
9357 is an insns away). These are difficult to use due to the
9358 branch length restrictions. */
9360 static void
9361 pa_combine_instructions (void)
9363 rtx_insn *anchor;
9365 /* This can get expensive since the basic algorithm is on the
9366 order of O(n^2) (or worse). Only do it for -O2 or higher
9367 levels of optimization. */
9368 if (optimize < 2)
9369 return;
9371 /* Walk down the list of insns looking for "anchor" insns which
9372 may be combined with "floating" insns. As the name implies,
9373 "anchor" instructions don't move, while "floating" insns may
9374 move around. */
9375 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9376 rtx_insn *new_rtx = make_insn_raw (par);
9378 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9380 enum attr_pa_combine_type anchor_attr;
9381 enum attr_pa_combine_type floater_attr;
9383 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9384 Also ignore any special USE insns. */
9385 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9386 || GET_CODE (PATTERN (anchor)) == USE
9387 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9388 continue;
9390 anchor_attr = get_attr_pa_combine_type (anchor);
9391 /* See if anchor is an insn suitable for combination. */
9392 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9393 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9394 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9395 && ! forward_branch_p (anchor)))
9397 rtx_insn *floater;
9399 for (floater = PREV_INSN (anchor);
9400 floater;
9401 floater = PREV_INSN (floater))
9403 if (NOTE_P (floater)
9404 || (NONJUMP_INSN_P (floater)
9405 && (GET_CODE (PATTERN (floater)) == USE
9406 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9407 continue;
9409 /* Anything except a regular INSN will stop our search. */
9410 if (! NONJUMP_INSN_P (floater))
9412 floater = NULL;
9413 break;
9416 /* See if FLOATER is suitable for combination with the
9417 anchor. */
9418 floater_attr = get_attr_pa_combine_type (floater);
9419 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9420 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9421 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9422 && floater_attr == PA_COMBINE_TYPE_FMPY))
9424 /* If ANCHOR and FLOATER can be combined, then we're
9425 done with this pass. */
9426 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9427 SET_DEST (PATTERN (floater)),
9428 XEXP (SET_SRC (PATTERN (floater)), 0),
9429 XEXP (SET_SRC (PATTERN (floater)), 1)))
9430 break;
9433 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9434 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9436 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9438 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9439 SET_DEST (PATTERN (floater)),
9440 XEXP (SET_SRC (PATTERN (floater)), 0),
9441 XEXP (SET_SRC (PATTERN (floater)), 1)))
9442 break;
9444 else
9446 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9447 SET_DEST (PATTERN (floater)),
9448 SET_SRC (PATTERN (floater)),
9449 SET_SRC (PATTERN (floater))))
9450 break;
9455 /* If we didn't find anything on the backwards scan try forwards. */
9456 if (!floater
9457 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9458 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9460 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9462 if (NOTE_P (floater)
9463 || (NONJUMP_INSN_P (floater)
9464 && (GET_CODE (PATTERN (floater)) == USE
9465 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9467 continue;
9469 /* Anything except a regular INSN will stop our search. */
9470 if (! NONJUMP_INSN_P (floater))
9472 floater = NULL;
9473 break;
9476 /* See if FLOATER is suitable for combination with the
9477 anchor. */
9478 floater_attr = get_attr_pa_combine_type (floater);
9479 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9480 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9481 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9482 && floater_attr == PA_COMBINE_TYPE_FMPY))
9484 /* If ANCHOR and FLOATER can be combined, then we're
9485 done with this pass. */
9486 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9487 SET_DEST (PATTERN (floater)),
9488 XEXP (SET_SRC (PATTERN (floater)),
9490 XEXP (SET_SRC (PATTERN (floater)),
9491 1)))
9492 break;
9497 /* FLOATER will be nonzero if we found a suitable floating
9498 insn for combination with ANCHOR. */
9499 if (floater
9500 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9501 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9503 /* Emit the new instruction and delete the old anchor. */
9504 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9505 copy_rtx (PATTERN (floater)));
9506 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9507 emit_insn_before (temp, anchor);
9509 SET_INSN_DELETED (anchor);
9511 /* Emit a special USE insn for FLOATER, then delete
9512 the floating insn. */
9513 temp = copy_rtx (PATTERN (floater));
9514 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9515 delete_insn (floater);
9517 continue;
9519 else if (floater
9520 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9522 /* Emit the new_jump instruction and delete the old anchor. */
9523 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9524 copy_rtx (PATTERN (floater)));
9525 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9526 temp = emit_jump_insn_before (temp, anchor);
9528 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9529 SET_INSN_DELETED (anchor);
9531 /* Emit a special USE insn for FLOATER, then delete
9532 the floating insn. */
9533 temp = copy_rtx (PATTERN (floater));
9534 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9535 delete_insn (floater);
9536 continue;
9542 static int
9543 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9544 int reversed, rtx dest,
9545 rtx src1, rtx src2)
9547 int insn_code_number;
9548 rtx_insn *start, *end;
9550 /* Create a PARALLEL with the patterns of ANCHOR and
9551 FLOATER, try to recognize it, then test constraints
9552 for the resulting pattern.
9554 If the pattern doesn't match or the constraints
9555 aren't met keep searching for a suitable floater
9556 insn. */
9557 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9558 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9559 INSN_CODE (new_rtx) = -1;
9560 insn_code_number = recog_memoized (new_rtx);
9561 basic_block bb = BLOCK_FOR_INSN (anchor);
9562 if (insn_code_number < 0
9563 || (extract_insn (new_rtx),
9564 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9565 return 0;
9567 if (reversed)
9569 start = anchor;
9570 end = floater;
9572 else
9574 start = floater;
9575 end = anchor;
9578 /* There's up to three operands to consider. One
9579 output and two inputs.
9581 The output must not be used between FLOATER & ANCHOR
9582 exclusive. The inputs must not be set between
9583 FLOATER and ANCHOR exclusive. */
9585 if (reg_used_between_p (dest, start, end))
9586 return 0;
9588 if (reg_set_between_p (src1, start, end))
9589 return 0;
9591 if (reg_set_between_p (src2, start, end))
9592 return 0;
9594 /* If we get here, then everything is good. */
9595 return 1;
9598 /* Return nonzero if references for INSN are delayed.
9600 Millicode insns are actually function calls with some special
9601 constraints on arguments and register usage.
9603 Millicode calls always expect their arguments in the integer argument
9604 registers, and always return their result in %r29 (ret1). They
9605 are expected to clobber their arguments, %r1, %r29, and the return
9606 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9608 This function tells reorg that the references to arguments and
9609 millicode calls do not appear to happen until after the millicode call.
9610 This allows reorg to put insns which set the argument registers into the
9611 delay slot of the millicode call -- thus they act more like traditional
9612 CALL_INSNs.
9614 Note we cannot consider side effects of the insn to be delayed because
9615 the branch and link insn will clobber the return pointer. If we happened
9616 to use the return pointer in the delay slot of the call, then we lose.
9618 get_attr_type will try to recognize the given insn, so make sure to
9619 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9620 in particular. */
9622 pa_insn_refs_are_delayed (rtx_insn *insn)
9624 return ((NONJUMP_INSN_P (insn)
9625 && GET_CODE (PATTERN (insn)) != SEQUENCE
9626 && GET_CODE (PATTERN (insn)) != USE
9627 && GET_CODE (PATTERN (insn)) != CLOBBER
9628 && get_attr_type (insn) == TYPE_MILLI));
9631 /* Promote the return value, but not the arguments. */
9633 static machine_mode
9634 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9635 machine_mode mode,
9636 int *punsignedp ATTRIBUTE_UNUSED,
9637 const_tree fntype ATTRIBUTE_UNUSED,
9638 int for_return)
9640 if (for_return == 0)
9641 return mode;
9642 return promote_mode (type, mode, punsignedp);
9645 /* On the HP-PA the value is found in register(s) 28(-29), unless
9646 the mode is SF or DF. Then the value is returned in fr4 (32).
9648 This must perform the same promotions as PROMOTE_MODE, else promoting
9649 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9651 Small structures must be returned in a PARALLEL on PA64 in order
9652 to match the HP Compiler ABI. */
9654 static rtx
9655 pa_function_value (const_tree valtype,
9656 const_tree func ATTRIBUTE_UNUSED,
9657 bool outgoing ATTRIBUTE_UNUSED)
9659 machine_mode valmode;
9661 if (AGGREGATE_TYPE_P (valtype)
9662 || TREE_CODE (valtype) == COMPLEX_TYPE
9663 || TREE_CODE (valtype) == VECTOR_TYPE)
9665 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9667 /* Handle aggregates that fit exactly in a word or double word. */
9668 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9669 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9671 if (TARGET_64BIT)
9673 /* Aggregates with a size less than or equal to 128 bits are
9674 returned in GR 28(-29). They are left justified. The pad
9675 bits are undefined. Larger aggregates are returned in
9676 memory. */
9677 rtx loc[2];
9678 int i, offset = 0;
9679 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9681 for (i = 0; i < ub; i++)
9683 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9684 gen_rtx_REG (DImode, 28 + i),
9685 GEN_INT (offset));
9686 offset += 8;
9689 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9691 else if (valsize > UNITS_PER_WORD)
9693 /* Aggregates 5 to 8 bytes in size are returned in general
9694 registers r28-r29 in the same manner as other non
9695 floating-point objects. The data is right-justified and
9696 zero-extended to 64 bits. This is opposite to the normal
9697 justification used on big endian targets and requires
9698 special treatment. */
9699 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9700 gen_rtx_REG (DImode, 28), const0_rtx);
9701 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9705 if ((INTEGRAL_TYPE_P (valtype)
9706 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9707 || POINTER_TYPE_P (valtype))
9708 valmode = word_mode;
9709 else
9710 valmode = TYPE_MODE (valtype);
9712 if (TREE_CODE (valtype) == REAL_TYPE
9713 && !AGGREGATE_TYPE_P (valtype)
9714 && TYPE_MODE (valtype) != TFmode
9715 && !TARGET_SOFT_FLOAT)
9716 return gen_rtx_REG (valmode, 32);
9718 return gen_rtx_REG (valmode, 28);
9721 /* Implement the TARGET_LIBCALL_VALUE hook. */
9723 static rtx
9724 pa_libcall_value (machine_mode mode,
9725 const_rtx fun ATTRIBUTE_UNUSED)
9727 if (! TARGET_SOFT_FLOAT
9728 && (mode == SFmode || mode == DFmode))
9729 return gen_rtx_REG (mode, 32);
9730 else
9731 return gen_rtx_REG (mode, 28);
9734 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9736 static bool
9737 pa_function_value_regno_p (const unsigned int regno)
9739 if (regno == 28
9740 || (! TARGET_SOFT_FLOAT && regno == 32))
9741 return true;
9743 return false;
9746 /* Update the data in CUM to advance over argument ARG. */
9748 static void
9749 pa_function_arg_advance (cumulative_args_t cum_v,
9750 const function_arg_info &arg)
9752 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9753 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9755 cum->nargs_prototype--;
9756 cum->words += (arg_size
9757 + ((cum->words & 01)
9758 && arg.type != NULL_TREE
9759 && arg_size > 1));
9762 /* Return the location of a parameter that is passed in a register or NULL
9763 if the parameter has any component that is passed in memory.
9765 This is new code and will be pushed to into the net sources after
9766 further testing.
9768 ??? We might want to restructure this so that it looks more like other
9769 ports. */
9770 static rtx
9771 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9773 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9774 tree type = arg.type;
9775 machine_mode mode = arg.mode;
9776 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9777 int alignment = 0;
9778 int arg_size;
9779 int fpr_reg_base;
9780 int gpr_reg_base;
9781 rtx retval;
9783 if (arg.end_marker_p ())
9784 return NULL_RTX;
9786 arg_size = pa_function_arg_size (mode, type);
9787 if (!arg_size)
9788 return NULL_RTX;
9790 /* If this arg would be passed partially or totally on the stack, then
9791 this routine should return zero. pa_arg_partial_bytes will
9792 handle arguments which are split between regs and stack slots if
9793 the ABI mandates split arguments. */
9794 if (!TARGET_64BIT)
9796 /* The 32-bit ABI does not split arguments. */
9797 if (cum->words + arg_size > max_arg_words)
9798 return NULL_RTX;
9800 else
9802 if (arg_size > 1)
9803 alignment = cum->words & 1;
9804 if (cum->words + alignment >= max_arg_words)
9805 return NULL_RTX;
9808 /* The 32bit ABIs and the 64bit ABIs are rather different,
9809 particularly in their handling of FP registers. We might
9810 be able to cleverly share code between them, but I'm not
9811 going to bother in the hope that splitting them up results
9812 in code that is more easily understood. */
9814 if (TARGET_64BIT)
9816 /* Advance the base registers to their current locations.
9818 Remember, gprs grow towards smaller register numbers while
9819 fprs grow to higher register numbers. Also remember that
9820 although FP regs are 32-bit addressable, we pretend that
9821 the registers are 64-bits wide. */
9822 gpr_reg_base = 26 - cum->words;
9823 fpr_reg_base = 32 + cum->words;
9825 /* Arguments wider than one word and small aggregates need special
9826 treatment. */
9827 if (arg_size > 1
9828 || mode == BLKmode
9829 || (type && (AGGREGATE_TYPE_P (type)
9830 || TREE_CODE (type) == COMPLEX_TYPE
9831 || TREE_CODE (type) == VECTOR_TYPE)))
9833 /* Double-extended precision (80-bit), quad-precision (128-bit)
9834 and aggregates including complex numbers are aligned on
9835 128-bit boundaries. The first eight 64-bit argument slots
9836 are associated one-to-one, with general registers r26
9837 through r19, and also with floating-point registers fr4
9838 through fr11. Arguments larger than one word are always
9839 passed in general registers.
9841 Using a PARALLEL with a word mode register results in left
9842 justified data on a big-endian target. */
9844 rtx loc[8];
9845 int i, offset = 0, ub = arg_size;
9847 /* Align the base register. */
9848 gpr_reg_base -= alignment;
9850 ub = MIN (ub, max_arg_words - cum->words - alignment);
9851 for (i = 0; i < ub; i++)
9853 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9854 gen_rtx_REG (DImode, gpr_reg_base),
9855 GEN_INT (offset));
9856 gpr_reg_base -= 1;
9857 offset += 8;
9860 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9863 else
9865 /* If the argument is larger than a word, then we know precisely
9866 which registers we must use. */
9867 if (arg_size > 1)
9869 if (cum->words)
9871 gpr_reg_base = 23;
9872 fpr_reg_base = 38;
9874 else
9876 gpr_reg_base = 25;
9877 fpr_reg_base = 34;
9880 /* Structures 5 to 8 bytes in size are passed in the general
9881 registers in the same manner as other non floating-point
9882 objects. The data is right-justified and zero-extended
9883 to 64 bits. This is opposite to the normal justification
9884 used on big endian targets and requires special treatment.
9885 We now define BLOCK_REG_PADDING to pad these objects.
9886 Aggregates, complex and vector types are passed in the same
9887 manner as structures. */
9888 if (mode == BLKmode
9889 || (type && (AGGREGATE_TYPE_P (type)
9890 || TREE_CODE (type) == COMPLEX_TYPE
9891 || TREE_CODE (type) == VECTOR_TYPE)))
9893 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9894 gen_rtx_REG (DImode, gpr_reg_base),
9895 const0_rtx);
9896 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9899 else
9901 /* We have a single word (32 bits). A simple computation
9902 will get us the register #s we need. */
9903 gpr_reg_base = 26 - cum->words;
9904 fpr_reg_base = 32 + 2 * cum->words;
9908 /* Determine if the argument needs to be passed in both general and
9909 floating point registers. */
9910 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9911 /* If we are doing soft-float with portable runtime, then there
9912 is no need to worry about FP regs. */
9913 && !TARGET_SOFT_FLOAT
9914 /* The parameter must be some kind of scalar float, else we just
9915 pass it in integer registers. */
9916 && GET_MODE_CLASS (mode) == MODE_FLOAT
9917 /* The target function must not have a prototype. */
9918 && cum->nargs_prototype <= 0
9919 /* libcalls do not need to pass items in both FP and general
9920 registers. */
9921 && type != NULL_TREE
9922 /* All this hair applies to "outgoing" args only. This includes
9923 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9924 && !cum->incoming)
9925 /* Also pass outgoing floating arguments in both registers in indirect
9926 calls with the 32 bit ABI and the HP assembler since there is no
9927 way to the specify argument locations in static functions. */
9928 || (!TARGET_64BIT
9929 && !TARGET_GAS
9930 && !cum->incoming
9931 && cum->indirect
9932 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9934 retval
9935 = gen_rtx_PARALLEL
9936 (mode,
9937 gen_rtvec (2,
9938 gen_rtx_EXPR_LIST (VOIDmode,
9939 gen_rtx_REG (mode, fpr_reg_base),
9940 const0_rtx),
9941 gen_rtx_EXPR_LIST (VOIDmode,
9942 gen_rtx_REG (mode, gpr_reg_base),
9943 const0_rtx)));
9945 else
9947 /* See if we should pass this parameter in a general register. */
9948 if (TARGET_SOFT_FLOAT
9949 /* Indirect calls in the normal 32bit ABI require all arguments
9950 to be passed in general registers. */
9951 || (!TARGET_PORTABLE_RUNTIME
9952 && !TARGET_64BIT
9953 && !TARGET_ELF32
9954 && cum->indirect)
9955 /* If the parameter is not a scalar floating-point parameter,
9956 then it belongs in GPRs. */
9957 || GET_MODE_CLASS (mode) != MODE_FLOAT
9958 /* Structure with single SFmode field belongs in GPR. */
9959 || (type && AGGREGATE_TYPE_P (type)))
9960 retval = gen_rtx_REG (mode, gpr_reg_base);
9961 else
9962 retval = gen_rtx_REG (mode, fpr_reg_base);
9964 return retval;
9967 /* Arguments larger than one word are double word aligned. */
9969 static unsigned int
9970 pa_function_arg_boundary (machine_mode mode, const_tree type)
9972 bool singleword = (type
9973 ? (integer_zerop (TYPE_SIZE (type))
9974 || !TREE_CONSTANT (TYPE_SIZE (type))
9975 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9976 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9978 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9981 /* If this arg would be passed totally in registers or totally on the stack,
9982 then this routine should return zero. */
9984 static int
9985 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9987 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9988 unsigned int max_arg_words = 8;
9989 unsigned int offset = 0;
9990 int arg_size;
9992 if (!TARGET_64BIT)
9993 return 0;
9995 arg_size = pa_function_arg_size (arg.mode, arg.type);
9996 if (arg_size > 1 && (cum->words & 1))
9997 offset = 1;
9999 if (cum->words + offset + arg_size <= max_arg_words)
10000 /* Arg fits fully into registers. */
10001 return 0;
10002 else if (cum->words + offset >= max_arg_words)
10003 /* Arg fully on the stack. */
10004 return 0;
10005 else
10006 /* Arg is split. */
10007 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10011 /* A get_unnamed_section callback for switching to the text section.
10013 This function is only used with SOM. Because we don't support
10014 named subspaces, we can only create a new subspace or switch back
10015 to the default text subspace. */
10017 static void
10018 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10020 gcc_assert (TARGET_SOM);
10021 if (TARGET_GAS)
10023 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10025 /* We only want to emit a .nsubspa directive once at the
10026 start of the function. */
10027 cfun->machine->in_nsubspa = 1;
10029 /* Create a new subspace for the text. This provides
10030 better stub placement and one-only functions. */
10031 if (cfun->decl
10032 && DECL_ONE_ONLY (cfun->decl)
10033 && !DECL_WEAK (cfun->decl))
10035 output_section_asm_op ("\t.SPACE $TEXT$\n"
10036 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10037 "ACCESS=44,SORT=24,COMDAT");
10038 return;
10041 else
10043 /* There isn't a current function or the body of the current
10044 function has been completed. So, we are changing to the
10045 text section to output debugging information. Thus, we
10046 need to forget that we are in the text section so that
10047 varasm.cc will call us when text_section is selected again. */
10048 gcc_assert (!cfun || !cfun->machine
10049 || cfun->machine->in_nsubspa == 2);
10050 in_section = NULL;
10052 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10053 return;
10055 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10058 /* A get_unnamed_section callback for switching to comdat data
10059 sections. This function is only used with SOM. */
10061 static void
10062 som_output_comdat_data_section_asm_op (const char *data)
10064 in_section = NULL;
10065 output_section_asm_op (data);
10068 /* Implement TARGET_ASM_INIT_SECTIONS. */
10070 static void
10071 pa_som_asm_init_sections (void)
10073 text_section
10074 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10076 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10077 is not being generated. */
10078 som_readonly_data_section
10079 = get_unnamed_section (0, output_section_asm_op,
10080 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10082 /* When secondary definitions are not supported, SOM makes readonly
10083 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10084 the comdat flag. */
10085 som_one_only_readonly_data_section
10086 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10087 "\t.SPACE $TEXT$\n"
10088 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10089 "ACCESS=0x2c,SORT=16,COMDAT");
10092 /* When secondary definitions are not supported, SOM makes data one-only
10093 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10094 som_one_only_data_section
10095 = get_unnamed_section (SECTION_WRITE,
10096 som_output_comdat_data_section_asm_op,
10097 "\t.SPACE $PRIVATE$\n"
10098 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10099 "ACCESS=31,SORT=24,COMDAT");
10101 if (flag_tm)
10102 som_tm_clone_table_section
10103 = get_unnamed_section (0, output_section_asm_op,
10104 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10106 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10107 reference data within the $TEXT$ space (for example constant
10108 strings in the $LIT$ subspace).
10110 The assemblers (GAS and HP as) both have problems with handling
10111 the difference of two symbols. This is the other correct way to
10112 reference constant data during PIC code generation.
10114 Thus, we can't put constant data needing relocation in the $TEXT$
10115 space during PIC generation.
10117 Previously, we placed all constant data into the $DATA$ subspace
10118 when generating PIC code. This reduces sharing, but it works
10119 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10120 This puts constant data not needing relocation into the $TEXT$ space. */
10121 readonly_data_section = som_readonly_data_section;
10123 /* We must not have a reference to an external symbol defined in a
10124 shared library in a readonly section, else the SOM linker will
10125 complain.
10127 So, we force exception information into the data section. */
10128 exception_section = data_section;
10131 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10133 static section *
10134 pa_som_tm_clone_table_section (void)
10136 return som_tm_clone_table_section;
10139 /* On hpux10, the linker will give an error if we have a reference
10140 in the read-only data section to a symbol defined in a shared
10141 library. Therefore, expressions that might require a reloc
10142 cannot be placed in the read-only data section. */
10144 static section *
10145 pa_select_section (tree exp, int reloc,
10146 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10148 if (TREE_CODE (exp) == VAR_DECL
10149 && TREE_READONLY (exp)
10150 && !TREE_THIS_VOLATILE (exp)
10151 && DECL_INITIAL (exp)
10152 && (DECL_INITIAL (exp) == error_mark_node
10153 || TREE_CONSTANT (DECL_INITIAL (exp)))
10154 && !(reloc & pa_reloc_rw_mask ()))
10156 if (TARGET_SOM
10157 && DECL_ONE_ONLY (exp)
10158 && !DECL_WEAK (exp))
10159 return som_one_only_readonly_data_section;
10160 else
10161 return readonly_data_section;
10163 else if (CONSTANT_CLASS_P (exp)
10164 && !(reloc & pa_reloc_rw_mask ()))
10165 return readonly_data_section;
10166 else if (TARGET_SOM
10167 && TREE_CODE (exp) == VAR_DECL
10168 && DECL_ONE_ONLY (exp)
10169 && !DECL_WEAK (exp))
10170 return som_one_only_data_section;
10171 else
10172 return data_section;
10175 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10176 and the function is in a COMDAT group, place the plabel reference in the
10177 .data.rel.ro.local section. The linker ignores references to symbols in
10178 discarded sections from this section. */
10180 static section *
10181 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10182 unsigned HOST_WIDE_INT align)
10184 if (function_label_operand (x, VOIDmode))
10186 tree decl = SYMBOL_REF_DECL (x);
10188 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10189 return get_named_section (NULL, ".data.rel.ro.local", 1);
10192 return default_elf_select_rtx_section (mode, x, align);
10195 /* Implement pa_reloc_rw_mask. */
10197 static int
10198 pa_reloc_rw_mask (void)
10200 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10201 return 3;
10203 /* HP linker does not support global relocs in readonly memory. */
10204 return TARGET_SOM ? 2 : 0;
10207 static void
10208 pa_globalize_label (FILE *stream, const char *name)
10210 /* We only handle DATA objects here, functions are globalized in
10211 ASM_DECLARE_FUNCTION_NAME. */
10212 if (! FUNCTION_NAME_P (name))
10214 fputs ("\t.EXPORT ", stream);
10215 assemble_name (stream, name);
10216 fputs (",DATA\n", stream);
10220 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10222 static rtx
10223 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10224 int incoming ATTRIBUTE_UNUSED)
10226 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10229 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10231 bool
10232 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10234 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10235 PA64 ABI says that objects larger than 128 bits are returned in memory.
10236 Note, int_size_in_bytes can return -1 if the size of the object is
10237 variable or larger than the maximum value that can be expressed as
10238 a HOST_WIDE_INT. It can also return zero for an empty type. The
10239 simplest way to handle variable and empty types is to pass them in
10240 memory. This avoids problems in defining the boundaries of argument
10241 slots, allocating registers, etc. */
10242 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10243 || int_size_in_bytes (type) <= 0);
10246 /* Structure to hold declaration and name of external symbols that are
10247 emitted by GCC. We generate a vector of these symbols and output them
10248 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10249 This avoids putting out names that are never really used. */
10251 typedef struct GTY(()) extern_symbol
10253 tree decl;
10254 const char *name;
10255 } extern_symbol;
10257 /* Define gc'd vector type for extern_symbol. */
10259 /* Vector of extern_symbol pointers. */
10260 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10262 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10263 /* Mark DECL (name NAME) as an external reference (assembler output
10264 file FILE). This saves the names to output at the end of the file
10265 if actually referenced. */
10267 void
10268 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10270 gcc_assert (file == asm_out_file);
10271 extern_symbol p = {decl, name};
10272 vec_safe_push (extern_symbols, p);
10274 #endif
10276 /* Output text required at the end of an assembler file.
10277 This includes deferred plabels and .import directives for
10278 all external symbols that were actually referenced. */
10280 static void
10281 pa_file_end (void)
10283 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10284 unsigned int i;
10285 extern_symbol *p;
10287 if (!NO_DEFERRED_PROFILE_COUNTERS)
10288 output_deferred_profile_counters ();
10289 #endif
10291 output_deferred_plabels ();
10293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10294 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10296 tree decl = p->decl;
10298 if (!TREE_ASM_WRITTEN (decl)
10299 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10300 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10303 vec_free (extern_symbols);
10304 #endif
10306 if (NEED_INDICATE_EXEC_STACK)
10307 file_end_indicate_exec_stack ();
10310 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10312 static bool
10313 pa_can_change_mode_class (machine_mode from, machine_mode to,
10314 reg_class_t rclass)
10316 if (from == to)
10317 return true;
10319 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10320 return true;
10322 /* Reject changes to/from modes with zero size. */
10323 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10324 return false;
10326 /* Reject changes to/from complex and vector modes. */
10327 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10328 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10329 return false;
10331 /* There is no way to load QImode or HImode values directly from memory
10332 to a FP register. SImode loads to the FP registers are not zero
10333 extended. On the 64-bit target, this conflicts with the definition
10334 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10335 except for DImode to SImode on the 64-bit target. It is handled by
10336 register renaming in pa_print_operand. */
10337 if (MAYBE_FP_REG_CLASS_P (rclass))
10338 return TARGET_64BIT && from == DImode && to == SImode;
10340 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10341 in specific sets of registers. Thus, we cannot allow changing
10342 to a larger mode when it's larger than a word. */
10343 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10344 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10345 return false;
10347 return true;
10350 /* Implement TARGET_MODES_TIEABLE_P.
10352 We should return FALSE for QImode and HImode because these modes
10353 are not ok in the floating-point registers. However, this prevents
10354 tieing these modes to SImode and DImode in the general registers.
10355 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10356 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10357 in the floating-point registers. */
10359 static bool
10360 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10362 /* Don't tie modes in different classes. */
10363 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10364 return false;
10366 return true;
10370 /* Length in units of the trampoline instruction code. */
10372 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10375 /* Output assembler code for a block containing the constant parts
10376 of a trampoline, leaving space for the variable parts.\
10378 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10379 and then branches to the specified routine.
10381 This code template is copied from text segment to stack location
10382 and then patched with pa_trampoline_init to contain valid values,
10383 and then entered as a subroutine.
10385 It is best to keep this as small as possible to avoid having to
10386 flush multiple lines in the cache. */
10388 static void
10389 pa_asm_trampoline_template (FILE *f)
10391 if (!TARGET_64BIT)
10393 if (TARGET_PA_20)
10395 fputs ("\tmfia %r20\n", f);
10396 fputs ("\tldw 48(%r20),%r22\n", f);
10397 fputs ("\tcopy %r22,%r21\n", f);
10398 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10399 fputs ("\tdepwi 0,31,2,%r22\n", f);
10400 fputs ("\tldw 0(%r22),%r21\n", f);
10401 fputs ("\tldw 4(%r22),%r19\n", f);
10402 fputs ("\tbve (%r21)\n", f);
10403 fputs ("\tldw 52(%r1),%r29\n", f);
10404 fputs ("\t.word 0\n", f);
10405 fputs ("\t.word 0\n", f);
10406 fputs ("\t.word 0\n", f);
10408 else
10410 if (ASSEMBLER_DIALECT == 0)
10412 fputs ("\tbl .+8,%r20\n", f);
10413 fputs ("\tdepi 0,31,2,%r20\n", f);
10415 else
10417 fputs ("\tb,l .+8,%r20\n", f);
10418 fputs ("\tdepwi 0,31,2,%r20\n", f);
10420 fputs ("\tldw 40(%r20),%r22\n", f);
10421 fputs ("\tcopy %r22,%r21\n", f);
10422 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10423 if (ASSEMBLER_DIALECT == 0)
10424 fputs ("\tdepi 0,31,2,%r22\n", f);
10425 else
10426 fputs ("\tdepwi 0,31,2,%r22\n", f);
10427 fputs ("\tldw 0(%r22),%r21\n", f);
10428 fputs ("\tldw 4(%r22),%r19\n", f);
10429 fputs ("\tldsid (%r21),%r1\n", f);
10430 fputs ("\tmtsp %r1,%sr0\n", f);
10431 fputs ("\tbe 0(%sr0,%r21)\n", f);
10432 fputs ("\tldw 44(%r20),%r29\n", f);
10434 fputs ("\t.word 0\n", f);
10435 fputs ("\t.word 0\n", f);
10436 fputs ("\t.word 0\n", f);
10437 fputs ("\t.word 0\n", f);
10439 else
10441 fputs ("\t.dword 0\n", f);
10442 fputs ("\t.dword 0\n", f);
10443 fputs ("\t.dword 0\n", f);
10444 fputs ("\t.dword 0\n", f);
10445 fputs ("\tmfia %r31\n", f);
10446 fputs ("\tldd 24(%r31),%r27\n", f);
10447 fputs ("\tldd 32(%r31),%r31\n", f);
10448 fputs ("\tldd 16(%r27),%r1\n", f);
10449 fputs ("\tbve (%r1)\n", f);
10450 fputs ("\tldd 24(%r27),%r27\n", f);
10451 fputs ("\t.dword 0 ; fptr\n", f);
10452 fputs ("\t.dword 0 ; static link\n", f);
10456 /* Emit RTL insns to initialize the variable parts of a trampoline.
10457 FNADDR is an RTX for the address of the function's pure code.
10458 CXT is an RTX for the static chain value for the function.
10460 Move the function address to the trampoline template at offset 48.
10461 Move the static chain value to trampoline template at offset 52.
10462 Move the trampoline address to trampoline template at offset 56.
10463 Move r19 to trampoline template at offset 60. The latter two
10464 words create a plabel for the indirect call to the trampoline.
10466 A similar sequence is used for the 64-bit port but the plabel is
10467 at the beginning of the trampoline.
10469 Finally, the cache entries for the trampoline code are flushed.
10470 This is necessary to ensure that the trampoline instruction sequence
10471 is written to memory prior to any attempts at prefetching the code
10472 sequence. */
10474 static void
10475 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10477 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10478 rtx start_addr = gen_reg_rtx (Pmode);
10479 rtx end_addr = gen_reg_rtx (Pmode);
10480 rtx line_length = gen_reg_rtx (Pmode);
10481 rtx r_tramp, tmp;
10483 emit_block_move (m_tramp, assemble_trampoline_template (),
10484 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10485 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10487 if (!TARGET_64BIT)
10489 tmp = adjust_address (m_tramp, Pmode, 48);
10490 emit_move_insn (tmp, fnaddr);
10491 tmp = adjust_address (m_tramp, Pmode, 52);
10492 emit_move_insn (tmp, chain_value);
10494 /* Create a fat pointer for the trampoline. */
10495 tmp = adjust_address (m_tramp, Pmode, 56);
10496 emit_move_insn (tmp, r_tramp);
10497 tmp = adjust_address (m_tramp, Pmode, 60);
10498 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10500 /* fdc and fic only use registers for the address to flush,
10501 they do not accept integer displacements. We align the
10502 start and end addresses to the beginning of their respective
10503 cache lines to minimize the number of lines flushed. */
10504 emit_insn (gen_andsi3 (start_addr, r_tramp,
10505 GEN_INT (-MIN_CACHELINE_SIZE)));
10506 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10507 TRAMPOLINE_CODE_SIZE-1));
10508 emit_insn (gen_andsi3 (end_addr, tmp,
10509 GEN_INT (-MIN_CACHELINE_SIZE)));
10510 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10511 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10512 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10513 gen_reg_rtx (Pmode),
10514 gen_reg_rtx (Pmode)));
10516 else
10518 tmp = adjust_address (m_tramp, Pmode, 56);
10519 emit_move_insn (tmp, fnaddr);
10520 tmp = adjust_address (m_tramp, Pmode, 64);
10521 emit_move_insn (tmp, chain_value);
10523 /* Create a fat pointer for the trampoline. */
10524 tmp = adjust_address (m_tramp, Pmode, 16);
10525 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10526 r_tramp, 32)));
10527 tmp = adjust_address (m_tramp, Pmode, 24);
10528 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10530 /* fdc and fic only use registers for the address to flush,
10531 they do not accept integer displacements. We align the
10532 start and end addresses to the beginning of their respective
10533 cache lines to minimize the number of lines flushed. */
10534 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10535 emit_insn (gen_anddi3 (start_addr, tmp,
10536 GEN_INT (-MIN_CACHELINE_SIZE)));
10537 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10538 TRAMPOLINE_CODE_SIZE - 1));
10539 emit_insn (gen_anddi3 (end_addr, tmp,
10540 GEN_INT (-MIN_CACHELINE_SIZE)));
10541 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10542 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10543 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10544 gen_reg_rtx (Pmode),
10545 gen_reg_rtx (Pmode)));
10548 #ifdef HAVE_ENABLE_EXECUTE_STACK
10549 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10550 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10551 #endif
10554 /* Perform any machine-specific adjustment in the address of the trampoline.
10555 ADDR contains the address that was passed to pa_trampoline_init.
10556 Adjust the trampoline address to point to the plabel at offset 56. */
10558 static rtx
10559 pa_trampoline_adjust_address (rtx addr)
10561 if (!TARGET_64BIT)
10562 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10563 return addr;
10566 static rtx
10567 pa_delegitimize_address (rtx orig_x)
10569 rtx x = delegitimize_mem_from_attrs (orig_x);
10571 if (GET_CODE (x) == LO_SUM
10572 && GET_CODE (XEXP (x, 1)) == UNSPEC
10573 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10574 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10575 return x;
10578 static rtx
10579 pa_internal_arg_pointer (void)
10581 /* The argument pointer and the hard frame pointer are the same in
10582 the 32-bit runtime, so we don't need a copy. */
10583 if (TARGET_64BIT)
10584 return copy_to_reg (virtual_incoming_args_rtx);
10585 else
10586 return virtual_incoming_args_rtx;
10589 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10590 Frame pointer elimination is automatically handled. */
10592 static bool
10593 pa_can_eliminate (const int from, const int to)
10595 /* The argument cannot be eliminated in the 64-bit runtime. */
10596 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10597 return false;
10599 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10600 ? ! frame_pointer_needed
10601 : true);
10604 /* Define the offset between two registers, FROM to be eliminated and its
10605 replacement TO, at the start of a routine. */
10606 HOST_WIDE_INT
10607 pa_initial_elimination_offset (int from, int to)
10609 HOST_WIDE_INT offset;
10611 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10612 && to == STACK_POINTER_REGNUM)
10613 offset = -pa_compute_frame_size (get_frame_size (), 0);
10614 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10615 offset = 0;
10616 else
10617 gcc_unreachable ();
10619 return offset;
10622 static void
10623 pa_conditional_register_usage (void)
10625 int i;
10627 if (!TARGET_64BIT && !TARGET_PA_11)
10629 for (i = 56; i <= FP_REG_LAST; i++)
10630 fixed_regs[i] = call_used_regs[i] = 1;
10631 for (i = 33; i < 56; i += 2)
10632 fixed_regs[i] = call_used_regs[i] = 1;
10634 if (TARGET_SOFT_FLOAT)
10636 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10637 fixed_regs[i] = call_used_regs[i] = 1;
10639 if (flag_pic)
10640 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10643 /* Target hook for c_mode_for_suffix. */
10645 static machine_mode
10646 pa_c_mode_for_suffix (char suffix)
10648 if (HPUX_LONG_DOUBLE_LIBRARY)
10650 if (suffix == 'q')
10651 return TFmode;
10654 return VOIDmode;
10657 /* Target hook for function_section. */
10659 static section *
10660 pa_function_section (tree decl, enum node_frequency freq,
10661 bool startup, bool exit)
10663 /* Put functions in text section if target doesn't have named sections. */
10664 if (!targetm_common.have_named_sections)
10665 return text_section;
10667 /* Force nested functions into the same section as the containing
10668 function. */
10669 if (decl
10670 && DECL_SECTION_NAME (decl) == NULL
10671 && DECL_CONTEXT (decl) != NULL_TREE
10672 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10673 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10674 return function_section (DECL_CONTEXT (decl));
10676 /* Otherwise, use the default function section. */
10677 return default_function_section (decl, freq, startup, exit);
10680 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10682 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10683 that need more than three instructions to load prior to reload. This
10684 limit is somewhat arbitrary. It takes three instructions to load a
10685 CONST_INT from memory but two are memory accesses. It may be better
10686 to increase the allowed range for CONST_INTS. We may also be able
10687 to handle CONST_DOUBLES. */
10689 static bool
10690 pa_legitimate_constant_p (machine_mode mode, rtx x)
10692 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10693 return false;
10695 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10696 return false;
10698 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10699 legitimate constants. The other variants can't be handled by
10700 the move patterns after reload starts. */
10701 if (tls_referenced_p (x))
10702 return false;
10704 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10705 return false;
10707 if (TARGET_64BIT
10708 && HOST_BITS_PER_WIDE_INT > 32
10709 && GET_CODE (x) == CONST_INT
10710 && !reload_in_progress
10711 && !reload_completed
10712 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10713 && !pa_cint_ok_for_move (UINTVAL (x)))
10714 return false;
10716 if (function_label_operand (x, mode))
10717 return false;
10719 return true;
10722 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10724 static unsigned int
10725 pa_section_type_flags (tree decl, const char *name, int reloc)
10727 unsigned int flags;
10729 flags = default_section_type_flags (decl, name, reloc);
10731 /* Function labels are placed in the constant pool. This can
10732 cause a section conflict if decls are put in ".data.rel.ro"
10733 or ".data.rel.ro.local" using the __attribute__ construct. */
10734 if (strcmp (name, ".data.rel.ro") == 0
10735 || strcmp (name, ".data.rel.ro.local") == 0)
10736 flags |= SECTION_WRITE | SECTION_RELRO;
10738 return flags;
10741 /* pa_legitimate_address_p recognizes an RTL expression that is a
10742 valid memory address for an instruction. The MODE argument is the
10743 machine mode for the MEM expression that wants to use this address.
10745 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10746 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10747 available with floating point loads and stores, and integer loads.
10748 We get better code by allowing indexed addresses in the initial
10749 RTL generation.
10751 The acceptance of indexed addresses as legitimate implies that we
10752 must provide patterns for doing indexed integer stores, or the move
10753 expanders must force the address of an indexed store to a register.
10754 We have adopted the latter approach.
10756 Another function of pa_legitimate_address_p is to ensure that
10757 the base register is a valid pointer for indexed instructions.
10758 On targets that have non-equivalent space registers, we have to
10759 know at the time of assembler output which register in a REG+REG
10760 pair is the base register. The REG_POINTER flag is sometimes lost
10761 in reload and the following passes, so it can't be relied on during
10762 code generation. Thus, we either have to canonicalize the order
10763 of the registers in REG+REG indexed addresses, or treat REG+REG
10764 addresses separately and provide patterns for both permutations.
10766 The latter approach requires several hundred additional lines of
10767 code in pa.md. The downside to canonicalizing is that a PLUS
10768 in the wrong order can't combine to form to make a scaled indexed
10769 memory operand. As we won't need to canonicalize the operands if
10770 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10772 We initially break out scaled indexed addresses in canonical order
10773 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10774 scaled indexed addresses during RTL generation. However, fold_rtx
10775 has its own opinion on how the operands of a PLUS should be ordered.
10776 If one of the operands is equivalent to a constant, it will make
10777 that operand the second operand. As the base register is likely to
10778 be equivalent to a SYMBOL_REF, we have made it the second operand.
10780 pa_legitimate_address_p accepts REG+REG as legitimate when the
10781 operands are in the order INDEX+BASE on targets with non-equivalent
10782 space registers, and in any order on targets with equivalent space
10783 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10785 We treat a SYMBOL_REF as legitimate if it is part of the current
10786 function's constant-pool, because such addresses can actually be
10787 output as REG+SMALLINT. */
10789 static bool
10790 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10792 if ((REG_P (x)
10793 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10794 : REG_OK_FOR_BASE_P (x)))
10795 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10796 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10797 && REG_P (XEXP (x, 0))
10798 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10799 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10800 return true;
10802 if (GET_CODE (x) == PLUS)
10804 rtx base, index;
10806 /* For REG+REG, the base register should be in XEXP (x, 1),
10807 so check it first. */
10808 if (REG_P (XEXP (x, 1))
10809 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10810 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10811 base = XEXP (x, 1), index = XEXP (x, 0);
10812 else if (REG_P (XEXP (x, 0))
10813 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10814 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10815 base = XEXP (x, 0), index = XEXP (x, 1);
10816 else
10817 return false;
10819 if (GET_CODE (index) == CONST_INT)
10821 if (INT_5_BITS (index))
10822 return true;
10824 /* When INT14_OK_STRICT is false, a secondary reload is needed
10825 to adjust the displacement of SImode and DImode floating point
10826 instructions but this may fail when the register also needs
10827 reloading. So, we return false when STRICT is true. We
10828 also reject long displacements for float mode addresses since
10829 the majority of accesses will use floating point instructions
10830 that don't support 14-bit offsets. */
10831 if (!INT14_OK_STRICT
10832 && (strict || !(reload_in_progress || reload_completed))
10833 && mode != QImode
10834 && mode != HImode)
10835 return false;
10837 return base14_operand (index, mode);
10840 if (!TARGET_DISABLE_INDEXING
10841 /* Only accept the "canonical" INDEX+BASE operand order
10842 on targets with non-equivalent space registers. */
10843 && (TARGET_NO_SPACE_REGS
10844 ? REG_P (index)
10845 : (base == XEXP (x, 1) && REG_P (index)
10846 && (reload_completed
10847 || (reload_in_progress && HARD_REGISTER_P (base))
10848 || REG_POINTER (base))
10849 && (reload_completed
10850 || (reload_in_progress && HARD_REGISTER_P (index))
10851 || !REG_POINTER (index))))
10852 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10853 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10854 : REG_OK_FOR_INDEX_P (index))
10855 && borx_reg_operand (base, Pmode)
10856 && borx_reg_operand (index, Pmode))
10857 return true;
10859 if (!TARGET_DISABLE_INDEXING
10860 && GET_CODE (index) == MULT
10861 /* Only accept base operands with the REG_POINTER flag prior to
10862 reload on targets with non-equivalent space registers. */
10863 && (TARGET_NO_SPACE_REGS
10864 || (base == XEXP (x, 1)
10865 && (reload_completed
10866 || (reload_in_progress && HARD_REGISTER_P (base))
10867 || REG_POINTER (base))))
10868 && REG_P (XEXP (index, 0))
10869 && GET_MODE (XEXP (index, 0)) == Pmode
10870 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10871 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10872 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10873 && GET_CODE (XEXP (index, 1)) == CONST_INT
10874 && INTVAL (XEXP (index, 1))
10875 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10876 && borx_reg_operand (base, Pmode))
10877 return true;
10879 return false;
10882 if (GET_CODE (x) == LO_SUM)
10884 rtx y = XEXP (x, 0);
10886 if (GET_CODE (y) == SUBREG)
10887 y = SUBREG_REG (y);
10889 if (REG_P (y)
10890 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10891 : REG_OK_FOR_BASE_P (y)))
10893 /* Needed for -fPIC */
10894 if (mode == Pmode
10895 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10896 return true;
10898 if (!INT14_OK_STRICT
10899 && (strict || !(reload_in_progress || reload_completed))
10900 && mode != QImode
10901 && mode != HImode)
10902 return false;
10904 if (CONSTANT_P (XEXP (x, 1)))
10905 return true;
10907 return false;
10910 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10911 return true;
10913 return false;
10916 /* Look for machine dependent ways to make the invalid address AD a
10917 valid address.
10919 For the PA, transform:
10921 memory(X + <large int>)
10923 into:
10925 if (<large int> & mask) >= 16
10926 Y = (<large int> & ~mask) + mask + 1 Round up.
10927 else
10928 Y = (<large int> & ~mask) Round down.
10929 Z = X + Y
10930 memory (Z + (<large int> - Y));
10932 This makes reload inheritance and reload_cse work better since Z
10933 can be reused.
10935 There may be more opportunities to improve code with this hook. */
10938 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10939 int opnum, int type,
10940 int ind_levels ATTRIBUTE_UNUSED)
10942 long offset, newoffset, mask;
10943 rtx new_rtx, temp = NULL_RTX;
10945 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10946 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10948 if (optimize && GET_CODE (ad) == PLUS)
10949 temp = simplify_binary_operation (PLUS, Pmode,
10950 XEXP (ad, 0), XEXP (ad, 1));
10952 new_rtx = temp ? temp : ad;
10954 if (optimize
10955 && GET_CODE (new_rtx) == PLUS
10956 && GET_CODE (XEXP (new_rtx, 0)) == REG
10957 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10959 offset = INTVAL (XEXP ((new_rtx), 1));
10961 /* Choose rounding direction. Round up if we are >= halfway. */
10962 if ((offset & mask) >= ((mask + 1) / 2))
10963 newoffset = (offset & ~mask) + mask + 1;
10964 else
10965 newoffset = offset & ~mask;
10967 /* Ensure that long displacements are aligned. */
10968 if (mask == 0x3fff
10969 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10970 || (TARGET_64BIT && (mode) == DImode)))
10971 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10973 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10975 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10976 GEN_INT (newoffset));
10977 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10978 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10979 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10980 opnum, (enum reload_type) type);
10981 return ad;
10985 return NULL_RTX;
10988 /* Output address vector. */
10990 void
10991 pa_output_addr_vec (rtx lab, rtx body)
10993 int idx, vlen = XVECLEN (body, 0);
10995 if (!TARGET_SOM)
10996 fputs ("\t.align 4\n", asm_out_file);
10997 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10998 if (TARGET_GAS)
10999 fputs ("\t.begin_brtab\n", asm_out_file);
11000 for (idx = 0; idx < vlen; idx++)
11002 ASM_OUTPUT_ADDR_VEC_ELT
11003 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11005 if (TARGET_GAS)
11006 fputs ("\t.end_brtab\n", asm_out_file);
11009 /* Output address difference vector. */
11011 void
11012 pa_output_addr_diff_vec (rtx lab, rtx body)
11014 rtx base = XEXP (XEXP (body, 0), 0);
11015 int idx, vlen = XVECLEN (body, 1);
11017 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11018 if (TARGET_GAS)
11019 fputs ("\t.begin_brtab\n", asm_out_file);
11020 for (idx = 0; idx < vlen; idx++)
11022 ASM_OUTPUT_ADDR_DIFF_ELT
11023 (asm_out_file,
11024 body,
11025 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11026 CODE_LABEL_NUMBER (base));
11028 if (TARGET_GAS)
11029 fputs ("\t.end_brtab\n", asm_out_file);
11032 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11033 arguments passed by hidden reference in the 32-bit HP runtime. Users
11034 can override this behavior for better compatibility with openmp at the
11035 risk of library incompatibilities. Arguments are always passed by value
11036 in the 64-bit HP runtime. */
11038 static bool
11039 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11041 return !TARGET_CALLER_COPIES;
11044 /* Implement TARGET_HARD_REGNO_NREGS. */
11046 static unsigned int
11047 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11049 return PA_HARD_REGNO_NREGS (regno, mode);
11052 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11054 static bool
11055 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11057 return PA_HARD_REGNO_MODE_OK (regno, mode);
11060 /* Implement TARGET_STARTING_FRAME_OFFSET.
11062 On the 32-bit ports, we reserve one slot for the previous frame
11063 pointer and one fill slot. The fill slot is for compatibility
11064 with HP compiled programs. On the 64-bit ports, we reserve one
11065 slot for the previous frame pointer. */
11067 static HOST_WIDE_INT
11068 pa_starting_frame_offset (void)
11070 return 8;
11073 /* Figure out the size in words of the function argument. */
11076 pa_function_arg_size (machine_mode mode, const_tree type)
11078 HOST_WIDE_INT size;
11080 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11082 /* The 64-bit runtime does not restrict the size of stack frames,
11083 but the gcc calling conventions limit argument sizes to 1G. Our
11084 prologue/epilogue code limits frame sizes to just under 32 bits.
11085 1G is also the maximum frame size that can be handled by the HPUX
11086 unwind descriptor. Since very large TYPE_SIZE_UNIT values can
11087 occur for (parallel:BLK []), we need to ignore large arguments
11088 passed by value. */
11089 if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11090 size = 0;
11091 return (int) CEIL (size, UNITS_PER_WORD);
11094 #include "gt-pa.h"