Revise -mdisable-fpregs option and add new -msoft-mult option
[official-gcc.git] / gcc / config / pa / pa.c
blob21b812e9be73e0a6fee870d9844b3d52f3ad4bc5
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2021 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
57 /* This file should be included last. */
58 #include "target-def.h"
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
81 other_mode = GET_MODE (SET_SRC (set));
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
208 /* The following extra sections are only used for SOM. */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
214 /* Counts for the number of callee-saved general and floating point
215 registers which were saved by the current function's prologue. */
216 static int gr_saved, fr_saved;
218 /* Boolean indicating whether the return pointer was saved by the
219 current function's prologue. */
220 static bool rp_saved;
222 static rtx find_addr_reg (rtx);
224 /* Keep track of the number of bytes we have output in the CODE subspace
225 during this compilation so we'll know when to emit inline long-calls. */
226 unsigned long total_code_bytes;
228 /* The last address of the previous function plus the number of bytes in
229 associated thunks that have been output. This is used to determine if
230 a thunk can use an IA-relative branch to reach its target function. */
231 static unsigned int last_address;
233 /* Variables to handle plabels that we discover are necessary at assembly
234 output time. They are output after the current function. */
235 struct GTY(()) deferred_plabel
237 rtx internal_label;
238 rtx symbol;
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241 deferred_plabels;
242 static size_t n_deferred_plabels = 0;
244 /* Initialize the GCC target structure. */
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
428 struct gcc_target targetm = TARGET_INITIALIZER;
430 /* Parse the -mfixed-range= option string. */
432 static void
433 fix_range (const char *const_str)
435 int i, first, last;
436 char *str, *dash, *comma;
438 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439 REG2 are either register names or register numbers. The effect
440 of this option is to mark the registers in the range from REG1 to
441 REG2 as ``fixed'' so they won't be used by the compiler. This is
442 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
444 i = strlen (const_str);
445 str = (char *) alloca (i + 1);
446 memcpy (str, const_str, i + 1);
448 while (1)
450 dash = strchr (str, '-');
451 if (!dash)
453 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 return;
456 *dash = '\0';
458 comma = strchr (dash + 1, ',');
459 if (comma)
460 *comma = '\0';
462 first = decode_reg_name (str);
463 if (first < 0)
465 warning (0, "unknown register name: %s", str);
466 return;
469 last = decode_reg_name (dash + 1);
470 if (last < 0)
472 warning (0, "unknown register name: %s", dash + 1);
473 return;
476 *dash = '-';
478 if (first > last)
480 warning (0, "%s-%s is an empty range", str, dash + 1);
481 return;
484 for (i = first; i <= last; ++i)
485 fixed_regs[i] = call_used_regs[i] = 1;
487 if (!comma)
488 break;
490 *comma = ',';
491 str = comma + 1;
494 /* Check if all floating point registers have been fixed. */
495 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496 if (!fixed_regs[i])
497 break;
499 if (i > FP_REG_LAST)
500 target_flags |= MASK_SOFT_FLOAT;
503 /* Implement the TARGET_OPTION_OVERRIDE hook. */
505 static void
506 pa_option_override (void)
508 unsigned int i;
509 cl_deferred_option *opt;
510 vec<cl_deferred_option> *v
511 = (vec<cl_deferred_option> *) pa_deferred_options;
513 if (v)
514 FOR_EACH_VEC_ELT (*v, i, opt)
516 switch (opt->opt_index)
518 case OPT_mfixed_range_:
519 fix_range (opt->arg);
520 break;
522 default:
523 gcc_unreachable ();
527 if (flag_pic && TARGET_PORTABLE_RUNTIME)
529 warning (0, "PIC code generation is not supported in the portable runtime model");
532 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
534 warning (0, "PIC code generation is not compatible with fast indirect calls");
537 if (! TARGET_GAS && write_symbols != NO_DEBUG)
539 warning (0, "%<-g%> is only supported when using GAS on this processor");
540 warning (0, "%<-g%> option disabled");
541 write_symbols = NO_DEBUG;
544 if (TARGET_64BIT && TARGET_HPUX)
546 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
547 specifically selected. */
548 if (!OPTION_SET_P (dwarf_strict))
549 dwarf_strict = 1;
550 if (!OPTION_SET_P (dwarf_version))
551 dwarf_version = 4;
554 /* We only support the "big PIC" model now. And we always generate PIC
555 code when in 64bit mode. */
556 if (flag_pic == 1 || TARGET_64BIT)
557 flag_pic = 2;
559 /* Disable -freorder-blocks-and-partition as we don't support hot and
560 cold partitioning. */
561 if (flag_reorder_blocks_and_partition)
563 inform (input_location,
564 "%<-freorder-blocks-and-partition%> does not work "
565 "on this architecture");
566 flag_reorder_blocks_and_partition = 0;
567 flag_reorder_blocks = 1;
570 /* We can't guarantee that .dword is available for 32-bit targets. */
571 if (UNITS_PER_WORD == 4)
572 targetm.asm_out.aligned_op.di = NULL;
574 /* The unaligned ops are only available when using GAS. */
575 if (!TARGET_GAS)
577 targetm.asm_out.unaligned_op.hi = NULL;
578 targetm.asm_out.unaligned_op.si = NULL;
579 targetm.asm_out.unaligned_op.di = NULL;
582 init_machine_status = pa_init_machine_status;
585 enum pa_builtins
587 PA_BUILTIN_COPYSIGNQ,
588 PA_BUILTIN_FABSQ,
589 PA_BUILTIN_INFQ,
590 PA_BUILTIN_HUGE_VALQ,
591 PA_BUILTIN_max
594 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
596 static void
597 pa_init_builtins (void)
599 #ifdef DONT_HAVE_FPUTC_UNLOCKED
601 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
602 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
603 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
605 #endif
606 #if TARGET_HPUX_11
608 tree decl;
610 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
611 set_user_assembler_name (decl, "_Isfinite");
612 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
613 set_user_assembler_name (decl, "_Isfinitef");
615 #endif
617 if (HPUX_LONG_DOUBLE_LIBRARY)
619 tree decl, ftype;
621 /* Under HPUX, the __float128 type is a synonym for "long double". */
622 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
623 "__float128");
625 /* TFmode support builtins. */
626 ftype = build_function_type_list (long_double_type_node,
627 long_double_type_node,
628 NULL_TREE);
629 decl = add_builtin_function ("__builtin_fabsq", ftype,
630 PA_BUILTIN_FABSQ, BUILT_IN_MD,
631 "_U_Qfabs", NULL_TREE);
632 TREE_READONLY (decl) = 1;
633 pa_builtins[PA_BUILTIN_FABSQ] = decl;
635 ftype = build_function_type_list (long_double_type_node,
636 long_double_type_node,
637 long_double_type_node,
638 NULL_TREE);
639 decl = add_builtin_function ("__builtin_copysignq", ftype,
640 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
641 "_U_Qfcopysign", NULL_TREE);
642 TREE_READONLY (decl) = 1;
643 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
645 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
646 decl = add_builtin_function ("__builtin_infq", ftype,
647 PA_BUILTIN_INFQ, BUILT_IN_MD,
648 NULL, NULL_TREE);
649 pa_builtins[PA_BUILTIN_INFQ] = decl;
651 decl = add_builtin_function ("__builtin_huge_valq", ftype,
652 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
653 NULL, NULL_TREE);
654 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
658 static rtx
659 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
660 machine_mode mode ATTRIBUTE_UNUSED,
661 int ignore ATTRIBUTE_UNUSED)
663 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
664 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
666 switch (fcode)
668 case PA_BUILTIN_FABSQ:
669 case PA_BUILTIN_COPYSIGNQ:
670 return expand_call (exp, target, ignore);
672 case PA_BUILTIN_INFQ:
673 case PA_BUILTIN_HUGE_VALQ:
675 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
676 REAL_VALUE_TYPE inf;
677 rtx tmp;
679 real_inf (&inf);
680 tmp = const_double_from_real_value (inf, target_mode);
682 tmp = validize_mem (force_const_mem (target_mode, tmp));
684 if (target == 0)
685 target = gen_reg_rtx (target_mode);
687 emit_move_insn (target, tmp);
688 return target;
691 default:
692 gcc_unreachable ();
695 return NULL_RTX;
698 /* Function to init struct machine_function.
699 This will be called, via a pointer variable,
700 from push_function_context. */
702 static struct machine_function *
703 pa_init_machine_status (void)
705 return ggc_cleared_alloc<machine_function> ();
708 /* If FROM is a probable pointer register, mark TO as a probable
709 pointer register with the same pointer alignment as FROM. */
711 static void
712 copy_reg_pointer (rtx to, rtx from)
714 if (REG_POINTER (from))
715 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
718 /* Return 1 if X contains a symbolic expression. We know these
719 expressions will have one of a few well defined forms, so
720 we need only check those forms. */
722 pa_symbolic_expression_p (rtx x)
725 /* Strip off any HIGH. */
726 if (GET_CODE (x) == HIGH)
727 x = XEXP (x, 0);
729 return symbolic_operand (x, VOIDmode);
732 /* Accept any constant that can be moved in one instruction into a
733 general register. */
735 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
737 /* OK if ldo, ldil, or zdepi, can be used. */
738 return (VAL_14_BITS_P (ival)
739 || pa_ldil_cint_p (ival)
740 || pa_zdepi_cint_p (ival));
743 /* True iff ldil can be used to load this CONST_INT. The least
744 significant 11 bits of the value must be zero and the value must
745 not change sign when extended from 32 to 64 bits. */
747 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
749 unsigned HOST_WIDE_INT x;
751 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
752 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
755 /* True iff zdepi can be used to generate this CONST_INT.
756 zdepi first sign extends a 5-bit signed number to a given field
757 length, then places this field anywhere in a zero. */
759 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
761 unsigned HOST_WIDE_INT lsb_mask, t;
763 /* This might not be obvious, but it's at least fast.
764 This function is critical; we don't have the time loops would take. */
765 lsb_mask = x & -x;
766 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
767 /* Return true iff t is a power of two. */
768 return ((t & (t - 1)) == 0);
771 /* True iff depi or extru can be used to compute (reg & mask).
772 Accept bit pattern like these:
773 0....01....1
774 1....10....0
775 1..10..01..1 */
777 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
779 mask = ~mask;
780 mask += mask & -mask;
781 return (mask & (mask - 1)) == 0;
784 /* True iff depi can be used to compute (reg | MASK). */
786 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
788 mask += mask & -mask;
789 return (mask & (mask - 1)) == 0;
792 /* Legitimize PIC addresses. If the address is already
793 position-independent, we return ORIG. Newly generated
794 position-independent addresses go to REG. If we need more
795 than one register, we lose. */
797 static rtx
798 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
800 rtx pic_ref = orig;
802 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
804 /* Labels need special handling. */
805 if (pic_label_operand (orig, mode))
807 rtx_insn *insn;
809 /* We do not want to go through the movXX expanders here since that
810 would create recursion.
812 Nor do we really want to call a generator for a named pattern
813 since that requires multiple patterns if we want to support
814 multiple word sizes.
816 So instead we just emit the raw set, which avoids the movXX
817 expanders completely. */
818 mark_reg_pointer (reg, BITS_PER_UNIT);
819 insn = emit_insn (gen_rtx_SET (reg, orig));
821 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
822 add_reg_note (insn, REG_EQUAL, orig);
824 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
825 and update LABEL_NUSES because this is not done automatically. */
826 if (reload_in_progress || reload_completed)
828 /* Extract LABEL_REF. */
829 if (GET_CODE (orig) == CONST)
830 orig = XEXP (XEXP (orig, 0), 0);
831 /* Extract CODE_LABEL. */
832 orig = XEXP (orig, 0);
833 add_reg_note (insn, REG_LABEL_OPERAND, orig);
834 /* Make sure we have label and not a note. */
835 if (LABEL_P (orig))
836 LABEL_NUSES (orig)++;
838 crtl->uses_pic_offset_table = 1;
839 return reg;
841 if (GET_CODE (orig) == SYMBOL_REF)
843 rtx_insn *insn;
844 rtx tmp_reg;
846 gcc_assert (reg);
848 /* Before reload, allocate a temporary register for the intermediate
849 result. This allows the sequence to be deleted when the final
850 result is unused and the insns are trivially dead. */
851 tmp_reg = ((reload_in_progress || reload_completed)
852 ? reg : gen_reg_rtx (Pmode));
854 if (function_label_operand (orig, VOIDmode))
856 /* Force function label into memory in word mode. */
857 orig = XEXP (force_const_mem (word_mode, orig), 0);
858 /* Load plabel address from DLT. */
859 emit_move_insn (tmp_reg,
860 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
861 gen_rtx_HIGH (word_mode, orig)));
862 pic_ref
863 = gen_const_mem (Pmode,
864 gen_rtx_LO_SUM (Pmode, tmp_reg,
865 gen_rtx_UNSPEC (Pmode,
866 gen_rtvec (1, orig),
867 UNSPEC_DLTIND14R)));
868 emit_move_insn (reg, pic_ref);
869 /* Now load address of function descriptor. */
870 pic_ref = gen_rtx_MEM (Pmode, reg);
872 else
874 /* Load symbol reference from DLT. */
875 emit_move_insn (tmp_reg,
876 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
877 gen_rtx_HIGH (word_mode, orig)));
878 pic_ref
879 = gen_const_mem (Pmode,
880 gen_rtx_LO_SUM (Pmode, tmp_reg,
881 gen_rtx_UNSPEC (Pmode,
882 gen_rtvec (1, orig),
883 UNSPEC_DLTIND14R)));
886 crtl->uses_pic_offset_table = 1;
887 mark_reg_pointer (reg, BITS_PER_UNIT);
888 insn = emit_move_insn (reg, pic_ref);
890 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
891 set_unique_reg_note (insn, REG_EQUAL, orig);
893 return reg;
895 else if (GET_CODE (orig) == CONST)
897 rtx base;
899 if (GET_CODE (XEXP (orig, 0)) == PLUS
900 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
901 return orig;
903 gcc_assert (reg);
904 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
906 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
907 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
908 base == reg ? 0 : reg);
910 if (GET_CODE (orig) == CONST_INT)
912 if (INT_14_BITS (orig))
913 return plus_constant (Pmode, base, INTVAL (orig));
914 orig = force_reg (Pmode, orig);
916 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
917 /* Likewise, should we set special REG_NOTEs here? */
920 return pic_ref;
923 static GTY(()) rtx gen_tls_tga;
925 static rtx
926 gen_tls_get_addr (void)
928 if (!gen_tls_tga)
929 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
930 return gen_tls_tga;
933 static rtx
934 hppa_tls_call (rtx arg)
936 rtx ret;
938 ret = gen_reg_rtx (Pmode);
939 emit_library_call_value (gen_tls_get_addr (), ret,
940 LCT_CONST, Pmode, arg, Pmode);
942 return ret;
945 static rtx
946 legitimize_tls_address (rtx addr)
948 rtx ret, tmp, t1, t2, tp;
949 rtx_insn *insn;
951 /* Currently, we can't handle anything but a SYMBOL_REF. */
952 if (GET_CODE (addr) != SYMBOL_REF)
953 return addr;
955 switch (SYMBOL_REF_TLS_MODEL (addr))
957 case TLS_MODEL_GLOBAL_DYNAMIC:
958 tmp = gen_reg_rtx (Pmode);
959 if (flag_pic)
960 emit_insn (gen_tgd_load_pic (tmp, addr));
961 else
962 emit_insn (gen_tgd_load (tmp, addr));
963 ret = hppa_tls_call (tmp);
964 break;
966 case TLS_MODEL_LOCAL_DYNAMIC:
967 ret = gen_reg_rtx (Pmode);
968 tmp = gen_reg_rtx (Pmode);
969 start_sequence ();
970 if (flag_pic)
971 emit_insn (gen_tld_load_pic (tmp, addr));
972 else
973 emit_insn (gen_tld_load (tmp, addr));
974 t1 = hppa_tls_call (tmp);
975 insn = get_insns ();
976 end_sequence ();
977 t2 = gen_reg_rtx (Pmode);
978 emit_libcall_block (insn, t2, t1,
979 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
980 UNSPEC_TLSLDBASE));
981 emit_insn (gen_tld_offset_load (ret, addr, t2));
982 break;
984 case TLS_MODEL_INITIAL_EXEC:
985 tp = gen_reg_rtx (Pmode);
986 tmp = gen_reg_rtx (Pmode);
987 ret = gen_reg_rtx (Pmode);
988 emit_insn (gen_tp_load (tp));
989 if (flag_pic)
990 emit_insn (gen_tie_load_pic (tmp, addr));
991 else
992 emit_insn (gen_tie_load (tmp, addr));
993 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
994 break;
996 case TLS_MODEL_LOCAL_EXEC:
997 tp = gen_reg_rtx (Pmode);
998 ret = gen_reg_rtx (Pmode);
999 emit_insn (gen_tp_load (tp));
1000 emit_insn (gen_tle_load (ret, addr, tp));
1001 break;
1003 default:
1004 gcc_unreachable ();
1007 return ret;
1010 /* Helper for hppa_legitimize_address. Given X, return true if it
1011 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1013 This respectively represent canonical shift-add rtxs or scaled
1014 memory addresses. */
1015 static bool
1016 mem_shadd_or_shadd_rtx_p (rtx x)
1018 return ((GET_CODE (x) == ASHIFT
1019 || GET_CODE (x) == MULT)
1020 && GET_CODE (XEXP (x, 1)) == CONST_INT
1021 && ((GET_CODE (x) == ASHIFT
1022 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1023 || (GET_CODE (x) == MULT
1024 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1027 /* Try machine-dependent ways of modifying an illegitimate address
1028 to be legitimate. If we find one, return the new, valid address.
1029 This macro is used in only one place: `memory_address' in explow.c.
1031 OLDX is the address as it was before break_out_memory_refs was called.
1032 In some cases it is useful to look at this to decide what needs to be done.
1034 It is always safe for this macro to do nothing. It exists to recognize
1035 opportunities to optimize the output.
1037 For the PA, transform:
1039 memory(X + <large int>)
1041 into:
1043 if (<large int> & mask) >= 16
1044 Y = (<large int> & ~mask) + mask + 1 Round up.
1045 else
1046 Y = (<large int> & ~mask) Round down.
1047 Z = X + Y
1048 memory (Z + (<large int> - Y));
1050 This is for CSE to find several similar references, and only use one Z.
1052 X can either be a SYMBOL_REF or REG, but because combine cannot
1053 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1054 D will not fit in 14 bits.
1056 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1057 0x1f as the mask.
1059 MODE_INT references allow displacements which fit in 14 bits, so use
1060 0x3fff as the mask.
1062 This relies on the fact that most mode MODE_FLOAT references will use FP
1063 registers and most mode MODE_INT references will use integer registers.
1064 (In the rare case of an FP register used in an integer MODE, we depend
1065 on secondary reloads to clean things up.)
1068 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1069 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1070 addressing modes to be used).
1072 Note that the addresses passed into hppa_legitimize_address always
1073 come from a MEM, so we only have to match the MULT form on incoming
1074 addresses. But to be future proof we also match the ASHIFT form.
1076 However, this routine always places those shift-add sequences into
1077 registers, so we have to generate the ASHIFT form as our output.
1079 Put X and Z into registers. Then put the entire expression into
1080 a register. */
1083 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1084 machine_mode mode)
1086 rtx orig = x;
1088 /* We need to canonicalize the order of operands in unscaled indexed
1089 addresses since the code that checks if an address is valid doesn't
1090 always try both orders. */
1091 if (!TARGET_NO_SPACE_REGS
1092 && GET_CODE (x) == PLUS
1093 && GET_MODE (x) == Pmode
1094 && REG_P (XEXP (x, 0))
1095 && REG_P (XEXP (x, 1))
1096 && REG_POINTER (XEXP (x, 0))
1097 && !REG_POINTER (XEXP (x, 1)))
1098 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1100 if (tls_referenced_p (x))
1101 return legitimize_tls_address (x);
1102 else if (flag_pic)
1103 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1105 /* Strip off CONST. */
1106 if (GET_CODE (x) == CONST)
1107 x = XEXP (x, 0);
1109 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1110 That should always be safe. */
1111 if (GET_CODE (x) == PLUS
1112 && GET_CODE (XEXP (x, 0)) == REG
1113 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1115 rtx reg = force_reg (Pmode, XEXP (x, 1));
1116 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1119 /* Note we must reject symbols which represent function addresses
1120 since the assembler/linker can't handle arithmetic on plabels. */
1121 if (GET_CODE (x) == PLUS
1122 && GET_CODE (XEXP (x, 1)) == CONST_INT
1123 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1124 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1125 || GET_CODE (XEXP (x, 0)) == REG))
1127 rtx int_part, ptr_reg;
1128 int newoffset;
1129 int offset = INTVAL (XEXP (x, 1));
1130 int mask;
1132 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1133 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1135 /* Choose which way to round the offset. Round up if we
1136 are >= halfway to the next boundary. */
1137 if ((offset & mask) >= ((mask + 1) / 2))
1138 newoffset = (offset & ~ mask) + mask + 1;
1139 else
1140 newoffset = (offset & ~ mask);
1142 /* If the newoffset will not fit in 14 bits (ldo), then
1143 handling this would take 4 or 5 instructions (2 to load
1144 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1145 add the new offset and the SYMBOL_REF.) Combine cannot
1146 handle 4->2 or 5->2 combinations, so do not create
1147 them. */
1148 if (! VAL_14_BITS_P (newoffset)
1149 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1151 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1152 rtx tmp_reg
1153 = force_reg (Pmode,
1154 gen_rtx_HIGH (Pmode, const_part));
1155 ptr_reg
1156 = force_reg (Pmode,
1157 gen_rtx_LO_SUM (Pmode,
1158 tmp_reg, const_part));
1160 else
1162 if (! VAL_14_BITS_P (newoffset))
1163 int_part = force_reg (Pmode, GEN_INT (newoffset));
1164 else
1165 int_part = GEN_INT (newoffset);
1167 ptr_reg = force_reg (Pmode,
1168 gen_rtx_PLUS (Pmode,
1169 force_reg (Pmode, XEXP (x, 0)),
1170 int_part));
1172 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1175 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1177 if (GET_CODE (x) == PLUS
1178 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1179 && (OBJECT_P (XEXP (x, 1))
1180 || GET_CODE (XEXP (x, 1)) == SUBREG)
1181 && GET_CODE (XEXP (x, 1)) != CONST)
1183 /* If we were given a MULT, we must fix the constant
1184 as we're going to create the ASHIFT form. */
1185 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1186 if (GET_CODE (XEXP (x, 0)) == MULT)
1187 shift_val = exact_log2 (shift_val);
1189 rtx reg1, reg2;
1190 reg1 = XEXP (x, 1);
1191 if (GET_CODE (reg1) != REG)
1192 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1194 reg2 = XEXP (XEXP (x, 0), 0);
1195 if (GET_CODE (reg2) != REG)
1196 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1198 return force_reg (Pmode,
1199 gen_rtx_PLUS (Pmode,
1200 gen_rtx_ASHIFT (Pmode, reg2,
1201 GEN_INT (shift_val)),
1202 reg1));
1205 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1207 Only do so for floating point modes since this is more speculative
1208 and we lose if it's an integer store. */
1209 if (GET_CODE (x) == PLUS
1210 && GET_CODE (XEXP (x, 0)) == PLUS
1211 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1212 && (mode == SFmode || mode == DFmode))
1214 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1216 /* If we were given a MULT, we must fix the constant
1217 as we're going to create the ASHIFT form. */
1218 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1219 shift_val = exact_log2 (shift_val);
1221 /* Try and figure out what to use as a base register. */
1222 rtx reg1, reg2, base, idx;
1224 reg1 = XEXP (XEXP (x, 0), 1);
1225 reg2 = XEXP (x, 1);
1226 base = NULL_RTX;
1227 idx = NULL_RTX;
1229 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1230 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1231 it's a base register below. */
1232 if (GET_CODE (reg1) != REG)
1233 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1235 if (GET_CODE (reg2) != REG)
1236 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1238 /* Figure out what the base and index are. */
1240 if (GET_CODE (reg1) == REG
1241 && REG_POINTER (reg1))
1243 base = reg1;
1244 idx = gen_rtx_PLUS (Pmode,
1245 gen_rtx_ASHIFT (Pmode,
1246 XEXP (XEXP (XEXP (x, 0), 0), 0),
1247 GEN_INT (shift_val)),
1248 XEXP (x, 1));
1250 else if (GET_CODE (reg2) == REG
1251 && REG_POINTER (reg2))
1253 base = reg2;
1254 idx = XEXP (x, 0);
1257 if (base == 0)
1258 return orig;
1260 /* If the index adds a large constant, try to scale the
1261 constant so that it can be loaded with only one insn. */
1262 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1263 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1264 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1265 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1267 /* Divide the CONST_INT by the scale factor, then add it to A. */
1268 int val = INTVAL (XEXP (idx, 1));
1269 val /= (1 << shift_val);
1271 reg1 = XEXP (XEXP (idx, 0), 0);
1272 if (GET_CODE (reg1) != REG)
1273 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1275 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1277 /* We can now generate a simple scaled indexed address. */
1278 return
1279 force_reg
1280 (Pmode, gen_rtx_PLUS (Pmode,
1281 gen_rtx_ASHIFT (Pmode, reg1,
1282 GEN_INT (shift_val)),
1283 base));
1286 /* If B + C is still a valid base register, then add them. */
1287 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1288 && INTVAL (XEXP (idx, 1)) <= 4096
1289 && INTVAL (XEXP (idx, 1)) >= -4096)
1291 rtx reg1, reg2;
1293 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1295 reg2 = XEXP (XEXP (idx, 0), 0);
1296 if (GET_CODE (reg2) != CONST_INT)
1297 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1299 return force_reg (Pmode,
1300 gen_rtx_PLUS (Pmode,
1301 gen_rtx_ASHIFT (Pmode, reg2,
1302 GEN_INT (shift_val)),
1303 reg1));
1306 /* Get the index into a register, then add the base + index and
1307 return a register holding the result. */
1309 /* First get A into a register. */
1310 reg1 = XEXP (XEXP (idx, 0), 0);
1311 if (GET_CODE (reg1) != REG)
1312 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1314 /* And get B into a register. */
1315 reg2 = XEXP (idx, 1);
1316 if (GET_CODE (reg2) != REG)
1317 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1319 reg1 = force_reg (Pmode,
1320 gen_rtx_PLUS (Pmode,
1321 gen_rtx_ASHIFT (Pmode, reg1,
1322 GEN_INT (shift_val)),
1323 reg2));
1325 /* Add the result to our base register and return. */
1326 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1330 /* Uh-oh. We might have an address for x[n-100000]. This needs
1331 special handling to avoid creating an indexed memory address
1332 with x-100000 as the base.
1334 If the constant part is small enough, then it's still safe because
1335 there is a guard page at the beginning and end of the data segment.
1337 Scaled references are common enough that we want to try and rearrange the
1338 terms so that we can use indexing for these addresses too. Only
1339 do the optimization for floatint point modes. */
1341 if (GET_CODE (x) == PLUS
1342 && pa_symbolic_expression_p (XEXP (x, 1)))
1344 /* Ugly. We modify things here so that the address offset specified
1345 by the index expression is computed first, then added to x to form
1346 the entire address. */
1348 rtx regx1, regx2, regy1, regy2, y;
1350 /* Strip off any CONST. */
1351 y = XEXP (x, 1);
1352 if (GET_CODE (y) == CONST)
1353 y = XEXP (y, 0);
1355 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1357 /* See if this looks like
1358 (plus (mult (reg) (mem_shadd_const))
1359 (const (plus (symbol_ref) (const_int))))
1361 Where const_int is small. In that case the const
1362 expression is a valid pointer for indexing.
1364 If const_int is big, but can be divided evenly by shadd_const
1365 and added to (reg). This allows more scaled indexed addresses. */
1366 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1367 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1368 && GET_CODE (XEXP (y, 1)) == CONST_INT
1369 && INTVAL (XEXP (y, 1)) >= -4096
1370 && INTVAL (XEXP (y, 1)) <= 4095)
1372 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1374 /* If we were given a MULT, we must fix the constant
1375 as we're going to create the ASHIFT form. */
1376 if (GET_CODE (XEXP (x, 0)) == MULT)
1377 shift_val = exact_log2 (shift_val);
1379 rtx reg1, reg2;
1381 reg1 = XEXP (x, 1);
1382 if (GET_CODE (reg1) != REG)
1383 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1385 reg2 = XEXP (XEXP (x, 0), 0);
1386 if (GET_CODE (reg2) != REG)
1387 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1389 return
1390 force_reg (Pmode,
1391 gen_rtx_PLUS (Pmode,
1392 gen_rtx_ASHIFT (Pmode,
1393 reg2,
1394 GEN_INT (shift_val)),
1395 reg1));
1397 else if ((mode == DFmode || mode == SFmode)
1398 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1399 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1400 && GET_CODE (XEXP (y, 1)) == CONST_INT
1401 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1403 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1405 /* If we were given a MULT, we must fix the constant
1406 as we're going to create the ASHIFT form. */
1407 if (GET_CODE (XEXP (x, 0)) == MULT)
1408 shift_val = exact_log2 (shift_val);
1410 regx1
1411 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1412 / INTVAL (XEXP (XEXP (x, 0), 1))));
1413 regx2 = XEXP (XEXP (x, 0), 0);
1414 if (GET_CODE (regx2) != REG)
1415 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1416 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1417 regx2, regx1));
1418 return
1419 force_reg (Pmode,
1420 gen_rtx_PLUS (Pmode,
1421 gen_rtx_ASHIFT (Pmode, regx2,
1422 GEN_INT (shift_val)),
1423 force_reg (Pmode, XEXP (y, 0))));
1425 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1426 && INTVAL (XEXP (y, 1)) >= -4096
1427 && INTVAL (XEXP (y, 1)) <= 4095)
1429 /* This is safe because of the guard page at the
1430 beginning and end of the data space. Just
1431 return the original address. */
1432 return orig;
1434 else
1436 /* Doesn't look like one we can optimize. */
1437 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1438 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1439 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1440 regx1 = force_reg (Pmode,
1441 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1442 regx1, regy2));
1443 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1448 return orig;
1451 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1453 Compute extra cost of moving data between one register class
1454 and another.
1456 Make moves from SAR so expensive they should never happen. We used to
1457 have 0xffff here, but that generates overflow in rare cases.
1459 Copies involving a FP register and a non-FP register are relatively
1460 expensive because they must go through memory.
1462 Other copies are reasonably cheap. */
1464 static int
1465 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1466 reg_class_t from, reg_class_t to)
1468 if (from == SHIFT_REGS)
1469 return 0x100;
1470 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1471 return 18;
1472 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1473 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1474 return 16;
1475 else
1476 return 2;
1479 /* For the HPPA, REG and REG+CONST is cost 0
1480 and addresses involving symbolic constants are cost 2.
1482 PIC addresses are very expensive.
1484 It is no coincidence that this has the same structure
1485 as pa_legitimate_address_p. */
1487 static int
1488 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1489 addr_space_t as ATTRIBUTE_UNUSED,
1490 bool speed ATTRIBUTE_UNUSED)
1492 switch (GET_CODE (X))
1494 case REG:
1495 case PLUS:
1496 case LO_SUM:
1497 return 1;
1498 case HIGH:
1499 return 2;
1500 default:
1501 return 4;
1505 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1506 The machine mode of X is known to be SImode or DImode. */
1508 static bool
1509 hppa_rtx_costs_shadd_p (rtx x)
1511 if (GET_CODE (x) != PLUS
1512 || !REG_P (XEXP (x, 1)))
1513 return false;
1514 rtx op0 = XEXP (x, 0);
1515 if (GET_CODE (op0) == ASHIFT
1516 && CONST_INT_P (XEXP (op0, 1))
1517 && REG_P (XEXP (op0, 0)))
1519 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1520 return x == 1 || x == 2 || x == 3;
1522 if (GET_CODE (op0) == MULT
1523 && CONST_INT_P (XEXP (op0, 1))
1524 && REG_P (XEXP (op0, 0)))
1526 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1527 return x == 2 || x == 4 || x == 8;
1529 return false;
1532 /* Compute a (partial) cost for rtx X. Return true if the complete
1533 cost has been computed, and false if subexpressions should be
1534 scanned. In either case, *TOTAL contains the cost result. */
1536 static bool
1537 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1538 int opno ATTRIBUTE_UNUSED,
1539 int *total, bool speed)
1541 int code = GET_CODE (x);
1543 switch (code)
1545 case CONST_INT:
1546 if (outer_code == SET)
1547 *total = COSTS_N_INSNS (1);
1548 else if (INTVAL (x) == 0)
1549 *total = 0;
1550 else if (INT_14_BITS (x))
1551 *total = 1;
1552 else
1553 *total = 2;
1554 return true;
1556 case HIGH:
1557 *total = 2;
1558 return true;
1560 case CONST:
1561 case LABEL_REF:
1562 case SYMBOL_REF:
1563 *total = 4;
1564 return true;
1566 case CONST_DOUBLE:
1567 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1568 && outer_code != SET)
1569 *total = 0;
1570 else
1571 *total = 8;
1572 return true;
1574 case MULT:
1575 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1577 *total = COSTS_N_INSNS (3);
1579 else if (mode == DImode)
1581 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1582 *total = COSTS_N_INSNS (25);
1583 else
1584 *total = COSTS_N_INSNS (80);
1586 else
1588 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1589 *total = COSTS_N_INSNS (8);
1590 else
1591 *total = COSTS_N_INSNS (20);
1593 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1595 case DIV:
1596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1598 *total = COSTS_N_INSNS (14);
1599 return false;
1601 /* FALLTHRU */
1603 case UDIV:
1604 case MOD:
1605 case UMOD:
1606 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1607 if (mode == DImode)
1608 *total = COSTS_N_INSNS (240);
1609 else
1610 *total = COSTS_N_INSNS (60);
1611 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1613 case PLUS: /* this includes shNadd insns */
1614 case MINUS:
1615 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1616 *total = COSTS_N_INSNS (3);
1617 else if (mode == DImode)
1619 if (TARGET_64BIT)
1621 *total = COSTS_N_INSNS (1);
1622 /* Handle shladd,l instructions. */
1623 if (hppa_rtx_costs_shadd_p (x))
1624 return true;
1626 else
1627 *total = COSTS_N_INSNS (2);
1629 else
1631 *total = COSTS_N_INSNS (1);
1632 /* Handle shNadd instructions. */
1633 if (hppa_rtx_costs_shadd_p (x))
1634 return true;
1636 return REG_P (XEXP (x, 0))
1637 && (REG_P (XEXP (x, 1))
1638 || CONST_INT_P (XEXP (x, 1)));
1640 case ASHIFT:
1641 if (mode == DImode)
1643 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1645 if (TARGET_64BIT)
1646 *total = COSTS_N_INSNS (1);
1647 else
1648 *total = COSTS_N_INSNS (2);
1649 return true;
1651 else if (TARGET_64BIT)
1652 *total = COSTS_N_INSNS (3);
1653 else if (speed)
1654 *total = COSTS_N_INSNS (13);
1655 else
1656 *total = COSTS_N_INSNS (18);
1658 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1660 if (TARGET_64BIT)
1661 *total = COSTS_N_INSNS (2);
1662 else
1663 *total = COSTS_N_INSNS (1);
1664 return true;
1666 else if (TARGET_64BIT)
1667 *total = COSTS_N_INSNS (4);
1668 else
1669 *total = COSTS_N_INSNS (2);
1670 return REG_P (XEXP (x, 0))
1671 && (REG_P (XEXP (x, 1))
1672 || CONST_INT_P (XEXP (x, 1)));
1674 case ASHIFTRT:
1675 if (mode == DImode)
1677 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1679 if (TARGET_64BIT)
1680 *total = COSTS_N_INSNS (1);
1681 else
1682 *total = COSTS_N_INSNS (2);
1683 return true;
1685 else if (TARGET_64BIT)
1686 *total = COSTS_N_INSNS (3);
1687 else if (speed)
1688 *total = COSTS_N_INSNS (14);
1689 else
1690 *total = COSTS_N_INSNS (19);
1692 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1694 if (TARGET_64BIT)
1695 *total = COSTS_N_INSNS (2);
1696 else
1697 *total = COSTS_N_INSNS (1);
1698 return true;
1700 else if (TARGET_64BIT)
1701 *total = COSTS_N_INSNS (4);
1702 else
1703 *total = COSTS_N_INSNS (2);
1704 return REG_P (XEXP (x, 0))
1705 && (REG_P (XEXP (x, 1))
1706 || CONST_INT_P (XEXP (x, 1)));
1708 case LSHIFTRT:
1709 if (mode == DImode)
1711 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1713 if (TARGET_64BIT)
1714 *total = COSTS_N_INSNS (1);
1715 else
1716 *total = COSTS_N_INSNS (2);
1717 return true;
1719 else if (TARGET_64BIT)
1720 *total = COSTS_N_INSNS (2);
1721 else if (speed)
1722 *total = COSTS_N_INSNS (12);
1723 else
1724 *total = COSTS_N_INSNS (15);
1726 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1728 *total = COSTS_N_INSNS (1);
1729 return true;
1731 else if (TARGET_64BIT)
1732 *total = COSTS_N_INSNS (3);
1733 else
1734 *total = COSTS_N_INSNS (2);
1735 return REG_P (XEXP (x, 0))
1736 && (REG_P (XEXP (x, 1))
1737 || CONST_INT_P (XEXP (x, 1)));
1739 default:
1740 return false;
1744 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1745 new rtx with the correct mode. */
1746 static inline rtx
1747 force_mode (machine_mode mode, rtx orig)
1749 if (mode == GET_MODE (orig))
1750 return orig;
1752 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1754 return gen_rtx_REG (mode, REGNO (orig));
1757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1759 static bool
1760 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1762 return tls_referenced_p (x);
1765 /* Emit insns to move operands[1] into operands[0].
1767 Return 1 if we have written out everything that needs to be done to
1768 do the move. Otherwise, return 0 and the caller will emit the move
1769 normally.
1771 Note SCRATCH_REG may not be in the proper mode depending on how it
1772 will be used. This routine is responsible for creating a new copy
1773 of SCRATCH_REG in the proper mode. */
1776 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1778 rtx operand0 = operands[0];
1779 rtx operand1 = operands[1];
1780 rtx tem;
1782 /* We can only handle indexed addresses in the destination operand
1783 of floating point stores. Thus, we need to break out indexed
1784 addresses from the destination operand. */
1785 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1787 gcc_assert (can_create_pseudo_p ());
1789 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1790 operand0 = replace_equiv_address (operand0, tem);
1793 /* On targets with non-equivalent space registers, break out unscaled
1794 indexed addresses from the source operand before the final CSE.
1795 We have to do this because the REG_POINTER flag is not correctly
1796 carried through various optimization passes and CSE may substitute
1797 a pseudo without the pointer set for one with the pointer set. As
1798 a result, we loose various opportunities to create insns with
1799 unscaled indexed addresses. */
1800 if (!TARGET_NO_SPACE_REGS
1801 && !cse_not_expected
1802 && GET_CODE (operand1) == MEM
1803 && GET_CODE (XEXP (operand1, 0)) == PLUS
1804 && REG_P (XEXP (XEXP (operand1, 0), 0))
1805 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1806 operand1
1807 = replace_equiv_address (operand1,
1808 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1810 if (scratch_reg
1811 && reload_in_progress && GET_CODE (operand0) == REG
1812 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1813 operand0 = reg_equiv_mem (REGNO (operand0));
1814 else if (scratch_reg
1815 && reload_in_progress && GET_CODE (operand0) == SUBREG
1816 && GET_CODE (SUBREG_REG (operand0)) == REG
1817 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1819 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1820 the code which tracks sets/uses for delete_output_reload. */
1821 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1822 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1823 SUBREG_BYTE (operand0));
1824 operand0 = alter_subreg (&temp, true);
1827 if (scratch_reg
1828 && reload_in_progress && GET_CODE (operand1) == REG
1829 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1830 operand1 = reg_equiv_mem (REGNO (operand1));
1831 else if (scratch_reg
1832 && reload_in_progress && GET_CODE (operand1) == SUBREG
1833 && GET_CODE (SUBREG_REG (operand1)) == REG
1834 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1836 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1837 the code which tracks sets/uses for delete_output_reload. */
1838 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1839 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1840 SUBREG_BYTE (operand1));
1841 operand1 = alter_subreg (&temp, true);
1844 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1845 && ((tem = find_replacement (&XEXP (operand0, 0)))
1846 != XEXP (operand0, 0)))
1847 operand0 = replace_equiv_address (operand0, tem);
1849 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1850 && ((tem = find_replacement (&XEXP (operand1, 0)))
1851 != XEXP (operand1, 0)))
1852 operand1 = replace_equiv_address (operand1, tem);
1854 /* Handle secondary reloads for loads/stores of FP registers from
1855 REG+D addresses where D does not fit in 5 or 14 bits, including
1856 (subreg (mem (addr))) cases, and reloads for other unsupported
1857 memory operands. */
1858 if (scratch_reg
1859 && FP_REG_P (operand0)
1860 && (MEM_P (operand1)
1861 || (GET_CODE (operand1) == SUBREG
1862 && MEM_P (XEXP (operand1, 0)))))
1864 rtx op1 = operand1;
1866 if (GET_CODE (op1) == SUBREG)
1867 op1 = XEXP (op1, 0);
1869 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1871 if (!(TARGET_PA_20
1872 && !TARGET_ELF32
1873 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1874 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1876 /* SCRATCH_REG will hold an address and maybe the actual data.
1877 We want it in WORD_MODE regardless of what mode it was
1878 originally given to us. */
1879 scratch_reg = force_mode (word_mode, scratch_reg);
1881 /* D might not fit in 14 bits either; for such cases load D
1882 into scratch reg. */
1883 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1885 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1886 emit_move_insn (scratch_reg,
1887 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1888 Pmode,
1889 XEXP (XEXP (op1, 0), 0),
1890 scratch_reg));
1892 else
1893 emit_move_insn (scratch_reg, XEXP (op1, 0));
1894 op1 = replace_equiv_address (op1, scratch_reg);
1897 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1898 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1899 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1901 /* Load memory address into SCRATCH_REG. */
1902 scratch_reg = force_mode (word_mode, scratch_reg);
1903 emit_move_insn (scratch_reg, XEXP (op1, 0));
1904 op1 = replace_equiv_address (op1, scratch_reg);
1906 emit_insn (gen_rtx_SET (operand0, op1));
1907 return 1;
1909 else if (scratch_reg
1910 && FP_REG_P (operand1)
1911 && (MEM_P (operand0)
1912 || (GET_CODE (operand0) == SUBREG
1913 && MEM_P (XEXP (operand0, 0)))))
1915 rtx op0 = operand0;
1917 if (GET_CODE (op0) == SUBREG)
1918 op0 = XEXP (op0, 0);
1920 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1922 if (!(TARGET_PA_20
1923 && !TARGET_ELF32
1924 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1925 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1927 /* SCRATCH_REG will hold an address and maybe the actual data.
1928 We want it in WORD_MODE regardless of what mode it was
1929 originally given to us. */
1930 scratch_reg = force_mode (word_mode, scratch_reg);
1932 /* D might not fit in 14 bits either; for such cases load D
1933 into scratch reg. */
1934 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1936 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1937 emit_move_insn (scratch_reg,
1938 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1939 Pmode,
1940 XEXP (XEXP (op0, 0), 0),
1941 scratch_reg));
1943 else
1944 emit_move_insn (scratch_reg, XEXP (op0, 0));
1945 op0 = replace_equiv_address (op0, scratch_reg);
1948 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1949 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1950 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1952 /* Load memory address into SCRATCH_REG. */
1953 scratch_reg = force_mode (word_mode, scratch_reg);
1954 emit_move_insn (scratch_reg, XEXP (op0, 0));
1955 op0 = replace_equiv_address (op0, scratch_reg);
1957 emit_insn (gen_rtx_SET (op0, operand1));
1958 return 1;
1960 /* Handle secondary reloads for loads of FP registers from constant
1961 expressions by forcing the constant into memory. For the most part,
1962 this is only necessary for SImode and DImode.
1964 Use scratch_reg to hold the address of the memory location. */
1965 else if (scratch_reg
1966 && CONSTANT_P (operand1)
1967 && FP_REG_P (operand0))
1969 rtx const_mem, xoperands[2];
1971 if (operand1 == CONST0_RTX (mode))
1973 emit_insn (gen_rtx_SET (operand0, operand1));
1974 return 1;
1977 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1978 it in WORD_MODE regardless of what mode it was originally given
1979 to us. */
1980 scratch_reg = force_mode (word_mode, scratch_reg);
1982 /* Force the constant into memory and put the address of the
1983 memory location into scratch_reg. */
1984 const_mem = force_const_mem (mode, operand1);
1985 xoperands[0] = scratch_reg;
1986 xoperands[1] = XEXP (const_mem, 0);
1987 pa_emit_move_sequence (xoperands, Pmode, 0);
1989 /* Now load the destination register. */
1990 emit_insn (gen_rtx_SET (operand0,
1991 replace_equiv_address (const_mem, scratch_reg)));
1992 return 1;
1994 /* Handle secondary reloads for SAR. These occur when trying to load
1995 the SAR from memory or a constant. */
1996 else if (scratch_reg
1997 && GET_CODE (operand0) == REG
1998 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1999 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2000 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2002 /* D might not fit in 14 bits either; for such cases load D into
2003 scratch reg. */
2004 if (GET_CODE (operand1) == MEM
2005 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2007 /* We are reloading the address into the scratch register, so we
2008 want to make sure the scratch register is a full register. */
2009 scratch_reg = force_mode (word_mode, scratch_reg);
2011 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2012 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2013 0)),
2014 Pmode,
2015 XEXP (XEXP (operand1, 0),
2017 scratch_reg));
2019 /* Now we are going to load the scratch register from memory,
2020 we want to load it in the same width as the original MEM,
2021 which must be the same as the width of the ultimate destination,
2022 OPERAND0. */
2023 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2025 emit_move_insn (scratch_reg,
2026 replace_equiv_address (operand1, scratch_reg));
2028 else
2030 /* We want to load the scratch register using the same mode as
2031 the ultimate destination. */
2032 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2034 emit_move_insn (scratch_reg, operand1);
2037 /* And emit the insn to set the ultimate destination. We know that
2038 the scratch register has the same mode as the destination at this
2039 point. */
2040 emit_move_insn (operand0, scratch_reg);
2041 return 1;
2044 /* Handle the most common case: storing into a register. */
2045 if (register_operand (operand0, mode))
2047 /* Legitimize TLS symbol references. This happens for references
2048 that aren't a legitimate constant. */
2049 if (PA_SYMBOL_REF_TLS_P (operand1))
2050 operand1 = legitimize_tls_address (operand1);
2052 if (register_operand (operand1, mode)
2053 || (GET_CODE (operand1) == CONST_INT
2054 && pa_cint_ok_for_move (UINTVAL (operand1)))
2055 || (operand1 == CONST0_RTX (mode))
2056 || (GET_CODE (operand1) == HIGH
2057 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2058 /* Only `general_operands' can come here, so MEM is ok. */
2059 || GET_CODE (operand1) == MEM)
2061 /* Various sets are created during RTL generation which don't
2062 have the REG_POINTER flag correctly set. After the CSE pass,
2063 instruction recognition can fail if we don't consistently
2064 set this flag when performing register copies. This should
2065 also improve the opportunities for creating insns that use
2066 unscaled indexing. */
2067 if (REG_P (operand0) && REG_P (operand1))
2069 if (REG_POINTER (operand1)
2070 && !REG_POINTER (operand0)
2071 && !HARD_REGISTER_P (operand0))
2072 copy_reg_pointer (operand0, operand1);
2075 /* When MEMs are broken out, the REG_POINTER flag doesn't
2076 get set. In some cases, we can set the REG_POINTER flag
2077 from the declaration for the MEM. */
2078 if (REG_P (operand0)
2079 && GET_CODE (operand1) == MEM
2080 && !REG_POINTER (operand0))
2082 tree decl = MEM_EXPR (operand1);
2084 /* Set the register pointer flag and register alignment
2085 if the declaration for this memory reference is a
2086 pointer type. */
2087 if (decl)
2089 tree type;
2091 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2092 tree operand 1. */
2093 if (TREE_CODE (decl) == COMPONENT_REF)
2094 decl = TREE_OPERAND (decl, 1);
2096 type = TREE_TYPE (decl);
2097 type = strip_array_types (type);
2099 if (POINTER_TYPE_P (type))
2100 mark_reg_pointer (operand0, BITS_PER_UNIT);
2104 emit_insn (gen_rtx_SET (operand0, operand1));
2105 return 1;
2108 else if (GET_CODE (operand0) == MEM)
2110 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2111 && !(reload_in_progress || reload_completed))
2113 rtx temp = gen_reg_rtx (DFmode);
2115 emit_insn (gen_rtx_SET (temp, operand1));
2116 emit_insn (gen_rtx_SET (operand0, temp));
2117 return 1;
2119 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2121 /* Run this case quickly. */
2122 emit_insn (gen_rtx_SET (operand0, operand1));
2123 return 1;
2125 if (! (reload_in_progress || reload_completed))
2127 operands[0] = validize_mem (operand0);
2128 operands[1] = operand1 = force_reg (mode, operand1);
2132 /* Simplify the source if we need to.
2133 Note we do have to handle function labels here, even though we do
2134 not consider them legitimate constants. Loop optimizations can
2135 call the emit_move_xxx with one as a source. */
2136 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2137 || (GET_CODE (operand1) == HIGH
2138 && symbolic_operand (XEXP (operand1, 0), mode))
2139 || function_label_operand (operand1, VOIDmode)
2140 || tls_referenced_p (operand1))
2142 int ishighonly = 0;
2144 if (GET_CODE (operand1) == HIGH)
2146 ishighonly = 1;
2147 operand1 = XEXP (operand1, 0);
2149 if (symbolic_operand (operand1, mode))
2151 /* Argh. The assembler and linker can't handle arithmetic
2152 involving plabels.
2154 So we force the plabel into memory, load operand0 from
2155 the memory location, then add in the constant part. */
2156 if ((GET_CODE (operand1) == CONST
2157 && GET_CODE (XEXP (operand1, 0)) == PLUS
2158 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2159 VOIDmode))
2160 || function_label_operand (operand1, VOIDmode))
2162 rtx temp, const_part;
2164 /* Figure out what (if any) scratch register to use. */
2165 if (reload_in_progress || reload_completed)
2167 scratch_reg = scratch_reg ? scratch_reg : operand0;
2168 /* SCRATCH_REG will hold an address and maybe the actual
2169 data. We want it in WORD_MODE regardless of what mode it
2170 was originally given to us. */
2171 scratch_reg = force_mode (word_mode, scratch_reg);
2173 else if (flag_pic)
2174 scratch_reg = gen_reg_rtx (Pmode);
2176 if (GET_CODE (operand1) == CONST)
2178 /* Save away the constant part of the expression. */
2179 const_part = XEXP (XEXP (operand1, 0), 1);
2180 gcc_assert (GET_CODE (const_part) == CONST_INT);
2182 /* Force the function label into memory. */
2183 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2185 else
2187 /* No constant part. */
2188 const_part = NULL_RTX;
2190 /* Force the function label into memory. */
2191 temp = force_const_mem (mode, operand1);
2195 /* Get the address of the memory location. PIC-ify it if
2196 necessary. */
2197 temp = XEXP (temp, 0);
2198 if (flag_pic)
2199 temp = legitimize_pic_address (temp, mode, scratch_reg);
2201 /* Put the address of the memory location into our destination
2202 register. */
2203 operands[1] = temp;
2204 pa_emit_move_sequence (operands, mode, scratch_reg);
2206 /* Now load from the memory location into our destination
2207 register. */
2208 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2209 pa_emit_move_sequence (operands, mode, scratch_reg);
2211 /* And add back in the constant part. */
2212 if (const_part != NULL_RTX)
2213 expand_inc (operand0, const_part);
2215 return 1;
2218 if (flag_pic)
2220 rtx_insn *insn;
2221 rtx temp;
2223 if (reload_in_progress || reload_completed)
2225 temp = scratch_reg ? scratch_reg : operand0;
2226 /* TEMP will hold an address and maybe the actual
2227 data. We want it in WORD_MODE regardless of what mode it
2228 was originally given to us. */
2229 temp = force_mode (word_mode, temp);
2231 else
2232 temp = gen_reg_rtx (Pmode);
2234 /* Force (const (plus (symbol) (const_int))) to memory
2235 if the const_int will not fit in 14 bits. Although
2236 this requires a relocation, the instruction sequence
2237 needed to load the value is shorter. */
2238 if (GET_CODE (operand1) == CONST
2239 && GET_CODE (XEXP (operand1, 0)) == PLUS
2240 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2241 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2243 rtx x, m = force_const_mem (mode, operand1);
2245 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2246 x = replace_equiv_address (m, x);
2247 insn = emit_move_insn (operand0, x);
2249 else
2251 operands[1] = legitimize_pic_address (operand1, mode, temp);
2252 if (REG_P (operand0) && REG_P (operands[1]))
2253 copy_reg_pointer (operand0, operands[1]);
2254 insn = emit_move_insn (operand0, operands[1]);
2257 /* Put a REG_EQUAL note on this insn. */
2258 set_unique_reg_note (insn, REG_EQUAL, operand1);
2260 /* On the HPPA, references to data space are supposed to use dp,
2261 register 27, but showing it in the RTL inhibits various cse
2262 and loop optimizations. */
2263 else
2265 rtx temp, set;
2267 if (reload_in_progress || reload_completed)
2269 temp = scratch_reg ? scratch_reg : operand0;
2270 /* TEMP will hold an address and maybe the actual
2271 data. We want it in WORD_MODE regardless of what mode it
2272 was originally given to us. */
2273 temp = force_mode (word_mode, temp);
2275 else
2276 temp = gen_reg_rtx (mode);
2278 /* Loading a SYMBOL_REF into a register makes that register
2279 safe to be used as the base in an indexed address.
2281 Don't mark hard registers though. That loses. */
2282 if (GET_CODE (operand0) == REG
2283 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2284 mark_reg_pointer (operand0, BITS_PER_UNIT);
2285 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2286 mark_reg_pointer (temp, BITS_PER_UNIT);
2288 if (ishighonly)
2289 set = gen_rtx_SET (operand0, temp);
2290 else
2291 set = gen_rtx_SET (operand0,
2292 gen_rtx_LO_SUM (mode, temp, operand1));
2294 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2295 emit_insn (set);
2298 return 1;
2300 else if (tls_referenced_p (operand1))
2302 rtx tmp = operand1;
2303 rtx addend = NULL;
2305 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2307 addend = XEXP (XEXP (tmp, 0), 1);
2308 tmp = XEXP (XEXP (tmp, 0), 0);
2311 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2312 tmp = legitimize_tls_address (tmp);
2313 if (addend)
2315 tmp = gen_rtx_PLUS (mode, tmp, addend);
2316 tmp = force_operand (tmp, operands[0]);
2318 operands[1] = tmp;
2320 else if (GET_CODE (operand1) != CONST_INT
2321 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2323 rtx temp;
2324 rtx_insn *insn;
2325 rtx op1 = operand1;
2326 HOST_WIDE_INT value = 0;
2327 HOST_WIDE_INT insv = 0;
2328 int insert = 0;
2330 if (GET_CODE (operand1) == CONST_INT)
2331 value = INTVAL (operand1);
2333 if (TARGET_64BIT
2334 && GET_CODE (operand1) == CONST_INT
2335 && HOST_BITS_PER_WIDE_INT > 32
2336 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2338 HOST_WIDE_INT nval;
2340 /* Extract the low order 32 bits of the value and sign extend.
2341 If the new value is the same as the original value, we can
2342 can use the original value as-is. If the new value is
2343 different, we use it and insert the most-significant 32-bits
2344 of the original value into the final result. */
2345 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2346 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2347 if (value != nval)
2349 #if HOST_BITS_PER_WIDE_INT > 32
2350 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2351 #endif
2352 insert = 1;
2353 value = nval;
2354 operand1 = GEN_INT (nval);
2358 if (reload_in_progress || reload_completed)
2359 temp = scratch_reg ? scratch_reg : operand0;
2360 else
2361 temp = gen_reg_rtx (mode);
2363 /* We don't directly split DImode constants on 32-bit targets
2364 because PLUS uses an 11-bit immediate and the insn sequence
2365 generated is not as efficient as the one using HIGH/LO_SUM. */
2366 if (GET_CODE (operand1) == CONST_INT
2367 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2368 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2369 && !insert)
2371 /* Directly break constant into high and low parts. This
2372 provides better optimization opportunities because various
2373 passes recognize constants split with PLUS but not LO_SUM.
2374 We use a 14-bit signed low part except when the addition
2375 of 0x4000 to the high part might change the sign of the
2376 high part. */
2377 HOST_WIDE_INT low = value & 0x3fff;
2378 HOST_WIDE_INT high = value & ~ 0x3fff;
2380 if (low >= 0x2000)
2382 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2383 high += 0x2000;
2384 else
2385 high += 0x4000;
2388 low = value - high;
2390 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2391 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2393 else
2395 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2396 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2399 insn = emit_move_insn (operands[0], operands[1]);
2401 /* Now insert the most significant 32 bits of the value
2402 into the register. When we don't have a second register
2403 available, it could take up to nine instructions to load
2404 a 64-bit integer constant. Prior to reload, we force
2405 constants that would take more than three instructions
2406 to load to the constant pool. During and after reload,
2407 we have to handle all possible values. */
2408 if (insert)
2410 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2411 register and the value to be inserted is outside the
2412 range that can be loaded with three depdi instructions. */
2413 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2415 operand1 = GEN_INT (insv);
2417 emit_insn (gen_rtx_SET (temp,
2418 gen_rtx_HIGH (mode, operand1)));
2419 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2420 if (mode == DImode)
2421 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2422 const0_rtx, temp));
2423 else
2424 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2425 const0_rtx, temp));
2427 else
2429 int len = 5, pos = 27;
2431 /* Insert the bits using the depdi instruction. */
2432 while (pos >= 0)
2434 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2435 HOST_WIDE_INT sign = v5 < 0;
2437 /* Left extend the insertion. */
2438 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2439 while (pos > 0 && (insv & 1) == sign)
2441 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2442 len += 1;
2443 pos -= 1;
2446 if (mode == DImode)
2447 insn = emit_insn (gen_insvdi (operand0,
2448 GEN_INT (len),
2449 GEN_INT (pos),
2450 GEN_INT (v5)));
2451 else
2452 insn = emit_insn (gen_insvsi (operand0,
2453 GEN_INT (len),
2454 GEN_INT (pos),
2455 GEN_INT (v5)));
2457 len = pos > 0 && pos < 5 ? pos : 5;
2458 pos -= len;
2463 set_unique_reg_note (insn, REG_EQUAL, op1);
2465 return 1;
2468 /* Now have insn-emit do whatever it normally does. */
2469 return 0;
2472 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2473 it will need a link/runtime reloc). */
2476 pa_reloc_needed (tree exp)
2478 int reloc = 0;
2480 switch (TREE_CODE (exp))
2482 case ADDR_EXPR:
2483 return 1;
2485 case POINTER_PLUS_EXPR:
2486 case PLUS_EXPR:
2487 case MINUS_EXPR:
2488 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2489 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2490 break;
2492 CASE_CONVERT:
2493 case NON_LVALUE_EXPR:
2494 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2495 break;
2497 case CONSTRUCTOR:
2499 tree value;
2500 unsigned HOST_WIDE_INT ix;
2502 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2503 if (value)
2504 reloc |= pa_reloc_needed (value);
2506 break;
2508 case ERROR_MARK:
2509 break;
2511 default:
2512 break;
2514 return reloc;
2518 /* Return the best assembler insn template
2519 for moving operands[1] into operands[0] as a fullword. */
2520 const char *
2521 pa_singlemove_string (rtx *operands)
2523 HOST_WIDE_INT intval;
2525 if (GET_CODE (operands[0]) == MEM)
2526 return "stw %r1,%0";
2527 if (GET_CODE (operands[1]) == MEM)
2528 return "ldw %1,%0";
2529 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2531 long i;
2533 gcc_assert (GET_MODE (operands[1]) == SFmode);
2535 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2536 bit pattern. */
2537 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2539 operands[1] = GEN_INT (i);
2540 /* Fall through to CONST_INT case. */
2542 if (GET_CODE (operands[1]) == CONST_INT)
2544 intval = INTVAL (operands[1]);
2546 if (VAL_14_BITS_P (intval))
2547 return "ldi %1,%0";
2548 else if ((intval & 0x7ff) == 0)
2549 return "ldil L'%1,%0";
2550 else if (pa_zdepi_cint_p (intval))
2551 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2552 else
2553 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2555 return "copy %1,%0";
2559 /* Compute position (in OP[1]) and width (in OP[2])
2560 useful for copying IMM to a register using the zdepi
2561 instructions. Store the immediate value to insert in OP[0]. */
2562 static void
2563 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2565 int lsb, len;
2567 /* Find the least significant set bit in IMM. */
2568 for (lsb = 0; lsb < 32; lsb++)
2570 if ((imm & 1) != 0)
2571 break;
2572 imm >>= 1;
2575 /* Choose variants based on *sign* of the 5-bit field. */
2576 if ((imm & 0x10) == 0)
2577 len = (lsb <= 28) ? 4 : 32 - lsb;
2578 else
2580 /* Find the width of the bitstring in IMM. */
2581 for (len = 5; len < 32 - lsb; len++)
2583 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2584 break;
2587 /* Sign extend IMM as a 5-bit value. */
2588 imm = (imm & 0xf) - 0x10;
2591 op[0] = imm;
2592 op[1] = 31 - lsb;
2593 op[2] = len;
2596 /* Compute position (in OP[1]) and width (in OP[2])
2597 useful for copying IMM to a register using the depdi,z
2598 instructions. Store the immediate value to insert in OP[0]. */
2600 static void
2601 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2603 int lsb, len, maxlen;
2605 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2607 /* Find the least significant set bit in IMM. */
2608 for (lsb = 0; lsb < maxlen; lsb++)
2610 if ((imm & 1) != 0)
2611 break;
2612 imm >>= 1;
2615 /* Choose variants based on *sign* of the 5-bit field. */
2616 if ((imm & 0x10) == 0)
2617 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2618 else
2620 /* Find the width of the bitstring in IMM. */
2621 for (len = 5; len < maxlen - lsb; len++)
2623 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2624 break;
2627 /* Extend length if host is narrow and IMM is negative. */
2628 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2629 len += 32;
2631 /* Sign extend IMM as a 5-bit value. */
2632 imm = (imm & 0xf) - 0x10;
2635 op[0] = imm;
2636 op[1] = 63 - lsb;
2637 op[2] = len;
2640 /* Output assembler code to perform a doubleword move insn
2641 with operands OPERANDS. */
2643 const char *
2644 pa_output_move_double (rtx *operands)
2646 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2647 rtx latehalf[2];
2648 rtx addreg0 = 0, addreg1 = 0;
2649 int highonly = 0;
2651 /* First classify both operands. */
2653 if (REG_P (operands[0]))
2654 optype0 = REGOP;
2655 else if (offsettable_memref_p (operands[0]))
2656 optype0 = OFFSOP;
2657 else if (GET_CODE (operands[0]) == MEM)
2658 optype0 = MEMOP;
2659 else
2660 optype0 = RNDOP;
2662 if (REG_P (operands[1]))
2663 optype1 = REGOP;
2664 else if (CONSTANT_P (operands[1]))
2665 optype1 = CNSTOP;
2666 else if (offsettable_memref_p (operands[1]))
2667 optype1 = OFFSOP;
2668 else if (GET_CODE (operands[1]) == MEM)
2669 optype1 = MEMOP;
2670 else
2671 optype1 = RNDOP;
2673 /* Check for the cases that the operand constraints are not
2674 supposed to allow to happen. */
2675 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2677 /* Handle copies between general and floating registers. */
2679 if (optype0 == REGOP && optype1 == REGOP
2680 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2682 if (FP_REG_P (operands[0]))
2684 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2685 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2686 return "{fldds|fldd} -16(%%sp),%0";
2688 else
2690 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2691 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2692 return "{ldws|ldw} -12(%%sp),%R0";
2696 /* Handle auto decrementing and incrementing loads and stores
2697 specifically, since the structure of the function doesn't work
2698 for them without major modification. Do it better when we learn
2699 this port about the general inc/dec addressing of PA.
2700 (This was written by tege. Chide him if it doesn't work.) */
2702 if (optype0 == MEMOP)
2704 /* We have to output the address syntax ourselves, since print_operand
2705 doesn't deal with the addresses we want to use. Fix this later. */
2707 rtx addr = XEXP (operands[0], 0);
2708 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2710 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2712 operands[0] = XEXP (addr, 0);
2713 gcc_assert (GET_CODE (operands[1]) == REG
2714 && GET_CODE (operands[0]) == REG);
2716 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2718 /* No overlap between high target register and address
2719 register. (We do this in a non-obvious way to
2720 save a register file writeback) */
2721 if (GET_CODE (addr) == POST_INC)
2722 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2723 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2725 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2727 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2729 operands[0] = XEXP (addr, 0);
2730 gcc_assert (GET_CODE (operands[1]) == REG
2731 && GET_CODE (operands[0]) == REG);
2733 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2734 /* No overlap between high target register and address
2735 register. (We do this in a non-obvious way to save a
2736 register file writeback) */
2737 if (GET_CODE (addr) == PRE_INC)
2738 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2739 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2742 if (optype1 == MEMOP)
2744 /* We have to output the address syntax ourselves, since print_operand
2745 doesn't deal with the addresses we want to use. Fix this later. */
2747 rtx addr = XEXP (operands[1], 0);
2748 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2750 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2752 operands[1] = XEXP (addr, 0);
2753 gcc_assert (GET_CODE (operands[0]) == REG
2754 && GET_CODE (operands[1]) == REG);
2756 if (!reg_overlap_mentioned_p (high_reg, addr))
2758 /* No overlap between high target register and address
2759 register. (We do this in a non-obvious way to
2760 save a register file writeback) */
2761 if (GET_CODE (addr) == POST_INC)
2762 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2763 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2765 else
2767 /* This is an undefined situation. We should load into the
2768 address register *and* update that register. Probably
2769 we don't need to handle this at all. */
2770 if (GET_CODE (addr) == POST_INC)
2771 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2772 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2775 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2777 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2779 operands[1] = XEXP (addr, 0);
2780 gcc_assert (GET_CODE (operands[0]) == REG
2781 && GET_CODE (operands[1]) == REG);
2783 if (!reg_overlap_mentioned_p (high_reg, addr))
2785 /* No overlap between high target register and address
2786 register. (We do this in a non-obvious way to
2787 save a register file writeback) */
2788 if (GET_CODE (addr) == PRE_INC)
2789 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2790 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2792 else
2794 /* This is an undefined situation. We should load into the
2795 address register *and* update that register. Probably
2796 we don't need to handle this at all. */
2797 if (GET_CODE (addr) == PRE_INC)
2798 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2799 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2802 else if (GET_CODE (addr) == PLUS
2803 && GET_CODE (XEXP (addr, 0)) == MULT)
2805 rtx xoperands[4];
2807 /* Load address into left half of destination register. */
2808 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2809 xoperands[1] = XEXP (addr, 1);
2810 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2811 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2812 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2813 xoperands);
2814 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2816 else if (GET_CODE (addr) == PLUS
2817 && REG_P (XEXP (addr, 0))
2818 && REG_P (XEXP (addr, 1)))
2820 rtx xoperands[3];
2822 /* Load address into left half of destination register. */
2823 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2824 xoperands[1] = XEXP (addr, 0);
2825 xoperands[2] = XEXP (addr, 1);
2826 output_asm_insn ("{addl|add,l} %1,%2,%0",
2827 xoperands);
2828 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2832 /* If an operand is an unoffsettable memory ref, find a register
2833 we can increment temporarily to make it refer to the second word. */
2835 if (optype0 == MEMOP)
2836 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2838 if (optype1 == MEMOP)
2839 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2841 /* Ok, we can do one word at a time.
2842 Normally we do the low-numbered word first.
2844 In either case, set up in LATEHALF the operands to use
2845 for the high-numbered word and in some cases alter the
2846 operands in OPERANDS to be suitable for the low-numbered word. */
2848 if (optype0 == REGOP)
2849 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2850 else if (optype0 == OFFSOP)
2851 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2852 else
2853 latehalf[0] = operands[0];
2855 if (optype1 == REGOP)
2856 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2857 else if (optype1 == OFFSOP)
2858 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2859 else if (optype1 == CNSTOP)
2861 if (GET_CODE (operands[1]) == HIGH)
2863 operands[1] = XEXP (operands[1], 0);
2864 highonly = 1;
2866 split_double (operands[1], &operands[1], &latehalf[1]);
2868 else
2869 latehalf[1] = operands[1];
2871 /* If the first move would clobber the source of the second one,
2872 do them in the other order.
2874 This can happen in two cases:
2876 mem -> register where the first half of the destination register
2877 is the same register used in the memory's address. Reload
2878 can create such insns.
2880 mem in this case will be either register indirect or register
2881 indirect plus a valid offset.
2883 register -> register move where REGNO(dst) == REGNO(src + 1)
2884 someone (Tim/Tege?) claimed this can happen for parameter loads.
2886 Handle mem -> register case first. */
2887 if (optype0 == REGOP
2888 && (optype1 == MEMOP || optype1 == OFFSOP)
2889 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2891 /* Do the late half first. */
2892 if (addreg1)
2893 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2894 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2896 /* Then clobber. */
2897 if (addreg1)
2898 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2899 return pa_singlemove_string (operands);
2902 /* Now handle register -> register case. */
2903 if (optype0 == REGOP && optype1 == REGOP
2904 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2906 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2907 return pa_singlemove_string (operands);
2910 /* Normal case: do the two words, low-numbered first. */
2912 output_asm_insn (pa_singlemove_string (operands), operands);
2914 /* Make any unoffsettable addresses point at high-numbered word. */
2915 if (addreg0)
2916 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2917 if (addreg1)
2918 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2920 /* Do high-numbered word. */
2921 if (highonly)
2922 output_asm_insn ("ldil L'%1,%0", latehalf);
2923 else
2924 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2926 /* Undo the adds we just did. */
2927 if (addreg0)
2928 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2929 if (addreg1)
2930 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2932 return "";
2935 const char *
2936 pa_output_fp_move_double (rtx *operands)
2938 if (FP_REG_P (operands[0]))
2940 if (FP_REG_P (operands[1])
2941 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2942 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2943 else
2944 output_asm_insn ("fldd%F1 %1,%0", operands);
2946 else if (FP_REG_P (operands[1]))
2948 output_asm_insn ("fstd%F0 %1,%0", operands);
2950 else
2952 rtx xoperands[2];
2954 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2956 /* This is a pain. You have to be prepared to deal with an
2957 arbitrary address here including pre/post increment/decrement.
2959 so avoid this in the MD. */
2960 gcc_assert (GET_CODE (operands[0]) == REG);
2962 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2963 xoperands[0] = operands[0];
2964 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2966 return "";
2969 /* Return a REG that occurs in ADDR with coefficient 1.
2970 ADDR can be effectively incremented by incrementing REG. */
2972 static rtx
2973 find_addr_reg (rtx addr)
2975 while (GET_CODE (addr) == PLUS)
2977 if (GET_CODE (XEXP (addr, 0)) == REG)
2978 addr = XEXP (addr, 0);
2979 else if (GET_CODE (XEXP (addr, 1)) == REG)
2980 addr = XEXP (addr, 1);
2981 else if (CONSTANT_P (XEXP (addr, 0)))
2982 addr = XEXP (addr, 1);
2983 else if (CONSTANT_P (XEXP (addr, 1)))
2984 addr = XEXP (addr, 0);
2985 else
2986 gcc_unreachable ();
2988 gcc_assert (GET_CODE (addr) == REG);
2989 return addr;
2992 /* Emit code to perform a block move.
2994 OPERANDS[0] is the destination pointer as a REG, clobbered.
2995 OPERANDS[1] is the source pointer as a REG, clobbered.
2996 OPERANDS[2] is a register for temporary storage.
2997 OPERANDS[3] is a register for temporary storage.
2998 OPERANDS[4] is the size as a CONST_INT
2999 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3000 OPERANDS[6] is another temporary register. */
3002 const char *
3003 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3005 int align = INTVAL (operands[5]);
3006 unsigned long n_bytes = INTVAL (operands[4]);
3008 /* We can't move more than a word at a time because the PA
3009 has no longer integer move insns. (Could use fp mem ops?) */
3010 if (align > (TARGET_64BIT ? 8 : 4))
3011 align = (TARGET_64BIT ? 8 : 4);
3013 /* Note that we know each loop below will execute at least twice
3014 (else we would have open-coded the copy). */
3015 switch (align)
3017 case 8:
3018 /* Pre-adjust the loop counter. */
3019 operands[4] = GEN_INT (n_bytes - 16);
3020 output_asm_insn ("ldi %4,%2", operands);
3022 /* Copying loop. */
3023 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3024 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3025 output_asm_insn ("std,ma %3,8(%0)", operands);
3026 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3027 output_asm_insn ("std,ma %6,8(%0)", operands);
3029 /* Handle the residual. There could be up to 7 bytes of
3030 residual to copy! */
3031 if (n_bytes % 16 != 0)
3033 operands[4] = GEN_INT (n_bytes % 8);
3034 if (n_bytes % 16 >= 8)
3035 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3036 if (n_bytes % 8 != 0)
3037 output_asm_insn ("ldd 0(%1),%6", operands);
3038 if (n_bytes % 16 >= 8)
3039 output_asm_insn ("std,ma %3,8(%0)", operands);
3040 if (n_bytes % 8 != 0)
3041 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3043 return "";
3045 case 4:
3046 /* Pre-adjust the loop counter. */
3047 operands[4] = GEN_INT (n_bytes - 8);
3048 output_asm_insn ("ldi %4,%2", operands);
3050 /* Copying loop. */
3051 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3052 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3053 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3054 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3055 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3057 /* Handle the residual. There could be up to 7 bytes of
3058 residual to copy! */
3059 if (n_bytes % 8 != 0)
3061 operands[4] = GEN_INT (n_bytes % 4);
3062 if (n_bytes % 8 >= 4)
3063 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3064 if (n_bytes % 4 != 0)
3065 output_asm_insn ("ldw 0(%1),%6", operands);
3066 if (n_bytes % 8 >= 4)
3067 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3068 if (n_bytes % 4 != 0)
3069 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3071 return "";
3073 case 2:
3074 /* Pre-adjust the loop counter. */
3075 operands[4] = GEN_INT (n_bytes - 4);
3076 output_asm_insn ("ldi %4,%2", operands);
3078 /* Copying loop. */
3079 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3080 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3081 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3082 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3083 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3085 /* Handle the residual. */
3086 if (n_bytes % 4 != 0)
3088 if (n_bytes % 4 >= 2)
3089 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3090 if (n_bytes % 2 != 0)
3091 output_asm_insn ("ldb 0(%1),%6", operands);
3092 if (n_bytes % 4 >= 2)
3093 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3094 if (n_bytes % 2 != 0)
3095 output_asm_insn ("stb %6,0(%0)", operands);
3097 return "";
3099 case 1:
3100 /* Pre-adjust the loop counter. */
3101 operands[4] = GEN_INT (n_bytes - 2);
3102 output_asm_insn ("ldi %4,%2", operands);
3104 /* Copying loop. */
3105 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3106 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3107 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3108 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3109 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3111 /* Handle the residual. */
3112 if (n_bytes % 2 != 0)
3114 output_asm_insn ("ldb 0(%1),%3", operands);
3115 output_asm_insn ("stb %3,0(%0)", operands);
3117 return "";
3119 default:
3120 gcc_unreachable ();
3124 /* Count the number of insns necessary to handle this block move.
3126 Basic structure is the same as emit_block_move, except that we
3127 count insns rather than emit them. */
3129 static int
3130 compute_cpymem_length (rtx_insn *insn)
3132 rtx pat = PATTERN (insn);
3133 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3134 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3135 unsigned int n_insns = 0;
3137 /* We can't move more than four bytes at a time because the PA
3138 has no longer integer move insns. (Could use fp mem ops?) */
3139 if (align > (TARGET_64BIT ? 8 : 4))
3140 align = (TARGET_64BIT ? 8 : 4);
3142 /* The basic copying loop. */
3143 n_insns = 6;
3145 /* Residuals. */
3146 if (n_bytes % (2 * align) != 0)
3148 if ((n_bytes % (2 * align)) >= align)
3149 n_insns += 2;
3151 if ((n_bytes % align) != 0)
3152 n_insns += 2;
3155 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3156 return n_insns * 4;
3159 /* Emit code to perform a block clear.
3161 OPERANDS[0] is the destination pointer as a REG, clobbered.
3162 OPERANDS[1] is a register for temporary storage.
3163 OPERANDS[2] is the size as a CONST_INT
3164 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3166 const char *
3167 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3169 int align = INTVAL (operands[3]);
3170 unsigned long n_bytes = INTVAL (operands[2]);
3172 /* We can't clear more than a word at a time because the PA
3173 has no longer integer move insns. */
3174 if (align > (TARGET_64BIT ? 8 : 4))
3175 align = (TARGET_64BIT ? 8 : 4);
3177 /* Note that we know each loop below will execute at least twice
3178 (else we would have open-coded the copy). */
3179 switch (align)
3181 case 8:
3182 /* Pre-adjust the loop counter. */
3183 operands[2] = GEN_INT (n_bytes - 16);
3184 output_asm_insn ("ldi %2,%1", operands);
3186 /* Loop. */
3187 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3188 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3189 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3191 /* Handle the residual. There could be up to 7 bytes of
3192 residual to copy! */
3193 if (n_bytes % 16 != 0)
3195 operands[2] = GEN_INT (n_bytes % 8);
3196 if (n_bytes % 16 >= 8)
3197 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3198 if (n_bytes % 8 != 0)
3199 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3201 return "";
3203 case 4:
3204 /* Pre-adjust the loop counter. */
3205 operands[2] = GEN_INT (n_bytes - 8);
3206 output_asm_insn ("ldi %2,%1", operands);
3208 /* Loop. */
3209 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3210 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3211 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3213 /* Handle the residual. There could be up to 7 bytes of
3214 residual to copy! */
3215 if (n_bytes % 8 != 0)
3217 operands[2] = GEN_INT (n_bytes % 4);
3218 if (n_bytes % 8 >= 4)
3219 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3220 if (n_bytes % 4 != 0)
3221 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3223 return "";
3225 case 2:
3226 /* Pre-adjust the loop counter. */
3227 operands[2] = GEN_INT (n_bytes - 4);
3228 output_asm_insn ("ldi %2,%1", operands);
3230 /* Loop. */
3231 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3232 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3233 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3235 /* Handle the residual. */
3236 if (n_bytes % 4 != 0)
3238 if (n_bytes % 4 >= 2)
3239 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3240 if (n_bytes % 2 != 0)
3241 output_asm_insn ("stb %%r0,0(%0)", operands);
3243 return "";
3245 case 1:
3246 /* Pre-adjust the loop counter. */
3247 operands[2] = GEN_INT (n_bytes - 2);
3248 output_asm_insn ("ldi %2,%1", operands);
3250 /* Loop. */
3251 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3252 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3253 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3255 /* Handle the residual. */
3256 if (n_bytes % 2 != 0)
3257 output_asm_insn ("stb %%r0,0(%0)", operands);
3259 return "";
3261 default:
3262 gcc_unreachable ();
3266 /* Count the number of insns necessary to handle this block move.
3268 Basic structure is the same as emit_block_move, except that we
3269 count insns rather than emit them. */
3271 static int
3272 compute_clrmem_length (rtx_insn *insn)
3274 rtx pat = PATTERN (insn);
3275 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3276 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3277 unsigned int n_insns = 0;
3279 /* We can't clear more than a word at a time because the PA
3280 has no longer integer move insns. */
3281 if (align > (TARGET_64BIT ? 8 : 4))
3282 align = (TARGET_64BIT ? 8 : 4);
3284 /* The basic loop. */
3285 n_insns = 4;
3287 /* Residuals. */
3288 if (n_bytes % (2 * align) != 0)
3290 if ((n_bytes % (2 * align)) >= align)
3291 n_insns++;
3293 if ((n_bytes % align) != 0)
3294 n_insns++;
3297 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3298 return n_insns * 4;
3302 const char *
3303 pa_output_and (rtx *operands)
3305 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3307 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3308 int ls0, ls1, ms0, p, len;
3310 for (ls0 = 0; ls0 < 32; ls0++)
3311 if ((mask & (1 << ls0)) == 0)
3312 break;
3314 for (ls1 = ls0; ls1 < 32; ls1++)
3315 if ((mask & (1 << ls1)) != 0)
3316 break;
3318 for (ms0 = ls1; ms0 < 32; ms0++)
3319 if ((mask & (1 << ms0)) == 0)
3320 break;
3322 gcc_assert (ms0 == 32);
3324 if (ls1 == 32)
3326 len = ls0;
3328 gcc_assert (len);
3330 operands[2] = GEN_INT (len);
3331 return "{extru|extrw,u} %1,31,%2,%0";
3333 else
3335 /* We could use this `depi' for the case above as well, but `depi'
3336 requires one more register file access than an `extru'. */
3338 p = 31 - ls0;
3339 len = ls1 - ls0;
3341 operands[2] = GEN_INT (p);
3342 operands[3] = GEN_INT (len);
3343 return "{depi|depwi} 0,%2,%3,%0";
3346 else
3347 return "and %1,%2,%0";
3350 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3351 storing the result in operands[0]. */
3352 const char *
3353 pa_output_64bit_and (rtx *operands)
3355 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3357 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3358 int ls0, ls1, ms0, p, len;
3360 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3361 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3362 break;
3364 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3365 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3366 break;
3368 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3369 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3370 break;
3372 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3374 if (ls1 == HOST_BITS_PER_WIDE_INT)
3376 len = ls0;
3378 gcc_assert (len);
3380 operands[2] = GEN_INT (len);
3381 return "extrd,u %1,63,%2,%0";
3383 else
3385 /* We could use this `depi' for the case above as well, but `depi'
3386 requires one more register file access than an `extru'. */
3388 p = 63 - ls0;
3389 len = ls1 - ls0;
3391 operands[2] = GEN_INT (p);
3392 operands[3] = GEN_INT (len);
3393 return "depdi 0,%2,%3,%0";
3396 else
3397 return "and %1,%2,%0";
3400 const char *
3401 pa_output_ior (rtx *operands)
3403 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3404 int bs0, bs1, p, len;
3406 if (INTVAL (operands[2]) == 0)
3407 return "copy %1,%0";
3409 for (bs0 = 0; bs0 < 32; bs0++)
3410 if ((mask & (1 << bs0)) != 0)
3411 break;
3413 for (bs1 = bs0; bs1 < 32; bs1++)
3414 if ((mask & (1 << bs1)) == 0)
3415 break;
3417 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3419 p = 31 - bs0;
3420 len = bs1 - bs0;
3422 operands[2] = GEN_INT (p);
3423 operands[3] = GEN_INT (len);
3424 return "{depi|depwi} -1,%2,%3,%0";
3427 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3428 storing the result in operands[0]. */
3429 const char *
3430 pa_output_64bit_ior (rtx *operands)
3432 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3433 int bs0, bs1, p, len;
3435 if (INTVAL (operands[2]) == 0)
3436 return "copy %1,%0";
3438 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3439 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3440 break;
3442 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3443 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3444 break;
3446 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3447 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3449 p = 63 - bs0;
3450 len = bs1 - bs0;
3452 operands[2] = GEN_INT (p);
3453 operands[3] = GEN_INT (len);
3454 return "depdi -1,%2,%3,%0";
3457 /* Target hook for assembling integer objects. This code handles
3458 aligned SI and DI integers specially since function references
3459 must be preceded by P%. */
3461 static bool
3462 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3464 bool result;
3465 tree decl = NULL;
3467 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3468 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3469 calling output_addr_const. Otherwise, it may call assemble_external
3470 in the midst of outputing the assembler code for the SYMBOL_REF.
3471 We restore the SYMBOL_REF_DECL after the output is done. */
3472 if (GET_CODE (x) == SYMBOL_REF)
3474 decl = SYMBOL_REF_DECL (x);
3475 if (decl)
3477 assemble_external (decl);
3478 SET_SYMBOL_REF_DECL (x, NULL);
3482 if (size == UNITS_PER_WORD
3483 && aligned_p
3484 && function_label_operand (x, VOIDmode))
3486 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3488 /* We don't want an OPD when generating fast indirect calls. */
3489 if (!TARGET_FAST_INDIRECT_CALLS)
3490 fputs ("P%", asm_out_file);
3492 output_addr_const (asm_out_file, x);
3493 fputc ('\n', asm_out_file);
3494 result = true;
3496 else
3497 result = default_assemble_integer (x, size, aligned_p);
3499 if (decl)
3500 SET_SYMBOL_REF_DECL (x, decl);
3502 return result;
3505 /* Output an ascii string. */
3506 void
3507 pa_output_ascii (FILE *file, const char *p, int size)
3509 int i;
3510 int chars_output;
3511 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3513 /* The HP assembler can only take strings of 256 characters at one
3514 time. This is a limitation on input line length, *not* the
3515 length of the string. Sigh. Even worse, it seems that the
3516 restriction is in number of input characters (see \xnn &
3517 \whatever). So we have to do this very carefully. */
3519 fputs ("\t.STRING \"", file);
3521 chars_output = 0;
3522 for (i = 0; i < size; i += 4)
3524 int co = 0;
3525 int io = 0;
3526 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3528 unsigned int c = (unsigned char) p[i + io];
3530 if (c == '\"' || c == '\\')
3531 partial_output[co++] = '\\';
3532 if (c >= ' ' && c < 0177)
3533 partial_output[co++] = c;
3534 else
3536 unsigned int hexd;
3537 partial_output[co++] = '\\';
3538 partial_output[co++] = 'x';
3539 hexd = c / 16 - 0 + '0';
3540 if (hexd > '9')
3541 hexd -= '9' - 'a' + 1;
3542 partial_output[co++] = hexd;
3543 hexd = c % 16 - 0 + '0';
3544 if (hexd > '9')
3545 hexd -= '9' - 'a' + 1;
3546 partial_output[co++] = hexd;
3549 if (chars_output + co > 243)
3551 fputs ("\"\n\t.STRING \"", file);
3552 chars_output = 0;
3554 fwrite (partial_output, 1, (size_t) co, file);
3555 chars_output += co;
3556 co = 0;
3558 fputs ("\"\n", file);
3561 /* Try to rewrite floating point comparisons & branches to avoid
3562 useless add,tr insns.
3564 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3565 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3566 first attempt to remove useless add,tr insns. It is zero
3567 for the second pass as reorg sometimes leaves bogus REG_DEAD
3568 notes lying around.
3570 When CHECK_NOTES is zero we can only eliminate add,tr insns
3571 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3572 instructions. */
3573 static void
3574 remove_useless_addtr_insns (int check_notes)
3576 rtx_insn *insn;
3577 static int pass = 0;
3579 /* This is fairly cheap, so always run it when optimizing. */
3580 if (optimize > 0)
3582 int fcmp_count = 0;
3583 int fbranch_count = 0;
3585 /* Walk all the insns in this function looking for fcmp & fbranch
3586 instructions. Keep track of how many of each we find. */
3587 for (insn = get_insns (); insn; insn = next_insn (insn))
3589 rtx tmp;
3591 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3592 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3593 continue;
3595 tmp = PATTERN (insn);
3597 /* It must be a set. */
3598 if (GET_CODE (tmp) != SET)
3599 continue;
3601 /* If the destination is CCFP, then we've found an fcmp insn. */
3602 tmp = SET_DEST (tmp);
3603 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3605 fcmp_count++;
3606 continue;
3609 tmp = PATTERN (insn);
3610 /* If this is an fbranch instruction, bump the fbranch counter. */
3611 if (GET_CODE (tmp) == SET
3612 && SET_DEST (tmp) == pc_rtx
3613 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3614 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3615 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3616 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3618 fbranch_count++;
3619 continue;
3624 /* Find all floating point compare + branch insns. If possible,
3625 reverse the comparison & the branch to avoid add,tr insns. */
3626 for (insn = get_insns (); insn; insn = next_insn (insn))
3628 rtx tmp;
3629 rtx_insn *next;
3631 /* Ignore anything that isn't an INSN. */
3632 if (! NONJUMP_INSN_P (insn))
3633 continue;
3635 tmp = PATTERN (insn);
3637 /* It must be a set. */
3638 if (GET_CODE (tmp) != SET)
3639 continue;
3641 /* The destination must be CCFP, which is register zero. */
3642 tmp = SET_DEST (tmp);
3643 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3644 continue;
3646 /* INSN should be a set of CCFP.
3648 See if the result of this insn is used in a reversed FP
3649 conditional branch. If so, reverse our condition and
3650 the branch. Doing so avoids useless add,tr insns. */
3651 next = next_insn (insn);
3652 while (next)
3654 /* Jumps, calls and labels stop our search. */
3655 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3656 break;
3658 /* As does another fcmp insn. */
3659 if (NONJUMP_INSN_P (next)
3660 && GET_CODE (PATTERN (next)) == SET
3661 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3662 && REGNO (SET_DEST (PATTERN (next))) == 0)
3663 break;
3665 next = next_insn (next);
3668 /* Is NEXT_INSN a branch? */
3669 if (next && JUMP_P (next))
3671 rtx pattern = PATTERN (next);
3673 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3674 and CCFP dies, then reverse our conditional and the branch
3675 to avoid the add,tr. */
3676 if (GET_CODE (pattern) == SET
3677 && SET_DEST (pattern) == pc_rtx
3678 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3679 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3680 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3681 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3682 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3683 && (fcmp_count == fbranch_count
3684 || (check_notes
3685 && find_regno_note (next, REG_DEAD, 0))))
3687 /* Reverse the branch. */
3688 tmp = XEXP (SET_SRC (pattern), 1);
3689 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3690 XEXP (SET_SRC (pattern), 2) = tmp;
3691 INSN_CODE (next) = -1;
3693 /* Reverse our condition. */
3694 tmp = PATTERN (insn);
3695 PUT_CODE (XEXP (tmp, 1),
3696 (reverse_condition_maybe_unordered
3697 (GET_CODE (XEXP (tmp, 1)))));
3703 pass = !pass;
3707 /* You may have trouble believing this, but this is the 32 bit HP-PA
3708 stack layout. Wow.
3710 Offset Contents
3712 Variable arguments (optional; any number may be allocated)
3714 SP-(4*(N+9)) arg word N
3716 SP-56 arg word 5
3717 SP-52 arg word 4
3719 Fixed arguments (must be allocated; may remain unused)
3721 SP-48 arg word 3
3722 SP-44 arg word 2
3723 SP-40 arg word 1
3724 SP-36 arg word 0
3726 Frame Marker
3728 SP-32 External Data Pointer (DP)
3729 SP-28 External sr4
3730 SP-24 External/stub RP (RP')
3731 SP-20 Current RP
3732 SP-16 Static Link
3733 SP-12 Clean up
3734 SP-8 Calling Stub RP (RP'')
3735 SP-4 Previous SP
3737 Top of Frame
3739 SP-0 Stack Pointer (points to next available address)
3743 /* This function saves registers as follows. Registers marked with ' are
3744 this function's registers (as opposed to the previous function's).
3745 If a frame_pointer isn't needed, r4 is saved as a general register;
3746 the space for the frame pointer is still allocated, though, to keep
3747 things simple.
3750 Top of Frame
3752 SP (FP') Previous FP
3753 SP + 4 Alignment filler (sigh)
3754 SP + 8 Space for locals reserved here.
3758 SP + n All call saved register used.
3762 SP + o All call saved fp registers used.
3766 SP + p (SP') points to next available address.
3770 /* Global variables set by output_function_prologue(). */
3771 /* Size of frame. Need to know this to emit return insns from
3772 leaf procedures. */
3773 static HOST_WIDE_INT actual_fsize, local_fsize;
3774 static int save_fregs;
3776 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3777 Handle case where DISP > 8k by using the add_high_const patterns.
3779 Note in DISP > 8k case, we will leave the high part of the address
3780 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3782 static void
3783 store_reg (int reg, HOST_WIDE_INT disp, int base)
3785 rtx dest, src, basereg;
3786 rtx_insn *insn;
3788 src = gen_rtx_REG (word_mode, reg);
3789 basereg = gen_rtx_REG (Pmode, base);
3790 if (VAL_14_BITS_P (disp))
3792 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3793 insn = emit_move_insn (dest, src);
3795 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3797 rtx delta = GEN_INT (disp);
3798 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3800 emit_move_insn (tmpreg, delta);
3801 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3802 if (DO_FRAME_NOTES)
3804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3805 gen_rtx_SET (tmpreg,
3806 gen_rtx_PLUS (Pmode, basereg, delta)));
3807 RTX_FRAME_RELATED_P (insn) = 1;
3809 dest = gen_rtx_MEM (word_mode, tmpreg);
3810 insn = emit_move_insn (dest, src);
3812 else
3814 rtx delta = GEN_INT (disp);
3815 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3816 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3818 emit_move_insn (tmpreg, high);
3819 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3820 insn = emit_move_insn (dest, src);
3821 if (DO_FRAME_NOTES)
3822 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3823 gen_rtx_SET (gen_rtx_MEM (word_mode,
3824 gen_rtx_PLUS (word_mode,
3825 basereg,
3826 delta)),
3827 src));
3830 if (DO_FRAME_NOTES)
3831 RTX_FRAME_RELATED_P (insn) = 1;
3834 /* Emit RTL to store REG at the memory location specified by BASE and then
3835 add MOD to BASE. MOD must be <= 8k. */
3837 static void
3838 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3840 rtx basereg, srcreg, delta;
3841 rtx_insn *insn;
3843 gcc_assert (VAL_14_BITS_P (mod));
3845 basereg = gen_rtx_REG (Pmode, base);
3846 srcreg = gen_rtx_REG (word_mode, reg);
3847 delta = GEN_INT (mod);
3849 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3850 if (DO_FRAME_NOTES)
3852 RTX_FRAME_RELATED_P (insn) = 1;
3854 /* RTX_FRAME_RELATED_P must be set on each frame related set
3855 in a parallel with more than one element. */
3856 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3857 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3861 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3862 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3863 whether to add a frame note or not.
3865 In the DISP > 8k case, we leave the high part of the address in %r1.
3866 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3868 static void
3869 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3871 rtx_insn *insn;
3873 if (VAL_14_BITS_P (disp))
3875 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3876 plus_constant (Pmode,
3877 gen_rtx_REG (Pmode, base), disp));
3879 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3881 rtx basereg = gen_rtx_REG (Pmode, base);
3882 rtx delta = GEN_INT (disp);
3883 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3885 emit_move_insn (tmpreg, delta);
3886 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3887 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3888 if (DO_FRAME_NOTES)
3889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3890 gen_rtx_SET (tmpreg,
3891 gen_rtx_PLUS (Pmode, basereg, delta)));
3893 else
3895 rtx basereg = gen_rtx_REG (Pmode, base);
3896 rtx delta = GEN_INT (disp);
3897 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3899 emit_move_insn (tmpreg,
3900 gen_rtx_PLUS (Pmode, basereg,
3901 gen_rtx_HIGH (Pmode, delta)));
3902 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3903 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3906 if (DO_FRAME_NOTES && note)
3907 RTX_FRAME_RELATED_P (insn) = 1;
3910 HOST_WIDE_INT
3911 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3913 int freg_saved = 0;
3914 int i, j;
3916 /* The code in pa_expand_prologue and pa_expand_epilogue must
3917 be consistent with the rounding and size calculation done here.
3918 Change them at the same time. */
3920 /* We do our own stack alignment. First, round the size of the
3921 stack locals up to a word boundary. */
3922 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3924 /* Space for previous frame pointer + filler. If any frame is
3925 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3926 waste some space here for the sake of HP compatibility. The
3927 first slot is only used when the frame pointer is needed. */
3928 if (size || frame_pointer_needed)
3929 size += pa_starting_frame_offset ();
3931 /* If the current function calls __builtin_eh_return, then we need
3932 to allocate stack space for registers that will hold data for
3933 the exception handler. */
3934 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3936 unsigned int i;
3938 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3939 continue;
3940 size += i * UNITS_PER_WORD;
3943 /* Account for space used by the callee general register saves. */
3944 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3945 if (df_regs_ever_live_p (i))
3946 size += UNITS_PER_WORD;
3948 /* Account for space used by the callee floating point register saves. */
3949 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3950 if (df_regs_ever_live_p (i)
3951 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3953 freg_saved = 1;
3955 /* We always save both halves of the FP register, so always
3956 increment the frame size by 8 bytes. */
3957 size += 8;
3960 /* If any of the floating registers are saved, account for the
3961 alignment needed for the floating point register save block. */
3962 if (freg_saved)
3964 size = (size + 7) & ~7;
3965 if (fregs_live)
3966 *fregs_live = 1;
3969 /* The various ABIs include space for the outgoing parameters in the
3970 size of the current function's stack frame. We don't need to align
3971 for the outgoing arguments as their alignment is set by the final
3972 rounding for the frame as a whole. */
3973 size += crtl->outgoing_args_size;
3975 /* Allocate space for the fixed frame marker. This space must be
3976 allocated for any function that makes calls or allocates
3977 stack space. */
3978 if (!crtl->is_leaf || size)
3979 size += TARGET_64BIT ? 48 : 32;
3981 /* Finally, round to the preferred stack boundary. */
3982 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3983 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3986 /* Output function label, and associated .PROC and .CALLINFO statements. */
3988 void
3989 pa_output_function_label (FILE *file)
3991 /* The function's label and associated .PROC must never be
3992 separated and must be output *after* any profiling declarations
3993 to avoid changing spaces/subspaces within a procedure. */
3994 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3995 fputs ("\t.PROC\n", file);
3997 /* pa_expand_prologue does the dirty work now. We just need
3998 to output the assembler directives which denote the start
3999 of a function. */
4000 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4001 if (crtl->is_leaf)
4002 fputs (",NO_CALLS", file);
4003 else
4004 fputs (",CALLS", file);
4005 if (rp_saved)
4006 fputs (",SAVE_RP", file);
4008 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4009 at the beginning of the frame and that it is used as the frame
4010 pointer for the frame. We do this because our current frame
4011 layout doesn't conform to that specified in the HP runtime
4012 documentation and we need a way to indicate to programs such as
4013 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4014 isn't used by HP compilers but is supported by the assembler.
4015 However, SAVE_SP is supposed to indicate that the previous stack
4016 pointer has been saved in the frame marker. */
4017 if (frame_pointer_needed)
4018 fputs (",SAVE_SP", file);
4020 /* Pass on information about the number of callee register saves
4021 performed in the prologue.
4023 The compiler is supposed to pass the highest register number
4024 saved, the assembler then has to adjust that number before
4025 entering it into the unwind descriptor (to account for any
4026 caller saved registers with lower register numbers than the
4027 first callee saved register). */
4028 if (gr_saved)
4029 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4031 if (fr_saved)
4032 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4034 fputs ("\n\t.ENTRY\n", file);
4037 /* Output function prologue. */
4039 static void
4040 pa_output_function_prologue (FILE *file)
4042 pa_output_function_label (file);
4043 remove_useless_addtr_insns (0);
4046 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4048 static void
4049 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4051 remove_useless_addtr_insns (0);
4054 void
4055 pa_expand_prologue (void)
4057 int merge_sp_adjust_with_store = 0;
4058 HOST_WIDE_INT size = get_frame_size ();
4059 HOST_WIDE_INT offset;
4060 int i;
4061 rtx tmpreg;
4062 rtx_insn *insn;
4064 gr_saved = 0;
4065 fr_saved = 0;
4066 save_fregs = 0;
4068 /* Compute total size for frame pointer, filler, locals and rounding to
4069 the next word boundary. Similar code appears in pa_compute_frame_size
4070 and must be changed in tandem with this code. */
4071 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4072 if (local_fsize || frame_pointer_needed)
4073 local_fsize += pa_starting_frame_offset ();
4075 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4076 if (flag_stack_usage_info)
4077 current_function_static_stack_size = actual_fsize;
4079 /* Compute a few things we will use often. */
4080 tmpreg = gen_rtx_REG (word_mode, 1);
4082 /* Save RP first. The calling conventions manual states RP will
4083 always be stored into the caller's frame at sp - 20 or sp - 16
4084 depending on which ABI is in use. */
4085 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4087 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4088 rp_saved = true;
4090 else
4091 rp_saved = false;
4093 /* Allocate the local frame and set up the frame pointer if needed. */
4094 if (actual_fsize != 0)
4096 if (frame_pointer_needed)
4098 /* Copy the old frame pointer temporarily into %r1. Set up the
4099 new stack pointer, then store away the saved old frame pointer
4100 into the stack at sp and at the same time update the stack
4101 pointer by actual_fsize bytes. Two versions, first
4102 handles small (<8k) frames. The second handles large (>=8k)
4103 frames. */
4104 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4105 if (DO_FRAME_NOTES)
4106 RTX_FRAME_RELATED_P (insn) = 1;
4108 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4109 if (DO_FRAME_NOTES)
4110 RTX_FRAME_RELATED_P (insn) = 1;
4112 if (VAL_14_BITS_P (actual_fsize))
4113 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4114 else
4116 /* It is incorrect to store the saved frame pointer at *sp,
4117 then increment sp (writes beyond the current stack boundary).
4119 So instead use stwm to store at *sp and post-increment the
4120 stack pointer as an atomic operation. Then increment sp to
4121 finish allocating the new frame. */
4122 HOST_WIDE_INT adjust1 = 8192 - 64;
4123 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4125 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4126 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4127 adjust2, 1);
4130 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4131 we need to store the previous stack pointer (frame pointer)
4132 into the frame marker on targets that use the HP unwind
4133 library. This allows the HP unwind library to be used to
4134 unwind GCC frames. However, we are not fully compatible
4135 with the HP library because our frame layout differs from
4136 that specified in the HP runtime specification.
4138 We don't want a frame note on this instruction as the frame
4139 marker moves during dynamic stack allocation.
4141 This instruction also serves as a blockage to prevent
4142 register spills from being scheduled before the stack
4143 pointer is raised. This is necessary as we store
4144 registers using the frame pointer as a base register,
4145 and the frame pointer is set before sp is raised. */
4146 if (TARGET_HPUX_UNWIND_LIBRARY)
4148 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4149 GEN_INT (TARGET_64BIT ? -8 : -4));
4151 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4152 hard_frame_pointer_rtx);
4154 else
4155 emit_insn (gen_blockage ());
4157 /* no frame pointer needed. */
4158 else
4160 /* In some cases we can perform the first callee register save
4161 and allocating the stack frame at the same time. If so, just
4162 make a note of it and defer allocating the frame until saving
4163 the callee registers. */
4164 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4165 merge_sp_adjust_with_store = 1;
4166 /* Cannot optimize. Adjust the stack frame by actual_fsize
4167 bytes. */
4168 else
4169 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4170 actual_fsize, 1);
4174 /* Normal register save.
4176 Do not save the frame pointer in the frame_pointer_needed case. It
4177 was done earlier. */
4178 if (frame_pointer_needed)
4180 offset = local_fsize;
4182 /* Saving the EH return data registers in the frame is the simplest
4183 way to get the frame unwind information emitted. We put them
4184 just before the general registers. */
4185 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4187 unsigned int i, regno;
4189 for (i = 0; ; ++i)
4191 regno = EH_RETURN_DATA_REGNO (i);
4192 if (regno == INVALID_REGNUM)
4193 break;
4195 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4196 offset += UNITS_PER_WORD;
4200 for (i = 18; i >= 4; i--)
4201 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4203 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4204 offset += UNITS_PER_WORD;
4205 gr_saved++;
4207 /* Account for %r3 which is saved in a special place. */
4208 gr_saved++;
4210 /* No frame pointer needed. */
4211 else
4213 offset = local_fsize - actual_fsize;
4215 /* Saving the EH return data registers in the frame is the simplest
4216 way to get the frame unwind information emitted. */
4217 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4219 unsigned int i, regno;
4221 for (i = 0; ; ++i)
4223 regno = EH_RETURN_DATA_REGNO (i);
4224 if (regno == INVALID_REGNUM)
4225 break;
4227 /* If merge_sp_adjust_with_store is nonzero, then we can
4228 optimize the first save. */
4229 if (merge_sp_adjust_with_store)
4231 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4232 merge_sp_adjust_with_store = 0;
4234 else
4235 store_reg (regno, offset, STACK_POINTER_REGNUM);
4236 offset += UNITS_PER_WORD;
4240 for (i = 18; i >= 3; i--)
4241 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4243 /* If merge_sp_adjust_with_store is nonzero, then we can
4244 optimize the first GR save. */
4245 if (merge_sp_adjust_with_store)
4247 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4248 merge_sp_adjust_with_store = 0;
4250 else
4251 store_reg (i, offset, STACK_POINTER_REGNUM);
4252 offset += UNITS_PER_WORD;
4253 gr_saved++;
4256 /* If we wanted to merge the SP adjustment with a GR save, but we never
4257 did any GR saves, then just emit the adjustment here. */
4258 if (merge_sp_adjust_with_store)
4259 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4260 actual_fsize, 1);
4263 /* The hppa calling conventions say that %r19, the pic offset
4264 register, is saved at sp - 32 (in this function's frame)
4265 when generating PIC code. FIXME: What is the correct thing
4266 to do for functions which make no calls and allocate no
4267 frame? Do we need to allocate a frame, or can we just omit
4268 the save? For now we'll just omit the save.
4270 We don't want a note on this insn as the frame marker can
4271 move if there is a dynamic stack allocation. */
4272 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4274 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4276 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4280 /* Align pointer properly (doubleword boundary). */
4281 offset = (offset + 7) & ~7;
4283 /* Floating point register store. */
4284 if (save_fregs)
4286 rtx base;
4288 /* First get the frame or stack pointer to the start of the FP register
4289 save area. */
4290 if (frame_pointer_needed)
4292 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4293 base = hard_frame_pointer_rtx;
4295 else
4297 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4298 base = stack_pointer_rtx;
4301 /* Now actually save the FP registers. */
4302 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4304 if (df_regs_ever_live_p (i)
4305 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4307 rtx addr, reg;
4308 rtx_insn *insn;
4309 addr = gen_rtx_MEM (DFmode,
4310 gen_rtx_POST_INC (word_mode, tmpreg));
4311 reg = gen_rtx_REG (DFmode, i);
4312 insn = emit_move_insn (addr, reg);
4313 if (DO_FRAME_NOTES)
4315 RTX_FRAME_RELATED_P (insn) = 1;
4316 if (TARGET_64BIT)
4318 rtx mem = gen_rtx_MEM (DFmode,
4319 plus_constant (Pmode, base,
4320 offset));
4321 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4322 gen_rtx_SET (mem, reg));
4324 else
4326 rtx meml = gen_rtx_MEM (SFmode,
4327 plus_constant (Pmode, base,
4328 offset));
4329 rtx memr = gen_rtx_MEM (SFmode,
4330 plus_constant (Pmode, base,
4331 offset + 4));
4332 rtx regl = gen_rtx_REG (SFmode, i);
4333 rtx regr = gen_rtx_REG (SFmode, i + 1);
4334 rtx setl = gen_rtx_SET (meml, regl);
4335 rtx setr = gen_rtx_SET (memr, regr);
4336 rtvec vec;
4338 RTX_FRAME_RELATED_P (setl) = 1;
4339 RTX_FRAME_RELATED_P (setr) = 1;
4340 vec = gen_rtvec (2, setl, setr);
4341 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4342 gen_rtx_SEQUENCE (VOIDmode, vec));
4345 offset += GET_MODE_SIZE (DFmode);
4346 fr_saved++;
4352 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4353 Handle case where DISP > 8k by using the add_high_const patterns. */
4355 static void
4356 load_reg (int reg, HOST_WIDE_INT disp, int base)
4358 rtx dest = gen_rtx_REG (word_mode, reg);
4359 rtx basereg = gen_rtx_REG (Pmode, base);
4360 rtx src;
4362 if (VAL_14_BITS_P (disp))
4363 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4364 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4366 rtx delta = GEN_INT (disp);
4367 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4369 emit_move_insn (tmpreg, delta);
4370 if (TARGET_DISABLE_INDEXING)
4372 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4373 src = gen_rtx_MEM (word_mode, tmpreg);
4375 else
4376 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4378 else
4380 rtx delta = GEN_INT (disp);
4381 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4382 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4384 emit_move_insn (tmpreg, high);
4385 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4388 emit_move_insn (dest, src);
4391 /* Update the total code bytes output to the text section. */
4393 static void
4394 update_total_code_bytes (unsigned int nbytes)
4396 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4397 && !IN_NAMED_SECTION_P (cfun->decl))
4399 unsigned int old_total = total_code_bytes;
4401 total_code_bytes += nbytes;
4403 /* Be prepared to handle overflows. */
4404 if (old_total > total_code_bytes)
4405 total_code_bytes = UINT_MAX;
4409 /* This function generates the assembly code for function exit.
4410 Args are as for output_function_prologue ().
4412 The function epilogue should not depend on the current stack
4413 pointer! It should use the frame pointer only. This is mandatory
4414 because of alloca; we also take advantage of it to omit stack
4415 adjustments before returning. */
4417 static void
4418 pa_output_function_epilogue (FILE *file)
4420 rtx_insn *insn = get_last_insn ();
4421 bool extra_nop;
4423 /* pa_expand_epilogue does the dirty work now. We just need
4424 to output the assembler directives which denote the end
4425 of a function.
4427 To make debuggers happy, emit a nop if the epilogue was completely
4428 eliminated due to a volatile call as the last insn in the
4429 current function. That way the return address (in %r2) will
4430 always point to a valid instruction in the current function. */
4432 /* Get the last real insn. */
4433 if (NOTE_P (insn))
4434 insn = prev_real_insn (insn);
4436 /* If it is a sequence, then look inside. */
4437 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4438 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4440 /* If insn is a CALL_INSN, then it must be a call to a volatile
4441 function (otherwise there would be epilogue insns). */
4442 if (insn && CALL_P (insn))
4444 fputs ("\tnop\n", file);
4445 extra_nop = true;
4447 else
4448 extra_nop = false;
4450 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4452 if (TARGET_SOM && TARGET_GAS)
4454 /* We are done with this subspace except possibly for some additional
4455 debug information. Forget that we are in this subspace to ensure
4456 that the next function is output in its own subspace. */
4457 in_section = NULL;
4458 cfun->machine->in_nsubspa = 2;
4461 /* Thunks do their own insn accounting. */
4462 if (cfun->is_thunk)
4463 return;
4465 if (INSN_ADDRESSES_SET_P ())
4467 last_address = extra_nop ? 4 : 0;
4468 insn = get_last_nonnote_insn ();
4469 if (insn)
4471 last_address += INSN_ADDRESSES (INSN_UID (insn));
4472 if (INSN_P (insn))
4473 last_address += insn_default_length (insn);
4475 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4476 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4478 else
4479 last_address = UINT_MAX;
4481 /* Finally, update the total number of code bytes output so far. */
4482 update_total_code_bytes (last_address);
4485 void
4486 pa_expand_epilogue (void)
4488 rtx tmpreg;
4489 HOST_WIDE_INT offset;
4490 HOST_WIDE_INT ret_off = 0;
4491 int i;
4492 int merge_sp_adjust_with_load = 0;
4494 /* We will use this often. */
4495 tmpreg = gen_rtx_REG (word_mode, 1);
4497 /* Try to restore RP early to avoid load/use interlocks when
4498 RP gets used in the return (bv) instruction. This appears to still
4499 be necessary even when we schedule the prologue and epilogue. */
4500 if (rp_saved)
4502 ret_off = TARGET_64BIT ? -16 : -20;
4503 if (frame_pointer_needed)
4505 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4506 ret_off = 0;
4508 else
4510 /* No frame pointer, and stack is smaller than 8k. */
4511 if (VAL_14_BITS_P (ret_off - actual_fsize))
4513 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4514 ret_off = 0;
4519 /* General register restores. */
4520 if (frame_pointer_needed)
4522 offset = local_fsize;
4524 /* If the current function calls __builtin_eh_return, then we need
4525 to restore the saved EH data registers. */
4526 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4528 unsigned int i, regno;
4530 for (i = 0; ; ++i)
4532 regno = EH_RETURN_DATA_REGNO (i);
4533 if (regno == INVALID_REGNUM)
4534 break;
4536 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4537 offset += UNITS_PER_WORD;
4541 for (i = 18; i >= 4; i--)
4542 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4544 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4545 offset += UNITS_PER_WORD;
4548 else
4550 offset = local_fsize - actual_fsize;
4552 /* If the current function calls __builtin_eh_return, then we need
4553 to restore the saved EH data registers. */
4554 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4556 unsigned int i, regno;
4558 for (i = 0; ; ++i)
4560 regno = EH_RETURN_DATA_REGNO (i);
4561 if (regno == INVALID_REGNUM)
4562 break;
4564 /* Only for the first load.
4565 merge_sp_adjust_with_load holds the register load
4566 with which we will merge the sp adjustment. */
4567 if (merge_sp_adjust_with_load == 0
4568 && local_fsize == 0
4569 && VAL_14_BITS_P (-actual_fsize))
4570 merge_sp_adjust_with_load = regno;
4571 else
4572 load_reg (regno, offset, STACK_POINTER_REGNUM);
4573 offset += UNITS_PER_WORD;
4577 for (i = 18; i >= 3; i--)
4579 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4581 /* Only for the first load.
4582 merge_sp_adjust_with_load holds the register load
4583 with which we will merge the sp adjustment. */
4584 if (merge_sp_adjust_with_load == 0
4585 && local_fsize == 0
4586 && VAL_14_BITS_P (-actual_fsize))
4587 merge_sp_adjust_with_load = i;
4588 else
4589 load_reg (i, offset, STACK_POINTER_REGNUM);
4590 offset += UNITS_PER_WORD;
4595 /* Align pointer properly (doubleword boundary). */
4596 offset = (offset + 7) & ~7;
4598 /* FP register restores. */
4599 if (save_fregs)
4601 /* Adjust the register to index off of. */
4602 if (frame_pointer_needed)
4603 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4604 else
4605 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4607 /* Actually do the restores now. */
4608 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4609 if (df_regs_ever_live_p (i)
4610 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4612 rtx src = gen_rtx_MEM (DFmode,
4613 gen_rtx_POST_INC (word_mode, tmpreg));
4614 rtx dest = gen_rtx_REG (DFmode, i);
4615 emit_move_insn (dest, src);
4619 /* Emit a blockage insn here to keep these insns from being moved to
4620 an earlier spot in the epilogue, or into the main instruction stream.
4622 This is necessary as we must not cut the stack back before all the
4623 restores are finished. */
4624 emit_insn (gen_blockage ());
4626 /* Reset stack pointer (and possibly frame pointer). The stack
4627 pointer is initially set to fp + 64 to avoid a race condition. */
4628 if (frame_pointer_needed)
4630 rtx delta = GEN_INT (-64);
4632 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4633 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4634 stack_pointer_rtx, delta));
4636 /* If we were deferring a callee register restore, do it now. */
4637 else if (merge_sp_adjust_with_load)
4639 rtx delta = GEN_INT (-actual_fsize);
4640 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4642 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4644 else if (actual_fsize != 0)
4645 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4646 - actual_fsize, 0);
4648 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4649 frame greater than 8k), do so now. */
4650 if (ret_off != 0)
4651 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4653 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4655 rtx sa = EH_RETURN_STACKADJ_RTX;
4657 emit_insn (gen_blockage ());
4658 emit_insn (TARGET_64BIT
4659 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4660 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4664 bool
4665 pa_can_use_return_insn (void)
4667 if (!reload_completed)
4668 return false;
4670 if (frame_pointer_needed)
4671 return false;
4673 if (df_regs_ever_live_p (2))
4674 return false;
4676 if (crtl->profile)
4677 return false;
4679 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4683 hppa_pic_save_rtx (void)
4685 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4688 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4689 #define NO_DEFERRED_PROFILE_COUNTERS 0
4690 #endif
4693 /* Vector of funcdef numbers. */
4694 static vec<int> funcdef_nos;
4696 /* Output deferred profile counters. */
4697 static void
4698 output_deferred_profile_counters (void)
4700 unsigned int i;
4701 int align, n;
4703 if (funcdef_nos.is_empty ())
4704 return;
4706 switch_to_section (data_section);
4707 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4708 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4710 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4712 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4713 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4716 funcdef_nos.release ();
4719 void
4720 hppa_profile_hook (int label_no)
4722 rtx_code_label *label_rtx = gen_label_rtx ();
4723 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4724 rtx arg_bytes, begin_label_rtx, mcount, sym;
4725 rtx_insn *call_insn;
4726 char begin_label_name[16];
4727 bool use_mcount_pcrel_call;
4729 /* Set up call destination. */
4730 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4731 pa_encode_label (sym);
4732 mcount = gen_rtx_MEM (Pmode, sym);
4734 /* If we can reach _mcount with a pc-relative call, we can optimize
4735 loading the address of the current function. This requires linker
4736 long branch stub support. */
4737 if (!TARGET_PORTABLE_RUNTIME
4738 && !TARGET_LONG_CALLS
4739 && (TARGET_SOM || flag_function_sections))
4740 use_mcount_pcrel_call = TRUE;
4741 else
4742 use_mcount_pcrel_call = FALSE;
4744 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4745 label_no);
4746 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4748 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4750 if (!use_mcount_pcrel_call)
4752 /* The address of the function is loaded into %r25 with an instruction-
4753 relative sequence that avoids the use of relocations. We use SImode
4754 for the address of the function in both 32 and 64-bit code to avoid
4755 having to provide DImode versions of the lcla2 pattern. */
4756 if (TARGET_PA_20)
4757 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4758 else
4759 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4762 if (!NO_DEFERRED_PROFILE_COUNTERS)
4764 rtx count_label_rtx, addr, r24;
4765 char count_label_name[16];
4767 funcdef_nos.safe_push (label_no);
4768 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4769 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4770 ggc_strdup (count_label_name));
4772 addr = force_reg (Pmode, count_label_rtx);
4773 r24 = gen_rtx_REG (Pmode, 24);
4774 emit_move_insn (r24, addr);
4776 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4777 if (use_mcount_pcrel_call)
4778 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4779 begin_label_rtx));
4780 else
4781 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4783 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4785 else
4787 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4788 if (use_mcount_pcrel_call)
4789 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4790 begin_label_rtx));
4791 else
4792 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4795 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4796 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4798 /* Indicate the _mcount call cannot throw, nor will it execute a
4799 non-local goto. */
4800 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4802 /* Allocate space for fixed arguments. */
4803 if (reg_parm_stack_space > crtl->outgoing_args_size)
4804 crtl->outgoing_args_size = reg_parm_stack_space;
4807 /* Fetch the return address for the frame COUNT steps up from
4808 the current frame, after the prologue. FRAMEADDR is the
4809 frame pointer of the COUNT frame.
4811 We want to ignore any export stub remnants here. To handle this,
4812 we examine the code at the return address, and if it is an export
4813 stub, we return a memory rtx for the stub return address stored
4814 at frame-24.
4816 The value returned is used in two different ways:
4818 1. To find a function's caller.
4820 2. To change the return address for a function.
4822 This function handles most instances of case 1; however, it will
4823 fail if there are two levels of stubs to execute on the return
4824 path. The only way I believe that can happen is if the return value
4825 needs a parameter relocation, which never happens for C code.
4827 This function handles most instances of case 2; however, it will
4828 fail if we did not originally have stub code on the return path
4829 but will need stub code on the new return path. This can happen if
4830 the caller & callee are both in the main program, but the new
4831 return location is in a shared library. */
4834 pa_return_addr_rtx (int count, rtx frameaddr)
4836 rtx label;
4837 rtx rp;
4838 rtx saved_rp;
4839 rtx ins;
4841 /* The instruction stream at the return address of a PA1.X export stub is:
4843 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4844 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4845 0x00011820 | stub+16: mtsp r1,sr0
4846 0xe0400002 | stub+20: be,n 0(sr0,rp)
4848 0xe0400002 must be specified as -532676606 so that it won't be
4849 rejected as an invalid immediate operand on 64-bit hosts.
4851 The instruction stream at the return address of a PA2.0 export stub is:
4853 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4854 0xe840d002 | stub+12: bve,n (rp)
4857 HOST_WIDE_INT insns[4];
4858 int i, len;
4860 if (count != 0)
4861 return NULL_RTX;
4863 rp = get_hard_reg_initial_val (Pmode, 2);
4865 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4866 return rp;
4868 /* If there is no export stub then just use the value saved from
4869 the return pointer register. */
4871 saved_rp = gen_reg_rtx (Pmode);
4872 emit_move_insn (saved_rp, rp);
4874 /* Get pointer to the instruction stream. We have to mask out the
4875 privilege level from the two low order bits of the return address
4876 pointer here so that ins will point to the start of the first
4877 instruction that would have been executed if we returned. */
4878 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4879 label = gen_label_rtx ();
4881 if (TARGET_PA_20)
4883 insns[0] = 0x4bc23fd1;
4884 insns[1] = -398405630;
4885 len = 2;
4887 else
4889 insns[0] = 0x4bc23fd1;
4890 insns[1] = 0x004010a1;
4891 insns[2] = 0x00011820;
4892 insns[3] = -532676606;
4893 len = 4;
4896 /* Check the instruction stream at the normal return address for the
4897 export stub. If it is an export stub, than our return address is
4898 really in -24[frameaddr]. */
4900 for (i = 0; i < len; i++)
4902 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4903 rtx op1 = GEN_INT (insns[i]);
4904 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4907 /* Here we know that our return address points to an export
4908 stub. We don't want to return the address of the export stub,
4909 but rather the return address of the export stub. That return
4910 address is stored at -24[frameaddr]. */
4912 emit_move_insn (saved_rp,
4913 gen_rtx_MEM (Pmode,
4914 memory_address (Pmode,
4915 plus_constant (Pmode, frameaddr,
4916 -24))));
4918 emit_label (label);
4920 return saved_rp;
4923 void
4924 pa_emit_bcond_fp (rtx operands[])
4926 enum rtx_code code = GET_CODE (operands[0]);
4927 rtx operand0 = operands[1];
4928 rtx operand1 = operands[2];
4929 rtx label = operands[3];
4931 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4932 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4934 emit_jump_insn (gen_rtx_SET (pc_rtx,
4935 gen_rtx_IF_THEN_ELSE (VOIDmode,
4936 gen_rtx_fmt_ee (NE,
4937 VOIDmode,
4938 gen_rtx_REG (CCFPmode, 0),
4939 const0_rtx),
4940 gen_rtx_LABEL_REF (VOIDmode, label),
4941 pc_rtx)));
4945 /* Adjust the cost of a scheduling dependency. Return the new cost of
4946 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4948 static int
4949 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4950 unsigned int)
4952 enum attr_type attr_type;
4954 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4955 true dependencies as they are described with bypasses now. */
4956 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4957 return cost;
4959 if (! recog_memoized (insn))
4960 return 0;
4962 attr_type = get_attr_type (insn);
4964 switch (dep_type)
4966 case REG_DEP_ANTI:
4967 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4968 cycles later. */
4970 if (attr_type == TYPE_FPLOAD)
4972 rtx pat = PATTERN (insn);
4973 rtx dep_pat = PATTERN (dep_insn);
4974 if (GET_CODE (pat) == PARALLEL)
4976 /* This happens for the fldXs,mb patterns. */
4977 pat = XVECEXP (pat, 0, 0);
4979 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4980 /* If this happens, we have to extend this to schedule
4981 optimally. Return 0 for now. */
4982 return 0;
4984 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4986 if (! recog_memoized (dep_insn))
4987 return 0;
4988 switch (get_attr_type (dep_insn))
4990 case TYPE_FPALU:
4991 case TYPE_FPMULSGL:
4992 case TYPE_FPMULDBL:
4993 case TYPE_FPDIVSGL:
4994 case TYPE_FPDIVDBL:
4995 case TYPE_FPSQRTSGL:
4996 case TYPE_FPSQRTDBL:
4997 /* A fpload can't be issued until one cycle before a
4998 preceding arithmetic operation has finished if
4999 the target of the fpload is any of the sources
5000 (or destination) of the arithmetic operation. */
5001 return insn_default_latency (dep_insn) - 1;
5003 default:
5004 return 0;
5008 else if (attr_type == TYPE_FPALU)
5010 rtx pat = PATTERN (insn);
5011 rtx dep_pat = PATTERN (dep_insn);
5012 if (GET_CODE (pat) == PARALLEL)
5014 /* This happens for the fldXs,mb patterns. */
5015 pat = XVECEXP (pat, 0, 0);
5017 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5018 /* If this happens, we have to extend this to schedule
5019 optimally. Return 0 for now. */
5020 return 0;
5022 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5024 if (! recog_memoized (dep_insn))
5025 return 0;
5026 switch (get_attr_type (dep_insn))
5028 case TYPE_FPDIVSGL:
5029 case TYPE_FPDIVDBL:
5030 case TYPE_FPSQRTSGL:
5031 case TYPE_FPSQRTDBL:
5032 /* An ALU flop can't be issued until two cycles before a
5033 preceding divide or sqrt operation has finished if
5034 the target of the ALU flop is any of the sources
5035 (or destination) of the divide or sqrt operation. */
5036 return insn_default_latency (dep_insn) - 2;
5038 default:
5039 return 0;
5044 /* For other anti dependencies, the cost is 0. */
5045 return 0;
5047 case REG_DEP_OUTPUT:
5048 /* Output dependency; DEP_INSN writes a register that INSN writes some
5049 cycles later. */
5050 if (attr_type == TYPE_FPLOAD)
5052 rtx pat = PATTERN (insn);
5053 rtx dep_pat = PATTERN (dep_insn);
5054 if (GET_CODE (pat) == PARALLEL)
5056 /* This happens for the fldXs,mb patterns. */
5057 pat = XVECEXP (pat, 0, 0);
5059 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5060 /* If this happens, we have to extend this to schedule
5061 optimally. Return 0 for now. */
5062 return 0;
5064 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5066 if (! recog_memoized (dep_insn))
5067 return 0;
5068 switch (get_attr_type (dep_insn))
5070 case TYPE_FPALU:
5071 case TYPE_FPMULSGL:
5072 case TYPE_FPMULDBL:
5073 case TYPE_FPDIVSGL:
5074 case TYPE_FPDIVDBL:
5075 case TYPE_FPSQRTSGL:
5076 case TYPE_FPSQRTDBL:
5077 /* A fpload can't be issued until one cycle before a
5078 preceding arithmetic operation has finished if
5079 the target of the fpload is the destination of the
5080 arithmetic operation.
5082 Exception: For PA7100LC, PA7200 and PA7300, the cost
5083 is 3 cycles, unless they bundle together. We also
5084 pay the penalty if the second insn is a fpload. */
5085 return insn_default_latency (dep_insn) - 1;
5087 default:
5088 return 0;
5092 else if (attr_type == TYPE_FPALU)
5094 rtx pat = PATTERN (insn);
5095 rtx dep_pat = PATTERN (dep_insn);
5096 if (GET_CODE (pat) == PARALLEL)
5098 /* This happens for the fldXs,mb patterns. */
5099 pat = XVECEXP (pat, 0, 0);
5101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5102 /* If this happens, we have to extend this to schedule
5103 optimally. Return 0 for now. */
5104 return 0;
5106 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5108 if (! recog_memoized (dep_insn))
5109 return 0;
5110 switch (get_attr_type (dep_insn))
5112 case TYPE_FPDIVSGL:
5113 case TYPE_FPDIVDBL:
5114 case TYPE_FPSQRTSGL:
5115 case TYPE_FPSQRTDBL:
5116 /* An ALU flop can't be issued until two cycles before a
5117 preceding divide or sqrt operation has finished if
5118 the target of the ALU flop is also the target of
5119 the divide or sqrt operation. */
5120 return insn_default_latency (dep_insn) - 2;
5122 default:
5123 return 0;
5128 /* For other output dependencies, the cost is 0. */
5129 return 0;
5131 default:
5132 gcc_unreachable ();
5136 /* The 700 can only issue a single insn at a time.
5137 The 7XXX processors can issue two insns at a time.
5138 The 8000 can issue 4 insns at a time. */
5139 static int
5140 pa_issue_rate (void)
5142 switch (pa_cpu)
5144 case PROCESSOR_700: return 1;
5145 case PROCESSOR_7100: return 2;
5146 case PROCESSOR_7100LC: return 2;
5147 case PROCESSOR_7200: return 2;
5148 case PROCESSOR_7300: return 2;
5149 case PROCESSOR_8000: return 4;
5151 default:
5152 gcc_unreachable ();
5158 /* Return any length plus adjustment needed by INSN which already has
5159 its length computed as LENGTH. Return LENGTH if no adjustment is
5160 necessary.
5162 Also compute the length of an inline block move here as it is too
5163 complicated to express as a length attribute in pa.md. */
5165 pa_adjust_insn_length (rtx_insn *insn, int length)
5167 rtx pat = PATTERN (insn);
5169 /* If length is negative or undefined, provide initial length. */
5170 if ((unsigned int) length >= INT_MAX)
5172 if (GET_CODE (pat) == SEQUENCE)
5173 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5175 switch (get_attr_type (insn))
5177 case TYPE_MILLI:
5178 length = pa_attr_length_millicode_call (insn);
5179 break;
5180 case TYPE_CALL:
5181 length = pa_attr_length_call (insn, 0);
5182 break;
5183 case TYPE_SIBCALL:
5184 length = pa_attr_length_call (insn, 1);
5185 break;
5186 case TYPE_DYNCALL:
5187 length = pa_attr_length_indirect_call (insn);
5188 break;
5189 case TYPE_SH_FUNC_ADRS:
5190 length = pa_attr_length_millicode_call (insn) + 20;
5191 break;
5192 default:
5193 gcc_unreachable ();
5197 /* Block move pattern. */
5198 if (NONJUMP_INSN_P (insn)
5199 && GET_CODE (pat) == PARALLEL
5200 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5201 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5202 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5203 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5204 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5205 length += compute_cpymem_length (insn) - 4;
5206 /* Block clear pattern. */
5207 else if (NONJUMP_INSN_P (insn)
5208 && GET_CODE (pat) == PARALLEL
5209 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5210 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5211 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5212 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5213 length += compute_clrmem_length (insn) - 4;
5214 /* Conditional branch with an unfilled delay slot. */
5215 else if (JUMP_P (insn) && ! simplejump_p (insn))
5217 /* Adjust a short backwards conditional with an unfilled delay slot. */
5218 if (GET_CODE (pat) == SET
5219 && length == 4
5220 && JUMP_LABEL (insn) != NULL_RTX
5221 && ! forward_branch_p (insn))
5222 length += 4;
5223 else if (GET_CODE (pat) == PARALLEL
5224 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5225 && length == 4)
5226 length += 4;
5227 /* Adjust dbra insn with short backwards conditional branch with
5228 unfilled delay slot -- only for case where counter is in a
5229 general register register. */
5230 else if (GET_CODE (pat) == PARALLEL
5231 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5232 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5233 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5234 && length == 4
5235 && ! forward_branch_p (insn))
5236 length += 4;
5238 return length;
5241 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5243 static bool
5244 pa_print_operand_punct_valid_p (unsigned char code)
5246 if (code == '@'
5247 || code == '#'
5248 || code == '*'
5249 || code == '^')
5250 return true;
5252 return false;
5255 /* Print operand X (an rtx) in assembler syntax to file FILE.
5256 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5257 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5259 void
5260 pa_print_operand (FILE *file, rtx x, int code)
5262 switch (code)
5264 case '#':
5265 /* Output a 'nop' if there's nothing for the delay slot. */
5266 if (dbr_sequence_length () == 0)
5267 fputs ("\n\tnop", file);
5268 return;
5269 case '*':
5270 /* Output a nullification completer if there's nothing for the */
5271 /* delay slot or nullification is requested. */
5272 if (dbr_sequence_length () == 0 ||
5273 (final_sequence &&
5274 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5275 fputs (",n", file);
5276 return;
5277 case 'R':
5278 /* Print out the second register name of a register pair.
5279 I.e., R (6) => 7. */
5280 fputs (reg_names[REGNO (x) + 1], file);
5281 return;
5282 case 'r':
5283 /* A register or zero. */
5284 if (x == const0_rtx
5285 || (x == CONST0_RTX (DFmode))
5286 || (x == CONST0_RTX (SFmode)))
5288 fputs ("%r0", file);
5289 return;
5291 else
5292 break;
5293 case 'f':
5294 /* A register or zero (floating point). */
5295 if (x == const0_rtx
5296 || (x == CONST0_RTX (DFmode))
5297 || (x == CONST0_RTX (SFmode)))
5299 fputs ("%fr0", file);
5300 return;
5302 else
5303 break;
5304 case 'A':
5306 rtx xoperands[2];
5308 xoperands[0] = XEXP (XEXP (x, 0), 0);
5309 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5310 pa_output_global_address (file, xoperands[1], 0);
5311 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5312 return;
5315 case 'C': /* Plain (C)ondition */
5316 case 'X':
5317 switch (GET_CODE (x))
5319 case EQ:
5320 fputs ("=", file); break;
5321 case NE:
5322 fputs ("<>", file); break;
5323 case GT:
5324 fputs (">", file); break;
5325 case GE:
5326 fputs (">=", file); break;
5327 case GEU:
5328 fputs (">>=", file); break;
5329 case GTU:
5330 fputs (">>", file); break;
5331 case LT:
5332 fputs ("<", file); break;
5333 case LE:
5334 fputs ("<=", file); break;
5335 case LEU:
5336 fputs ("<<=", file); break;
5337 case LTU:
5338 fputs ("<<", file); break;
5339 default:
5340 gcc_unreachable ();
5342 return;
5343 case 'N': /* Condition, (N)egated */
5344 switch (GET_CODE (x))
5346 case EQ:
5347 fputs ("<>", file); break;
5348 case NE:
5349 fputs ("=", file); break;
5350 case GT:
5351 fputs ("<=", file); break;
5352 case GE:
5353 fputs ("<", file); break;
5354 case GEU:
5355 fputs ("<<", file); break;
5356 case GTU:
5357 fputs ("<<=", file); break;
5358 case LT:
5359 fputs (">=", file); break;
5360 case LE:
5361 fputs (">", file); break;
5362 case LEU:
5363 fputs (">>", file); break;
5364 case LTU:
5365 fputs (">>=", file); break;
5366 default:
5367 gcc_unreachable ();
5369 return;
5370 /* For floating point comparisons. Note that the output
5371 predicates are the complement of the desired mode. The
5372 conditions for GT, GE, LT, LE and LTGT cause an invalid
5373 operation exception if the result is unordered and this
5374 exception is enabled in the floating-point status register. */
5375 case 'Y':
5376 switch (GET_CODE (x))
5378 case EQ:
5379 fputs ("!=", file); break;
5380 case NE:
5381 fputs ("=", file); break;
5382 case GT:
5383 fputs ("!>", file); break;
5384 case GE:
5385 fputs ("!>=", file); break;
5386 case LT:
5387 fputs ("!<", file); break;
5388 case LE:
5389 fputs ("!<=", file); break;
5390 case LTGT:
5391 fputs ("!<>", file); break;
5392 case UNLE:
5393 fputs ("!?<=", file); break;
5394 case UNLT:
5395 fputs ("!?<", file); break;
5396 case UNGE:
5397 fputs ("!?>=", file); break;
5398 case UNGT:
5399 fputs ("!?>", file); break;
5400 case UNEQ:
5401 fputs ("!?=", file); break;
5402 case UNORDERED:
5403 fputs ("!?", file); break;
5404 case ORDERED:
5405 fputs ("?", file); break;
5406 default:
5407 gcc_unreachable ();
5409 return;
5410 case 'S': /* Condition, operands are (S)wapped. */
5411 switch (GET_CODE (x))
5413 case EQ:
5414 fputs ("=", file); break;
5415 case NE:
5416 fputs ("<>", file); break;
5417 case GT:
5418 fputs ("<", file); break;
5419 case GE:
5420 fputs ("<=", file); break;
5421 case GEU:
5422 fputs ("<<=", file); break;
5423 case GTU:
5424 fputs ("<<", file); break;
5425 case LT:
5426 fputs (">", file); break;
5427 case LE:
5428 fputs (">=", file); break;
5429 case LEU:
5430 fputs (">>=", file); break;
5431 case LTU:
5432 fputs (">>", file); break;
5433 default:
5434 gcc_unreachable ();
5436 return;
5437 case 'B': /* Condition, (B)oth swapped and negate. */
5438 switch (GET_CODE (x))
5440 case EQ:
5441 fputs ("<>", file); break;
5442 case NE:
5443 fputs ("=", file); break;
5444 case GT:
5445 fputs (">=", file); break;
5446 case GE:
5447 fputs (">", file); break;
5448 case GEU:
5449 fputs (">>", file); break;
5450 case GTU:
5451 fputs (">>=", file); break;
5452 case LT:
5453 fputs ("<=", file); break;
5454 case LE:
5455 fputs ("<", file); break;
5456 case LEU:
5457 fputs ("<<", file); break;
5458 case LTU:
5459 fputs ("<<=", file); break;
5460 default:
5461 gcc_unreachable ();
5463 return;
5464 case 'k':
5465 gcc_assert (GET_CODE (x) == CONST_INT);
5466 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5467 return;
5468 case 'Q':
5469 gcc_assert (GET_CODE (x) == CONST_INT);
5470 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5471 return;
5472 case 'L':
5473 gcc_assert (GET_CODE (x) == CONST_INT);
5474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5475 return;
5476 case 'o':
5477 gcc_assert (GET_CODE (x) == CONST_INT
5478 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5480 return;
5481 case 'O':
5482 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5483 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5484 return;
5485 case 'p':
5486 gcc_assert (GET_CODE (x) == CONST_INT);
5487 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5488 return;
5489 case 'P':
5490 gcc_assert (GET_CODE (x) == CONST_INT);
5491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5492 return;
5493 case 'I':
5494 if (GET_CODE (x) == CONST_INT)
5495 fputs ("i", file);
5496 return;
5497 case 'M':
5498 case 'F':
5499 switch (GET_CODE (XEXP (x, 0)))
5501 case PRE_DEC:
5502 case PRE_INC:
5503 if (ASSEMBLER_DIALECT == 0)
5504 fputs ("s,mb", file);
5505 else
5506 fputs (",mb", file);
5507 break;
5508 case POST_DEC:
5509 case POST_INC:
5510 if (ASSEMBLER_DIALECT == 0)
5511 fputs ("s,ma", file);
5512 else
5513 fputs (",ma", file);
5514 break;
5515 case PLUS:
5516 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5517 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5519 if (ASSEMBLER_DIALECT == 0)
5520 fputs ("x", file);
5522 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5523 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5525 if (ASSEMBLER_DIALECT == 0)
5526 fputs ("x,s", file);
5527 else
5528 fputs (",s", file);
5530 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5531 fputs ("s", file);
5532 break;
5533 default:
5534 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5535 fputs ("s", file);
5536 break;
5538 return;
5539 case 'G':
5540 pa_output_global_address (file, x, 0);
5541 return;
5542 case 'H':
5543 pa_output_global_address (file, x, 1);
5544 return;
5545 case 0: /* Don't do anything special */
5546 break;
5547 case 'Z':
5549 unsigned op[3];
5550 compute_zdepwi_operands (INTVAL (x), op);
5551 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5552 return;
5554 case 'z':
5556 unsigned op[3];
5557 compute_zdepdi_operands (INTVAL (x), op);
5558 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5559 return;
5561 case 'c':
5562 /* We can get here from a .vtable_inherit due to our
5563 CONSTANT_ADDRESS_P rejecting perfectly good constant
5564 addresses. */
5565 break;
5566 default:
5567 gcc_unreachable ();
5569 if (GET_CODE (x) == REG)
5571 fputs (reg_names [REGNO (x)], file);
5572 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5574 fputs ("R", file);
5575 return;
5577 if (FP_REG_P (x)
5578 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5579 && (REGNO (x) & 1) == 0)
5580 fputs ("L", file);
5582 else if (GET_CODE (x) == MEM)
5584 int size = GET_MODE_SIZE (GET_MODE (x));
5585 rtx base = NULL_RTX;
5586 switch (GET_CODE (XEXP (x, 0)))
5588 case PRE_DEC:
5589 case POST_DEC:
5590 base = XEXP (XEXP (x, 0), 0);
5591 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5592 break;
5593 case PRE_INC:
5594 case POST_INC:
5595 base = XEXP (XEXP (x, 0), 0);
5596 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5597 break;
5598 case PLUS:
5599 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5600 fprintf (file, "%s(%s)",
5601 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5602 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5603 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5604 fprintf (file, "%s(%s)",
5605 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5606 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5607 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5608 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5610 /* Because the REG_POINTER flag can get lost during reload,
5611 pa_legitimate_address_p canonicalizes the order of the
5612 index and base registers in the combined move patterns. */
5613 rtx base = XEXP (XEXP (x, 0), 1);
5614 rtx index = XEXP (XEXP (x, 0), 0);
5616 fprintf (file, "%s(%s)",
5617 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5619 else
5620 output_address (GET_MODE (x), XEXP (x, 0));
5621 break;
5622 default:
5623 output_address (GET_MODE (x), XEXP (x, 0));
5624 break;
5627 else
5628 output_addr_const (file, x);
5631 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5633 void
5634 pa_output_global_address (FILE *file, rtx x, int round_constant)
5637 /* Imagine (high (const (plus ...))). */
5638 if (GET_CODE (x) == HIGH)
5639 x = XEXP (x, 0);
5641 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5642 output_addr_const (file, x);
5643 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5645 output_addr_const (file, x);
5646 fputs ("-$global$", file);
5648 else if (GET_CODE (x) == CONST)
5650 const char *sep = "";
5651 int offset = 0; /* assembler wants -$global$ at end */
5652 rtx base = NULL_RTX;
5654 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5656 case LABEL_REF:
5657 case SYMBOL_REF:
5658 base = XEXP (XEXP (x, 0), 0);
5659 output_addr_const (file, base);
5660 break;
5661 case CONST_INT:
5662 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5663 break;
5664 default:
5665 gcc_unreachable ();
5668 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5670 case LABEL_REF:
5671 case SYMBOL_REF:
5672 base = XEXP (XEXP (x, 0), 1);
5673 output_addr_const (file, base);
5674 break;
5675 case CONST_INT:
5676 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5677 break;
5678 default:
5679 gcc_unreachable ();
5682 /* How bogus. The compiler is apparently responsible for
5683 rounding the constant if it uses an LR field selector.
5685 The linker and/or assembler seem a better place since
5686 they have to do this kind of thing already.
5688 If we fail to do this, HP's optimizing linker may eliminate
5689 an addil, but not update the ldw/stw/ldo instruction that
5690 uses the result of the addil. */
5691 if (round_constant)
5692 offset = ((offset + 0x1000) & ~0x1fff);
5694 switch (GET_CODE (XEXP (x, 0)))
5696 case PLUS:
5697 if (offset < 0)
5699 offset = -offset;
5700 sep = "-";
5702 else
5703 sep = "+";
5704 break;
5706 case MINUS:
5707 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5708 sep = "-";
5709 break;
5711 default:
5712 gcc_unreachable ();
5715 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5716 fputs ("-$global$", file);
5717 if (offset)
5718 fprintf (file, "%s%d", sep, offset);
5720 else
5721 output_addr_const (file, x);
5724 /* Output boilerplate text to appear at the beginning of the file.
5725 There are several possible versions. */
5726 #define aputs(x) fputs(x, asm_out_file)
5727 static inline void
5728 pa_file_start_level (void)
5730 if (TARGET_64BIT)
5731 aputs ("\t.LEVEL 2.0w\n");
5732 else if (TARGET_PA_20)
5733 aputs ("\t.LEVEL 2.0\n");
5734 else if (TARGET_PA_11)
5735 aputs ("\t.LEVEL 1.1\n");
5736 else
5737 aputs ("\t.LEVEL 1.0\n");
5740 static inline void
5741 pa_file_start_space (int sortspace)
5743 aputs ("\t.SPACE $PRIVATE$");
5744 if (sortspace)
5745 aputs (",SORT=16");
5746 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5747 if (flag_tm)
5748 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5749 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5750 "\n\t.SPACE $TEXT$");
5751 if (sortspace)
5752 aputs (",SORT=8");
5753 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5754 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5757 static inline void
5758 pa_file_start_file (int want_version)
5760 if (write_symbols != NO_DEBUG)
5762 output_file_directive (asm_out_file, main_input_filename);
5763 if (want_version)
5764 aputs ("\t.version\t\"01.01\"\n");
5768 static inline void
5769 pa_file_start_mcount (const char *aswhat)
5771 if (profile_flag)
5772 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5775 static void
5776 pa_elf_file_start (void)
5778 pa_file_start_level ();
5779 pa_file_start_mcount ("ENTRY");
5780 pa_file_start_file (0);
5783 static void
5784 pa_som_file_start (void)
5786 pa_file_start_level ();
5787 pa_file_start_space (0);
5788 aputs ("\t.IMPORT $global$,DATA\n"
5789 "\t.IMPORT $$dyncall,MILLICODE\n");
5790 pa_file_start_mcount ("CODE");
5791 pa_file_start_file (0);
5794 static void
5795 pa_linux_file_start (void)
5797 pa_file_start_file (1);
5798 pa_file_start_level ();
5799 pa_file_start_mcount ("CODE");
5802 static void
5803 pa_hpux64_gas_file_start (void)
5805 pa_file_start_level ();
5806 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5807 if (profile_flag)
5808 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5809 #endif
5810 pa_file_start_file (1);
5813 static void
5814 pa_hpux64_hpas_file_start (void)
5816 pa_file_start_level ();
5817 pa_file_start_space (1);
5818 pa_file_start_mcount ("CODE");
5819 pa_file_start_file (0);
5821 #undef aputs
5823 /* Search the deferred plabel list for SYMBOL and return its internal
5824 label. If an entry for SYMBOL is not found, a new entry is created. */
5827 pa_get_deferred_plabel (rtx symbol)
5829 const char *fname = XSTR (symbol, 0);
5830 size_t i;
5832 /* See if we have already put this function on the list of deferred
5833 plabels. This list is generally small, so a liner search is not
5834 too ugly. If it proves too slow replace it with something faster. */
5835 for (i = 0; i < n_deferred_plabels; i++)
5836 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5837 break;
5839 /* If the deferred plabel list is empty, or this entry was not found
5840 on the list, create a new entry on the list. */
5841 if (deferred_plabels == NULL || i == n_deferred_plabels)
5843 tree id;
5845 if (deferred_plabels == 0)
5846 deferred_plabels = ggc_alloc<deferred_plabel> ();
5847 else
5848 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5849 deferred_plabels,
5850 n_deferred_plabels + 1);
5852 i = n_deferred_plabels++;
5853 deferred_plabels[i].internal_label = gen_label_rtx ();
5854 deferred_plabels[i].symbol = symbol;
5856 /* Gross. We have just implicitly taken the address of this
5857 function. Mark it in the same manner as assemble_name. */
5858 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5859 if (id)
5860 mark_referenced (id);
5863 return deferred_plabels[i].internal_label;
5866 static void
5867 output_deferred_plabels (void)
5869 size_t i;
5871 /* If we have some deferred plabels, then we need to switch into the
5872 data or readonly data section, and align it to a 4 byte boundary
5873 before outputting the deferred plabels. */
5874 if (n_deferred_plabels)
5876 switch_to_section (flag_pic ? data_section : readonly_data_section);
5877 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5880 /* Now output the deferred plabels. */
5881 for (i = 0; i < n_deferred_plabels; i++)
5883 targetm.asm_out.internal_label (asm_out_file, "L",
5884 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5885 assemble_integer (deferred_plabels[i].symbol,
5886 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5890 /* Initialize optabs to point to emulation routines. */
5892 static void
5893 pa_init_libfuncs (void)
5895 if (HPUX_LONG_DOUBLE_LIBRARY)
5897 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5898 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5899 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5900 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5901 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5902 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5903 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5904 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5905 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5907 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5908 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5909 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5910 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5911 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5912 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5913 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5915 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5916 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5917 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5918 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5920 set_conv_libfunc (sfix_optab, SImode, TFmode,
5921 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5922 : "_U_Qfcnvfxt_quad_to_sgl");
5923 set_conv_libfunc (sfix_optab, DImode, TFmode,
5924 "_U_Qfcnvfxt_quad_to_dbl");
5925 set_conv_libfunc (ufix_optab, SImode, TFmode,
5926 "_U_Qfcnvfxt_quad_to_usgl");
5927 set_conv_libfunc (ufix_optab, DImode, TFmode,
5928 "_U_Qfcnvfxt_quad_to_udbl");
5930 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5931 "_U_Qfcnvxf_sgl_to_quad");
5932 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5933 "_U_Qfcnvxf_dbl_to_quad");
5934 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5935 "_U_Qfcnvxf_usgl_to_quad");
5936 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5937 "_U_Qfcnvxf_udbl_to_quad");
5940 if (TARGET_SYNC_LIBCALL)
5941 init_sync_libfuncs (8);
5944 /* HP's millicode routines mean something special to the assembler.
5945 Keep track of which ones we have used. */
5947 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5948 static void import_milli (enum millicodes);
5949 static char imported[(int) end1000];
5950 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5951 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5952 #define MILLI_START 10
5954 static void
5955 import_milli (enum millicodes code)
5957 char str[sizeof (import_string)];
5959 if (!imported[(int) code])
5961 imported[(int) code] = 1;
5962 strcpy (str, import_string);
5963 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5964 output_asm_insn (str, 0);
5968 /* The register constraints have put the operands and return value in
5969 the proper registers. */
5971 const char *
5972 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5974 import_milli (mulI);
5975 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5978 /* Emit the rtl for doing a division by a constant. */
5980 /* Do magic division millicodes exist for this value? */
5981 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5983 /* We'll use an array to keep track of the magic millicodes and
5984 whether or not we've used them already. [n][0] is signed, [n][1] is
5985 unsigned. */
5987 static int div_milli[16][2];
5990 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5992 if (GET_CODE (operands[2]) == CONST_INT
5993 && INTVAL (operands[2]) > 0
5994 && INTVAL (operands[2]) < 16
5995 && pa_magic_milli[INTVAL (operands[2])])
5997 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5999 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6000 emit
6001 (gen_rtx_PARALLEL
6002 (VOIDmode,
6003 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6004 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6005 SImode,
6006 gen_rtx_REG (SImode, 26),
6007 operands[2])),
6008 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6009 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6010 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6011 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6012 gen_rtx_CLOBBER (VOIDmode, ret))));
6013 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6014 return 1;
6016 return 0;
6019 const char *
6020 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6022 int divisor;
6024 /* If the divisor is a constant, try to use one of the special
6025 opcodes .*/
6026 if (GET_CODE (operands[0]) == CONST_INT)
6028 static char buf[100];
6029 divisor = INTVAL (operands[0]);
6030 if (!div_milli[divisor][unsignedp])
6032 div_milli[divisor][unsignedp] = 1;
6033 if (unsignedp)
6034 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6035 else
6036 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6038 if (unsignedp)
6040 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6041 INTVAL (operands[0]));
6042 return pa_output_millicode_call (insn,
6043 gen_rtx_SYMBOL_REF (SImode, buf));
6045 else
6047 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6048 INTVAL (operands[0]));
6049 return pa_output_millicode_call (insn,
6050 gen_rtx_SYMBOL_REF (SImode, buf));
6053 /* Divisor isn't a special constant. */
6054 else
6056 if (unsignedp)
6058 import_milli (divU);
6059 return pa_output_millicode_call (insn,
6060 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6062 else
6064 import_milli (divI);
6065 return pa_output_millicode_call (insn,
6066 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6071 /* Output a $$rem millicode to do mod. */
6073 const char *
6074 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6076 if (unsignedp)
6078 import_milli (remU);
6079 return pa_output_millicode_call (insn,
6080 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6082 else
6084 import_milli (remI);
6085 return pa_output_millicode_call (insn,
6086 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6090 void
6091 pa_output_arg_descriptor (rtx_insn *call_insn)
6093 const char *arg_regs[4];
6094 machine_mode arg_mode;
6095 rtx link;
6096 int i, output_flag = 0;
6097 int regno;
6099 /* We neither need nor want argument location descriptors for the
6100 64bit runtime environment or the ELF32 environment. */
6101 if (TARGET_64BIT || TARGET_ELF32)
6102 return;
6104 for (i = 0; i < 4; i++)
6105 arg_regs[i] = 0;
6107 /* Specify explicitly that no argument relocations should take place
6108 if using the portable runtime calling conventions. */
6109 if (TARGET_PORTABLE_RUNTIME)
6111 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6112 asm_out_file);
6113 return;
6116 gcc_assert (CALL_P (call_insn));
6117 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6118 link; link = XEXP (link, 1))
6120 rtx use = XEXP (link, 0);
6122 if (! (GET_CODE (use) == USE
6123 && GET_CODE (XEXP (use, 0)) == REG
6124 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6125 continue;
6127 arg_mode = GET_MODE (XEXP (use, 0));
6128 regno = REGNO (XEXP (use, 0));
6129 if (regno >= 23 && regno <= 26)
6131 arg_regs[26 - regno] = "GR";
6132 if (arg_mode == DImode)
6133 arg_regs[25 - regno] = "GR";
6135 else if (regno >= 32 && regno <= 39)
6137 if (arg_mode == SFmode)
6138 arg_regs[(regno - 32) / 2] = "FR";
6139 else
6141 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6142 arg_regs[(regno - 34) / 2] = "FR";
6143 arg_regs[(regno - 34) / 2 + 1] = "FU";
6144 #else
6145 arg_regs[(regno - 34) / 2] = "FU";
6146 arg_regs[(regno - 34) / 2 + 1] = "FR";
6147 #endif
6151 fputs ("\t.CALL ", asm_out_file);
6152 for (i = 0; i < 4; i++)
6154 if (arg_regs[i])
6156 if (output_flag++)
6157 fputc (',', asm_out_file);
6158 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6161 fputc ('\n', asm_out_file);
6164 /* Inform reload about cases where moving X with a mode MODE to or from
6165 a register in RCLASS requires an extra scratch or immediate register.
6166 Return the class needed for the immediate register. */
6168 static reg_class_t
6169 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6170 machine_mode mode, secondary_reload_info *sri)
6172 int regno;
6173 enum reg_class rclass = (enum reg_class) rclass_i;
6175 /* Handle the easy stuff first. */
6176 if (rclass == R1_REGS)
6177 return NO_REGS;
6179 if (REG_P (x))
6181 regno = REGNO (x);
6182 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6183 return NO_REGS;
6185 else
6186 regno = -1;
6188 /* If we have something like (mem (mem (...)), we can safely assume the
6189 inner MEM will end up in a general register after reloading, so there's
6190 no need for a secondary reload. */
6191 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6192 return NO_REGS;
6194 /* Trying to load a constant into a FP register during PIC code
6195 generation requires %r1 as a scratch register. For float modes,
6196 the only legitimate constant is CONST0_RTX. However, there are
6197 a few patterns that accept constant double operands. */
6198 if (flag_pic
6199 && FP_REG_CLASS_P (rclass)
6200 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6202 switch (mode)
6204 case E_SImode:
6205 sri->icode = CODE_FOR_reload_insi_r1;
6206 break;
6208 case E_DImode:
6209 sri->icode = CODE_FOR_reload_indi_r1;
6210 break;
6212 case E_SFmode:
6213 sri->icode = CODE_FOR_reload_insf_r1;
6214 break;
6216 case E_DFmode:
6217 sri->icode = CODE_FOR_reload_indf_r1;
6218 break;
6220 default:
6221 gcc_unreachable ();
6223 return NO_REGS;
6226 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6227 register when we're generating PIC code or when the operand isn't
6228 readonly. */
6229 if (pa_symbolic_expression_p (x))
6231 if (GET_CODE (x) == HIGH)
6232 x = XEXP (x, 0);
6234 if (flag_pic || !read_only_operand (x, VOIDmode))
6236 switch (mode)
6238 case E_SImode:
6239 sri->icode = CODE_FOR_reload_insi_r1;
6240 break;
6242 case E_DImode:
6243 sri->icode = CODE_FOR_reload_indi_r1;
6244 break;
6246 default:
6247 gcc_unreachable ();
6249 return NO_REGS;
6253 /* Profiling showed the PA port spends about 1.3% of its compilation
6254 time in true_regnum from calls inside pa_secondary_reload_class. */
6255 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6256 regno = true_regnum (x);
6258 /* Handle reloads for floating point loads and stores. */
6259 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6260 && FP_REG_CLASS_P (rclass))
6262 if (MEM_P (x))
6264 x = XEXP (x, 0);
6266 /* We don't need a secondary reload for indexed memory addresses.
6268 When INT14_OK_STRICT is true, it might appear that we could
6269 directly allow register indirect memory addresses. However,
6270 this doesn't work because we don't support SUBREGs in
6271 floating-point register copies and reload doesn't tell us
6272 when it's going to use a SUBREG. */
6273 if (IS_INDEX_ADDR_P (x))
6274 return NO_REGS;
6277 /* Request a secondary reload with a general scratch register
6278 for everything else. ??? Could symbolic operands be handled
6279 directly when generating non-pic PA 2.0 code? */
6280 sri->icode = (in_p
6281 ? direct_optab_handler (reload_in_optab, mode)
6282 : direct_optab_handler (reload_out_optab, mode));
6283 return NO_REGS;
6286 /* A SAR<->FP register copy requires an intermediate general register
6287 and secondary memory. We need a secondary reload with a general
6288 scratch register for spills. */
6289 if (rclass == SHIFT_REGS)
6291 /* Handle spill. */
6292 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6294 sri->icode = (in_p
6295 ? direct_optab_handler (reload_in_optab, mode)
6296 : direct_optab_handler (reload_out_optab, mode));
6297 return NO_REGS;
6300 /* Handle FP copy. */
6301 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6302 return GENERAL_REGS;
6305 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6306 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6307 && FP_REG_CLASS_P (rclass))
6308 return GENERAL_REGS;
6310 return NO_REGS;
6313 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6315 static bool
6316 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6317 reg_class_t class1 ATTRIBUTE_UNUSED,
6318 reg_class_t class2 ATTRIBUTE_UNUSED)
6320 #ifdef PA_SECONDARY_MEMORY_NEEDED
6321 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6322 #else
6323 return false;
6324 #endif
6327 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6328 is only marked as live on entry by df-scan when it is a fixed
6329 register. It isn't a fixed register in the 64-bit runtime,
6330 so we need to mark it here. */
6332 static void
6333 pa_extra_live_on_entry (bitmap regs)
6335 if (TARGET_64BIT)
6336 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6339 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6340 to prevent it from being deleted. */
6343 pa_eh_return_handler_rtx (void)
6345 rtx tmp;
6347 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6348 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6349 tmp = gen_rtx_MEM (word_mode, tmp);
6350 tmp->volatil = 1;
6351 return tmp;
6354 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6355 by invisible reference. As a GCC extension, we also pass anything
6356 with a zero or variable size by reference.
6358 The 64-bit runtime does not describe passing any types by invisible
6359 reference. The internals of GCC can't currently handle passing
6360 empty structures, and zero or variable length arrays when they are
6361 not passed entirely on the stack or by reference. Thus, as a GCC
6362 extension, we pass these types by reference. The HP compiler doesn't
6363 support these types, so hopefully there shouldn't be any compatibility
6364 issues. This may have to be revisited when HP releases a C99 compiler
6365 or updates the ABI. */
6367 static bool
6368 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6370 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6371 if (TARGET_64BIT)
6372 return size <= 0;
6373 else
6374 return size <= 0 || size > 8;
6377 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6379 static pad_direction
6380 pa_function_arg_padding (machine_mode mode, const_tree type)
6382 if (mode == BLKmode
6383 || (TARGET_64BIT
6384 && type
6385 && (AGGREGATE_TYPE_P (type)
6386 || TREE_CODE (type) == COMPLEX_TYPE
6387 || TREE_CODE (type) == VECTOR_TYPE)))
6389 /* Return PAD_NONE if justification is not required. */
6390 if (type
6391 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6392 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6393 return PAD_NONE;
6395 /* The directions set here are ignored when a BLKmode argument larger
6396 than a word is placed in a register. Different code is used for
6397 the stack and registers. This makes it difficult to have a
6398 consistent data representation for both the stack and registers.
6399 For both runtimes, the justification and padding for arguments on
6400 the stack and in registers should be identical. */
6401 if (TARGET_64BIT)
6402 /* The 64-bit runtime specifies left justification for aggregates. */
6403 return PAD_UPWARD;
6404 else
6405 /* The 32-bit runtime architecture specifies right justification.
6406 When the argument is passed on the stack, the argument is padded
6407 with garbage on the left. The HP compiler pads with zeros. */
6408 return PAD_DOWNWARD;
6411 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6412 return PAD_DOWNWARD;
6413 else
6414 return PAD_NONE;
6418 /* Do what is necessary for `va_start'. We look at the current function
6419 to determine if stdargs or varargs is used and fill in an initial
6420 va_list. A pointer to this constructor is returned. */
6422 static rtx
6423 hppa_builtin_saveregs (void)
6425 rtx offset, dest;
6426 tree fntype = TREE_TYPE (current_function_decl);
6427 int argadj = ((!stdarg_p (fntype))
6428 ? UNITS_PER_WORD : 0);
6430 if (argadj)
6431 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6432 else
6433 offset = crtl->args.arg_offset_rtx;
6435 if (TARGET_64BIT)
6437 int i, off;
6439 /* Adjust for varargs/stdarg differences. */
6440 if (argadj)
6441 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6442 else
6443 offset = crtl->args.arg_offset_rtx;
6445 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6446 from the incoming arg pointer and growing to larger addresses. */
6447 for (i = 26, off = -64; i >= 19; i--, off += 8)
6448 emit_move_insn (gen_rtx_MEM (word_mode,
6449 plus_constant (Pmode,
6450 arg_pointer_rtx, off)),
6451 gen_rtx_REG (word_mode, i));
6453 /* The incoming args pointer points just beyond the flushback area;
6454 normally this is not a serious concern. However, when we are doing
6455 varargs/stdargs we want to make the arg pointer point to the start
6456 of the incoming argument area. */
6457 emit_move_insn (virtual_incoming_args_rtx,
6458 plus_constant (Pmode, arg_pointer_rtx, -64));
6460 /* Now return a pointer to the first anonymous argument. */
6461 return copy_to_reg (expand_binop (Pmode, add_optab,
6462 virtual_incoming_args_rtx,
6463 offset, 0, 0, OPTAB_LIB_WIDEN));
6466 /* Store general registers on the stack. */
6467 dest = gen_rtx_MEM (BLKmode,
6468 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6469 -16));
6470 set_mem_alias_set (dest, get_varargs_alias_set ());
6471 set_mem_align (dest, BITS_PER_WORD);
6472 move_block_from_reg (23, dest, 4);
6474 /* move_block_from_reg will emit code to store the argument registers
6475 individually as scalar stores.
6477 However, other insns may later load from the same addresses for
6478 a structure load (passing a struct to a varargs routine).
6480 The alias code assumes that such aliasing can never happen, so we
6481 have to keep memory referencing insns from moving up beyond the
6482 last argument register store. So we emit a blockage insn here. */
6483 emit_insn (gen_blockage ());
6485 return copy_to_reg (expand_binop (Pmode, add_optab,
6486 crtl->args.internal_arg_pointer,
6487 offset, 0, 0, OPTAB_LIB_WIDEN));
6490 static void
6491 hppa_va_start (tree valist, rtx nextarg)
6493 nextarg = expand_builtin_saveregs ();
6494 std_expand_builtin_va_start (valist, nextarg);
6497 static tree
6498 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6499 gimple_seq *post_p)
6501 if (TARGET_64BIT)
6503 /* Args grow upward. We can use the generic routines. */
6504 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6506 else /* !TARGET_64BIT */
6508 tree ptr = build_pointer_type (type);
6509 tree valist_type;
6510 tree t, u;
6511 unsigned int size, ofs;
6512 bool indirect;
6514 indirect = pass_va_arg_by_reference (type);
6515 if (indirect)
6517 type = ptr;
6518 ptr = build_pointer_type (type);
6520 size = int_size_in_bytes (type);
6521 valist_type = TREE_TYPE (valist);
6523 /* Args grow down. Not handled by generic routines. */
6525 u = fold_convert (sizetype, size_in_bytes (type));
6526 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6527 t = fold_build_pointer_plus (valist, u);
6529 /* Align to 4 or 8 byte boundary depending on argument size. */
6531 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6532 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6533 t = fold_convert (valist_type, t);
6535 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6537 ofs = (8 - size) % 4;
6538 if (ofs != 0)
6539 t = fold_build_pointer_plus_hwi (t, ofs);
6541 t = fold_convert (ptr, t);
6542 t = build_va_arg_indirect_ref (t);
6544 if (indirect)
6545 t = build_va_arg_indirect_ref (t);
6547 return t;
6551 /* True if MODE is valid for the target. By "valid", we mean able to
6552 be manipulated in non-trivial ways. In particular, this means all
6553 the arithmetic is supported.
6555 Currently, TImode is not valid as the HP 64-bit runtime documentation
6556 doesn't document the alignment and calling conventions for this type.
6557 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6558 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6560 static bool
6561 pa_scalar_mode_supported_p (scalar_mode mode)
6563 int precision = GET_MODE_PRECISION (mode);
6565 switch (GET_MODE_CLASS (mode))
6567 case MODE_PARTIAL_INT:
6568 case MODE_INT:
6569 if (precision == CHAR_TYPE_SIZE)
6570 return true;
6571 if (precision == SHORT_TYPE_SIZE)
6572 return true;
6573 if (precision == INT_TYPE_SIZE)
6574 return true;
6575 if (precision == LONG_TYPE_SIZE)
6576 return true;
6577 if (precision == LONG_LONG_TYPE_SIZE)
6578 return true;
6579 return false;
6581 case MODE_FLOAT:
6582 if (precision == FLOAT_TYPE_SIZE)
6583 return true;
6584 if (precision == DOUBLE_TYPE_SIZE)
6585 return true;
6586 if (precision == LONG_DOUBLE_TYPE_SIZE)
6587 return true;
6588 return false;
6590 case MODE_DECIMAL_FLOAT:
6591 return false;
6593 default:
6594 gcc_unreachable ();
6598 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6599 it branches into the delay slot. Otherwise, return FALSE. */
6601 static bool
6602 branch_to_delay_slot_p (rtx_insn *insn)
6604 rtx_insn *jump_insn;
6606 if (dbr_sequence_length ())
6607 return FALSE;
6609 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6610 while (insn)
6612 insn = next_active_insn (insn);
6613 if (jump_insn == insn)
6614 return TRUE;
6616 /* We can't rely on the length of asms. So, we return FALSE when
6617 the branch is followed by an asm. */
6618 if (!insn
6619 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6620 || asm_noperands (PATTERN (insn)) >= 0
6621 || get_attr_length (insn) > 0)
6622 break;
6625 return FALSE;
6628 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6630 This occurs when INSN has an unfilled delay slot and is followed
6631 by an asm. Disaster can occur if the asm is empty and the jump
6632 branches into the delay slot. So, we add a nop in the delay slot
6633 when this occurs. */
6635 static bool
6636 branch_needs_nop_p (rtx_insn *insn)
6638 rtx_insn *jump_insn;
6640 if (dbr_sequence_length ())
6641 return FALSE;
6643 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6644 while (insn)
6646 insn = next_active_insn (insn);
6647 if (!insn || jump_insn == insn)
6648 return TRUE;
6650 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6651 || asm_noperands (PATTERN (insn)) >= 0)
6652 && get_attr_length (insn) > 0)
6653 break;
6656 return FALSE;
6659 /* Return TRUE if INSN, a forward jump insn, can use nullification
6660 to skip the following instruction. This avoids an extra cycle due
6661 to a mis-predicted branch when we fall through. */
6663 static bool
6664 use_skip_p (rtx_insn *insn)
6666 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6668 while (insn)
6670 insn = next_active_insn (insn);
6672 /* We can't rely on the length of asms, so we can't skip asms. */
6673 if (!insn
6674 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6675 || asm_noperands (PATTERN (insn)) >= 0)
6676 break;
6677 if (get_attr_length (insn) == 4
6678 && jump_insn == next_active_insn (insn))
6679 return TRUE;
6680 if (get_attr_length (insn) > 0)
6681 break;
6684 return FALSE;
6687 /* This routine handles all the normal conditional branch sequences we
6688 might need to generate. It handles compare immediate vs compare
6689 register, nullification of delay slots, varying length branches,
6690 negated branches, and all combinations of the above. It returns the
6691 output appropriate to emit the branch corresponding to all given
6692 parameters. */
6694 const char *
6695 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6697 static char buf[100];
6698 bool useskip;
6699 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6700 int length = get_attr_length (insn);
6701 int xdelay;
6703 /* A conditional branch to the following instruction (e.g. the delay slot)
6704 is asking for a disaster. This can happen when not optimizing and
6705 when jump optimization fails.
6707 While it is usually safe to emit nothing, this can fail if the
6708 preceding instruction is a nullified branch with an empty delay
6709 slot and the same branch target as this branch. We could check
6710 for this but jump optimization should eliminate nop jumps. It
6711 is always safe to emit a nop. */
6712 if (branch_to_delay_slot_p (insn))
6713 return "nop";
6715 /* The doubleword form of the cmpib instruction doesn't have the LEU
6716 and GTU conditions while the cmpb instruction does. Since we accept
6717 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6718 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6719 operands[2] = gen_rtx_REG (DImode, 0);
6720 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6721 operands[1] = gen_rtx_REG (DImode, 0);
6723 /* If this is a long branch with its delay slot unfilled, set `nullify'
6724 as it can nullify the delay slot and save a nop. */
6725 if (length == 8 && dbr_sequence_length () == 0)
6726 nullify = 1;
6728 /* If this is a short forward conditional branch which did not get
6729 its delay slot filled, the delay slot can still be nullified. */
6730 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6731 nullify = forward_branch_p (insn);
6733 /* A forward branch over a single nullified insn can be done with a
6734 comclr instruction. This avoids a single cycle penalty due to
6735 mis-predicted branch if we fall through (branch not taken). */
6736 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6738 switch (length)
6740 /* All short conditional branches except backwards with an unfilled
6741 delay slot. */
6742 case 4:
6743 if (useskip)
6744 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6745 else
6746 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6747 if (GET_MODE (operands[1]) == DImode)
6748 strcat (buf, "*");
6749 if (negated)
6750 strcat (buf, "%B3");
6751 else
6752 strcat (buf, "%S3");
6753 if (useskip)
6754 strcat (buf, " %2,%r1,%%r0");
6755 else if (nullify)
6757 if (branch_needs_nop_p (insn))
6758 strcat (buf, ",n %2,%r1,%0%#");
6759 else
6760 strcat (buf, ",n %2,%r1,%0");
6762 else
6763 strcat (buf, " %2,%r1,%0");
6764 break;
6766 /* All long conditionals. Note a short backward branch with an
6767 unfilled delay slot is treated just like a long backward branch
6768 with an unfilled delay slot. */
6769 case 8:
6770 /* Handle weird backwards branch with a filled delay slot
6771 which is nullified. */
6772 if (dbr_sequence_length () != 0
6773 && ! forward_branch_p (insn)
6774 && nullify)
6776 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6777 if (GET_MODE (operands[1]) == DImode)
6778 strcat (buf, "*");
6779 if (negated)
6780 strcat (buf, "%S3");
6781 else
6782 strcat (buf, "%B3");
6783 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6785 /* Handle short backwards branch with an unfilled delay slot.
6786 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6787 taken and untaken branches. */
6788 else if (dbr_sequence_length () == 0
6789 && ! forward_branch_p (insn)
6790 && INSN_ADDRESSES_SET_P ()
6791 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6792 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6794 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6795 if (GET_MODE (operands[1]) == DImode)
6796 strcat (buf, "*");
6797 if (negated)
6798 strcat (buf, "%B3 %2,%r1,%0%#");
6799 else
6800 strcat (buf, "%S3 %2,%r1,%0%#");
6802 else
6804 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6805 if (GET_MODE (operands[1]) == DImode)
6806 strcat (buf, "*");
6807 if (negated)
6808 strcat (buf, "%S3");
6809 else
6810 strcat (buf, "%B3");
6811 if (nullify)
6812 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6813 else
6814 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6816 break;
6818 default:
6819 /* The reversed conditional branch must branch over one additional
6820 instruction if the delay slot is filled and needs to be extracted
6821 by pa_output_lbranch. If the delay slot is empty or this is a
6822 nullified forward branch, the instruction after the reversed
6823 condition branch must be nullified. */
6824 if (dbr_sequence_length () == 0
6825 || (nullify && forward_branch_p (insn)))
6827 nullify = 1;
6828 xdelay = 0;
6829 operands[4] = GEN_INT (length);
6831 else
6833 xdelay = 1;
6834 operands[4] = GEN_INT (length + 4);
6837 /* Create a reversed conditional branch which branches around
6838 the following insns. */
6839 if (GET_MODE (operands[1]) != DImode)
6841 if (nullify)
6843 if (negated)
6844 strcpy (buf,
6845 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6846 else
6847 strcpy (buf,
6848 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6850 else
6852 if (negated)
6853 strcpy (buf,
6854 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6855 else
6856 strcpy (buf,
6857 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6860 else
6862 if (nullify)
6864 if (negated)
6865 strcpy (buf,
6866 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6867 else
6868 strcpy (buf,
6869 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6871 else
6873 if (negated)
6874 strcpy (buf,
6875 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6876 else
6877 strcpy (buf,
6878 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6882 output_asm_insn (buf, operands);
6883 return pa_output_lbranch (operands[0], insn, xdelay);
6885 return buf;
6888 /* Output a PIC pc-relative instruction sequence to load the address of
6889 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6890 or a code label. OPERANDS[1] specifies the register to use to load
6891 the program counter. OPERANDS[3] may be used for label generation
6892 The sequence is always three instructions in length. The program
6893 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6894 Register %r1 is clobbered. */
6896 static void
6897 pa_output_pic_pcrel_sequence (rtx *operands)
6899 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6900 if (TARGET_PA_20)
6902 /* We can use mfia to determine the current program counter. */
6903 if (TARGET_SOM || !TARGET_GAS)
6905 operands[3] = gen_label_rtx ();
6906 targetm.asm_out.internal_label (asm_out_file, "L",
6907 CODE_LABEL_NUMBER (operands[3]));
6908 output_asm_insn ("mfia %1", operands);
6909 output_asm_insn ("addil L'%0-%l3,%1", operands);
6910 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6912 else
6914 output_asm_insn ("mfia %1", operands);
6915 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6916 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6919 else
6921 /* We need to use a branch to determine the current program counter. */
6922 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6923 if (TARGET_SOM || !TARGET_GAS)
6925 operands[3] = gen_label_rtx ();
6926 output_asm_insn ("addil L'%0-%l3,%1", operands);
6927 targetm.asm_out.internal_label (asm_out_file, "L",
6928 CODE_LABEL_NUMBER (operands[3]));
6929 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6931 else
6933 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6934 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6939 /* This routine handles output of long unconditional branches that
6940 exceed the maximum range of a simple branch instruction. Since
6941 we don't have a register available for the branch, we save register
6942 %r1 in the frame marker, load the branch destination DEST into %r1,
6943 execute the branch, and restore %r1 in the delay slot of the branch.
6945 Since long branches may have an insn in the delay slot and the
6946 delay slot is used to restore %r1, we in general need to extract
6947 this insn and execute it before the branch. However, to facilitate
6948 use of this function by conditional branches, we also provide an
6949 option to not extract the delay insn so that it will be emitted
6950 after the long branch. So, if there is an insn in the delay slot,
6951 it is extracted if XDELAY is nonzero.
6953 The lengths of the various long-branch sequences are 20, 16 and 24
6954 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6956 const char *
6957 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6959 rtx xoperands[4];
6961 xoperands[0] = dest;
6963 /* First, free up the delay slot. */
6964 if (xdelay && dbr_sequence_length () != 0)
6966 /* We can't handle a jump in the delay slot. */
6967 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6969 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6970 optimize, 0, NULL);
6972 /* Now delete the delay insn. */
6973 SET_INSN_DELETED (NEXT_INSN (insn));
6976 /* Output an insn to save %r1. The runtime documentation doesn't
6977 specify whether the "Clean Up" slot in the callers frame can
6978 be clobbered by the callee. It isn't copied by HP's builtin
6979 alloca, so this suggests that it can be clobbered if necessary.
6980 The "Static Link" location is copied by HP builtin alloca, so
6981 we avoid using it. Using the cleanup slot might be a problem
6982 if we have to interoperate with languages that pass cleanup
6983 information. However, it should be possible to handle these
6984 situations with GCC's asm feature.
6986 The "Current RP" slot is reserved for the called procedure, so
6987 we try to use it when we don't have a frame of our own. It's
6988 rather unlikely that we won't have a frame when we need to emit
6989 a very long branch.
6991 Really the way to go long term is a register scavenger; goto
6992 the target of the jump and find a register which we can use
6993 as a scratch to hold the value in %r1. Then, we wouldn't have
6994 to free up the delay slot or clobber a slot that may be needed
6995 for other purposes. */
6996 if (TARGET_64BIT)
6998 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6999 /* Use the return pointer slot in the frame marker. */
7000 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7001 else
7002 /* Use the slot at -40 in the frame marker since HP builtin
7003 alloca doesn't copy it. */
7004 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7006 else
7008 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7009 /* Use the return pointer slot in the frame marker. */
7010 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7011 else
7012 /* Use the "Clean Up" slot in the frame marker. In GCC,
7013 the only other use of this location is for copying a
7014 floating point double argument from a floating-point
7015 register to two general registers. The copy is done
7016 as an "atomic" operation when outputting a call, so it
7017 won't interfere with our using the location here. */
7018 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7021 if (TARGET_PORTABLE_RUNTIME)
7023 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7024 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7025 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7027 else if (flag_pic)
7029 xoperands[1] = gen_rtx_REG (Pmode, 1);
7030 xoperands[2] = xoperands[1];
7031 pa_output_pic_pcrel_sequence (xoperands);
7032 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7034 else
7035 /* Now output a very long branch to the original target. */
7036 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7038 /* Now restore the value of %r1 in the delay slot. */
7039 if (TARGET_64BIT)
7041 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7042 return "ldd -16(%%r30),%%r1";
7043 else
7044 return "ldd -40(%%r30),%%r1";
7046 else
7048 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7049 return "ldw -20(%%r30),%%r1";
7050 else
7051 return "ldw -12(%%r30),%%r1";
7055 /* This routine handles all the branch-on-bit conditional branch sequences we
7056 might need to generate. It handles nullification of delay slots,
7057 varying length branches, negated branches and all combinations of the
7058 above. it returns the appropriate output template to emit the branch. */
7060 const char *
7061 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7063 static char buf[100];
7064 bool useskip;
7065 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7066 int length = get_attr_length (insn);
7067 int xdelay;
7069 /* A conditional branch to the following instruction (e.g. the delay slot) is
7070 asking for a disaster. I do not think this can happen as this pattern
7071 is only used when optimizing; jump optimization should eliminate the
7072 jump. But be prepared just in case. */
7074 if (branch_to_delay_slot_p (insn))
7075 return "nop";
7077 /* If this is a long branch with its delay slot unfilled, set `nullify'
7078 as it can nullify the delay slot and save a nop. */
7079 if (length == 8 && dbr_sequence_length () == 0)
7080 nullify = 1;
7082 /* If this is a short forward conditional branch which did not get
7083 its delay slot filled, the delay slot can still be nullified. */
7084 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7085 nullify = forward_branch_p (insn);
7087 /* A forward branch over a single nullified insn can be done with a
7088 extrs instruction. This avoids a single cycle penalty due to
7089 mis-predicted branch if we fall through (branch not taken). */
7090 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7092 switch (length)
7095 /* All short conditional branches except backwards with an unfilled
7096 delay slot. */
7097 case 4:
7098 if (useskip)
7099 strcpy (buf, "{extrs,|extrw,s,}");
7100 else
7101 strcpy (buf, "bb,");
7102 if (useskip && GET_MODE (operands[0]) == DImode)
7103 strcpy (buf, "extrd,s,*");
7104 else if (GET_MODE (operands[0]) == DImode)
7105 strcpy (buf, "bb,*");
7106 if ((which == 0 && negated)
7107 || (which == 1 && ! negated))
7108 strcat (buf, ">=");
7109 else
7110 strcat (buf, "<");
7111 if (useskip)
7112 strcat (buf, " %0,%1,1,%%r0");
7113 else if (nullify && negated)
7115 if (branch_needs_nop_p (insn))
7116 strcat (buf, ",n %0,%1,%3%#");
7117 else
7118 strcat (buf, ",n %0,%1,%3");
7120 else if (nullify && ! negated)
7122 if (branch_needs_nop_p (insn))
7123 strcat (buf, ",n %0,%1,%2%#");
7124 else
7125 strcat (buf, ",n %0,%1,%2");
7127 else if (! nullify && negated)
7128 strcat (buf, " %0,%1,%3");
7129 else if (! nullify && ! negated)
7130 strcat (buf, " %0,%1,%2");
7131 break;
7133 /* All long conditionals. Note a short backward branch with an
7134 unfilled delay slot is treated just like a long backward branch
7135 with an unfilled delay slot. */
7136 case 8:
7137 /* Handle weird backwards branch with a filled delay slot
7138 which is nullified. */
7139 if (dbr_sequence_length () != 0
7140 && ! forward_branch_p (insn)
7141 && nullify)
7143 strcpy (buf, "bb,");
7144 if (GET_MODE (operands[0]) == DImode)
7145 strcat (buf, "*");
7146 if ((which == 0 && negated)
7147 || (which == 1 && ! negated))
7148 strcat (buf, "<");
7149 else
7150 strcat (buf, ">=");
7151 if (negated)
7152 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7153 else
7154 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7156 /* Handle short backwards branch with an unfilled delay slot.
7157 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7158 taken and untaken branches. */
7159 else if (dbr_sequence_length () == 0
7160 && ! forward_branch_p (insn)
7161 && INSN_ADDRESSES_SET_P ()
7162 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7163 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7165 strcpy (buf, "bb,");
7166 if (GET_MODE (operands[0]) == DImode)
7167 strcat (buf, "*");
7168 if ((which == 0 && negated)
7169 || (which == 1 && ! negated))
7170 strcat (buf, ">=");
7171 else
7172 strcat (buf, "<");
7173 if (negated)
7174 strcat (buf, " %0,%1,%3%#");
7175 else
7176 strcat (buf, " %0,%1,%2%#");
7178 else
7180 if (GET_MODE (operands[0]) == DImode)
7181 strcpy (buf, "extrd,s,*");
7182 else
7183 strcpy (buf, "{extrs,|extrw,s,}");
7184 if ((which == 0 && negated)
7185 || (which == 1 && ! negated))
7186 strcat (buf, "<");
7187 else
7188 strcat (buf, ">=");
7189 if (nullify && negated)
7190 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7191 else if (nullify && ! negated)
7192 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7193 else if (negated)
7194 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7195 else
7196 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7198 break;
7200 default:
7201 /* The reversed conditional branch must branch over one additional
7202 instruction if the delay slot is filled and needs to be extracted
7203 by pa_output_lbranch. If the delay slot is empty or this is a
7204 nullified forward branch, the instruction after the reversed
7205 condition branch must be nullified. */
7206 if (dbr_sequence_length () == 0
7207 || (nullify && forward_branch_p (insn)))
7209 nullify = 1;
7210 xdelay = 0;
7211 operands[4] = GEN_INT (length);
7213 else
7215 xdelay = 1;
7216 operands[4] = GEN_INT (length + 4);
7219 if (GET_MODE (operands[0]) == DImode)
7220 strcpy (buf, "bb,*");
7221 else
7222 strcpy (buf, "bb,");
7223 if ((which == 0 && negated)
7224 || (which == 1 && !negated))
7225 strcat (buf, "<");
7226 else
7227 strcat (buf, ">=");
7228 if (nullify)
7229 strcat (buf, ",n %0,%1,.+%4");
7230 else
7231 strcat (buf, " %0,%1,.+%4");
7232 output_asm_insn (buf, operands);
7233 return pa_output_lbranch (negated ? operands[3] : operands[2],
7234 insn, xdelay);
7236 return buf;
7239 /* This routine handles all the branch-on-variable-bit conditional branch
7240 sequences we might need to generate. It handles nullification of delay
7241 slots, varying length branches, negated branches and all combinations
7242 of the above. it returns the appropriate output template to emit the
7243 branch. */
7245 const char *
7246 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7247 int which)
7249 static char buf[100];
7250 bool useskip;
7251 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7252 int length = get_attr_length (insn);
7253 int xdelay;
7255 /* A conditional branch to the following instruction (e.g. the delay slot) is
7256 asking for a disaster. I do not think this can happen as this pattern
7257 is only used when optimizing; jump optimization should eliminate the
7258 jump. But be prepared just in case. */
7260 if (branch_to_delay_slot_p (insn))
7261 return "nop";
7263 /* If this is a long branch with its delay slot unfilled, set `nullify'
7264 as it can nullify the delay slot and save a nop. */
7265 if (length == 8 && dbr_sequence_length () == 0)
7266 nullify = 1;
7268 /* If this is a short forward conditional branch which did not get
7269 its delay slot filled, the delay slot can still be nullified. */
7270 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7271 nullify = forward_branch_p (insn);
7273 /* A forward branch over a single nullified insn can be done with a
7274 extrs instruction. This avoids a single cycle penalty due to
7275 mis-predicted branch if we fall through (branch not taken). */
7276 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7278 switch (length)
7281 /* All short conditional branches except backwards with an unfilled
7282 delay slot. */
7283 case 4:
7284 if (useskip)
7285 strcpy (buf, "{vextrs,|extrw,s,}");
7286 else
7287 strcpy (buf, "{bvb,|bb,}");
7288 if (useskip && GET_MODE (operands[0]) == DImode)
7289 strcpy (buf, "extrd,s,*");
7290 else if (GET_MODE (operands[0]) == DImode)
7291 strcpy (buf, "bb,*");
7292 if ((which == 0 && negated)
7293 || (which == 1 && ! negated))
7294 strcat (buf, ">=");
7295 else
7296 strcat (buf, "<");
7297 if (useskip)
7298 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7299 else if (nullify && negated)
7301 if (branch_needs_nop_p (insn))
7302 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7303 else
7304 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7306 else if (nullify && ! negated)
7308 if (branch_needs_nop_p (insn))
7309 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7310 else
7311 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7313 else if (! nullify && negated)
7314 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7315 else if (! nullify && ! negated)
7316 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7317 break;
7319 /* All long conditionals. Note a short backward branch with an
7320 unfilled delay slot is treated just like a long backward branch
7321 with an unfilled delay slot. */
7322 case 8:
7323 /* Handle weird backwards branch with a filled delay slot
7324 which is nullified. */
7325 if (dbr_sequence_length () != 0
7326 && ! forward_branch_p (insn)
7327 && nullify)
7329 strcpy (buf, "{bvb,|bb,}");
7330 if (GET_MODE (operands[0]) == DImode)
7331 strcat (buf, "*");
7332 if ((which == 0 && negated)
7333 || (which == 1 && ! negated))
7334 strcat (buf, "<");
7335 else
7336 strcat (buf, ">=");
7337 if (negated)
7338 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7339 else
7340 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7342 /* Handle short backwards branch with an unfilled delay slot.
7343 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7344 taken and untaken branches. */
7345 else if (dbr_sequence_length () == 0
7346 && ! forward_branch_p (insn)
7347 && INSN_ADDRESSES_SET_P ()
7348 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7349 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7351 strcpy (buf, "{bvb,|bb,}");
7352 if (GET_MODE (operands[0]) == DImode)
7353 strcat (buf, "*");
7354 if ((which == 0 && negated)
7355 || (which == 1 && ! negated))
7356 strcat (buf, ">=");
7357 else
7358 strcat (buf, "<");
7359 if (negated)
7360 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7361 else
7362 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7364 else
7366 strcpy (buf, "{vextrs,|extrw,s,}");
7367 if (GET_MODE (operands[0]) == DImode)
7368 strcpy (buf, "extrd,s,*");
7369 if ((which == 0 && negated)
7370 || (which == 1 && ! negated))
7371 strcat (buf, "<");
7372 else
7373 strcat (buf, ">=");
7374 if (nullify && negated)
7375 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7376 else if (nullify && ! negated)
7377 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7378 else if (negated)
7379 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7380 else
7381 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7383 break;
7385 default:
7386 /* The reversed conditional branch must branch over one additional
7387 instruction if the delay slot is filled and needs to be extracted
7388 by pa_output_lbranch. If the delay slot is empty or this is a
7389 nullified forward branch, the instruction after the reversed
7390 condition branch must be nullified. */
7391 if (dbr_sequence_length () == 0
7392 || (nullify && forward_branch_p (insn)))
7394 nullify = 1;
7395 xdelay = 0;
7396 operands[4] = GEN_INT (length);
7398 else
7400 xdelay = 1;
7401 operands[4] = GEN_INT (length + 4);
7404 if (GET_MODE (operands[0]) == DImode)
7405 strcpy (buf, "bb,*");
7406 else
7407 strcpy (buf, "{bvb,|bb,}");
7408 if ((which == 0 && negated)
7409 || (which == 1 && !negated))
7410 strcat (buf, "<");
7411 else
7412 strcat (buf, ">=");
7413 if (nullify)
7414 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7415 else
7416 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7417 output_asm_insn (buf, operands);
7418 return pa_output_lbranch (negated ? operands[3] : operands[2],
7419 insn, xdelay);
7421 return buf;
7424 /* Return the output template for emitting a dbra type insn.
7426 Note it may perform some output operations on its own before
7427 returning the final output string. */
7428 const char *
7429 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7431 int length = get_attr_length (insn);
7433 /* A conditional branch to the following instruction (e.g. the delay slot) is
7434 asking for a disaster. Be prepared! */
7436 if (branch_to_delay_slot_p (insn))
7438 if (which_alternative == 0)
7439 return "ldo %1(%0),%0";
7440 else if (which_alternative == 1)
7442 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7443 output_asm_insn ("ldw -16(%%r30),%4", operands);
7444 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7445 return "{fldws|fldw} -16(%%r30),%0";
7447 else
7449 output_asm_insn ("ldw %0,%4", operands);
7450 return "ldo %1(%4),%4\n\tstw %4,%0";
7454 if (which_alternative == 0)
7456 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7457 int xdelay;
7459 /* If this is a long branch with its delay slot unfilled, set `nullify'
7460 as it can nullify the delay slot and save a nop. */
7461 if (length == 8 && dbr_sequence_length () == 0)
7462 nullify = 1;
7464 /* If this is a short forward conditional branch which did not get
7465 its delay slot filled, the delay slot can still be nullified. */
7466 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7467 nullify = forward_branch_p (insn);
7469 switch (length)
7471 case 4:
7472 if (nullify)
7474 if (branch_needs_nop_p (insn))
7475 return "addib,%C2,n %1,%0,%3%#";
7476 else
7477 return "addib,%C2,n %1,%0,%3";
7479 else
7480 return "addib,%C2 %1,%0,%3";
7482 case 8:
7483 /* Handle weird backwards branch with a fulled delay slot
7484 which is nullified. */
7485 if (dbr_sequence_length () != 0
7486 && ! forward_branch_p (insn)
7487 && nullify)
7488 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7489 /* Handle short backwards branch with an unfilled delay slot.
7490 Using a addb;nop rather than addi;bl saves 1 cycle for both
7491 taken and untaken branches. */
7492 else if (dbr_sequence_length () == 0
7493 && ! forward_branch_p (insn)
7494 && INSN_ADDRESSES_SET_P ()
7495 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7496 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7497 return "addib,%C2 %1,%0,%3%#";
7499 /* Handle normal cases. */
7500 if (nullify)
7501 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7502 else
7503 return "addi,%N2 %1,%0,%0\n\tb %3";
7505 default:
7506 /* The reversed conditional branch must branch over one additional
7507 instruction if the delay slot is filled and needs to be extracted
7508 by pa_output_lbranch. If the delay slot is empty or this is a
7509 nullified forward branch, the instruction after the reversed
7510 condition branch must be nullified. */
7511 if (dbr_sequence_length () == 0
7512 || (nullify && forward_branch_p (insn)))
7514 nullify = 1;
7515 xdelay = 0;
7516 operands[4] = GEN_INT (length);
7518 else
7520 xdelay = 1;
7521 operands[4] = GEN_INT (length + 4);
7524 if (nullify)
7525 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7526 else
7527 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7529 return pa_output_lbranch (operands[3], insn, xdelay);
7533 /* Deal with gross reload from FP register case. */
7534 else if (which_alternative == 1)
7536 /* Move loop counter from FP register to MEM then into a GR,
7537 increment the GR, store the GR into MEM, and finally reload
7538 the FP register from MEM from within the branch's delay slot. */
7539 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7540 operands);
7541 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7542 if (length == 24)
7543 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7544 else if (length == 28)
7545 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7546 else
7548 operands[5] = GEN_INT (length - 16);
7549 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7550 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7551 return pa_output_lbranch (operands[3], insn, 0);
7554 /* Deal with gross reload from memory case. */
7555 else
7557 /* Reload loop counter from memory, the store back to memory
7558 happens in the branch's delay slot. */
7559 output_asm_insn ("ldw %0,%4", operands);
7560 if (length == 12)
7561 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7562 else if (length == 16)
7563 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7564 else
7566 operands[5] = GEN_INT (length - 4);
7567 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7568 return pa_output_lbranch (operands[3], insn, 0);
7573 /* Return the output template for emitting a movb type insn.
7575 Note it may perform some output operations on its own before
7576 returning the final output string. */
7577 const char *
7578 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7579 int reverse_comparison)
7581 int length = get_attr_length (insn);
7583 /* A conditional branch to the following instruction (e.g. the delay slot) is
7584 asking for a disaster. Be prepared! */
7586 if (branch_to_delay_slot_p (insn))
7588 if (which_alternative == 0)
7589 return "copy %1,%0";
7590 else if (which_alternative == 1)
7592 output_asm_insn ("stw %1,-16(%%r30)", operands);
7593 return "{fldws|fldw} -16(%%r30),%0";
7595 else if (which_alternative == 2)
7596 return "stw %1,%0";
7597 else
7598 return "mtsar %r1";
7601 /* Support the second variant. */
7602 if (reverse_comparison)
7603 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7605 if (which_alternative == 0)
7607 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7608 int xdelay;
7610 /* If this is a long branch with its delay slot unfilled, set `nullify'
7611 as it can nullify the delay slot and save a nop. */
7612 if (length == 8 && dbr_sequence_length () == 0)
7613 nullify = 1;
7615 /* If this is a short forward conditional branch which did not get
7616 its delay slot filled, the delay slot can still be nullified. */
7617 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7618 nullify = forward_branch_p (insn);
7620 switch (length)
7622 case 4:
7623 if (nullify)
7625 if (branch_needs_nop_p (insn))
7626 return "movb,%C2,n %1,%0,%3%#";
7627 else
7628 return "movb,%C2,n %1,%0,%3";
7630 else
7631 return "movb,%C2 %1,%0,%3";
7633 case 8:
7634 /* Handle weird backwards branch with a filled delay slot
7635 which is nullified. */
7636 if (dbr_sequence_length () != 0
7637 && ! forward_branch_p (insn)
7638 && nullify)
7639 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7641 /* Handle short backwards branch with an unfilled delay slot.
7642 Using a movb;nop rather than or;bl saves 1 cycle for both
7643 taken and untaken branches. */
7644 else if (dbr_sequence_length () == 0
7645 && ! forward_branch_p (insn)
7646 && INSN_ADDRESSES_SET_P ()
7647 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7648 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7649 return "movb,%C2 %1,%0,%3%#";
7650 /* Handle normal cases. */
7651 if (nullify)
7652 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7653 else
7654 return "or,%N2 %1,%%r0,%0\n\tb %3";
7656 default:
7657 /* The reversed conditional branch must branch over one additional
7658 instruction if the delay slot is filled and needs to be extracted
7659 by pa_output_lbranch. If the delay slot is empty or this is a
7660 nullified forward branch, the instruction after the reversed
7661 condition branch must be nullified. */
7662 if (dbr_sequence_length () == 0
7663 || (nullify && forward_branch_p (insn)))
7665 nullify = 1;
7666 xdelay = 0;
7667 operands[4] = GEN_INT (length);
7669 else
7671 xdelay = 1;
7672 operands[4] = GEN_INT (length + 4);
7675 if (nullify)
7676 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7677 else
7678 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7680 return pa_output_lbranch (operands[3], insn, xdelay);
7683 /* Deal with gross reload for FP destination register case. */
7684 else if (which_alternative == 1)
7686 /* Move source register to MEM, perform the branch test, then
7687 finally load the FP register from MEM from within the branch's
7688 delay slot. */
7689 output_asm_insn ("stw %1,-16(%%r30)", operands);
7690 if (length == 12)
7691 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7692 else if (length == 16)
7693 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7694 else
7696 operands[4] = GEN_INT (length - 4);
7697 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7698 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7699 return pa_output_lbranch (operands[3], insn, 0);
7702 /* Deal with gross reload from memory case. */
7703 else if (which_alternative == 2)
7705 /* Reload loop counter from memory, the store back to memory
7706 happens in the branch's delay slot. */
7707 if (length == 8)
7708 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7709 else if (length == 12)
7710 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7711 else
7713 operands[4] = GEN_INT (length);
7714 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7715 operands);
7716 return pa_output_lbranch (operands[3], insn, 0);
7719 /* Handle SAR as a destination. */
7720 else
7722 if (length == 8)
7723 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7724 else if (length == 12)
7725 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7726 else
7728 operands[4] = GEN_INT (length);
7729 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7730 operands);
7731 return pa_output_lbranch (operands[3], insn, 0);
7736 /* Copy any FP arguments in INSN into integer registers. */
7737 static void
7738 copy_fp_args (rtx_insn *insn)
7740 rtx link;
7741 rtx xoperands[2];
7743 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7745 int arg_mode, regno;
7746 rtx use = XEXP (link, 0);
7748 if (! (GET_CODE (use) == USE
7749 && GET_CODE (XEXP (use, 0)) == REG
7750 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7751 continue;
7753 arg_mode = GET_MODE (XEXP (use, 0));
7754 regno = REGNO (XEXP (use, 0));
7756 /* Is it a floating point register? */
7757 if (regno >= 32 && regno <= 39)
7759 /* Copy the FP register into an integer register via memory. */
7760 if (arg_mode == SFmode)
7762 xoperands[0] = XEXP (use, 0);
7763 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7764 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7765 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7767 else
7769 xoperands[0] = XEXP (use, 0);
7770 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7771 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7772 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7773 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7779 /* Compute length of the FP argument copy sequence for INSN. */
7780 static int
7781 length_fp_args (rtx_insn *insn)
7783 int length = 0;
7784 rtx link;
7786 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7788 int arg_mode, regno;
7789 rtx use = XEXP (link, 0);
7791 if (! (GET_CODE (use) == USE
7792 && GET_CODE (XEXP (use, 0)) == REG
7793 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7794 continue;
7796 arg_mode = GET_MODE (XEXP (use, 0));
7797 regno = REGNO (XEXP (use, 0));
7799 /* Is it a floating point register? */
7800 if (regno >= 32 && regno <= 39)
7802 if (arg_mode == SFmode)
7803 length += 8;
7804 else
7805 length += 12;
7809 return length;
7812 /* Return the attribute length for the millicode call instruction INSN.
7813 The length must match the code generated by pa_output_millicode_call.
7814 We include the delay slot in the returned length as it is better to
7815 over estimate the length than to under estimate it. */
7818 pa_attr_length_millicode_call (rtx_insn *insn)
7820 unsigned long distance = -1;
7821 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7823 if (INSN_ADDRESSES_SET_P ())
7825 distance = (total + insn_current_reference_address (insn));
7826 if (distance < total)
7827 distance = -1;
7830 if (TARGET_64BIT)
7832 if (!TARGET_LONG_CALLS && distance < 7600000)
7833 return 8;
7835 return 20;
7837 else if (TARGET_PORTABLE_RUNTIME)
7838 return 24;
7839 else
7841 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7842 return 8;
7844 if (!flag_pic)
7845 return 12;
7847 return 24;
7851 /* INSN is a function call.
7853 CALL_DEST is the routine we are calling. */
7855 const char *
7856 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7858 int attr_length = get_attr_length (insn);
7859 int seq_length = dbr_sequence_length ();
7860 rtx xoperands[4];
7862 xoperands[0] = call_dest;
7864 /* Handle the common case where we are sure that the branch will
7865 reach the beginning of the $CODE$ subspace. The within reach
7866 form of the $$sh_func_adrs call has a length of 28. Because it
7867 has an attribute type of sh_func_adrs, it never has a nonzero
7868 sequence length (i.e., the delay slot is never filled). */
7869 if (!TARGET_LONG_CALLS
7870 && (attr_length == 8
7871 || (attr_length == 28
7872 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7874 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7875 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7877 else
7879 if (TARGET_64BIT)
7881 /* It might seem that one insn could be saved by accessing
7882 the millicode function using the linkage table. However,
7883 this doesn't work in shared libraries and other dynamically
7884 loaded objects. Using a pc-relative sequence also avoids
7885 problems related to the implicit use of the gp register. */
7886 xoperands[1] = gen_rtx_REG (Pmode, 1);
7887 xoperands[2] = xoperands[1];
7888 pa_output_pic_pcrel_sequence (xoperands);
7889 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7891 else if (TARGET_PORTABLE_RUNTIME)
7893 /* Pure portable runtime doesn't allow be/ble; we also don't
7894 have PIC support in the assembler/linker, so this sequence
7895 is needed. */
7897 /* Get the address of our target into %r1. */
7898 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7899 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7901 /* Get our return address into %r31. */
7902 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7903 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7905 /* Jump to our target address in %r1. */
7906 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7908 else if (!flag_pic)
7910 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7911 if (TARGET_PA_20)
7912 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7913 else
7914 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7916 else
7918 xoperands[1] = gen_rtx_REG (Pmode, 31);
7919 xoperands[2] = gen_rtx_REG (Pmode, 1);
7920 pa_output_pic_pcrel_sequence (xoperands);
7922 /* Adjust return address. */
7923 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7925 /* Jump to our target address in %r1. */
7926 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7930 if (seq_length == 0)
7931 output_asm_insn ("nop", xoperands);
7933 return "";
7936 /* Return the attribute length of the call instruction INSN. The SIBCALL
7937 flag indicates whether INSN is a regular call or a sibling call. The
7938 length returned must be longer than the code actually generated by
7939 pa_output_call. Since branch shortening is done before delay branch
7940 sequencing, there is no way to determine whether or not the delay
7941 slot will be filled during branch shortening. Even when the delay
7942 slot is filled, we may have to add a nop if the delay slot contains
7943 a branch that can't reach its target. Thus, we always have to include
7944 the delay slot in the length estimate. This used to be done in
7945 pa_adjust_insn_length but we do it here now as some sequences always
7946 fill the delay slot and we can save four bytes in the estimate for
7947 these sequences. */
7950 pa_attr_length_call (rtx_insn *insn, int sibcall)
7952 int local_call;
7953 rtx call, call_dest;
7954 tree call_decl;
7955 int length = 0;
7956 rtx pat = PATTERN (insn);
7957 unsigned long distance = -1;
7959 gcc_assert (CALL_P (insn));
7961 if (INSN_ADDRESSES_SET_P ())
7963 unsigned long total;
7965 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7966 distance = (total + insn_current_reference_address (insn));
7967 if (distance < total)
7968 distance = -1;
7971 gcc_assert (GET_CODE (pat) == PARALLEL);
7973 /* Get the call rtx. */
7974 call = XVECEXP (pat, 0, 0);
7975 if (GET_CODE (call) == SET)
7976 call = SET_SRC (call);
7978 gcc_assert (GET_CODE (call) == CALL);
7980 /* Determine if this is a local call. */
7981 call_dest = XEXP (XEXP (call, 0), 0);
7982 call_decl = SYMBOL_REF_DECL (call_dest);
7983 local_call = call_decl && targetm.binds_local_p (call_decl);
7985 /* pc-relative branch. */
7986 if (!TARGET_LONG_CALLS
7987 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7988 || distance < MAX_PCREL17F_OFFSET))
7989 length += 8;
7991 /* 64-bit plabel sequence. */
7992 else if (TARGET_64BIT && !local_call)
7993 length += 24;
7995 /* non-pic long absolute branch sequence. */
7996 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7997 length += 12;
7999 /* long pc-relative branch sequence. */
8000 else if (TARGET_LONG_PIC_SDIFF_CALL
8001 || (TARGET_GAS && !TARGET_SOM && local_call))
8003 length += 20;
8005 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8006 length += 8;
8009 /* 32-bit plabel sequence. */
8010 else
8012 length += 32;
8014 if (TARGET_SOM)
8015 length += length_fp_args (insn);
8017 if (flag_pic)
8018 length += 4;
8020 if (!TARGET_PA_20)
8022 if (!sibcall)
8023 length += 8;
8025 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8026 length += 8;
8030 return length;
8033 /* INSN is a function call.
8035 CALL_DEST is the routine we are calling. */
8037 const char *
8038 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8040 int seq_length = dbr_sequence_length ();
8041 tree call_decl = SYMBOL_REF_DECL (call_dest);
8042 int local_call = call_decl && targetm.binds_local_p (call_decl);
8043 rtx xoperands[4];
8045 xoperands[0] = call_dest;
8047 /* Handle the common case where we're sure that the branch will reach
8048 the beginning of the "$CODE$" subspace. This is the beginning of
8049 the current function if we are in a named section. */
8050 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8052 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8053 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8055 else
8057 if (TARGET_64BIT && !local_call)
8059 /* ??? As far as I can tell, the HP linker doesn't support the
8060 long pc-relative sequence described in the 64-bit runtime
8061 architecture. So, we use a slightly longer indirect call. */
8062 xoperands[0] = pa_get_deferred_plabel (call_dest);
8063 xoperands[1] = gen_label_rtx ();
8065 /* Put the load of %r27 into the delay slot. We don't need to
8066 do anything when generating fast indirect calls. */
8067 if (seq_length != 0)
8069 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8070 optimize, 0, NULL);
8072 /* Now delete the delay insn. */
8073 SET_INSN_DELETED (NEXT_INSN (insn));
8076 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8077 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8078 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8079 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8080 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8081 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8082 seq_length = 1;
8084 else
8086 int indirect_call = 0;
8088 /* Emit a long call. There are several different sequences
8089 of increasing length and complexity. In most cases,
8090 they don't allow an instruction in the delay slot. */
8091 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8092 && !TARGET_LONG_PIC_SDIFF_CALL
8093 && !(TARGET_GAS && !TARGET_SOM && local_call)
8094 && !TARGET_64BIT)
8095 indirect_call = 1;
8097 if (seq_length != 0
8098 && !sibcall
8099 && (!TARGET_PA_20
8100 || indirect_call
8101 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8103 /* A non-jump insn in the delay slot. By definition we can
8104 emit this insn before the call (and in fact before argument
8105 relocating. */
8106 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8107 NULL);
8109 /* Now delete the delay insn. */
8110 SET_INSN_DELETED (NEXT_INSN (insn));
8111 seq_length = 0;
8114 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8116 /* This is the best sequence for making long calls in
8117 non-pic code. Unfortunately, GNU ld doesn't provide
8118 the stub needed for external calls, and GAS's support
8119 for this with the SOM linker is buggy. It is safe
8120 to use this for local calls. */
8121 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8122 if (sibcall)
8123 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8124 else
8126 if (TARGET_PA_20)
8127 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8128 xoperands);
8129 else
8130 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8132 output_asm_insn ("copy %%r31,%%r2", xoperands);
8133 seq_length = 1;
8136 else
8138 /* The HP assembler and linker can handle relocations for
8139 the difference of two symbols. The HP assembler
8140 recognizes the sequence as a pc-relative call and
8141 the linker provides stubs when needed. */
8143 /* GAS currently can't generate the relocations that
8144 are needed for the SOM linker under HP-UX using this
8145 sequence. The GNU linker doesn't generate the stubs
8146 that are needed for external calls on TARGET_ELF32
8147 with this sequence. For now, we have to use a longer
8148 plabel sequence when using GAS for non local calls. */
8149 if (TARGET_LONG_PIC_SDIFF_CALL
8150 || (TARGET_GAS && !TARGET_SOM && local_call))
8152 xoperands[1] = gen_rtx_REG (Pmode, 1);
8153 xoperands[2] = xoperands[1];
8154 pa_output_pic_pcrel_sequence (xoperands);
8156 else
8158 /* Emit a long plabel-based call sequence. This is
8159 essentially an inline implementation of $$dyncall.
8160 We don't actually try to call $$dyncall as this is
8161 as difficult as calling the function itself. */
8162 xoperands[0] = pa_get_deferred_plabel (call_dest);
8163 xoperands[1] = gen_label_rtx ();
8165 /* Since the call is indirect, FP arguments in registers
8166 need to be copied to the general registers. Then, the
8167 argument relocation stub will copy them back. */
8168 if (TARGET_SOM)
8169 copy_fp_args (insn);
8171 if (flag_pic)
8173 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8174 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8175 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8177 else
8179 output_asm_insn ("addil LR'%0-$global$,%%r27",
8180 xoperands);
8181 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8182 xoperands);
8185 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8186 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8187 /* Should this be an ordered load to ensure the target
8188 address is loaded before the global pointer? */
8189 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8190 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8192 if (!sibcall && !TARGET_PA_20)
8194 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8195 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8196 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8197 else
8198 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8202 if (TARGET_PA_20)
8204 if (sibcall)
8205 output_asm_insn ("bve (%%r1)", xoperands);
8206 else
8208 if (indirect_call)
8210 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8211 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8212 seq_length = 1;
8214 else
8215 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8218 else
8220 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8221 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8222 xoperands);
8224 if (sibcall)
8226 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8227 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8228 else
8229 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8231 else
8233 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8234 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8235 else
8236 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8238 if (indirect_call)
8239 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8240 else
8241 output_asm_insn ("copy %%r31,%%r2", xoperands);
8242 seq_length = 1;
8249 if (seq_length == 0)
8250 output_asm_insn ("nop", xoperands);
8252 return "";
8255 /* Return the attribute length of the indirect call instruction INSN.
8256 The length must match the code generated by output_indirect call.
8257 The returned length includes the delay slot. Currently, the delay
8258 slot of an indirect call sequence is not exposed and it is used by
8259 the sequence itself. */
8262 pa_attr_length_indirect_call (rtx_insn *insn)
8264 unsigned long distance = -1;
8265 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8267 if (INSN_ADDRESSES_SET_P ())
8269 distance = (total + insn_current_reference_address (insn));
8270 if (distance < total)
8271 distance = -1;
8274 if (TARGET_64BIT)
8275 return 12;
8277 if (TARGET_FAST_INDIRECT_CALLS)
8278 return 8;
8280 if (TARGET_PORTABLE_RUNTIME)
8281 return 16;
8283 if (!TARGET_LONG_CALLS
8284 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8285 || distance < MAX_PCREL17F_OFFSET))
8286 return 8;
8288 /* Out of reach, can use ble. */
8289 if (!flag_pic)
8290 return 12;
8292 /* Inline versions of $$dyncall. */
8293 if (!optimize_size)
8295 if (TARGET_NO_SPACE_REGS)
8296 return 28;
8298 if (TARGET_PA_20)
8299 return 32;
8302 /* Long PIC pc-relative call. */
8303 return 20;
8306 const char *
8307 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8309 rtx xoperands[4];
8310 int length;
8312 if (TARGET_64BIT)
8314 xoperands[0] = call_dest;
8315 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8316 "bve,l (%%r2),%%r2\n\t"
8317 "ldd 24(%0),%%r27", xoperands);
8318 return "";
8321 /* First the special case for kernels, level 0 systems, etc. */
8322 if (TARGET_FAST_INDIRECT_CALLS)
8324 pa_output_arg_descriptor (insn);
8325 if (TARGET_PA_20)
8326 return "bve,l,n (%%r22),%%r2\n\tnop";
8327 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8330 if (TARGET_PORTABLE_RUNTIME)
8332 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8333 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8334 pa_output_arg_descriptor (insn);
8335 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8338 /* Now the normal case -- we can reach $$dyncall directly or
8339 we're sure that we can get there via a long-branch stub.
8341 No need to check target flags as the length uniquely identifies
8342 the remaining cases. */
8343 length = pa_attr_length_indirect_call (insn);
8344 if (length == 8)
8346 pa_output_arg_descriptor (insn);
8348 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8349 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8350 variant of the B,L instruction can't be used on the SOM target. */
8351 if (TARGET_PA_20 && !TARGET_SOM)
8352 return "b,l,n $$dyncall,%%r2\n\tnop";
8353 else
8354 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8357 /* Long millicode call, but we are not generating PIC or portable runtime
8358 code. */
8359 if (length == 12)
8361 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8362 pa_output_arg_descriptor (insn);
8363 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8366 /* The long PIC pc-relative call sequence is five instructions. So,
8367 let's use an inline version of $$dyncall when the calling sequence
8368 has a roughly similar number of instructions and we are not optimizing
8369 for size. We need two instructions to load the return pointer plus
8370 the $$dyncall implementation. */
8371 if (!optimize_size)
8373 if (TARGET_NO_SPACE_REGS)
8375 pa_output_arg_descriptor (insn);
8376 output_asm_insn ("bl .+8,%%r2\n\t"
8377 "ldo 20(%%r2),%%r2\n\t"
8378 "extru,<> %%r22,30,1,%%r0\n\t"
8379 "bv,n %%r0(%%r22)\n\t"
8380 "ldw -2(%%r22),%%r21\n\t"
8381 "bv %%r0(%%r21)\n\t"
8382 "ldw 2(%%r22),%%r19", xoperands);
8383 return "";
8385 if (TARGET_PA_20)
8387 pa_output_arg_descriptor (insn);
8388 output_asm_insn ("bl .+8,%%r2\n\t"
8389 "ldo 24(%%r2),%%r2\n\t"
8390 "stw %%r2,-24(%%sp)\n\t"
8391 "extru,<> %r22,30,1,%%r0\n\t"
8392 "bve,n (%%r22)\n\t"
8393 "ldw -2(%%r22),%%r21\n\t"
8394 "bve (%%r21)\n\t"
8395 "ldw 2(%%r22),%%r19", xoperands);
8396 return "";
8400 /* We need a long PIC call to $$dyncall. */
8401 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8402 xoperands[1] = gen_rtx_REG (Pmode, 2);
8403 xoperands[2] = gen_rtx_REG (Pmode, 1);
8404 pa_output_pic_pcrel_sequence (xoperands);
8405 pa_output_arg_descriptor (insn);
8406 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8409 /* In HPUX 8.0's shared library scheme, special relocations are needed
8410 for function labels if they might be passed to a function
8411 in a shared library (because shared libraries don't live in code
8412 space), and special magic is needed to construct their address. */
8414 void
8415 pa_encode_label (rtx sym)
8417 const char *str = XSTR (sym, 0);
8418 int len = strlen (str) + 1;
8419 char *newstr, *p;
8421 p = newstr = XALLOCAVEC (char, len + 1);
8422 *p++ = '@';
8423 strcpy (p, str);
8425 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8428 static void
8429 pa_encode_section_info (tree decl, rtx rtl, int first)
8431 int old_referenced = 0;
8433 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8434 old_referenced
8435 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8437 default_encode_section_info (decl, rtl, first);
8439 if (first && TEXT_SPACE_P (decl))
8441 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8442 if (TREE_CODE (decl) == FUNCTION_DECL)
8443 pa_encode_label (XEXP (rtl, 0));
8445 else if (old_referenced)
8446 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8449 /* This is sort of inverse to pa_encode_section_info. */
8451 static const char *
8452 pa_strip_name_encoding (const char *str)
8454 str += (*str == '@');
8455 str += (*str == '*');
8456 return str;
8459 /* Returns 1 if OP is a function label involved in a simple addition
8460 with a constant. Used to keep certain patterns from matching
8461 during instruction combination. */
8463 pa_is_function_label_plus_const (rtx op)
8465 /* Strip off any CONST. */
8466 if (GET_CODE (op) == CONST)
8467 op = XEXP (op, 0);
8469 return (GET_CODE (op) == PLUS
8470 && function_label_operand (XEXP (op, 0), VOIDmode)
8471 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8474 /* Output the assembler code for a thunk function. THUNK_DECL is the
8475 declaration for the thunk function itself, FUNCTION is the decl for
8476 the target function. DELTA is an immediate constant offset to be
8477 added to THIS. If VCALL_OFFSET is nonzero, the word at
8478 *(*this + vcall_offset) should be added to THIS. */
8480 static void
8481 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8482 HOST_WIDE_INT vcall_offset, tree function)
8484 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8485 static unsigned int current_thunk_number;
8486 int val_14 = VAL_14_BITS_P (delta);
8487 unsigned int old_last_address = last_address, nbytes = 0;
8488 char label[17];
8489 rtx xoperands[4];
8491 xoperands[0] = XEXP (DECL_RTL (function), 0);
8492 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8493 xoperands[2] = GEN_INT (delta);
8495 assemble_start_function (thunk_fndecl, fnname);
8496 final_start_function (emit_barrier (), file, 1);
8498 if (!vcall_offset)
8500 /* Output the thunk. We know that the function is in the same
8501 translation unit (i.e., the same space) as the thunk, and that
8502 thunks are output after their method. Thus, we don't need an
8503 external branch to reach the function. With SOM and GAS,
8504 functions and thunks are effectively in different sections.
8505 Thus, we can always use a IA-relative branch and the linker
8506 will add a long branch stub if necessary.
8508 However, we have to be careful when generating PIC code on the
8509 SOM port to ensure that the sequence does not transfer to an
8510 import stub for the target function as this could clobber the
8511 return value saved at SP-24. This would also apply to the
8512 32-bit linux port if the multi-space model is implemented. */
8513 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8514 && !(flag_pic && TREE_PUBLIC (function))
8515 && (TARGET_GAS || last_address < 262132))
8516 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8517 && ((targetm_common.have_named_sections
8518 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8519 /* The GNU 64-bit linker has rather poor stub management.
8520 So, we use a long branch from thunks that aren't in
8521 the same section as the target function. */
8522 && ((!TARGET_64BIT
8523 && (DECL_SECTION_NAME (thunk_fndecl)
8524 != DECL_SECTION_NAME (function)))
8525 || ((DECL_SECTION_NAME (thunk_fndecl)
8526 == DECL_SECTION_NAME (function))
8527 && last_address < 262132)))
8528 /* In this case, we need to be able to reach the start of
8529 the stub table even though the function is likely closer
8530 and can be jumped to directly. */
8531 || (targetm_common.have_named_sections
8532 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8533 && DECL_SECTION_NAME (function) == NULL
8534 && total_code_bytes < MAX_PCREL17F_OFFSET)
8535 /* Likewise. */
8536 || (!targetm_common.have_named_sections
8537 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8539 if (!val_14)
8540 output_asm_insn ("addil L'%2,%%r26", xoperands);
8542 output_asm_insn ("b %0", xoperands);
8544 if (val_14)
8546 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8547 nbytes += 8;
8549 else
8551 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8552 nbytes += 12;
8555 else if (TARGET_64BIT)
8557 rtx xop[4];
8559 /* We only have one call-clobbered scratch register, so we can't
8560 make use of the delay slot if delta doesn't fit in 14 bits. */
8561 if (!val_14)
8563 output_asm_insn ("addil L'%2,%%r26", xoperands);
8564 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8567 /* Load function address into %r1. */
8568 xop[0] = xoperands[0];
8569 xop[1] = gen_rtx_REG (Pmode, 1);
8570 xop[2] = xop[1];
8571 pa_output_pic_pcrel_sequence (xop);
8573 if (val_14)
8575 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8576 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8577 nbytes += 20;
8579 else
8581 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8582 nbytes += 24;
8585 else if (TARGET_PORTABLE_RUNTIME)
8587 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8588 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8590 if (!val_14)
8591 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8593 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8595 if (val_14)
8597 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8598 nbytes += 16;
8600 else
8602 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8603 nbytes += 20;
8606 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8608 /* The function is accessible from outside this module. The only
8609 way to avoid an import stub between the thunk and function is to
8610 call the function directly with an indirect sequence similar to
8611 that used by $$dyncall. This is possible because $$dyncall acts
8612 as the import stub in an indirect call. */
8613 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8614 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8615 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8616 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8617 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8618 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8619 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8620 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8621 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8623 if (!val_14)
8625 output_asm_insn ("addil L'%2,%%r26", xoperands);
8626 nbytes += 4;
8629 if (TARGET_PA_20)
8631 output_asm_insn ("bve (%%r22)", xoperands);
8632 nbytes += 36;
8634 else if (TARGET_NO_SPACE_REGS)
8636 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8637 nbytes += 36;
8639 else
8641 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8642 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8643 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8644 nbytes += 44;
8647 if (val_14)
8648 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8649 else
8650 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8652 else if (flag_pic)
8654 rtx xop[4];
8656 /* Load function address into %r22. */
8657 xop[0] = xoperands[0];
8658 xop[1] = gen_rtx_REG (Pmode, 1);
8659 xop[2] = gen_rtx_REG (Pmode, 22);
8660 pa_output_pic_pcrel_sequence (xop);
8662 if (!val_14)
8663 output_asm_insn ("addil L'%2,%%r26", xoperands);
8665 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8667 if (val_14)
8669 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8670 nbytes += 20;
8672 else
8674 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8675 nbytes += 24;
8678 else
8680 if (!val_14)
8681 output_asm_insn ("addil L'%2,%%r26", xoperands);
8683 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8684 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8686 if (val_14)
8688 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8689 nbytes += 12;
8691 else
8693 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8694 nbytes += 16;
8698 else
8700 rtx xop[4];
8702 /* Add DELTA to THIS. */
8703 if (val_14)
8705 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8706 nbytes += 4;
8708 else
8710 output_asm_insn ("addil L'%2,%%r26", xoperands);
8711 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8712 nbytes += 8;
8715 if (TARGET_64BIT)
8717 /* Load *(THIS + DELTA) to %r1. */
8718 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8720 val_14 = VAL_14_BITS_P (vcall_offset);
8721 xoperands[2] = GEN_INT (vcall_offset);
8723 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8724 if (val_14)
8726 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8727 nbytes += 8;
8729 else
8731 output_asm_insn ("addil L'%2,%%r1", xoperands);
8732 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8733 nbytes += 12;
8736 else
8738 /* Load *(THIS + DELTA) to %r1. */
8739 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8741 val_14 = VAL_14_BITS_P (vcall_offset);
8742 xoperands[2] = GEN_INT (vcall_offset);
8744 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8745 if (val_14)
8747 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8748 nbytes += 8;
8750 else
8752 output_asm_insn ("addil L'%2,%%r1", xoperands);
8753 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8754 nbytes += 12;
8758 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8759 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8760 && !(flag_pic && TREE_PUBLIC (function))
8761 && (TARGET_GAS || last_address < 262132))
8762 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8763 && ((targetm_common.have_named_sections
8764 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8765 /* The GNU 64-bit linker has rather poor stub management.
8766 So, we use a long branch from thunks that aren't in
8767 the same section as the target function. */
8768 && ((!TARGET_64BIT
8769 && (DECL_SECTION_NAME (thunk_fndecl)
8770 != DECL_SECTION_NAME (function)))
8771 || ((DECL_SECTION_NAME (thunk_fndecl)
8772 == DECL_SECTION_NAME (function))
8773 && last_address < 262132)))
8774 /* In this case, we need to be able to reach the start of
8775 the stub table even though the function is likely closer
8776 and can be jumped to directly. */
8777 || (targetm_common.have_named_sections
8778 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8779 && DECL_SECTION_NAME (function) == NULL
8780 && total_code_bytes < MAX_PCREL17F_OFFSET)
8781 /* Likewise. */
8782 || (!targetm_common.have_named_sections
8783 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8785 nbytes += 4;
8786 output_asm_insn ("b %0", xoperands);
8788 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8789 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8791 else if (TARGET_64BIT)
8793 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8794 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8796 /* Load function address into %r1. */
8797 nbytes += 16;
8798 xop[0] = xoperands[0];
8799 xop[1] = gen_rtx_REG (Pmode, 1);
8800 xop[2] = xop[1];
8801 pa_output_pic_pcrel_sequence (xop);
8803 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8805 else if (TARGET_PORTABLE_RUNTIME)
8807 /* Load function address into %r22. */
8808 nbytes += 12;
8809 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8810 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8812 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8814 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8815 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8817 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8819 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8820 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8822 /* The function is accessible from outside this module. The only
8823 way to avoid an import stub between the thunk and function is to
8824 call the function directly with an indirect sequence similar to
8825 that used by $$dyncall. This is possible because $$dyncall acts
8826 as the import stub in an indirect call. */
8827 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8828 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8829 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8830 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8831 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8832 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8833 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8834 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8835 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8837 if (TARGET_PA_20)
8839 output_asm_insn ("bve,n (%%r22)", xoperands);
8840 nbytes += 32;
8842 else if (TARGET_NO_SPACE_REGS)
8844 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8845 nbytes += 32;
8847 else
8849 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8850 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8851 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8852 nbytes += 40;
8855 else if (flag_pic)
8857 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8858 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8860 /* Load function address into %r1. */
8861 nbytes += 16;
8862 xop[0] = xoperands[0];
8863 xop[1] = gen_rtx_REG (Pmode, 1);
8864 xop[2] = xop[1];
8865 pa_output_pic_pcrel_sequence (xop);
8867 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8869 else
8871 /* Load function address into %r22. */
8872 nbytes += 8;
8873 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8874 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8876 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8877 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8881 final_end_function ();
8883 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8885 switch_to_section (data_section);
8886 output_asm_insn (".align 4", xoperands);
8887 ASM_OUTPUT_LABEL (file, label);
8888 output_asm_insn (".word P'%0", xoperands);
8891 current_thunk_number++;
8892 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8893 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8894 last_address += nbytes;
8895 if (old_last_address > last_address)
8896 last_address = UINT_MAX;
8897 update_total_code_bytes (nbytes);
8898 assemble_end_function (thunk_fndecl, fnname);
8901 /* Only direct calls to static functions are allowed to be sibling (tail)
8902 call optimized.
8904 This restriction is necessary because some linker generated stubs will
8905 store return pointers into rp' in some cases which might clobber a
8906 live value already in rp'.
8908 In a sibcall the current function and the target function share stack
8909 space. Thus if the path to the current function and the path to the
8910 target function save a value in rp', they save the value into the
8911 same stack slot, which has undesirable consequences.
8913 Because of the deferred binding nature of shared libraries any function
8914 with external scope could be in a different load module and thus require
8915 rp' to be saved when calling that function. So sibcall optimizations
8916 can only be safe for static function.
8918 Note that GCC never needs return value relocations, so we don't have to
8919 worry about static calls with return value relocations (which require
8920 saving rp').
8922 It is safe to perform a sibcall optimization when the target function
8923 will never return. */
8924 static bool
8925 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8927 /* Sibcalls are not ok because the arg pointer register is not a fixed
8928 register. This prevents the sibcall optimization from occurring. In
8929 addition, there are problems with stub placement using GNU ld. This
8930 is because a normal sibcall branch uses a 17-bit relocation while
8931 a regular call branch uses a 22-bit relocation. As a result, more
8932 care needs to be taken in the placement of long-branch stubs. */
8933 if (TARGET_64BIT)
8934 return false;
8936 if (TARGET_PORTABLE_RUNTIME)
8937 return false;
8939 /* Sibcalls are only ok within a translation unit. */
8940 return decl && targetm.binds_local_p (decl);
8943 /* ??? Addition is not commutative on the PA due to the weird implicit
8944 space register selection rules for memory addresses. Therefore, we
8945 don't consider a + b == b + a, as this might be inside a MEM. */
8946 static bool
8947 pa_commutative_p (const_rtx x, int outer_code)
8949 return (COMMUTATIVE_P (x)
8950 && (TARGET_NO_SPACE_REGS
8951 || (outer_code != UNKNOWN && outer_code != MEM)
8952 || GET_CODE (x) != PLUS));
8955 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8956 use in fmpyadd instructions. */
8958 pa_fmpyaddoperands (rtx *operands)
8960 machine_mode mode = GET_MODE (operands[0]);
8962 /* Must be a floating point mode. */
8963 if (mode != SFmode && mode != DFmode)
8964 return 0;
8966 /* All modes must be the same. */
8967 if (! (mode == GET_MODE (operands[1])
8968 && mode == GET_MODE (operands[2])
8969 && mode == GET_MODE (operands[3])
8970 && mode == GET_MODE (operands[4])
8971 && mode == GET_MODE (operands[5])))
8972 return 0;
8974 /* All operands must be registers. */
8975 if (! (GET_CODE (operands[1]) == REG
8976 && GET_CODE (operands[2]) == REG
8977 && GET_CODE (operands[3]) == REG
8978 && GET_CODE (operands[4]) == REG
8979 && GET_CODE (operands[5]) == REG))
8980 return 0;
8982 /* Only 2 real operands to the addition. One of the input operands must
8983 be the same as the output operand. */
8984 if (! rtx_equal_p (operands[3], operands[4])
8985 && ! rtx_equal_p (operands[3], operands[5]))
8986 return 0;
8988 /* Inout operand of add cannot conflict with any operands from multiply. */
8989 if (rtx_equal_p (operands[3], operands[0])
8990 || rtx_equal_p (operands[3], operands[1])
8991 || rtx_equal_p (operands[3], operands[2]))
8992 return 0;
8994 /* multiply cannot feed into addition operands. */
8995 if (rtx_equal_p (operands[4], operands[0])
8996 || rtx_equal_p (operands[5], operands[0]))
8997 return 0;
8999 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9000 if (mode == SFmode
9001 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9002 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9003 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9004 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9005 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9006 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9007 return 0;
9009 /* Passed. Operands are suitable for fmpyadd. */
9010 return 1;
9013 #if !defined(USE_COLLECT2)
9014 static void
9015 pa_asm_out_constructor (rtx symbol, int priority)
9017 if (!function_label_operand (symbol, VOIDmode))
9018 pa_encode_label (symbol);
9020 #ifdef CTORS_SECTION_ASM_OP
9021 default_ctor_section_asm_out_constructor (symbol, priority);
9022 #else
9023 # ifdef TARGET_ASM_NAMED_SECTION
9024 default_named_section_asm_out_constructor (symbol, priority);
9025 # else
9026 default_stabs_asm_out_constructor (symbol, priority);
9027 # endif
9028 #endif
9031 static void
9032 pa_asm_out_destructor (rtx symbol, int priority)
9034 if (!function_label_operand (symbol, VOIDmode))
9035 pa_encode_label (symbol);
9037 #ifdef DTORS_SECTION_ASM_OP
9038 default_dtor_section_asm_out_destructor (symbol, priority);
9039 #else
9040 # ifdef TARGET_ASM_NAMED_SECTION
9041 default_named_section_asm_out_destructor (symbol, priority);
9042 # else
9043 default_stabs_asm_out_destructor (symbol, priority);
9044 # endif
9045 #endif
9047 #endif
9049 /* This function places uninitialized global data in the bss section.
9050 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9051 function on the SOM port to prevent uninitialized global data from
9052 being placed in the data section. */
9054 void
9055 pa_asm_output_aligned_bss (FILE *stream,
9056 const char *name,
9057 unsigned HOST_WIDE_INT size,
9058 unsigned int align)
9060 switch_to_section (bss_section);
9062 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9063 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9064 #endif
9066 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9067 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9068 #endif
9070 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9071 ASM_OUTPUT_LABEL (stream, name);
9072 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9075 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9076 that doesn't allow the alignment of global common storage to be directly
9077 specified. The SOM linker aligns common storage based on the rounded
9078 value of the NUM_BYTES parameter in the .comm directive. It's not
9079 possible to use the .align directive as it doesn't affect the alignment
9080 of the label associated with a .comm directive. */
9082 void
9083 pa_asm_output_aligned_common (FILE *stream,
9084 const char *name,
9085 unsigned HOST_WIDE_INT size,
9086 unsigned int align)
9088 unsigned int max_common_align;
9090 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9091 if (align > max_common_align)
9093 /* Alignment exceeds maximum alignment for global common data. */
9094 align = max_common_align;
9097 switch_to_section (bss_section);
9099 assemble_name (stream, name);
9100 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9101 MAX (size, align / BITS_PER_UNIT));
9104 /* We can't use .comm for local common storage as the SOM linker effectively
9105 treats the symbol as universal and uses the same storage for local symbols
9106 with the same name in different object files. The .block directive
9107 reserves an uninitialized block of storage. However, it's not common
9108 storage. Fortunately, GCC never requests common storage with the same
9109 name in any given translation unit. */
9111 void
9112 pa_asm_output_aligned_local (FILE *stream,
9113 const char *name,
9114 unsigned HOST_WIDE_INT size,
9115 unsigned int align)
9117 switch_to_section (bss_section);
9118 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9120 #ifdef LOCAL_ASM_OP
9121 fprintf (stream, "%s", LOCAL_ASM_OP);
9122 assemble_name (stream, name);
9123 fprintf (stream, "\n");
9124 #endif
9126 ASM_OUTPUT_LABEL (stream, name);
9127 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9130 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9131 use in fmpysub instructions. */
9133 pa_fmpysuboperands (rtx *operands)
9135 machine_mode mode = GET_MODE (operands[0]);
9137 /* Must be a floating point mode. */
9138 if (mode != SFmode && mode != DFmode)
9139 return 0;
9141 /* All modes must be the same. */
9142 if (! (mode == GET_MODE (operands[1])
9143 && mode == GET_MODE (operands[2])
9144 && mode == GET_MODE (operands[3])
9145 && mode == GET_MODE (operands[4])
9146 && mode == GET_MODE (operands[5])))
9147 return 0;
9149 /* All operands must be registers. */
9150 if (! (GET_CODE (operands[1]) == REG
9151 && GET_CODE (operands[2]) == REG
9152 && GET_CODE (operands[3]) == REG
9153 && GET_CODE (operands[4]) == REG
9154 && GET_CODE (operands[5]) == REG))
9155 return 0;
9157 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9158 operation, so operands[4] must be the same as operand[3]. */
9159 if (! rtx_equal_p (operands[3], operands[4]))
9160 return 0;
9162 /* multiply cannot feed into subtraction. */
9163 if (rtx_equal_p (operands[5], operands[0]))
9164 return 0;
9166 /* Inout operand of sub cannot conflict with any operands from multiply. */
9167 if (rtx_equal_p (operands[3], operands[0])
9168 || rtx_equal_p (operands[3], operands[1])
9169 || rtx_equal_p (operands[3], operands[2]))
9170 return 0;
9172 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9173 if (mode == SFmode
9174 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9175 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9176 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9177 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9178 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9179 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9180 return 0;
9182 /* Passed. Operands are suitable for fmpysub. */
9183 return 1;
9186 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9187 constants for a MULT embedded inside a memory address. */
9189 pa_mem_shadd_constant_p (int val)
9191 if (val == 2 || val == 4 || val == 8)
9192 return 1;
9193 else
9194 return 0;
9197 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9198 constants for shadd instructions. */
9200 pa_shadd_constant_p (int val)
9202 if (val == 1 || val == 2 || val == 3)
9203 return 1;
9204 else
9205 return 0;
9208 /* Return TRUE if INSN branches forward. */
9210 static bool
9211 forward_branch_p (rtx_insn *insn)
9213 rtx lab = JUMP_LABEL (insn);
9215 /* The INSN must have a jump label. */
9216 gcc_assert (lab != NULL_RTX);
9218 if (INSN_ADDRESSES_SET_P ())
9219 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9221 while (insn)
9223 if (insn == lab)
9224 return true;
9225 else
9226 insn = NEXT_INSN (insn);
9229 return false;
9232 /* Output an unconditional move and branch insn. */
9234 const char *
9235 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9237 int length = get_attr_length (insn);
9239 /* These are the cases in which we win. */
9240 if (length == 4)
9241 return "mov%I1b,tr %1,%0,%2";
9243 /* None of the following cases win, but they don't lose either. */
9244 if (length == 8)
9246 if (dbr_sequence_length () == 0)
9248 /* Nothing in the delay slot, fake it by putting the combined
9249 insn (the copy or add) in the delay slot of a bl. */
9250 if (GET_CODE (operands[1]) == CONST_INT)
9251 return "b %2\n\tldi %1,%0";
9252 else
9253 return "b %2\n\tcopy %1,%0";
9255 else
9257 /* Something in the delay slot, but we've got a long branch. */
9258 if (GET_CODE (operands[1]) == CONST_INT)
9259 return "ldi %1,%0\n\tb %2";
9260 else
9261 return "copy %1,%0\n\tb %2";
9265 if (GET_CODE (operands[1]) == CONST_INT)
9266 output_asm_insn ("ldi %1,%0", operands);
9267 else
9268 output_asm_insn ("copy %1,%0", operands);
9269 return pa_output_lbranch (operands[2], insn, 1);
9272 /* Output an unconditional add and branch insn. */
9274 const char *
9275 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9277 int length = get_attr_length (insn);
9279 /* To make life easy we want operand0 to be the shared input/output
9280 operand and operand1 to be the readonly operand. */
9281 if (operands[0] == operands[1])
9282 operands[1] = operands[2];
9284 /* These are the cases in which we win. */
9285 if (length == 4)
9286 return "add%I1b,tr %1,%0,%3";
9288 /* None of the following cases win, but they don't lose either. */
9289 if (length == 8)
9291 if (dbr_sequence_length () == 0)
9292 /* Nothing in the delay slot, fake it by putting the combined
9293 insn (the copy or add) in the delay slot of a bl. */
9294 return "b %3\n\tadd%I1 %1,%0,%0";
9295 else
9296 /* Something in the delay slot, but we've got a long branch. */
9297 return "add%I1 %1,%0,%0\n\tb %3";
9300 output_asm_insn ("add%I1 %1,%0,%0", operands);
9301 return pa_output_lbranch (operands[3], insn, 1);
9304 /* We use this hook to perform a PA specific optimization which is difficult
9305 to do in earlier passes. */
9307 static void
9308 pa_reorg (void)
9310 remove_useless_addtr_insns (1);
9312 if (pa_cpu < PROCESSOR_8000)
9313 pa_combine_instructions ();
9316 /* The PA has a number of odd instructions which can perform multiple
9317 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9318 it may be profitable to combine two instructions into one instruction
9319 with two outputs. It's not profitable PA2.0 machines because the
9320 two outputs would take two slots in the reorder buffers.
9322 This routine finds instructions which can be combined and combines
9323 them. We only support some of the potential combinations, and we
9324 only try common ways to find suitable instructions.
9326 * addb can add two registers or a register and a small integer
9327 and jump to a nearby (+-8k) location. Normally the jump to the
9328 nearby location is conditional on the result of the add, but by
9329 using the "true" condition we can make the jump unconditional.
9330 Thus addb can perform two independent operations in one insn.
9332 * movb is similar to addb in that it can perform a reg->reg
9333 or small immediate->reg copy and jump to a nearby (+-8k location).
9335 * fmpyadd and fmpysub can perform a FP multiply and either an
9336 FP add or FP sub if the operands of the multiply and add/sub are
9337 independent (there are other minor restrictions). Note both
9338 the fmpy and fadd/fsub can in theory move to better spots according
9339 to data dependencies, but for now we require the fmpy stay at a
9340 fixed location.
9342 * Many of the memory operations can perform pre & post updates
9343 of index registers. GCC's pre/post increment/decrement addressing
9344 is far too simple to take advantage of all the possibilities. This
9345 pass may not be suitable since those insns may not be independent.
9347 * comclr can compare two ints or an int and a register, nullify
9348 the following instruction and zero some other register. This
9349 is more difficult to use as it's harder to find an insn which
9350 will generate a comclr than finding something like an unconditional
9351 branch. (conditional moves & long branches create comclr insns).
9353 * Most arithmetic operations can conditionally skip the next
9354 instruction. They can be viewed as "perform this operation
9355 and conditionally jump to this nearby location" (where nearby
9356 is an insns away). These are difficult to use due to the
9357 branch length restrictions. */
9359 static void
9360 pa_combine_instructions (void)
9362 rtx_insn *anchor;
9364 /* This can get expensive since the basic algorithm is on the
9365 order of O(n^2) (or worse). Only do it for -O2 or higher
9366 levels of optimization. */
9367 if (optimize < 2)
9368 return;
9370 /* Walk down the list of insns looking for "anchor" insns which
9371 may be combined with "floating" insns. As the name implies,
9372 "anchor" instructions don't move, while "floating" insns may
9373 move around. */
9374 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9375 rtx_insn *new_rtx = make_insn_raw (par);
9377 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9379 enum attr_pa_combine_type anchor_attr;
9380 enum attr_pa_combine_type floater_attr;
9382 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9383 Also ignore any special USE insns. */
9384 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9385 || GET_CODE (PATTERN (anchor)) == USE
9386 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9387 continue;
9389 anchor_attr = get_attr_pa_combine_type (anchor);
9390 /* See if anchor is an insn suitable for combination. */
9391 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9392 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9393 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9394 && ! forward_branch_p (anchor)))
9396 rtx_insn *floater;
9398 for (floater = PREV_INSN (anchor);
9399 floater;
9400 floater = PREV_INSN (floater))
9402 if (NOTE_P (floater)
9403 || (NONJUMP_INSN_P (floater)
9404 && (GET_CODE (PATTERN (floater)) == USE
9405 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9406 continue;
9408 /* Anything except a regular INSN will stop our search. */
9409 if (! NONJUMP_INSN_P (floater))
9411 floater = NULL;
9412 break;
9415 /* See if FLOATER is suitable for combination with the
9416 anchor. */
9417 floater_attr = get_attr_pa_combine_type (floater);
9418 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9419 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9420 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9421 && floater_attr == PA_COMBINE_TYPE_FMPY))
9423 /* If ANCHOR and FLOATER can be combined, then we're
9424 done with this pass. */
9425 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9426 SET_DEST (PATTERN (floater)),
9427 XEXP (SET_SRC (PATTERN (floater)), 0),
9428 XEXP (SET_SRC (PATTERN (floater)), 1)))
9429 break;
9432 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9433 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9435 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9437 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9438 SET_DEST (PATTERN (floater)),
9439 XEXP (SET_SRC (PATTERN (floater)), 0),
9440 XEXP (SET_SRC (PATTERN (floater)), 1)))
9441 break;
9443 else
9445 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9446 SET_DEST (PATTERN (floater)),
9447 SET_SRC (PATTERN (floater)),
9448 SET_SRC (PATTERN (floater))))
9449 break;
9454 /* If we didn't find anything on the backwards scan try forwards. */
9455 if (!floater
9456 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9457 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9459 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9461 if (NOTE_P (floater)
9462 || (NONJUMP_INSN_P (floater)
9463 && (GET_CODE (PATTERN (floater)) == USE
9464 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9466 continue;
9468 /* Anything except a regular INSN will stop our search. */
9469 if (! NONJUMP_INSN_P (floater))
9471 floater = NULL;
9472 break;
9475 /* See if FLOATER is suitable for combination with the
9476 anchor. */
9477 floater_attr = get_attr_pa_combine_type (floater);
9478 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9479 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9480 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9481 && floater_attr == PA_COMBINE_TYPE_FMPY))
9483 /* If ANCHOR and FLOATER can be combined, then we're
9484 done with this pass. */
9485 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9486 SET_DEST (PATTERN (floater)),
9487 XEXP (SET_SRC (PATTERN (floater)),
9489 XEXP (SET_SRC (PATTERN (floater)),
9490 1)))
9491 break;
9496 /* FLOATER will be nonzero if we found a suitable floating
9497 insn for combination with ANCHOR. */
9498 if (floater
9499 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9500 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9502 /* Emit the new instruction and delete the old anchor. */
9503 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9504 copy_rtx (PATTERN (floater)));
9505 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9506 emit_insn_before (temp, anchor);
9508 SET_INSN_DELETED (anchor);
9510 /* Emit a special USE insn for FLOATER, then delete
9511 the floating insn. */
9512 temp = copy_rtx (PATTERN (floater));
9513 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9514 delete_insn (floater);
9516 continue;
9518 else if (floater
9519 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9521 /* Emit the new_jump instruction and delete the old anchor. */
9522 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9523 copy_rtx (PATTERN (floater)));
9524 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9525 temp = emit_jump_insn_before (temp, anchor);
9527 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9528 SET_INSN_DELETED (anchor);
9530 /* Emit a special USE insn for FLOATER, then delete
9531 the floating insn. */
9532 temp = copy_rtx (PATTERN (floater));
9533 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9534 delete_insn (floater);
9535 continue;
9541 static int
9542 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9543 int reversed, rtx dest,
9544 rtx src1, rtx src2)
9546 int insn_code_number;
9547 rtx_insn *start, *end;
9549 /* Create a PARALLEL with the patterns of ANCHOR and
9550 FLOATER, try to recognize it, then test constraints
9551 for the resulting pattern.
9553 If the pattern doesn't match or the constraints
9554 aren't met keep searching for a suitable floater
9555 insn. */
9556 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9557 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9558 INSN_CODE (new_rtx) = -1;
9559 insn_code_number = recog_memoized (new_rtx);
9560 basic_block bb = BLOCK_FOR_INSN (anchor);
9561 if (insn_code_number < 0
9562 || (extract_insn (new_rtx),
9563 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9564 return 0;
9566 if (reversed)
9568 start = anchor;
9569 end = floater;
9571 else
9573 start = floater;
9574 end = anchor;
9577 /* There's up to three operands to consider. One
9578 output and two inputs.
9580 The output must not be used between FLOATER & ANCHOR
9581 exclusive. The inputs must not be set between
9582 FLOATER and ANCHOR exclusive. */
9584 if (reg_used_between_p (dest, start, end))
9585 return 0;
9587 if (reg_set_between_p (src1, start, end))
9588 return 0;
9590 if (reg_set_between_p (src2, start, end))
9591 return 0;
9593 /* If we get here, then everything is good. */
9594 return 1;
9597 /* Return nonzero if references for INSN are delayed.
9599 Millicode insns are actually function calls with some special
9600 constraints on arguments and register usage.
9602 Millicode calls always expect their arguments in the integer argument
9603 registers, and always return their result in %r29 (ret1). They
9604 are expected to clobber their arguments, %r1, %r29, and the return
9605 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9607 This function tells reorg that the references to arguments and
9608 millicode calls do not appear to happen until after the millicode call.
9609 This allows reorg to put insns which set the argument registers into the
9610 delay slot of the millicode call -- thus they act more like traditional
9611 CALL_INSNs.
9613 Note we cannot consider side effects of the insn to be delayed because
9614 the branch and link insn will clobber the return pointer. If we happened
9615 to use the return pointer in the delay slot of the call, then we lose.
9617 get_attr_type will try to recognize the given insn, so make sure to
9618 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9619 in particular. */
9621 pa_insn_refs_are_delayed (rtx_insn *insn)
9623 return ((NONJUMP_INSN_P (insn)
9624 && GET_CODE (PATTERN (insn)) != SEQUENCE
9625 && GET_CODE (PATTERN (insn)) != USE
9626 && GET_CODE (PATTERN (insn)) != CLOBBER
9627 && get_attr_type (insn) == TYPE_MILLI));
9630 /* Promote the return value, but not the arguments. */
9632 static machine_mode
9633 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9634 machine_mode mode,
9635 int *punsignedp ATTRIBUTE_UNUSED,
9636 const_tree fntype ATTRIBUTE_UNUSED,
9637 int for_return)
9639 if (for_return == 0)
9640 return mode;
9641 return promote_mode (type, mode, punsignedp);
9644 /* On the HP-PA the value is found in register(s) 28(-29), unless
9645 the mode is SF or DF. Then the value is returned in fr4 (32).
9647 This must perform the same promotions as PROMOTE_MODE, else promoting
9648 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9650 Small structures must be returned in a PARALLEL on PA64 in order
9651 to match the HP Compiler ABI. */
9653 static rtx
9654 pa_function_value (const_tree valtype,
9655 const_tree func ATTRIBUTE_UNUSED,
9656 bool outgoing ATTRIBUTE_UNUSED)
9658 machine_mode valmode;
9660 if (AGGREGATE_TYPE_P (valtype)
9661 || TREE_CODE (valtype) == COMPLEX_TYPE
9662 || TREE_CODE (valtype) == VECTOR_TYPE)
9664 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9666 /* Handle aggregates that fit exactly in a word or double word. */
9667 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9668 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9670 if (TARGET_64BIT)
9672 /* Aggregates with a size less than or equal to 128 bits are
9673 returned in GR 28(-29). They are left justified. The pad
9674 bits are undefined. Larger aggregates are returned in
9675 memory. */
9676 rtx loc[2];
9677 int i, offset = 0;
9678 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9680 for (i = 0; i < ub; i++)
9682 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9683 gen_rtx_REG (DImode, 28 + i),
9684 GEN_INT (offset));
9685 offset += 8;
9688 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9690 else if (valsize > UNITS_PER_WORD)
9692 /* Aggregates 5 to 8 bytes in size are returned in general
9693 registers r28-r29 in the same manner as other non
9694 floating-point objects. The data is right-justified and
9695 zero-extended to 64 bits. This is opposite to the normal
9696 justification used on big endian targets and requires
9697 special treatment. */
9698 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9699 gen_rtx_REG (DImode, 28), const0_rtx);
9700 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9704 if ((INTEGRAL_TYPE_P (valtype)
9705 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9706 || POINTER_TYPE_P (valtype))
9707 valmode = word_mode;
9708 else
9709 valmode = TYPE_MODE (valtype);
9711 if (TREE_CODE (valtype) == REAL_TYPE
9712 && !AGGREGATE_TYPE_P (valtype)
9713 && TYPE_MODE (valtype) != TFmode
9714 && !TARGET_SOFT_FLOAT)
9715 return gen_rtx_REG (valmode, 32);
9717 return gen_rtx_REG (valmode, 28);
9720 /* Implement the TARGET_LIBCALL_VALUE hook. */
9722 static rtx
9723 pa_libcall_value (machine_mode mode,
9724 const_rtx fun ATTRIBUTE_UNUSED)
9726 if (! TARGET_SOFT_FLOAT
9727 && (mode == SFmode || mode == DFmode))
9728 return gen_rtx_REG (mode, 32);
9729 else
9730 return gen_rtx_REG (mode, 28);
9733 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9735 static bool
9736 pa_function_value_regno_p (const unsigned int regno)
9738 if (regno == 28
9739 || (! TARGET_SOFT_FLOAT && regno == 32))
9740 return true;
9742 return false;
9745 /* Update the data in CUM to advance over argument ARG. */
9747 static void
9748 pa_function_arg_advance (cumulative_args_t cum_v,
9749 const function_arg_info &arg)
9751 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9752 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9754 cum->nargs_prototype--;
9755 cum->words += (arg_size
9756 + ((cum->words & 01)
9757 && arg.type != NULL_TREE
9758 && arg_size > 1));
9761 /* Return the location of a parameter that is passed in a register or NULL
9762 if the parameter has any component that is passed in memory.
9764 This is new code and will be pushed to into the net sources after
9765 further testing.
9767 ??? We might want to restructure this so that it looks more like other
9768 ports. */
9769 static rtx
9770 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9772 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9773 tree type = arg.type;
9774 machine_mode mode = arg.mode;
9775 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9776 int alignment = 0;
9777 int arg_size;
9778 int fpr_reg_base;
9779 int gpr_reg_base;
9780 rtx retval;
9782 if (arg.end_marker_p ())
9783 return NULL_RTX;
9785 arg_size = pa_function_arg_size (mode, type);
9787 /* If this arg would be passed partially or totally on the stack, then
9788 this routine should return zero. pa_arg_partial_bytes will
9789 handle arguments which are split between regs and stack slots if
9790 the ABI mandates split arguments. */
9791 if (!TARGET_64BIT)
9793 /* The 32-bit ABI does not split arguments. */
9794 if (cum->words + arg_size > max_arg_words)
9795 return NULL_RTX;
9797 else
9799 if (arg_size > 1)
9800 alignment = cum->words & 1;
9801 if (cum->words + alignment >= max_arg_words)
9802 return NULL_RTX;
9805 /* The 32bit ABIs and the 64bit ABIs are rather different,
9806 particularly in their handling of FP registers. We might
9807 be able to cleverly share code between them, but I'm not
9808 going to bother in the hope that splitting them up results
9809 in code that is more easily understood. */
9811 if (TARGET_64BIT)
9813 /* Advance the base registers to their current locations.
9815 Remember, gprs grow towards smaller register numbers while
9816 fprs grow to higher register numbers. Also remember that
9817 although FP regs are 32-bit addressable, we pretend that
9818 the registers are 64-bits wide. */
9819 gpr_reg_base = 26 - cum->words;
9820 fpr_reg_base = 32 + cum->words;
9822 /* Arguments wider than one word and small aggregates need special
9823 treatment. */
9824 if (arg_size > 1
9825 || mode == BLKmode
9826 || (type && (AGGREGATE_TYPE_P (type)
9827 || TREE_CODE (type) == COMPLEX_TYPE
9828 || TREE_CODE (type) == VECTOR_TYPE)))
9830 /* Double-extended precision (80-bit), quad-precision (128-bit)
9831 and aggregates including complex numbers are aligned on
9832 128-bit boundaries. The first eight 64-bit argument slots
9833 are associated one-to-one, with general registers r26
9834 through r19, and also with floating-point registers fr4
9835 through fr11. Arguments larger than one word are always
9836 passed in general registers.
9838 Using a PARALLEL with a word mode register results in left
9839 justified data on a big-endian target. */
9841 rtx loc[8];
9842 int i, offset = 0, ub = arg_size;
9844 /* Align the base register. */
9845 gpr_reg_base -= alignment;
9847 ub = MIN (ub, max_arg_words - cum->words - alignment);
9848 for (i = 0; i < ub; i++)
9850 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9851 gen_rtx_REG (DImode, gpr_reg_base),
9852 GEN_INT (offset));
9853 gpr_reg_base -= 1;
9854 offset += 8;
9857 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9860 else
9862 /* If the argument is larger than a word, then we know precisely
9863 which registers we must use. */
9864 if (arg_size > 1)
9866 if (cum->words)
9868 gpr_reg_base = 23;
9869 fpr_reg_base = 38;
9871 else
9873 gpr_reg_base = 25;
9874 fpr_reg_base = 34;
9877 /* Structures 5 to 8 bytes in size are passed in the general
9878 registers in the same manner as other non floating-point
9879 objects. The data is right-justified and zero-extended
9880 to 64 bits. This is opposite to the normal justification
9881 used on big endian targets and requires special treatment.
9882 We now define BLOCK_REG_PADDING to pad these objects.
9883 Aggregates, complex and vector types are passed in the same
9884 manner as structures. */
9885 if (mode == BLKmode
9886 || (type && (AGGREGATE_TYPE_P (type)
9887 || TREE_CODE (type) == COMPLEX_TYPE
9888 || TREE_CODE (type) == VECTOR_TYPE)))
9890 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9891 gen_rtx_REG (DImode, gpr_reg_base),
9892 const0_rtx);
9893 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9896 else
9898 /* We have a single word (32 bits). A simple computation
9899 will get us the register #s we need. */
9900 gpr_reg_base = 26 - cum->words;
9901 fpr_reg_base = 32 + 2 * cum->words;
9905 /* Determine if the argument needs to be passed in both general and
9906 floating point registers. */
9907 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9908 /* If we are doing soft-float with portable runtime, then there
9909 is no need to worry about FP regs. */
9910 && !TARGET_SOFT_FLOAT
9911 /* The parameter must be some kind of scalar float, else we just
9912 pass it in integer registers. */
9913 && GET_MODE_CLASS (mode) == MODE_FLOAT
9914 /* The target function must not have a prototype. */
9915 && cum->nargs_prototype <= 0
9916 /* libcalls do not need to pass items in both FP and general
9917 registers. */
9918 && type != NULL_TREE
9919 /* All this hair applies to "outgoing" args only. This includes
9920 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9921 && !cum->incoming)
9922 /* Also pass outgoing floating arguments in both registers in indirect
9923 calls with the 32 bit ABI and the HP assembler since there is no
9924 way to the specify argument locations in static functions. */
9925 || (!TARGET_64BIT
9926 && !TARGET_GAS
9927 && !cum->incoming
9928 && cum->indirect
9929 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9931 retval
9932 = gen_rtx_PARALLEL
9933 (mode,
9934 gen_rtvec (2,
9935 gen_rtx_EXPR_LIST (VOIDmode,
9936 gen_rtx_REG (mode, fpr_reg_base),
9937 const0_rtx),
9938 gen_rtx_EXPR_LIST (VOIDmode,
9939 gen_rtx_REG (mode, gpr_reg_base),
9940 const0_rtx)));
9942 else
9944 /* See if we should pass this parameter in a general register. */
9945 if (TARGET_SOFT_FLOAT
9946 /* Indirect calls in the normal 32bit ABI require all arguments
9947 to be passed in general registers. */
9948 || (!TARGET_PORTABLE_RUNTIME
9949 && !TARGET_64BIT
9950 && !TARGET_ELF32
9951 && cum->indirect)
9952 /* If the parameter is not a scalar floating-point parameter,
9953 then it belongs in GPRs. */
9954 || GET_MODE_CLASS (mode) != MODE_FLOAT
9955 /* Structure with single SFmode field belongs in GPR. */
9956 || (type && AGGREGATE_TYPE_P (type)))
9957 retval = gen_rtx_REG (mode, gpr_reg_base);
9958 else
9959 retval = gen_rtx_REG (mode, fpr_reg_base);
9961 return retval;
9964 /* Arguments larger than one word are double word aligned. */
9966 static unsigned int
9967 pa_function_arg_boundary (machine_mode mode, const_tree type)
9969 bool singleword = (type
9970 ? (integer_zerop (TYPE_SIZE (type))
9971 || !TREE_CONSTANT (TYPE_SIZE (type))
9972 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9973 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9975 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9978 /* If this arg would be passed totally in registers or totally on the stack,
9979 then this routine should return zero. */
9981 static int
9982 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9984 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9985 unsigned int max_arg_words = 8;
9986 unsigned int offset = 0;
9988 if (!TARGET_64BIT)
9989 return 0;
9991 if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9992 offset = 1;
9994 if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9995 <= max_arg_words)
9996 /* Arg fits fully into registers. */
9997 return 0;
9998 else if (cum->words + offset >= max_arg_words)
9999 /* Arg fully on the stack. */
10000 return 0;
10001 else
10002 /* Arg is split. */
10003 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10007 /* A get_unnamed_section callback for switching to the text section.
10009 This function is only used with SOM. Because we don't support
10010 named subspaces, we can only create a new subspace or switch back
10011 to the default text subspace. */
10013 static void
10014 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
10016 gcc_assert (TARGET_SOM);
10017 if (TARGET_GAS)
10019 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10021 /* We only want to emit a .nsubspa directive once at the
10022 start of the function. */
10023 cfun->machine->in_nsubspa = 1;
10025 /* Create a new subspace for the text. This provides
10026 better stub placement and one-only functions. */
10027 if (cfun->decl
10028 && DECL_ONE_ONLY (cfun->decl)
10029 && !DECL_WEAK (cfun->decl))
10031 output_section_asm_op ("\t.SPACE $TEXT$\n"
10032 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10033 "ACCESS=44,SORT=24,COMDAT");
10034 return;
10037 else
10039 /* There isn't a current function or the body of the current
10040 function has been completed. So, we are changing to the
10041 text section to output debugging information. Thus, we
10042 need to forget that we are in the text section so that
10043 varasm.c will call us when text_section is selected again. */
10044 gcc_assert (!cfun || !cfun->machine
10045 || cfun->machine->in_nsubspa == 2);
10046 in_section = NULL;
10048 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10049 return;
10051 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10054 /* A get_unnamed_section callback for switching to comdat data
10055 sections. This function is only used with SOM. */
10057 static void
10058 som_output_comdat_data_section_asm_op (const void *data)
10060 in_section = NULL;
10061 output_section_asm_op (data);
10064 /* Implement TARGET_ASM_INIT_SECTIONS. */
10066 static void
10067 pa_som_asm_init_sections (void)
10069 text_section
10070 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10072 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10073 is not being generated. */
10074 som_readonly_data_section
10075 = get_unnamed_section (0, output_section_asm_op,
10076 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10078 /* When secondary definitions are not supported, SOM makes readonly
10079 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10080 the comdat flag. */
10081 som_one_only_readonly_data_section
10082 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10083 "\t.SPACE $TEXT$\n"
10084 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10085 "ACCESS=0x2c,SORT=16,COMDAT");
10088 /* When secondary definitions are not supported, SOM makes data one-only
10089 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10090 som_one_only_data_section
10091 = get_unnamed_section (SECTION_WRITE,
10092 som_output_comdat_data_section_asm_op,
10093 "\t.SPACE $PRIVATE$\n"
10094 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10095 "ACCESS=31,SORT=24,COMDAT");
10097 if (flag_tm)
10098 som_tm_clone_table_section
10099 = get_unnamed_section (0, output_section_asm_op,
10100 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10102 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10103 reference data within the $TEXT$ space (for example constant
10104 strings in the $LIT$ subspace).
10106 The assemblers (GAS and HP as) both have problems with handling
10107 the difference of two symbols. This is the other correct way to
10108 reference constant data during PIC code generation.
10110 Thus, we can't put constant data needing relocation in the $TEXT$
10111 space during PIC generation.
10113 Previously, we placed all constant data into the $DATA$ subspace
10114 when generating PIC code. This reduces sharing, but it works
10115 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10116 This puts constant data not needing relocation into the $TEXT$ space. */
10117 readonly_data_section = som_readonly_data_section;
10119 /* We must not have a reference to an external symbol defined in a
10120 shared library in a readonly section, else the SOM linker will
10121 complain.
10123 So, we force exception information into the data section. */
10124 exception_section = data_section;
10127 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10129 static section *
10130 pa_som_tm_clone_table_section (void)
10132 return som_tm_clone_table_section;
10135 /* On hpux10, the linker will give an error if we have a reference
10136 in the read-only data section to a symbol defined in a shared
10137 library. Therefore, expressions that might require a reloc
10138 cannot be placed in the read-only data section. */
10140 static section *
10141 pa_select_section (tree exp, int reloc,
10142 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10144 if (TREE_CODE (exp) == VAR_DECL
10145 && TREE_READONLY (exp)
10146 && !TREE_THIS_VOLATILE (exp)
10147 && DECL_INITIAL (exp)
10148 && (DECL_INITIAL (exp) == error_mark_node
10149 || TREE_CONSTANT (DECL_INITIAL (exp)))
10150 && !(reloc & pa_reloc_rw_mask ()))
10152 if (TARGET_SOM
10153 && DECL_ONE_ONLY (exp)
10154 && !DECL_WEAK (exp))
10155 return som_one_only_readonly_data_section;
10156 else
10157 return readonly_data_section;
10159 else if (CONSTANT_CLASS_P (exp)
10160 && !(reloc & pa_reloc_rw_mask ()))
10161 return readonly_data_section;
10162 else if (TARGET_SOM
10163 && TREE_CODE (exp) == VAR_DECL
10164 && DECL_ONE_ONLY (exp)
10165 && !DECL_WEAK (exp))
10166 return som_one_only_data_section;
10167 else
10168 return data_section;
10171 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10172 and the function is in a COMDAT group, place the plabel reference in the
10173 .data.rel.ro.local section. The linker ignores references to symbols in
10174 discarded sections from this section. */
10176 static section *
10177 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10178 unsigned HOST_WIDE_INT align)
10180 if (function_label_operand (x, VOIDmode))
10182 tree decl = SYMBOL_REF_DECL (x);
10184 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10185 return get_named_section (NULL, ".data.rel.ro.local", 1);
10188 return default_elf_select_rtx_section (mode, x, align);
10191 /* Implement pa_reloc_rw_mask. */
10193 static int
10194 pa_reloc_rw_mask (void)
10196 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10197 return 3;
10199 /* HP linker does not support global relocs in readonly memory. */
10200 return TARGET_SOM ? 2 : 0;
10203 static void
10204 pa_globalize_label (FILE *stream, const char *name)
10206 /* We only handle DATA objects here, functions are globalized in
10207 ASM_DECLARE_FUNCTION_NAME. */
10208 if (! FUNCTION_NAME_P (name))
10210 fputs ("\t.EXPORT ", stream);
10211 assemble_name (stream, name);
10212 fputs (",DATA\n", stream);
10216 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10218 static rtx
10219 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10220 int incoming ATTRIBUTE_UNUSED)
10222 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10225 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10227 bool
10228 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10230 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10231 PA64 ABI says that objects larger than 128 bits are returned in memory.
10232 Note, int_size_in_bytes can return -1 if the size of the object is
10233 variable or larger than the maximum value that can be expressed as
10234 a HOST_WIDE_INT. It can also return zero for an empty type. The
10235 simplest way to handle variable and empty types is to pass them in
10236 memory. This avoids problems in defining the boundaries of argument
10237 slots, allocating registers, etc. */
10238 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10239 || int_size_in_bytes (type) <= 0);
10242 /* Structure to hold declaration and name of external symbols that are
10243 emitted by GCC. We generate a vector of these symbols and output them
10244 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10245 This avoids putting out names that are never really used. */
10247 typedef struct GTY(()) extern_symbol
10249 tree decl;
10250 const char *name;
10251 } extern_symbol;
10253 /* Define gc'd vector type for extern_symbol. */
10255 /* Vector of extern_symbol pointers. */
10256 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10258 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10259 /* Mark DECL (name NAME) as an external reference (assembler output
10260 file FILE). This saves the names to output at the end of the file
10261 if actually referenced. */
10263 void
10264 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10266 gcc_assert (file == asm_out_file);
10267 extern_symbol p = {decl, name};
10268 vec_safe_push (extern_symbols, p);
10270 #endif
10272 /* Output text required at the end of an assembler file.
10273 This includes deferred plabels and .import directives for
10274 all external symbols that were actually referenced. */
10276 static void
10277 pa_file_end (void)
10279 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10280 unsigned int i;
10281 extern_symbol *p;
10283 if (!NO_DEFERRED_PROFILE_COUNTERS)
10284 output_deferred_profile_counters ();
10285 #endif
10287 output_deferred_plabels ();
10289 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10290 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10292 tree decl = p->decl;
10294 if (!TREE_ASM_WRITTEN (decl)
10295 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10296 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10299 vec_free (extern_symbols);
10300 #endif
10302 if (NEED_INDICATE_EXEC_STACK)
10303 file_end_indicate_exec_stack ();
10306 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10308 static bool
10309 pa_can_change_mode_class (machine_mode from, machine_mode to,
10310 reg_class_t rclass)
10312 if (from == to)
10313 return true;
10315 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10316 return true;
10318 /* Reject changes to/from modes with zero size. */
10319 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10320 return false;
10322 /* Reject changes to/from complex and vector modes. */
10323 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10324 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10325 return false;
10327 /* There is no way to load QImode or HImode values directly from memory
10328 to a FP register. SImode loads to the FP registers are not zero
10329 extended. On the 64-bit target, this conflicts with the definition
10330 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10331 except for DImode to SImode on the 64-bit target. It is handled by
10332 register renaming in pa_print_operand. */
10333 if (MAYBE_FP_REG_CLASS_P (rclass))
10334 return TARGET_64BIT && from == DImode && to == SImode;
10336 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10337 in specific sets of registers. Thus, we cannot allow changing
10338 to a larger mode when it's larger than a word. */
10339 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10340 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10341 return false;
10343 return true;
10346 /* Implement TARGET_MODES_TIEABLE_P.
10348 We should return FALSE for QImode and HImode because these modes
10349 are not ok in the floating-point registers. However, this prevents
10350 tieing these modes to SImode and DImode in the general registers.
10351 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10352 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10353 in the floating-point registers. */
10355 static bool
10356 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10358 /* Don't tie modes in different classes. */
10359 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10360 return false;
10362 return true;
10366 /* Length in units of the trampoline instruction code. */
10368 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10371 /* Output assembler code for a block containing the constant parts
10372 of a trampoline, leaving space for the variable parts.\
10374 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10375 and then branches to the specified routine.
10377 This code template is copied from text segment to stack location
10378 and then patched with pa_trampoline_init to contain valid values,
10379 and then entered as a subroutine.
10381 It is best to keep this as small as possible to avoid having to
10382 flush multiple lines in the cache. */
10384 static void
10385 pa_asm_trampoline_template (FILE *f)
10387 if (!TARGET_64BIT)
10389 if (TARGET_PA_20)
10391 fputs ("\tmfia %r20\n", f);
10392 fputs ("\tldw 48(%r20),%r22\n", f);
10393 fputs ("\tcopy %r22,%r21\n", f);
10394 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10395 fputs ("\tdepwi 0,31,2,%r22\n", f);
10396 fputs ("\tldw 0(%r22),%r21\n", f);
10397 fputs ("\tldw 4(%r22),%r19\n", f);
10398 fputs ("\tbve (%r21)\n", f);
10399 fputs ("\tldw 52(%r1),%r29\n", f);
10400 fputs ("\t.word 0\n", f);
10401 fputs ("\t.word 0\n", f);
10402 fputs ("\t.word 0\n", f);
10404 else
10406 if (ASSEMBLER_DIALECT == 0)
10408 fputs ("\tbl .+8,%r20\n", f);
10409 fputs ("\tdepi 0,31,2,%r20\n", f);
10411 else
10413 fputs ("\tb,l .+8,%r20\n", f);
10414 fputs ("\tdepwi 0,31,2,%r20\n", f);
10416 fputs ("\tldw 40(%r20),%r22\n", f);
10417 fputs ("\tcopy %r22,%r21\n", f);
10418 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10419 if (ASSEMBLER_DIALECT == 0)
10420 fputs ("\tdepi 0,31,2,%r22\n", f);
10421 else
10422 fputs ("\tdepwi 0,31,2,%r22\n", f);
10423 fputs ("\tldw 0(%r22),%r21\n", f);
10424 fputs ("\tldw 4(%r22),%r19\n", f);
10425 fputs ("\tldsid (%r21),%r1\n", f);
10426 fputs ("\tmtsp %r1,%sr0\n", f);
10427 fputs ("\tbe 0(%sr0,%r21)\n", f);
10428 fputs ("\tldw 44(%r20),%r29\n", f);
10430 fputs ("\t.word 0\n", f);
10431 fputs ("\t.word 0\n", f);
10432 fputs ("\t.word 0\n", f);
10433 fputs ("\t.word 0\n", f);
10435 else
10437 fputs ("\t.dword 0\n", f);
10438 fputs ("\t.dword 0\n", f);
10439 fputs ("\t.dword 0\n", f);
10440 fputs ("\t.dword 0\n", f);
10441 fputs ("\tmfia %r31\n", f);
10442 fputs ("\tldd 24(%r31),%r27\n", f);
10443 fputs ("\tldd 32(%r31),%r31\n", f);
10444 fputs ("\tldd 16(%r27),%r1\n", f);
10445 fputs ("\tbve (%r1)\n", f);
10446 fputs ("\tldd 24(%r27),%r27\n", f);
10447 fputs ("\t.dword 0 ; fptr\n", f);
10448 fputs ("\t.dword 0 ; static link\n", f);
10452 /* Emit RTL insns to initialize the variable parts of a trampoline.
10453 FNADDR is an RTX for the address of the function's pure code.
10454 CXT is an RTX for the static chain value for the function.
10456 Move the function address to the trampoline template at offset 48.
10457 Move the static chain value to trampoline template at offset 52.
10458 Move the trampoline address to trampoline template at offset 56.
10459 Move r19 to trampoline template at offset 60. The latter two
10460 words create a plabel for the indirect call to the trampoline.
10462 A similar sequence is used for the 64-bit port but the plabel is
10463 at the beginning of the trampoline.
10465 Finally, the cache entries for the trampoline code are flushed.
10466 This is necessary to ensure that the trampoline instruction sequence
10467 is written to memory prior to any attempts at prefetching the code
10468 sequence. */
10470 static void
10471 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10473 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10474 rtx start_addr = gen_reg_rtx (Pmode);
10475 rtx end_addr = gen_reg_rtx (Pmode);
10476 rtx line_length = gen_reg_rtx (Pmode);
10477 rtx r_tramp, tmp;
10479 emit_block_move (m_tramp, assemble_trampoline_template (),
10480 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10481 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10483 if (!TARGET_64BIT)
10485 tmp = adjust_address (m_tramp, Pmode, 48);
10486 emit_move_insn (tmp, fnaddr);
10487 tmp = adjust_address (m_tramp, Pmode, 52);
10488 emit_move_insn (tmp, chain_value);
10490 /* Create a fat pointer for the trampoline. */
10491 tmp = adjust_address (m_tramp, Pmode, 56);
10492 emit_move_insn (tmp, r_tramp);
10493 tmp = adjust_address (m_tramp, Pmode, 60);
10494 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10496 /* fdc and fic only use registers for the address to flush,
10497 they do not accept integer displacements. We align the
10498 start and end addresses to the beginning of their respective
10499 cache lines to minimize the number of lines flushed. */
10500 emit_insn (gen_andsi3 (start_addr, r_tramp,
10501 GEN_INT (-MIN_CACHELINE_SIZE)));
10502 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10503 TRAMPOLINE_CODE_SIZE-1));
10504 emit_insn (gen_andsi3 (end_addr, tmp,
10505 GEN_INT (-MIN_CACHELINE_SIZE)));
10506 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10507 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10508 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10509 gen_reg_rtx (Pmode),
10510 gen_reg_rtx (Pmode)));
10512 else
10514 tmp = adjust_address (m_tramp, Pmode, 56);
10515 emit_move_insn (tmp, fnaddr);
10516 tmp = adjust_address (m_tramp, Pmode, 64);
10517 emit_move_insn (tmp, chain_value);
10519 /* Create a fat pointer for the trampoline. */
10520 tmp = adjust_address (m_tramp, Pmode, 16);
10521 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10522 r_tramp, 32)));
10523 tmp = adjust_address (m_tramp, Pmode, 24);
10524 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10526 /* fdc and fic only use registers for the address to flush,
10527 they do not accept integer displacements. We align the
10528 start and end addresses to the beginning of their respective
10529 cache lines to minimize the number of lines flushed. */
10530 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10531 emit_insn (gen_anddi3 (start_addr, tmp,
10532 GEN_INT (-MIN_CACHELINE_SIZE)));
10533 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10534 TRAMPOLINE_CODE_SIZE - 1));
10535 emit_insn (gen_anddi3 (end_addr, tmp,
10536 GEN_INT (-MIN_CACHELINE_SIZE)));
10537 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10538 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10539 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10540 gen_reg_rtx (Pmode),
10541 gen_reg_rtx (Pmode)));
10544 #ifdef HAVE_ENABLE_EXECUTE_STACK
10545 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10546 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10547 #endif
10550 /* Perform any machine-specific adjustment in the address of the trampoline.
10551 ADDR contains the address that was passed to pa_trampoline_init.
10552 Adjust the trampoline address to point to the plabel at offset 56. */
10554 static rtx
10555 pa_trampoline_adjust_address (rtx addr)
10557 if (!TARGET_64BIT)
10558 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10559 return addr;
10562 static rtx
10563 pa_delegitimize_address (rtx orig_x)
10565 rtx x = delegitimize_mem_from_attrs (orig_x);
10567 if (GET_CODE (x) == LO_SUM
10568 && GET_CODE (XEXP (x, 1)) == UNSPEC
10569 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10570 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10571 return x;
10574 static rtx
10575 pa_internal_arg_pointer (void)
10577 /* The argument pointer and the hard frame pointer are the same in
10578 the 32-bit runtime, so we don't need a copy. */
10579 if (TARGET_64BIT)
10580 return copy_to_reg (virtual_incoming_args_rtx);
10581 else
10582 return virtual_incoming_args_rtx;
10585 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10586 Frame pointer elimination is automatically handled. */
10588 static bool
10589 pa_can_eliminate (const int from, const int to)
10591 /* The argument cannot be eliminated in the 64-bit runtime. */
10592 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10593 return false;
10595 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10596 ? ! frame_pointer_needed
10597 : true);
10600 /* Define the offset between two registers, FROM to be eliminated and its
10601 replacement TO, at the start of a routine. */
10602 HOST_WIDE_INT
10603 pa_initial_elimination_offset (int from, int to)
10605 HOST_WIDE_INT offset;
10607 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10608 && to == STACK_POINTER_REGNUM)
10609 offset = -pa_compute_frame_size (get_frame_size (), 0);
10610 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10611 offset = 0;
10612 else
10613 gcc_unreachable ();
10615 return offset;
10618 static void
10619 pa_conditional_register_usage (void)
10621 int i;
10623 if (!TARGET_64BIT && !TARGET_PA_11)
10625 for (i = 56; i <= FP_REG_LAST; i++)
10626 fixed_regs[i] = call_used_regs[i] = 1;
10627 for (i = 33; i < 56; i += 2)
10628 fixed_regs[i] = call_used_regs[i] = 1;
10630 if (TARGET_SOFT_FLOAT)
10632 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10633 fixed_regs[i] = call_used_regs[i] = 1;
10635 if (flag_pic)
10636 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10639 /* Target hook for c_mode_for_suffix. */
10641 static machine_mode
10642 pa_c_mode_for_suffix (char suffix)
10644 if (HPUX_LONG_DOUBLE_LIBRARY)
10646 if (suffix == 'q')
10647 return TFmode;
10650 return VOIDmode;
10653 /* Target hook for function_section. */
10655 static section *
10656 pa_function_section (tree decl, enum node_frequency freq,
10657 bool startup, bool exit)
10659 /* Put functions in text section if target doesn't have named sections. */
10660 if (!targetm_common.have_named_sections)
10661 return text_section;
10663 /* Force nested functions into the same section as the containing
10664 function. */
10665 if (decl
10666 && DECL_SECTION_NAME (decl) == NULL
10667 && DECL_CONTEXT (decl) != NULL_TREE
10668 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10669 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10670 return function_section (DECL_CONTEXT (decl));
10672 /* Otherwise, use the default function section. */
10673 return default_function_section (decl, freq, startup, exit);
10676 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10678 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10679 that need more than three instructions to load prior to reload. This
10680 limit is somewhat arbitrary. It takes three instructions to load a
10681 CONST_INT from memory but two are memory accesses. It may be better
10682 to increase the allowed range for CONST_INTS. We may also be able
10683 to handle CONST_DOUBLES. */
10685 static bool
10686 pa_legitimate_constant_p (machine_mode mode, rtx x)
10688 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10689 return false;
10691 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10692 return false;
10694 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10695 legitimate constants. The other variants can't be handled by
10696 the move patterns after reload starts. */
10697 if (tls_referenced_p (x))
10698 return false;
10700 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10701 return false;
10703 if (TARGET_64BIT
10704 && HOST_BITS_PER_WIDE_INT > 32
10705 && GET_CODE (x) == CONST_INT
10706 && !reload_in_progress
10707 && !reload_completed
10708 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10709 && !pa_cint_ok_for_move (UINTVAL (x)))
10710 return false;
10712 if (function_label_operand (x, mode))
10713 return false;
10715 return true;
10718 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10720 static unsigned int
10721 pa_section_type_flags (tree decl, const char *name, int reloc)
10723 unsigned int flags;
10725 flags = default_section_type_flags (decl, name, reloc);
10727 /* Function labels are placed in the constant pool. This can
10728 cause a section conflict if decls are put in ".data.rel.ro"
10729 or ".data.rel.ro.local" using the __attribute__ construct. */
10730 if (strcmp (name, ".data.rel.ro") == 0
10731 || strcmp (name, ".data.rel.ro.local") == 0)
10732 flags |= SECTION_WRITE | SECTION_RELRO;
10734 return flags;
10737 /* pa_legitimate_address_p recognizes an RTL expression that is a
10738 valid memory address for an instruction. The MODE argument is the
10739 machine mode for the MEM expression that wants to use this address.
10741 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10742 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10743 available with floating point loads and stores, and integer loads.
10744 We get better code by allowing indexed addresses in the initial
10745 RTL generation.
10747 The acceptance of indexed addresses as legitimate implies that we
10748 must provide patterns for doing indexed integer stores, or the move
10749 expanders must force the address of an indexed store to a register.
10750 We have adopted the latter approach.
10752 Another function of pa_legitimate_address_p is to ensure that
10753 the base register is a valid pointer for indexed instructions.
10754 On targets that have non-equivalent space registers, we have to
10755 know at the time of assembler output which register in a REG+REG
10756 pair is the base register. The REG_POINTER flag is sometimes lost
10757 in reload and the following passes, so it can't be relied on during
10758 code generation. Thus, we either have to canonicalize the order
10759 of the registers in REG+REG indexed addresses, or treat REG+REG
10760 addresses separately and provide patterns for both permutations.
10762 The latter approach requires several hundred additional lines of
10763 code in pa.md. The downside to canonicalizing is that a PLUS
10764 in the wrong order can't combine to form to make a scaled indexed
10765 memory operand. As we won't need to canonicalize the operands if
10766 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10768 We initially break out scaled indexed addresses in canonical order
10769 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10770 scaled indexed addresses during RTL generation. However, fold_rtx
10771 has its own opinion on how the operands of a PLUS should be ordered.
10772 If one of the operands is equivalent to a constant, it will make
10773 that operand the second operand. As the base register is likely to
10774 be equivalent to a SYMBOL_REF, we have made it the second operand.
10776 pa_legitimate_address_p accepts REG+REG as legitimate when the
10777 operands are in the order INDEX+BASE on targets with non-equivalent
10778 space registers, and in any order on targets with equivalent space
10779 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10781 We treat a SYMBOL_REF as legitimate if it is part of the current
10782 function's constant-pool, because such addresses can actually be
10783 output as REG+SMALLINT. */
10785 static bool
10786 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10788 if ((REG_P (x)
10789 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10790 : REG_OK_FOR_BASE_P (x)))
10791 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10792 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10793 && REG_P (XEXP (x, 0))
10794 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10795 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10796 return true;
10798 if (GET_CODE (x) == PLUS)
10800 rtx base, index;
10802 /* For REG+REG, the base register should be in XEXP (x, 1),
10803 so check it first. */
10804 if (REG_P (XEXP (x, 1))
10805 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10806 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10807 base = XEXP (x, 1), index = XEXP (x, 0);
10808 else if (REG_P (XEXP (x, 0))
10809 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10810 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10811 base = XEXP (x, 0), index = XEXP (x, 1);
10812 else
10813 return false;
10815 if (GET_CODE (index) == CONST_INT)
10817 if (INT_5_BITS (index))
10818 return true;
10820 /* When INT14_OK_STRICT is false, a secondary reload is needed
10821 to adjust the displacement of SImode and DImode floating point
10822 instructions but this may fail when the register also needs
10823 reloading. So, we return false when STRICT is true. We
10824 also reject long displacements for float mode addresses since
10825 the majority of accesses will use floating point instructions
10826 that don't support 14-bit offsets. */
10827 if (!INT14_OK_STRICT
10828 && (strict || !(reload_in_progress || reload_completed))
10829 && mode != QImode
10830 && mode != HImode)
10831 return false;
10833 return base14_operand (index, mode);
10836 if (!TARGET_DISABLE_INDEXING
10837 /* Only accept the "canonical" INDEX+BASE operand order
10838 on targets with non-equivalent space registers. */
10839 && (TARGET_NO_SPACE_REGS
10840 ? REG_P (index)
10841 : (base == XEXP (x, 1) && REG_P (index)
10842 && (reload_completed
10843 || (reload_in_progress && HARD_REGISTER_P (base))
10844 || REG_POINTER (base))
10845 && (reload_completed
10846 || (reload_in_progress && HARD_REGISTER_P (index))
10847 || !REG_POINTER (index))))
10848 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10849 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10850 : REG_OK_FOR_INDEX_P (index))
10851 && borx_reg_operand (base, Pmode)
10852 && borx_reg_operand (index, Pmode))
10853 return true;
10855 if (!TARGET_DISABLE_INDEXING
10856 && GET_CODE (index) == MULT
10857 /* Only accept base operands with the REG_POINTER flag prior to
10858 reload on targets with non-equivalent space registers. */
10859 && (TARGET_NO_SPACE_REGS
10860 || (base == XEXP (x, 1)
10861 && (reload_completed
10862 || (reload_in_progress && HARD_REGISTER_P (base))
10863 || REG_POINTER (base))))
10864 && REG_P (XEXP (index, 0))
10865 && GET_MODE (XEXP (index, 0)) == Pmode
10866 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10867 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10868 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10869 && GET_CODE (XEXP (index, 1)) == CONST_INT
10870 && INTVAL (XEXP (index, 1))
10871 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10872 && borx_reg_operand (base, Pmode))
10873 return true;
10875 return false;
10878 if (GET_CODE (x) == LO_SUM)
10880 rtx y = XEXP (x, 0);
10882 if (GET_CODE (y) == SUBREG)
10883 y = SUBREG_REG (y);
10885 if (REG_P (y)
10886 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10887 : REG_OK_FOR_BASE_P (y)))
10889 /* Needed for -fPIC */
10890 if (mode == Pmode
10891 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10892 return true;
10894 if (!INT14_OK_STRICT
10895 && (strict || !(reload_in_progress || reload_completed))
10896 && mode != QImode
10897 && mode != HImode)
10898 return false;
10900 if (CONSTANT_P (XEXP (x, 1)))
10901 return true;
10903 return false;
10906 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10907 return true;
10909 return false;
10912 /* Look for machine dependent ways to make the invalid address AD a
10913 valid address.
10915 For the PA, transform:
10917 memory(X + <large int>)
10919 into:
10921 if (<large int> & mask) >= 16
10922 Y = (<large int> & ~mask) + mask + 1 Round up.
10923 else
10924 Y = (<large int> & ~mask) Round down.
10925 Z = X + Y
10926 memory (Z + (<large int> - Y));
10928 This makes reload inheritance and reload_cse work better since Z
10929 can be reused.
10931 There may be more opportunities to improve code with this hook. */
10934 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10935 int opnum, int type,
10936 int ind_levels ATTRIBUTE_UNUSED)
10938 long offset, newoffset, mask;
10939 rtx new_rtx, temp = NULL_RTX;
10941 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10942 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10944 if (optimize && GET_CODE (ad) == PLUS)
10945 temp = simplify_binary_operation (PLUS, Pmode,
10946 XEXP (ad, 0), XEXP (ad, 1));
10948 new_rtx = temp ? temp : ad;
10950 if (optimize
10951 && GET_CODE (new_rtx) == PLUS
10952 && GET_CODE (XEXP (new_rtx, 0)) == REG
10953 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10955 offset = INTVAL (XEXP ((new_rtx), 1));
10957 /* Choose rounding direction. Round up if we are >= halfway. */
10958 if ((offset & mask) >= ((mask + 1) / 2))
10959 newoffset = (offset & ~mask) + mask + 1;
10960 else
10961 newoffset = offset & ~mask;
10963 /* Ensure that long displacements are aligned. */
10964 if (mask == 0x3fff
10965 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10966 || (TARGET_64BIT && (mode) == DImode)))
10967 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10969 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10971 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10972 GEN_INT (newoffset));
10973 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10974 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10975 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10976 opnum, (enum reload_type) type);
10977 return ad;
10981 return NULL_RTX;
10984 /* Output address vector. */
10986 void
10987 pa_output_addr_vec (rtx lab, rtx body)
10989 int idx, vlen = XVECLEN (body, 0);
10991 if (!TARGET_SOM)
10992 fputs ("\t.align 4\n", asm_out_file);
10993 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10994 if (TARGET_GAS)
10995 fputs ("\t.begin_brtab\n", asm_out_file);
10996 for (idx = 0; idx < vlen; idx++)
10998 ASM_OUTPUT_ADDR_VEC_ELT
10999 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11001 if (TARGET_GAS)
11002 fputs ("\t.end_brtab\n", asm_out_file);
11005 /* Output address difference vector. */
11007 void
11008 pa_output_addr_diff_vec (rtx lab, rtx body)
11010 rtx base = XEXP (XEXP (body, 0), 0);
11011 int idx, vlen = XVECLEN (body, 1);
11013 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11014 if (TARGET_GAS)
11015 fputs ("\t.begin_brtab\n", asm_out_file);
11016 for (idx = 0; idx < vlen; idx++)
11018 ASM_OUTPUT_ADDR_DIFF_ELT
11019 (asm_out_file,
11020 body,
11021 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11022 CODE_LABEL_NUMBER (base));
11024 if (TARGET_GAS)
11025 fputs ("\t.end_brtab\n", asm_out_file);
11028 /* This is a helper function for the other atomic operations. This function
11029 emits a loop that contains SEQ that iterates until a compare-and-swap
11030 operation at the end succeeds. MEM is the memory to be modified. SEQ is
11031 a set of instructions that takes a value from OLD_REG as an input and
11032 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
11033 set to the current contents of MEM. After SEQ, a compare-and-swap will
11034 attempt to update MEM with NEW_REG. The function returns true when the
11035 loop was generated successfully. */
11037 static bool
11038 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11040 machine_mode mode = GET_MODE (mem);
11041 rtx_code_label *label;
11042 rtx cmp_reg, success, oldval;
11044 /* The loop we want to generate looks like
11046 cmp_reg = mem;
11047 label:
11048 old_reg = cmp_reg;
11049 seq;
11050 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11051 if (success)
11052 goto label;
11054 Note that we only do the plain load from memory once. Subsequent
11055 iterations use the value loaded by the compare-and-swap pattern. */
11057 label = gen_label_rtx ();
11058 cmp_reg = gen_reg_rtx (mode);
11060 emit_move_insn (cmp_reg, mem);
11061 emit_label (label);
11062 emit_move_insn (old_reg, cmp_reg);
11063 if (seq)
11064 emit_insn (seq);
11066 success = NULL_RTX;
11067 oldval = cmp_reg;
11068 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11069 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11070 MEMMODEL_RELAXED))
11071 return false;
11073 if (oldval != cmp_reg)
11074 emit_move_insn (cmp_reg, oldval);
11076 /* Mark this jump predicted not taken. */
11077 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11078 GET_MODE (success), 1, label,
11079 profile_probability::guessed_never ());
11080 return true;
11083 /* This function tries to implement an atomic exchange operation using a
11084 compare_and_swap loop. VAL is written to *MEM. The previous contents of
11085 *MEM are returned, using TARGET if possible. No memory model is required
11086 since a compare_and_swap loop is seq-cst. */
11089 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11091 machine_mode mode = GET_MODE (mem);
11093 if (can_compare_and_swap_p (mode, true))
11095 if (!target || !register_operand (target, mode))
11096 target = gen_reg_rtx (mode);
11097 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11098 return target;
11101 return NULL_RTX;
11104 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11105 arguments passed by hidden reference in the 32-bit HP runtime. Users
11106 can override this behavior for better compatibility with openmp at the
11107 risk of library incompatibilities. Arguments are always passed by value
11108 in the 64-bit HP runtime. */
11110 static bool
11111 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11113 return !TARGET_CALLER_COPIES;
11116 /* Implement TARGET_HARD_REGNO_NREGS. */
11118 static unsigned int
11119 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11121 return PA_HARD_REGNO_NREGS (regno, mode);
11124 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11126 static bool
11127 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11129 return PA_HARD_REGNO_MODE_OK (regno, mode);
11132 /* Implement TARGET_STARTING_FRAME_OFFSET.
11134 On the 32-bit ports, we reserve one slot for the previous frame
11135 pointer and one fill slot. The fill slot is for compatibility
11136 with HP compiled programs. On the 64-bit ports, we reserve one
11137 slot for the previous frame pointer. */
11139 static HOST_WIDE_INT
11140 pa_starting_frame_offset (void)
11142 return 8;
11145 /* Figure out the size in words of the function argument. The size
11146 returned by this function should always be greater than zero because
11147 we pass variable and zero sized objects by reference. */
11149 HOST_WIDE_INT
11150 pa_function_arg_size (machine_mode mode, const_tree type)
11152 HOST_WIDE_INT size;
11154 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11155 return CEIL (size, UNITS_PER_WORD);
11158 #include "gt-pa.h"