* config/pa/pa.c (pa_emit_move_sequence): Handle floating point
[official-gcc.git] / gcc / config / pa / pa.c
blob8b1c8327c79733b09716344938cd243c31ed2c5c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2016 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "df.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "calls.h"
42 #include "output.h"
43 #include "except.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "reload.h"
47 #include "common/common-target.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "opts.h"
51 #include "builtins.h"
53 /* This file should be included last. */
54 #include "target-def.h"
56 /* Return nonzero if there is a bypass for the output of
57 OUT_INSN and the fp store IN_INSN. */
58 int
59 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
61 machine_mode store_mode;
62 machine_mode other_mode;
63 rtx set;
65 if (recog_memoized (in_insn) < 0
66 || (get_attr_type (in_insn) != TYPE_FPSTORE
67 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
68 || recog_memoized (out_insn) < 0)
69 return 0;
71 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
73 set = single_set (out_insn);
74 if (!set)
75 return 0;
77 other_mode = GET_MODE (SET_SRC (set));
79 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
83 #ifndef DO_FRAME_NOTES
84 #ifdef INCOMING_RETURN_ADDR_RTX
85 #define DO_FRAME_NOTES 1
86 #else
87 #define DO_FRAME_NOTES 0
88 #endif
89 #endif
91 static void pa_option_override (void);
92 static void copy_reg_pointer (rtx, rtx);
93 static void fix_range (const char *);
94 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
95 reg_class_t);
96 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
97 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
98 static inline rtx force_mode (machine_mode, rtx);
99 static void pa_reorg (void);
100 static void pa_combine_instructions (void);
101 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
102 rtx, rtx);
103 static bool forward_branch_p (rtx_insn *);
104 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
105 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movmem_length (rtx_insn *);
107 static int compute_clrmem_length (rtx_insn *);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static rtx pa_function_value (const_tree, const_tree, bool);
115 static rtx pa_libcall_value (machine_mode, const_rtx);
116 static bool pa_function_value_regno_p (const unsigned int);
117 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void update_total_code_bytes (unsigned int);
119 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
120 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
121 static int pa_adjust_priority (rtx_insn *, int);
122 static int pa_issue_rate (void);
123 static int pa_reloc_rw_mask (void);
124 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
125 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
126 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
127 ATTRIBUTE_UNUSED;
128 static void pa_encode_section_info (tree, rtx, int);
129 static const char *pa_strip_name_encoding (const char *);
130 static bool pa_function_ok_for_sibcall (tree, tree);
131 static void pa_globalize_label (FILE *, const char *)
132 ATTRIBUTE_UNUSED;
133 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
134 HOST_WIDE_INT, tree);
135 #if !defined(USE_COLLECT2)
136 static void pa_asm_out_constructor (rtx, int);
137 static void pa_asm_out_destructor (rtx, int);
138 #endif
139 static void pa_init_builtins (void);
140 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
141 static rtx hppa_builtin_saveregs (void);
142 static void hppa_va_start (tree, rtx);
143 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
144 static bool pa_scalar_mode_supported_p (machine_mode);
145 static bool pa_commutative_p (const_rtx x, int outer_code);
146 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
147 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
148 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
149 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
150 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
151 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
153 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
154 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
155 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
158 static void output_deferred_plabels (void);
159 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
160 #ifdef ASM_OUTPUT_EXTERNAL_REAL
161 static void pa_hpux_file_end (void);
162 #endif
163 static void pa_init_libfuncs (void);
164 static rtx pa_struct_value_rtx (tree, int);
165 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
166 const_tree, bool);
167 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
168 tree, bool);
169 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
170 const_tree, bool);
171 static rtx pa_function_arg (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
174 static struct machine_function * pa_init_machine_status (void);
175 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
176 machine_mode,
177 secondary_reload_info *);
178 static void pa_extra_live_on_entry (bitmap);
179 static machine_mode pa_promote_function_mode (const_tree,
180 machine_mode, int *,
181 const_tree, int);
183 static void pa_asm_trampoline_template (FILE *);
184 static void pa_trampoline_init (rtx, tree, rtx);
185 static rtx pa_trampoline_adjust_address (rtx);
186 static rtx pa_delegitimize_address (rtx);
187 static bool pa_print_operand_punct_valid_p (unsigned char);
188 static rtx pa_internal_arg_pointer (void);
189 static bool pa_can_eliminate (const int, const int);
190 static void pa_conditional_register_usage (void);
191 static machine_mode pa_c_mode_for_suffix (char);
192 static section *pa_function_section (tree, enum node_frequency, bool, bool);
193 static bool pa_cannot_force_const_mem (machine_mode, rtx);
194 static bool pa_legitimate_constant_p (machine_mode, rtx);
195 static unsigned int pa_section_type_flags (tree, const char *, int);
196 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
198 /* The following extra sections are only used for SOM. */
199 static GTY(()) section *som_readonly_data_section;
200 static GTY(()) section *som_one_only_readonly_data_section;
201 static GTY(()) section *som_one_only_data_section;
202 static GTY(()) section *som_tm_clone_table_section;
204 /* Counts for the number of callee-saved general and floating point
205 registers which were saved by the current function's prologue. */
206 static int gr_saved, fr_saved;
208 /* Boolean indicating whether the return pointer was saved by the
209 current function's prologue. */
210 static bool rp_saved;
212 static rtx find_addr_reg (rtx);
214 /* Keep track of the number of bytes we have output in the CODE subspace
215 during this compilation so we'll know when to emit inline long-calls. */
216 unsigned long total_code_bytes;
218 /* The last address of the previous function plus the number of bytes in
219 associated thunks that have been output. This is used to determine if
220 a thunk can use an IA-relative branch to reach its target function. */
221 static unsigned int last_address;
223 /* Variables to handle plabels that we discover are necessary at assembly
224 output time. They are output after the current function. */
225 struct GTY(()) deferred_plabel
227 rtx internal_label;
228 rtx symbol;
230 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
231 deferred_plabels;
232 static size_t n_deferred_plabels = 0;
234 /* Initialize the GCC target structure. */
236 #undef TARGET_OPTION_OVERRIDE
237 #define TARGET_OPTION_OVERRIDE pa_option_override
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER pa_assemble_integer
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_EPILOGUE
257 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
259 #undef TARGET_FUNCTION_VALUE
260 #define TARGET_FUNCTION_VALUE pa_function_value
261 #undef TARGET_LIBCALL_VALUE
262 #define TARGET_LIBCALL_VALUE pa_libcall_value
263 #undef TARGET_FUNCTION_VALUE_REGNO_P
264 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
266 #undef TARGET_LEGITIMIZE_ADDRESS
267 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
269 #undef TARGET_SCHED_ADJUST_COST
270 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
271 #undef TARGET_SCHED_ADJUST_PRIORITY
272 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
273 #undef TARGET_SCHED_ISSUE_RATE
274 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
276 #undef TARGET_ENCODE_SECTION_INFO
277 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
278 #undef TARGET_STRIP_NAME_ENCODING
279 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
281 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
282 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
284 #undef TARGET_COMMUTATIVE_P
285 #define TARGET_COMMUTATIVE_P pa_commutative_p
287 #undef TARGET_ASM_OUTPUT_MI_THUNK
288 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
289 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
292 #undef TARGET_ASM_FILE_END
293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
294 #define TARGET_ASM_FILE_END pa_hpux_file_end
295 #else
296 #define TARGET_ASM_FILE_END output_deferred_plabels
297 #endif
299 #undef TARGET_ASM_RELOC_RW_MASK
300 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
302 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
303 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
305 #if !defined(USE_COLLECT2)
306 #undef TARGET_ASM_CONSTRUCTOR
307 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
308 #undef TARGET_ASM_DESTRUCTOR
309 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
310 #endif
312 #undef TARGET_INIT_BUILTINS
313 #define TARGET_INIT_BUILTINS pa_init_builtins
315 #undef TARGET_EXPAND_BUILTIN
316 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
318 #undef TARGET_REGISTER_MOVE_COST
319 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
320 #undef TARGET_RTX_COSTS
321 #define TARGET_RTX_COSTS hppa_rtx_costs
322 #undef TARGET_ADDRESS_COST
323 #define TARGET_ADDRESS_COST hppa_address_cost
325 #undef TARGET_MACHINE_DEPENDENT_REORG
326 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
328 #undef TARGET_INIT_LIBFUNCS
329 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
331 #undef TARGET_PROMOTE_FUNCTION_MODE
332 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
333 #undef TARGET_PROMOTE_PROTOTYPES
334 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
340 #undef TARGET_MUST_PASS_IN_STACK
341 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
342 #undef TARGET_PASS_BY_REFERENCE
343 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
344 #undef TARGET_CALLEE_COPIES
345 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
346 #undef TARGET_ARG_PARTIAL_BYTES
347 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
348 #undef TARGET_FUNCTION_ARG
349 #define TARGET_FUNCTION_ARG pa_function_arg
350 #undef TARGET_FUNCTION_ARG_ADVANCE
351 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
352 #undef TARGET_FUNCTION_ARG_BOUNDARY
353 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
355 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
356 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
357 #undef TARGET_EXPAND_BUILTIN_VA_START
358 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
360 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
362 #undef TARGET_SCALAR_MODE_SUPPORTED_P
363 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
365 #undef TARGET_CANNOT_FORCE_CONST_MEM
366 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
368 #undef TARGET_SECONDARY_RELOAD
369 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
371 #undef TARGET_EXTRA_LIVE_ON_ENTRY
372 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
380 #undef TARGET_DELEGITIMIZE_ADDRESS
381 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
382 #undef TARGET_INTERNAL_ARG_POINTER
383 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
384 #undef TARGET_CAN_ELIMINATE
385 #define TARGET_CAN_ELIMINATE pa_can_eliminate
386 #undef TARGET_CONDITIONAL_REGISTER_USAGE
387 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
388 #undef TARGET_C_MODE_FOR_SUFFIX
389 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
390 #undef TARGET_ASM_FUNCTION_SECTION
391 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
393 #undef TARGET_LEGITIMATE_CONSTANT_P
394 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
395 #undef TARGET_SECTION_TYPE_FLAGS
396 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
397 #undef TARGET_LEGITIMATE_ADDRESS_P
398 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
400 struct gcc_target targetm = TARGET_INITIALIZER;
402 /* Parse the -mfixed-range= option string. */
404 static void
405 fix_range (const char *const_str)
407 int i, first, last;
408 char *str, *dash, *comma;
410 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
411 REG2 are either register names or register numbers. The effect
412 of this option is to mark the registers in the range from REG1 to
413 REG2 as ``fixed'' so they won't be used by the compiler. This is
414 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
416 i = strlen (const_str);
417 str = (char *) alloca (i + 1);
418 memcpy (str, const_str, i + 1);
420 while (1)
422 dash = strchr (str, '-');
423 if (!dash)
425 warning (0, "value of -mfixed-range must have form REG1-REG2");
426 return;
428 *dash = '\0';
430 comma = strchr (dash + 1, ',');
431 if (comma)
432 *comma = '\0';
434 first = decode_reg_name (str);
435 if (first < 0)
437 warning (0, "unknown register name: %s", str);
438 return;
441 last = decode_reg_name (dash + 1);
442 if (last < 0)
444 warning (0, "unknown register name: %s", dash + 1);
445 return;
448 *dash = '-';
450 if (first > last)
452 warning (0, "%s-%s is an empty range", str, dash + 1);
453 return;
456 for (i = first; i <= last; ++i)
457 fixed_regs[i] = call_used_regs[i] = 1;
459 if (!comma)
460 break;
462 *comma = ',';
463 str = comma + 1;
466 /* Check if all floating point registers have been fixed. */
467 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
468 if (!fixed_regs[i])
469 break;
471 if (i > FP_REG_LAST)
472 target_flags |= MASK_DISABLE_FPREGS;
475 /* Implement the TARGET_OPTION_OVERRIDE hook. */
477 static void
478 pa_option_override (void)
480 unsigned int i;
481 cl_deferred_option *opt;
482 vec<cl_deferred_option> *v
483 = (vec<cl_deferred_option> *) pa_deferred_options;
485 if (v)
486 FOR_EACH_VEC_ELT (*v, i, opt)
488 switch (opt->opt_index)
490 case OPT_mfixed_range_:
491 fix_range (opt->arg);
492 break;
494 default:
495 gcc_unreachable ();
499 if (flag_pic && TARGET_PORTABLE_RUNTIME)
501 warning (0, "PIC code generation is not supported in the portable runtime model");
504 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
506 warning (0, "PIC code generation is not compatible with fast indirect calls");
509 if (! TARGET_GAS && write_symbols != NO_DEBUG)
511 warning (0, "-g is only supported when using GAS on this processor,");
512 warning (0, "-g option disabled");
513 write_symbols = NO_DEBUG;
516 /* We only support the "big PIC" model now. And we always generate PIC
517 code when in 64bit mode. */
518 if (flag_pic == 1 || TARGET_64BIT)
519 flag_pic = 2;
521 /* Disable -freorder-blocks-and-partition as we don't support hot and
522 cold partitioning. */
523 if (flag_reorder_blocks_and_partition)
525 inform (input_location,
526 "-freorder-blocks-and-partition does not work "
527 "on this architecture");
528 flag_reorder_blocks_and_partition = 0;
529 flag_reorder_blocks = 1;
532 /* We can't guarantee that .dword is available for 32-bit targets. */
533 if (UNITS_PER_WORD == 4)
534 targetm.asm_out.aligned_op.di = NULL;
536 /* The unaligned ops are only available when using GAS. */
537 if (!TARGET_GAS)
539 targetm.asm_out.unaligned_op.hi = NULL;
540 targetm.asm_out.unaligned_op.si = NULL;
541 targetm.asm_out.unaligned_op.di = NULL;
544 init_machine_status = pa_init_machine_status;
547 enum pa_builtins
549 PA_BUILTIN_COPYSIGNQ,
550 PA_BUILTIN_FABSQ,
551 PA_BUILTIN_INFQ,
552 PA_BUILTIN_HUGE_VALQ,
553 PA_BUILTIN_max
556 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
558 static void
559 pa_init_builtins (void)
561 #ifdef DONT_HAVE_FPUTC_UNLOCKED
563 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
564 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
565 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
567 #endif
568 #if TARGET_HPUX_11
570 tree decl;
572 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
573 set_user_assembler_name (decl, "_Isfinite");
574 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
575 set_user_assembler_name (decl, "_Isfinitef");
577 #endif
579 if (HPUX_LONG_DOUBLE_LIBRARY)
581 tree decl, ftype;
583 /* Under HPUX, the __float128 type is a synonym for "long double". */
584 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
585 "__float128");
587 /* TFmode support builtins. */
588 ftype = build_function_type_list (long_double_type_node,
589 long_double_type_node,
590 NULL_TREE);
591 decl = add_builtin_function ("__builtin_fabsq", ftype,
592 PA_BUILTIN_FABSQ, BUILT_IN_MD,
593 "_U_Qfabs", NULL_TREE);
594 TREE_READONLY (decl) = 1;
595 pa_builtins[PA_BUILTIN_FABSQ] = decl;
597 ftype = build_function_type_list (long_double_type_node,
598 long_double_type_node,
599 long_double_type_node,
600 NULL_TREE);
601 decl = add_builtin_function ("__builtin_copysignq", ftype,
602 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
603 "_U_Qfcopysign", NULL_TREE);
604 TREE_READONLY (decl) = 1;
605 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
607 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
608 decl = add_builtin_function ("__builtin_infq", ftype,
609 PA_BUILTIN_INFQ, BUILT_IN_MD,
610 NULL, NULL_TREE);
611 pa_builtins[PA_BUILTIN_INFQ] = decl;
613 decl = add_builtin_function ("__builtin_huge_valq", ftype,
614 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
615 NULL, NULL_TREE);
616 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
620 static rtx
621 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
622 machine_mode mode ATTRIBUTE_UNUSED,
623 int ignore ATTRIBUTE_UNUSED)
625 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
626 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
628 switch (fcode)
630 case PA_BUILTIN_FABSQ:
631 case PA_BUILTIN_COPYSIGNQ:
632 return expand_call (exp, target, ignore);
634 case PA_BUILTIN_INFQ:
635 case PA_BUILTIN_HUGE_VALQ:
637 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
638 REAL_VALUE_TYPE inf;
639 rtx tmp;
641 real_inf (&inf);
642 tmp = const_double_from_real_value (inf, target_mode);
644 tmp = validize_mem (force_const_mem (target_mode, tmp));
646 if (target == 0)
647 target = gen_reg_rtx (target_mode);
649 emit_move_insn (target, tmp);
650 return target;
653 default:
654 gcc_unreachable ();
657 return NULL_RTX;
660 /* Function to init struct machine_function.
661 This will be called, via a pointer variable,
662 from push_function_context. */
664 static struct machine_function *
665 pa_init_machine_status (void)
667 return ggc_cleared_alloc<machine_function> ();
670 /* If FROM is a probable pointer register, mark TO as a probable
671 pointer register with the same pointer alignment as FROM. */
673 static void
674 copy_reg_pointer (rtx to, rtx from)
676 if (REG_POINTER (from))
677 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
680 /* Return 1 if X contains a symbolic expression. We know these
681 expressions will have one of a few well defined forms, so
682 we need only check those forms. */
684 pa_symbolic_expression_p (rtx x)
687 /* Strip off any HIGH. */
688 if (GET_CODE (x) == HIGH)
689 x = XEXP (x, 0);
691 return symbolic_operand (x, VOIDmode);
694 /* Accept any constant that can be moved in one instruction into a
695 general register. */
697 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
699 /* OK if ldo, ldil, or zdepi, can be used. */
700 return (VAL_14_BITS_P (ival)
701 || pa_ldil_cint_p (ival)
702 || pa_zdepi_cint_p (ival));
705 /* True iff ldil can be used to load this CONST_INT. The least
706 significant 11 bits of the value must be zero and the value must
707 not change sign when extended from 32 to 64 bits. */
709 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
711 unsigned HOST_WIDE_INT x;
713 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
714 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
717 /* True iff zdepi can be used to generate this CONST_INT.
718 zdepi first sign extends a 5-bit signed number to a given field
719 length, then places this field anywhere in a zero. */
721 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
723 unsigned HOST_WIDE_INT lsb_mask, t;
725 /* This might not be obvious, but it's at least fast.
726 This function is critical; we don't have the time loops would take. */
727 lsb_mask = x & -x;
728 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
729 /* Return true iff t is a power of two. */
730 return ((t & (t - 1)) == 0);
733 /* True iff depi or extru can be used to compute (reg & mask).
734 Accept bit pattern like these:
735 0....01....1
736 1....10....0
737 1..10..01..1 */
739 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
741 mask = ~mask;
742 mask += mask & -mask;
743 return (mask & (mask - 1)) == 0;
746 /* True iff depi can be used to compute (reg | MASK). */
748 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
750 mask += mask & -mask;
751 return (mask & (mask - 1)) == 0;
754 /* Legitimize PIC addresses. If the address is already
755 position-independent, we return ORIG. Newly generated
756 position-independent addresses go to REG. If we need more
757 than one register, we lose. */
759 static rtx
760 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
762 rtx pic_ref = orig;
764 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
766 /* Labels need special handling. */
767 if (pic_label_operand (orig, mode))
769 rtx_insn *insn;
771 /* We do not want to go through the movXX expanders here since that
772 would create recursion.
774 Nor do we really want to call a generator for a named pattern
775 since that requires multiple patterns if we want to support
776 multiple word sizes.
778 So instead we just emit the raw set, which avoids the movXX
779 expanders completely. */
780 mark_reg_pointer (reg, BITS_PER_UNIT);
781 insn = emit_insn (gen_rtx_SET (reg, orig));
783 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
784 add_reg_note (insn, REG_EQUAL, orig);
786 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
787 and update LABEL_NUSES because this is not done automatically. */
788 if (reload_in_progress || reload_completed)
790 /* Extract LABEL_REF. */
791 if (GET_CODE (orig) == CONST)
792 orig = XEXP (XEXP (orig, 0), 0);
793 /* Extract CODE_LABEL. */
794 orig = XEXP (orig, 0);
795 add_reg_note (insn, REG_LABEL_OPERAND, orig);
796 /* Make sure we have label and not a note. */
797 if (LABEL_P (orig))
798 LABEL_NUSES (orig)++;
800 crtl->uses_pic_offset_table = 1;
801 return reg;
803 if (GET_CODE (orig) == SYMBOL_REF)
805 rtx_insn *insn;
806 rtx tmp_reg;
808 gcc_assert (reg);
810 /* Before reload, allocate a temporary register for the intermediate
811 result. This allows the sequence to be deleted when the final
812 result is unused and the insns are trivially dead. */
813 tmp_reg = ((reload_in_progress || reload_completed)
814 ? reg : gen_reg_rtx (Pmode));
816 if (function_label_operand (orig, VOIDmode))
818 /* Force function label into memory in word mode. */
819 orig = XEXP (force_const_mem (word_mode, orig), 0);
820 /* Load plabel address from DLT. */
821 emit_move_insn (tmp_reg,
822 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
823 gen_rtx_HIGH (word_mode, orig)));
824 pic_ref
825 = gen_const_mem (Pmode,
826 gen_rtx_LO_SUM (Pmode, tmp_reg,
827 gen_rtx_UNSPEC (Pmode,
828 gen_rtvec (1, orig),
829 UNSPEC_DLTIND14R)));
830 emit_move_insn (reg, pic_ref);
831 /* Now load address of function descriptor. */
832 pic_ref = gen_rtx_MEM (Pmode, reg);
834 else
836 /* Load symbol reference from DLT. */
837 emit_move_insn (tmp_reg,
838 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
839 gen_rtx_HIGH (word_mode, orig)));
840 pic_ref
841 = gen_const_mem (Pmode,
842 gen_rtx_LO_SUM (Pmode, tmp_reg,
843 gen_rtx_UNSPEC (Pmode,
844 gen_rtvec (1, orig),
845 UNSPEC_DLTIND14R)));
848 crtl->uses_pic_offset_table = 1;
849 mark_reg_pointer (reg, BITS_PER_UNIT);
850 insn = emit_move_insn (reg, pic_ref);
852 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
853 set_unique_reg_note (insn, REG_EQUAL, orig);
855 return reg;
857 else if (GET_CODE (orig) == CONST)
859 rtx base;
861 if (GET_CODE (XEXP (orig, 0)) == PLUS
862 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
863 return orig;
865 gcc_assert (reg);
866 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
868 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
869 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
870 base == reg ? 0 : reg);
872 if (GET_CODE (orig) == CONST_INT)
874 if (INT_14_BITS (orig))
875 return plus_constant (Pmode, base, INTVAL (orig));
876 orig = force_reg (Pmode, orig);
878 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
879 /* Likewise, should we set special REG_NOTEs here? */
882 return pic_ref;
885 static GTY(()) rtx gen_tls_tga;
887 static rtx
888 gen_tls_get_addr (void)
890 if (!gen_tls_tga)
891 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
892 return gen_tls_tga;
895 static rtx
896 hppa_tls_call (rtx arg)
898 rtx ret;
900 ret = gen_reg_rtx (Pmode);
901 emit_library_call_value (gen_tls_get_addr (), ret,
902 LCT_CONST, Pmode, 1, arg, Pmode);
904 return ret;
907 static rtx
908 legitimize_tls_address (rtx addr)
910 rtx ret, tmp, t1, t2, tp;
911 rtx_insn *insn;
913 /* Currently, we can't handle anything but a SYMBOL_REF. */
914 if (GET_CODE (addr) != SYMBOL_REF)
915 return addr;
917 switch (SYMBOL_REF_TLS_MODEL (addr))
919 case TLS_MODEL_GLOBAL_DYNAMIC:
920 tmp = gen_reg_rtx (Pmode);
921 if (flag_pic)
922 emit_insn (gen_tgd_load_pic (tmp, addr));
923 else
924 emit_insn (gen_tgd_load (tmp, addr));
925 ret = hppa_tls_call (tmp);
926 break;
928 case TLS_MODEL_LOCAL_DYNAMIC:
929 ret = gen_reg_rtx (Pmode);
930 tmp = gen_reg_rtx (Pmode);
931 start_sequence ();
932 if (flag_pic)
933 emit_insn (gen_tld_load_pic (tmp, addr));
934 else
935 emit_insn (gen_tld_load (tmp, addr));
936 t1 = hppa_tls_call (tmp);
937 insn = get_insns ();
938 end_sequence ();
939 t2 = gen_reg_rtx (Pmode);
940 emit_libcall_block (insn, t2, t1,
941 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
942 UNSPEC_TLSLDBASE));
943 emit_insn (gen_tld_offset_load (ret, addr, t2));
944 break;
946 case TLS_MODEL_INITIAL_EXEC:
947 tp = gen_reg_rtx (Pmode);
948 tmp = gen_reg_rtx (Pmode);
949 ret = gen_reg_rtx (Pmode);
950 emit_insn (gen_tp_load (tp));
951 if (flag_pic)
952 emit_insn (gen_tie_load_pic (tmp, addr));
953 else
954 emit_insn (gen_tie_load (tmp, addr));
955 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
956 break;
958 case TLS_MODEL_LOCAL_EXEC:
959 tp = gen_reg_rtx (Pmode);
960 ret = gen_reg_rtx (Pmode);
961 emit_insn (gen_tp_load (tp));
962 emit_insn (gen_tle_load (ret, addr, tp));
963 break;
965 default:
966 gcc_unreachable ();
969 return ret;
972 /* Helper for hppa_legitimize_address. Given X, return true if it
973 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
975 This respectively represent canonical shift-add rtxs or scaled
976 memory addresses. */
977 static bool
978 mem_shadd_or_shadd_rtx_p (rtx x)
980 return ((GET_CODE (x) == ASHIFT
981 || GET_CODE (x) == MULT)
982 && GET_CODE (XEXP (x, 1)) == CONST_INT
983 && ((GET_CODE (x) == ASHIFT
984 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
985 || (GET_CODE (x) == MULT
986 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
989 /* Try machine-dependent ways of modifying an illegitimate address
990 to be legitimate. If we find one, return the new, valid address.
991 This macro is used in only one place: `memory_address' in explow.c.
993 OLDX is the address as it was before break_out_memory_refs was called.
994 In some cases it is useful to look at this to decide what needs to be done.
996 It is always safe for this macro to do nothing. It exists to recognize
997 opportunities to optimize the output.
999 For the PA, transform:
1001 memory(X + <large int>)
1003 into:
1005 if (<large int> & mask) >= 16
1006 Y = (<large int> & ~mask) + mask + 1 Round up.
1007 else
1008 Y = (<large int> & ~mask) Round down.
1009 Z = X + Y
1010 memory (Z + (<large int> - Y));
1012 This is for CSE to find several similar references, and only use one Z.
1014 X can either be a SYMBOL_REF or REG, but because combine cannot
1015 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1016 D will not fit in 14 bits.
1018 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1019 0x1f as the mask.
1021 MODE_INT references allow displacements which fit in 14 bits, so use
1022 0x3fff as the mask.
1024 This relies on the fact that most mode MODE_FLOAT references will use FP
1025 registers and most mode MODE_INT references will use integer registers.
1026 (In the rare case of an FP register used in an integer MODE, we depend
1027 on secondary reloads to clean things up.)
1030 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1031 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1032 addressing modes to be used).
1034 Note that the addresses passed into hppa_legitimize_address always
1035 come from a MEM, so we only have to match the MULT form on incoming
1036 addresses. But to be future proof we also match the ASHIFT form.
1038 However, this routine always places those shift-add sequences into
1039 registers, so we have to generate the ASHIFT form as our output.
1041 Put X and Z into registers. Then put the entire expression into
1042 a register. */
1045 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1046 machine_mode mode)
1048 rtx orig = x;
1050 /* We need to canonicalize the order of operands in unscaled indexed
1051 addresses since the code that checks if an address is valid doesn't
1052 always try both orders. */
1053 if (!TARGET_NO_SPACE_REGS
1054 && GET_CODE (x) == PLUS
1055 && GET_MODE (x) == Pmode
1056 && REG_P (XEXP (x, 0))
1057 && REG_P (XEXP (x, 1))
1058 && REG_POINTER (XEXP (x, 0))
1059 && !REG_POINTER (XEXP (x, 1)))
1060 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1062 if (tls_referenced_p (x))
1063 return legitimize_tls_address (x);
1064 else if (flag_pic)
1065 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1067 /* Strip off CONST. */
1068 if (GET_CODE (x) == CONST)
1069 x = XEXP (x, 0);
1071 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1072 That should always be safe. */
1073 if (GET_CODE (x) == PLUS
1074 && GET_CODE (XEXP (x, 0)) == REG
1075 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1077 rtx reg = force_reg (Pmode, XEXP (x, 1));
1078 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1081 /* Note we must reject symbols which represent function addresses
1082 since the assembler/linker can't handle arithmetic on plabels. */
1083 if (GET_CODE (x) == PLUS
1084 && GET_CODE (XEXP (x, 1)) == CONST_INT
1085 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1086 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1087 || GET_CODE (XEXP (x, 0)) == REG))
1089 rtx int_part, ptr_reg;
1090 int newoffset;
1091 int offset = INTVAL (XEXP (x, 1));
1092 int mask;
1094 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1095 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1097 /* Choose which way to round the offset. Round up if we
1098 are >= halfway to the next boundary. */
1099 if ((offset & mask) >= ((mask + 1) / 2))
1100 newoffset = (offset & ~ mask) + mask + 1;
1101 else
1102 newoffset = (offset & ~ mask);
1104 /* If the newoffset will not fit in 14 bits (ldo), then
1105 handling this would take 4 or 5 instructions (2 to load
1106 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1107 add the new offset and the SYMBOL_REF.) Combine can
1108 not handle 4->2 or 5->2 combinations, so do not create
1109 them. */
1110 if (! VAL_14_BITS_P (newoffset)
1111 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1113 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1114 rtx tmp_reg
1115 = force_reg (Pmode,
1116 gen_rtx_HIGH (Pmode, const_part));
1117 ptr_reg
1118 = force_reg (Pmode,
1119 gen_rtx_LO_SUM (Pmode,
1120 tmp_reg, const_part));
1122 else
1124 if (! VAL_14_BITS_P (newoffset))
1125 int_part = force_reg (Pmode, GEN_INT (newoffset));
1126 else
1127 int_part = GEN_INT (newoffset);
1129 ptr_reg = force_reg (Pmode,
1130 gen_rtx_PLUS (Pmode,
1131 force_reg (Pmode, XEXP (x, 0)),
1132 int_part));
1134 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1137 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1139 if (GET_CODE (x) == PLUS
1140 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1141 && (OBJECT_P (XEXP (x, 1))
1142 || GET_CODE (XEXP (x, 1)) == SUBREG)
1143 && GET_CODE (XEXP (x, 1)) != CONST)
1145 /* If we were given a MULT, we must fix the constant
1146 as we're going to create the ASHIFT form. */
1147 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1148 if (GET_CODE (XEXP (x, 0)) == MULT)
1149 shift_val = exact_log2 (shift_val);
1151 rtx reg1, reg2;
1152 reg1 = XEXP (x, 1);
1153 if (GET_CODE (reg1) != REG)
1154 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1156 reg2 = XEXP (XEXP (x, 0), 0);
1157 if (GET_CODE (reg2) != REG)
1158 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1160 return force_reg (Pmode,
1161 gen_rtx_PLUS (Pmode,
1162 gen_rtx_ASHIFT (Pmode, reg2,
1163 GEN_INT (shift_val)),
1164 reg1));
1167 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1169 Only do so for floating point modes since this is more speculative
1170 and we lose if it's an integer store. */
1171 if (GET_CODE (x) == PLUS
1172 && GET_CODE (XEXP (x, 0)) == PLUS
1173 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1174 && (mode == SFmode || mode == DFmode))
1176 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1178 /* If we were given a MULT, we must fix the constant
1179 as we're going to create the ASHIFT form. */
1180 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1181 shift_val = exact_log2 (shift_val);
1183 /* Try and figure out what to use as a base register. */
1184 rtx reg1, reg2, base, idx;
1186 reg1 = XEXP (XEXP (x, 0), 1);
1187 reg2 = XEXP (x, 1);
1188 base = NULL_RTX;
1189 idx = NULL_RTX;
1191 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1192 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1193 it's a base register below. */
1194 if (GET_CODE (reg1) != REG)
1195 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1197 if (GET_CODE (reg2) != REG)
1198 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1200 /* Figure out what the base and index are. */
1202 if (GET_CODE (reg1) == REG
1203 && REG_POINTER (reg1))
1205 base = reg1;
1206 idx = gen_rtx_PLUS (Pmode,
1207 gen_rtx_ASHIFT (Pmode,
1208 XEXP (XEXP (XEXP (x, 0), 0), 0),
1209 GEN_INT (shift_val)),
1210 XEXP (x, 1));
1212 else if (GET_CODE (reg2) == REG
1213 && REG_POINTER (reg2))
1215 base = reg2;
1216 idx = XEXP (x, 0);
1219 if (base == 0)
1220 return orig;
1222 /* If the index adds a large constant, try to scale the
1223 constant so that it can be loaded with only one insn. */
1224 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1225 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1226 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1227 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1229 /* Divide the CONST_INT by the scale factor, then add it to A. */
1230 int val = INTVAL (XEXP (idx, 1));
1231 val /= (1 << shift_val);
1233 reg1 = XEXP (XEXP (idx, 0), 0);
1234 if (GET_CODE (reg1) != REG)
1235 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1237 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1239 /* We can now generate a simple scaled indexed address. */
1240 return
1241 force_reg
1242 (Pmode, gen_rtx_PLUS (Pmode,
1243 gen_rtx_ASHIFT (Pmode, reg1,
1244 GEN_INT (shift_val)),
1245 base));
1248 /* If B + C is still a valid base register, then add them. */
1249 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1250 && INTVAL (XEXP (idx, 1)) <= 4096
1251 && INTVAL (XEXP (idx, 1)) >= -4096)
1253 rtx reg1, reg2;
1255 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1257 reg2 = XEXP (XEXP (idx, 0), 0);
1258 if (GET_CODE (reg2) != CONST_INT)
1259 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1261 return force_reg (Pmode,
1262 gen_rtx_PLUS (Pmode,
1263 gen_rtx_ASHIFT (Pmode, reg2,
1264 GEN_INT (shift_val)),
1265 reg1));
1268 /* Get the index into a register, then add the base + index and
1269 return a register holding the result. */
1271 /* First get A into a register. */
1272 reg1 = XEXP (XEXP (idx, 0), 0);
1273 if (GET_CODE (reg1) != REG)
1274 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1276 /* And get B into a register. */
1277 reg2 = XEXP (idx, 1);
1278 if (GET_CODE (reg2) != REG)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1281 reg1 = force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_ASHIFT (Pmode, reg1,
1284 GEN_INT (shift_val)),
1285 reg2));
1287 /* Add the result to our base register and return. */
1288 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1292 /* Uh-oh. We might have an address for x[n-100000]. This needs
1293 special handling to avoid creating an indexed memory address
1294 with x-100000 as the base.
1296 If the constant part is small enough, then it's still safe because
1297 there is a guard page at the beginning and end of the data segment.
1299 Scaled references are common enough that we want to try and rearrange the
1300 terms so that we can use indexing for these addresses too. Only
1301 do the optimization for floatint point modes. */
1303 if (GET_CODE (x) == PLUS
1304 && pa_symbolic_expression_p (XEXP (x, 1)))
1306 /* Ugly. We modify things here so that the address offset specified
1307 by the index expression is computed first, then added to x to form
1308 the entire address. */
1310 rtx regx1, regx2, regy1, regy2, y;
1312 /* Strip off any CONST. */
1313 y = XEXP (x, 1);
1314 if (GET_CODE (y) == CONST)
1315 y = XEXP (y, 0);
1317 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1319 /* See if this looks like
1320 (plus (mult (reg) (mem_shadd_const))
1321 (const (plus (symbol_ref) (const_int))))
1323 Where const_int is small. In that case the const
1324 expression is a valid pointer for indexing.
1326 If const_int is big, but can be divided evenly by shadd_const
1327 and added to (reg). This allows more scaled indexed addresses. */
1328 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1329 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1330 && GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095)
1334 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1336 /* If we were given a MULT, we must fix the constant
1337 as we're going to create the ASHIFT form. */
1338 if (GET_CODE (XEXP (x, 0)) == MULT)
1339 shift_val = exact_log2 (shift_val);
1341 rtx reg1, reg2;
1343 reg1 = XEXP (x, 1);
1344 if (GET_CODE (reg1) != REG)
1345 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1347 reg2 = XEXP (XEXP (x, 0), 0);
1348 if (GET_CODE (reg2) != REG)
1349 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1351 return
1352 force_reg (Pmode,
1353 gen_rtx_PLUS (Pmode,
1354 gen_rtx_ASHIFT (Pmode,
1355 reg2,
1356 GEN_INT (shift_val)),
1357 reg1));
1359 else if ((mode == DFmode || mode == SFmode)
1360 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1361 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1362 && GET_CODE (XEXP (y, 1)) == CONST_INT
1363 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1365 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1367 /* If we were given a MULT, we must fix the constant
1368 as we're going to create the ASHIFT form. */
1369 if (GET_CODE (XEXP (x, 0)) == MULT)
1370 shift_val = exact_log2 (shift_val);
1372 regx1
1373 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1374 / INTVAL (XEXP (XEXP (x, 0), 1))));
1375 regx2 = XEXP (XEXP (x, 0), 0);
1376 if (GET_CODE (regx2) != REG)
1377 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1378 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1379 regx2, regx1));
1380 return
1381 force_reg (Pmode,
1382 gen_rtx_PLUS (Pmode,
1383 gen_rtx_ASHIFT (Pmode, regx2,
1384 GEN_INT (shift_val)),
1385 force_reg (Pmode, XEXP (y, 0))));
1387 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1388 && INTVAL (XEXP (y, 1)) >= -4096
1389 && INTVAL (XEXP (y, 1)) <= 4095)
1391 /* This is safe because of the guard page at the
1392 beginning and end of the data space. Just
1393 return the original address. */
1394 return orig;
1396 else
1398 /* Doesn't look like one we can optimize. */
1399 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1400 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1401 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1402 regx1 = force_reg (Pmode,
1403 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1404 regx1, regy2));
1405 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1410 return orig;
1413 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1415 Compute extra cost of moving data between one register class
1416 and another.
1418 Make moves from SAR so expensive they should never happen. We used to
1419 have 0xffff here, but that generates overflow in rare cases.
1421 Copies involving a FP register and a non-FP register are relatively
1422 expensive because they must go through memory.
1424 Other copies are reasonably cheap. */
1426 static int
1427 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1428 reg_class_t from, reg_class_t to)
1430 if (from == SHIFT_REGS)
1431 return 0x100;
1432 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1433 return 18;
1434 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1435 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1436 return 16;
1437 else
1438 return 2;
1441 /* For the HPPA, REG and REG+CONST is cost 0
1442 and addresses involving symbolic constants are cost 2.
1444 PIC addresses are very expensive.
1446 It is no coincidence that this has the same structure
1447 as pa_legitimate_address_p. */
1449 static int
1450 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1451 addr_space_t as ATTRIBUTE_UNUSED,
1452 bool speed ATTRIBUTE_UNUSED)
1454 switch (GET_CODE (X))
1456 case REG:
1457 case PLUS:
1458 case LO_SUM:
1459 return 1;
1460 case HIGH:
1461 return 2;
1462 default:
1463 return 4;
1467 /* Compute a (partial) cost for rtx X. Return true if the complete
1468 cost has been computed, and false if subexpressions should be
1469 scanned. In either case, *TOTAL contains the cost result. */
1471 static bool
1472 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1473 int opno ATTRIBUTE_UNUSED,
1474 int *total, bool speed ATTRIBUTE_UNUSED)
1476 int factor;
1477 int code = GET_CODE (x);
1479 switch (code)
1481 case CONST_INT:
1482 if (INTVAL (x) == 0)
1483 *total = 0;
1484 else if (INT_14_BITS (x))
1485 *total = 1;
1486 else
1487 *total = 2;
1488 return true;
1490 case HIGH:
1491 *total = 2;
1492 return true;
1494 case CONST:
1495 case LABEL_REF:
1496 case SYMBOL_REF:
1497 *total = 4;
1498 return true;
1500 case CONST_DOUBLE:
1501 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1502 && outer_code != SET)
1503 *total = 0;
1504 else
1505 *total = 8;
1506 return true;
1508 case MULT:
1509 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1511 *total = COSTS_N_INSNS (3);
1512 return true;
1515 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1516 factor = GET_MODE_SIZE (mode) / 4;
1517 if (factor == 0)
1518 factor = 1;
1520 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1521 *total = factor * factor * COSTS_N_INSNS (8);
1522 else
1523 *total = factor * factor * COSTS_N_INSNS (20);
1524 return true;
1526 case DIV:
1527 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1529 *total = COSTS_N_INSNS (14);
1530 return true;
1532 /* FALLTHRU */
1534 case UDIV:
1535 case MOD:
1536 case UMOD:
1537 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1538 factor = GET_MODE_SIZE (mode) / 4;
1539 if (factor == 0)
1540 factor = 1;
1542 *total = factor * factor * COSTS_N_INSNS (60);
1543 return true;
1545 case PLUS: /* this includes shNadd insns */
1546 case MINUS:
1547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1549 *total = COSTS_N_INSNS (3);
1550 return true;
1553 /* A size N times larger than UNITS_PER_WORD needs N times as
1554 many insns, taking N times as long. */
1555 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1556 if (factor == 0)
1557 factor = 1;
1558 *total = factor * COSTS_N_INSNS (1);
1559 return true;
1561 case ASHIFT:
1562 case ASHIFTRT:
1563 case LSHIFTRT:
1564 *total = COSTS_N_INSNS (1);
1565 return true;
1567 default:
1568 return false;
1572 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1573 new rtx with the correct mode. */
1574 static inline rtx
1575 force_mode (machine_mode mode, rtx orig)
1577 if (mode == GET_MODE (orig))
1578 return orig;
1580 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1582 return gen_rtx_REG (mode, REGNO (orig));
1585 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1587 static bool
1588 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1590 return tls_referenced_p (x);
1593 /* Emit insns to move operands[1] into operands[0].
1595 Return 1 if we have written out everything that needs to be done to
1596 do the move. Otherwise, return 0 and the caller will emit the move
1597 normally.
1599 Note SCRATCH_REG may not be in the proper mode depending on how it
1600 will be used. This routine is responsible for creating a new copy
1601 of SCRATCH_REG in the proper mode. */
1604 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1606 register rtx operand0 = operands[0];
1607 register rtx operand1 = operands[1];
1608 register rtx tem;
1610 /* We can only handle indexed addresses in the destination operand
1611 of floating point stores. Thus, we need to break out indexed
1612 addresses from the destination operand. */
1613 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1615 gcc_assert (can_create_pseudo_p ());
1617 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1618 operand0 = replace_equiv_address (operand0, tem);
1621 /* On targets with non-equivalent space registers, break out unscaled
1622 indexed addresses from the source operand before the final CSE.
1623 We have to do this because the REG_POINTER flag is not correctly
1624 carried through various optimization passes and CSE may substitute
1625 a pseudo without the pointer set for one with the pointer set. As
1626 a result, we loose various opportunities to create insns with
1627 unscaled indexed addresses. */
1628 if (!TARGET_NO_SPACE_REGS
1629 && !cse_not_expected
1630 && GET_CODE (operand1) == MEM
1631 && GET_CODE (XEXP (operand1, 0)) == PLUS
1632 && REG_P (XEXP (XEXP (operand1, 0), 0))
1633 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1634 operand1
1635 = replace_equiv_address (operand1,
1636 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1638 if (scratch_reg
1639 && reload_in_progress && GET_CODE (operand0) == REG
1640 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1641 operand0 = reg_equiv_mem (REGNO (operand0));
1642 else if (scratch_reg
1643 && reload_in_progress && GET_CODE (operand0) == SUBREG
1644 && GET_CODE (SUBREG_REG (operand0)) == REG
1645 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1647 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1648 the code which tracks sets/uses for delete_output_reload. */
1649 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1650 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1651 SUBREG_BYTE (operand0));
1652 operand0 = alter_subreg (&temp, true);
1655 if (scratch_reg
1656 && reload_in_progress && GET_CODE (operand1) == REG
1657 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1658 operand1 = reg_equiv_mem (REGNO (operand1));
1659 else if (scratch_reg
1660 && reload_in_progress && GET_CODE (operand1) == SUBREG
1661 && GET_CODE (SUBREG_REG (operand1)) == REG
1662 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1664 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1665 the code which tracks sets/uses for delete_output_reload. */
1666 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1667 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1668 SUBREG_BYTE (operand1));
1669 operand1 = alter_subreg (&temp, true);
1672 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1673 && ((tem = find_replacement (&XEXP (operand0, 0)))
1674 != XEXP (operand0, 0)))
1675 operand0 = replace_equiv_address (operand0, tem);
1677 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1678 && ((tem = find_replacement (&XEXP (operand1, 0)))
1679 != XEXP (operand1, 0)))
1680 operand1 = replace_equiv_address (operand1, tem);
1682 /* Handle secondary reloads for loads/stores of FP registers from
1683 REG+D addresses where D does not fit in 5 or 14 bits, including
1684 (subreg (mem (addr))) cases, and reloads for other unsupported
1685 memory operands. */
1686 if (scratch_reg
1687 && FP_REG_P (operand0)
1688 && (MEM_P (operand1)
1689 || (GET_CODE (operand1) == SUBREG
1690 && MEM_P (XEXP (operand1, 0)))))
1692 rtx op1 = operand1;
1694 if (GET_CODE (op1) == SUBREG)
1695 op1 = XEXP (op1, 0);
1697 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1699 if (!(TARGET_PA_20
1700 && !TARGET_ELF32
1701 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1702 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1704 /* SCRATCH_REG will hold an address and maybe the actual data.
1705 We want it in WORD_MODE regardless of what mode it was
1706 originally given to us. */
1707 scratch_reg = force_mode (word_mode, scratch_reg);
1709 /* D might not fit in 14 bits either; for such cases load D
1710 into scratch reg. */
1711 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1713 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1714 emit_move_insn (scratch_reg,
1715 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1716 Pmode,
1717 XEXP (XEXP (op1, 0), 0),
1718 scratch_reg));
1720 else
1721 emit_move_insn (scratch_reg, XEXP (op1, 0));
1722 emit_insn (gen_rtx_SET (operand0,
1723 replace_equiv_address (op1, scratch_reg)));
1724 return 1;
1727 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1728 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1729 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1731 /* Load memory address into SCRATCH_REG. */
1732 scratch_reg = force_mode (word_mode, scratch_reg);
1733 emit_move_insn (scratch_reg, XEXP (op1, 0));
1734 emit_insn (gen_rtx_SET (operand0,
1735 replace_equiv_address (op1, scratch_reg)));
1736 return 1;
1739 else if (scratch_reg
1740 && FP_REG_P (operand1)
1741 && (MEM_P (operand0)
1742 || (GET_CODE (operand0) == SUBREG
1743 && MEM_P (XEXP (operand0, 0)))))
1745 rtx op0 = operand0;
1747 if (GET_CODE (op0) == SUBREG)
1748 op0 = XEXP (op0, 0);
1750 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1752 if (!(TARGET_PA_20
1753 && !TARGET_ELF32
1754 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1755 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1757 /* SCRATCH_REG will hold an address and maybe the actual data.
1758 We want it in WORD_MODE regardless of what mode it was
1759 originally given to us. */
1760 scratch_reg = force_mode (word_mode, scratch_reg);
1762 /* D might not fit in 14 bits either; for such cases load D
1763 into scratch reg. */
1764 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1766 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1767 emit_move_insn (scratch_reg,
1768 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1769 Pmode,
1770 XEXP (XEXP (op0, 0), 0),
1771 scratch_reg));
1773 else
1774 emit_move_insn (scratch_reg, XEXP (op0, 0));
1775 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1776 operand1));
1777 return 1;
1780 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1781 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1782 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1784 /* Load memory address into SCRATCH_REG. */
1785 scratch_reg = force_mode (word_mode, scratch_reg);
1786 emit_move_insn (scratch_reg, XEXP (op0, 0));
1787 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1788 operand1));
1789 return 1;
1792 /* Handle secondary reloads for loads of FP registers from constant
1793 expressions by forcing the constant into memory. For the most part,
1794 this is only necessary for SImode and DImode.
1796 Use scratch_reg to hold the address of the memory location. */
1797 else if (scratch_reg
1798 && CONSTANT_P (operand1)
1799 && FP_REG_P (operand0))
1801 rtx const_mem, xoperands[2];
1803 if (operand1 == CONST0_RTX (mode))
1805 emit_insn (gen_rtx_SET (operand0, operand1));
1806 return 1;
1809 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1810 it in WORD_MODE regardless of what mode it was originally given
1811 to us. */
1812 scratch_reg = force_mode (word_mode, scratch_reg);
1814 /* Force the constant into memory and put the address of the
1815 memory location into scratch_reg. */
1816 const_mem = force_const_mem (mode, operand1);
1817 xoperands[0] = scratch_reg;
1818 xoperands[1] = XEXP (const_mem, 0);
1819 pa_emit_move_sequence (xoperands, Pmode, 0);
1821 /* Now load the destination register. */
1822 emit_insn (gen_rtx_SET (operand0,
1823 replace_equiv_address (const_mem, scratch_reg)));
1824 return 1;
1826 /* Handle secondary reloads for SAR. These occur when trying to load
1827 the SAR from memory or a constant. */
1828 else if (scratch_reg
1829 && GET_CODE (operand0) == REG
1830 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1831 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1832 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1834 /* D might not fit in 14 bits either; for such cases load D into
1835 scratch reg. */
1836 if (GET_CODE (operand1) == MEM
1837 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1839 /* We are reloading the address into the scratch register, so we
1840 want to make sure the scratch register is a full register. */
1841 scratch_reg = force_mode (word_mode, scratch_reg);
1843 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1844 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1845 0)),
1846 Pmode,
1847 XEXP (XEXP (operand1, 0),
1849 scratch_reg));
1851 /* Now we are going to load the scratch register from memory,
1852 we want to load it in the same width as the original MEM,
1853 which must be the same as the width of the ultimate destination,
1854 OPERAND0. */
1855 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1857 emit_move_insn (scratch_reg,
1858 replace_equiv_address (operand1, scratch_reg));
1860 else
1862 /* We want to load the scratch register using the same mode as
1863 the ultimate destination. */
1864 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1866 emit_move_insn (scratch_reg, operand1);
1869 /* And emit the insn to set the ultimate destination. We know that
1870 the scratch register has the same mode as the destination at this
1871 point. */
1872 emit_move_insn (operand0, scratch_reg);
1873 return 1;
1876 /* Handle the most common case: storing into a register. */
1877 if (register_operand (operand0, mode))
1879 /* Legitimize TLS symbol references. This happens for references
1880 that aren't a legitimate constant. */
1881 if (PA_SYMBOL_REF_TLS_P (operand1))
1882 operand1 = legitimize_tls_address (operand1);
1884 if (register_operand (operand1, mode)
1885 || (GET_CODE (operand1) == CONST_INT
1886 && pa_cint_ok_for_move (UINTVAL (operand1)))
1887 || (operand1 == CONST0_RTX (mode))
1888 || (GET_CODE (operand1) == HIGH
1889 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1890 /* Only `general_operands' can come here, so MEM is ok. */
1891 || GET_CODE (operand1) == MEM)
1893 /* Various sets are created during RTL generation which don't
1894 have the REG_POINTER flag correctly set. After the CSE pass,
1895 instruction recognition can fail if we don't consistently
1896 set this flag when performing register copies. This should
1897 also improve the opportunities for creating insns that use
1898 unscaled indexing. */
1899 if (REG_P (operand0) && REG_P (operand1))
1901 if (REG_POINTER (operand1)
1902 && !REG_POINTER (operand0)
1903 && !HARD_REGISTER_P (operand0))
1904 copy_reg_pointer (operand0, operand1);
1907 /* When MEMs are broken out, the REG_POINTER flag doesn't
1908 get set. In some cases, we can set the REG_POINTER flag
1909 from the declaration for the MEM. */
1910 if (REG_P (operand0)
1911 && GET_CODE (operand1) == MEM
1912 && !REG_POINTER (operand0))
1914 tree decl = MEM_EXPR (operand1);
1916 /* Set the register pointer flag and register alignment
1917 if the declaration for this memory reference is a
1918 pointer type. */
1919 if (decl)
1921 tree type;
1923 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1924 tree operand 1. */
1925 if (TREE_CODE (decl) == COMPONENT_REF)
1926 decl = TREE_OPERAND (decl, 1);
1928 type = TREE_TYPE (decl);
1929 type = strip_array_types (type);
1931 if (POINTER_TYPE_P (type))
1933 int align;
1935 type = TREE_TYPE (type);
1936 /* Using TYPE_ALIGN_OK is rather conservative as
1937 only the ada frontend actually sets it. */
1938 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1939 : BITS_PER_UNIT);
1940 mark_reg_pointer (operand0, align);
1945 emit_insn (gen_rtx_SET (operand0, operand1));
1946 return 1;
1949 else if (GET_CODE (operand0) == MEM)
1951 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1952 && !(reload_in_progress || reload_completed))
1954 rtx temp = gen_reg_rtx (DFmode);
1956 emit_insn (gen_rtx_SET (temp, operand1));
1957 emit_insn (gen_rtx_SET (operand0, temp));
1958 return 1;
1960 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1962 /* Run this case quickly. */
1963 emit_insn (gen_rtx_SET (operand0, operand1));
1964 return 1;
1966 if (! (reload_in_progress || reload_completed))
1968 operands[0] = validize_mem (operand0);
1969 operands[1] = operand1 = force_reg (mode, operand1);
1973 /* Simplify the source if we need to.
1974 Note we do have to handle function labels here, even though we do
1975 not consider them legitimate constants. Loop optimizations can
1976 call the emit_move_xxx with one as a source. */
1977 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1978 || (GET_CODE (operand1) == HIGH
1979 && symbolic_operand (XEXP (operand1, 0), mode))
1980 || function_label_operand (operand1, VOIDmode)
1981 || tls_referenced_p (operand1))
1983 int ishighonly = 0;
1985 if (GET_CODE (operand1) == HIGH)
1987 ishighonly = 1;
1988 operand1 = XEXP (operand1, 0);
1990 if (symbolic_operand (operand1, mode))
1992 /* Argh. The assembler and linker can't handle arithmetic
1993 involving plabels.
1995 So we force the plabel into memory, load operand0 from
1996 the memory location, then add in the constant part. */
1997 if ((GET_CODE (operand1) == CONST
1998 && GET_CODE (XEXP (operand1, 0)) == PLUS
1999 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2000 VOIDmode))
2001 || function_label_operand (operand1, VOIDmode))
2003 rtx temp, const_part;
2005 /* Figure out what (if any) scratch register to use. */
2006 if (reload_in_progress || reload_completed)
2008 scratch_reg = scratch_reg ? scratch_reg : operand0;
2009 /* SCRATCH_REG will hold an address and maybe the actual
2010 data. We want it in WORD_MODE regardless of what mode it
2011 was originally given to us. */
2012 scratch_reg = force_mode (word_mode, scratch_reg);
2014 else if (flag_pic)
2015 scratch_reg = gen_reg_rtx (Pmode);
2017 if (GET_CODE (operand1) == CONST)
2019 /* Save away the constant part of the expression. */
2020 const_part = XEXP (XEXP (operand1, 0), 1);
2021 gcc_assert (GET_CODE (const_part) == CONST_INT);
2023 /* Force the function label into memory. */
2024 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2026 else
2028 /* No constant part. */
2029 const_part = NULL_RTX;
2031 /* Force the function label into memory. */
2032 temp = force_const_mem (mode, operand1);
2036 /* Get the address of the memory location. PIC-ify it if
2037 necessary. */
2038 temp = XEXP (temp, 0);
2039 if (flag_pic)
2040 temp = legitimize_pic_address (temp, mode, scratch_reg);
2042 /* Put the address of the memory location into our destination
2043 register. */
2044 operands[1] = temp;
2045 pa_emit_move_sequence (operands, mode, scratch_reg);
2047 /* Now load from the memory location into our destination
2048 register. */
2049 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2050 pa_emit_move_sequence (operands, mode, scratch_reg);
2052 /* And add back in the constant part. */
2053 if (const_part != NULL_RTX)
2054 expand_inc (operand0, const_part);
2056 return 1;
2059 if (flag_pic)
2061 rtx_insn *insn;
2062 rtx temp;
2064 if (reload_in_progress || reload_completed)
2066 temp = scratch_reg ? scratch_reg : operand0;
2067 /* TEMP will hold an address and maybe the actual
2068 data. We want it in WORD_MODE regardless of what mode it
2069 was originally given to us. */
2070 temp = force_mode (word_mode, temp);
2072 else
2073 temp = gen_reg_rtx (Pmode);
2075 /* Force (const (plus (symbol) (const_int))) to memory
2076 if the const_int will not fit in 14 bits. Although
2077 this requires a relocation, the instruction sequence
2078 needed to load the value is shorter. */
2079 if (GET_CODE (operand1) == CONST
2080 && GET_CODE (XEXP (operand1, 0)) == PLUS
2081 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2082 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2084 rtx x, m = force_const_mem (mode, operand1);
2086 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2087 x = replace_equiv_address (m, x);
2088 insn = emit_move_insn (operand0, x);
2090 else
2092 operands[1] = legitimize_pic_address (operand1, mode, temp);
2093 if (REG_P (operand0) && REG_P (operands[1]))
2094 copy_reg_pointer (operand0, operands[1]);
2095 insn = emit_move_insn (operand0, operands[1]);
2098 /* Put a REG_EQUAL note on this insn. */
2099 set_unique_reg_note (insn, REG_EQUAL, operand1);
2101 /* On the HPPA, references to data space are supposed to use dp,
2102 register 27, but showing it in the RTL inhibits various cse
2103 and loop optimizations. */
2104 else
2106 rtx temp, set;
2108 if (reload_in_progress || reload_completed)
2110 temp = scratch_reg ? scratch_reg : operand0;
2111 /* TEMP will hold an address and maybe the actual
2112 data. We want it in WORD_MODE regardless of what mode it
2113 was originally given to us. */
2114 temp = force_mode (word_mode, temp);
2116 else
2117 temp = gen_reg_rtx (mode);
2119 /* Loading a SYMBOL_REF into a register makes that register
2120 safe to be used as the base in an indexed address.
2122 Don't mark hard registers though. That loses. */
2123 if (GET_CODE (operand0) == REG
2124 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2125 mark_reg_pointer (operand0, BITS_PER_UNIT);
2126 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2127 mark_reg_pointer (temp, BITS_PER_UNIT);
2129 if (ishighonly)
2130 set = gen_rtx_SET (operand0, temp);
2131 else
2132 set = gen_rtx_SET (operand0,
2133 gen_rtx_LO_SUM (mode, temp, operand1));
2135 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2136 emit_insn (set);
2139 return 1;
2141 else if (tls_referenced_p (operand1))
2143 rtx tmp = operand1;
2144 rtx addend = NULL;
2146 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2148 addend = XEXP (XEXP (tmp, 0), 1);
2149 tmp = XEXP (XEXP (tmp, 0), 0);
2152 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2153 tmp = legitimize_tls_address (tmp);
2154 if (addend)
2156 tmp = gen_rtx_PLUS (mode, tmp, addend);
2157 tmp = force_operand (tmp, operands[0]);
2159 operands[1] = tmp;
2161 else if (GET_CODE (operand1) != CONST_INT
2162 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2164 rtx temp;
2165 rtx_insn *insn;
2166 rtx op1 = operand1;
2167 HOST_WIDE_INT value = 0;
2168 HOST_WIDE_INT insv = 0;
2169 int insert = 0;
2171 if (GET_CODE (operand1) == CONST_INT)
2172 value = INTVAL (operand1);
2174 if (TARGET_64BIT
2175 && GET_CODE (operand1) == CONST_INT
2176 && HOST_BITS_PER_WIDE_INT > 32
2177 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2179 HOST_WIDE_INT nval;
2181 /* Extract the low order 32 bits of the value and sign extend.
2182 If the new value is the same as the original value, we can
2183 can use the original value as-is. If the new value is
2184 different, we use it and insert the most-significant 32-bits
2185 of the original value into the final result. */
2186 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2187 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2188 if (value != nval)
2190 #if HOST_BITS_PER_WIDE_INT > 32
2191 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2192 #endif
2193 insert = 1;
2194 value = nval;
2195 operand1 = GEN_INT (nval);
2199 if (reload_in_progress || reload_completed)
2200 temp = scratch_reg ? scratch_reg : operand0;
2201 else
2202 temp = gen_reg_rtx (mode);
2204 /* We don't directly split DImode constants on 32-bit targets
2205 because PLUS uses an 11-bit immediate and the insn sequence
2206 generated is not as efficient as the one using HIGH/LO_SUM. */
2207 if (GET_CODE (operand1) == CONST_INT
2208 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2209 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2210 && !insert)
2212 /* Directly break constant into high and low parts. This
2213 provides better optimization opportunities because various
2214 passes recognize constants split with PLUS but not LO_SUM.
2215 We use a 14-bit signed low part except when the addition
2216 of 0x4000 to the high part might change the sign of the
2217 high part. */
2218 HOST_WIDE_INT low = value & 0x3fff;
2219 HOST_WIDE_INT high = value & ~ 0x3fff;
2221 if (low >= 0x2000)
2223 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2224 high += 0x2000;
2225 else
2226 high += 0x4000;
2229 low = value - high;
2231 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2232 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2234 else
2236 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2237 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2240 insn = emit_move_insn (operands[0], operands[1]);
2242 /* Now insert the most significant 32 bits of the value
2243 into the register. When we don't have a second register
2244 available, it could take up to nine instructions to load
2245 a 64-bit integer constant. Prior to reload, we force
2246 constants that would take more than three instructions
2247 to load to the constant pool. During and after reload,
2248 we have to handle all possible values. */
2249 if (insert)
2251 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2252 register and the value to be inserted is outside the
2253 range that can be loaded with three depdi instructions. */
2254 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2256 operand1 = GEN_INT (insv);
2258 emit_insn (gen_rtx_SET (temp,
2259 gen_rtx_HIGH (mode, operand1)));
2260 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2261 if (mode == DImode)
2262 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2263 const0_rtx, temp));
2264 else
2265 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2266 const0_rtx, temp));
2268 else
2270 int len = 5, pos = 27;
2272 /* Insert the bits using the depdi instruction. */
2273 while (pos >= 0)
2275 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2276 HOST_WIDE_INT sign = v5 < 0;
2278 /* Left extend the insertion. */
2279 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2280 while (pos > 0 && (insv & 1) == sign)
2282 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2283 len += 1;
2284 pos -= 1;
2287 if (mode == DImode)
2288 insn = emit_insn (gen_insvdi (operand0,
2289 GEN_INT (len),
2290 GEN_INT (pos),
2291 GEN_INT (v5)));
2292 else
2293 insn = emit_insn (gen_insvsi (operand0,
2294 GEN_INT (len),
2295 GEN_INT (pos),
2296 GEN_INT (v5)));
2298 len = pos > 0 && pos < 5 ? pos : 5;
2299 pos -= len;
2304 set_unique_reg_note (insn, REG_EQUAL, op1);
2306 return 1;
2309 /* Now have insn-emit do whatever it normally does. */
2310 return 0;
2313 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2314 it will need a link/runtime reloc). */
2317 pa_reloc_needed (tree exp)
2319 int reloc = 0;
2321 switch (TREE_CODE (exp))
2323 case ADDR_EXPR:
2324 return 1;
2326 case POINTER_PLUS_EXPR:
2327 case PLUS_EXPR:
2328 case MINUS_EXPR:
2329 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2330 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2331 break;
2333 CASE_CONVERT:
2334 case NON_LVALUE_EXPR:
2335 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2336 break;
2338 case CONSTRUCTOR:
2340 tree value;
2341 unsigned HOST_WIDE_INT ix;
2343 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2344 if (value)
2345 reloc |= pa_reloc_needed (value);
2347 break;
2349 case ERROR_MARK:
2350 break;
2352 default:
2353 break;
2355 return reloc;
2359 /* Return the best assembler insn template
2360 for moving operands[1] into operands[0] as a fullword. */
2361 const char *
2362 pa_singlemove_string (rtx *operands)
2364 HOST_WIDE_INT intval;
2366 if (GET_CODE (operands[0]) == MEM)
2367 return "stw %r1,%0";
2368 if (GET_CODE (operands[1]) == MEM)
2369 return "ldw %1,%0";
2370 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2372 long i;
2374 gcc_assert (GET_MODE (operands[1]) == SFmode);
2376 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2377 bit pattern. */
2378 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2380 operands[1] = GEN_INT (i);
2381 /* Fall through to CONST_INT case. */
2383 if (GET_CODE (operands[1]) == CONST_INT)
2385 intval = INTVAL (operands[1]);
2387 if (VAL_14_BITS_P (intval))
2388 return "ldi %1,%0";
2389 else if ((intval & 0x7ff) == 0)
2390 return "ldil L'%1,%0";
2391 else if (pa_zdepi_cint_p (intval))
2392 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2393 else
2394 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2396 return "copy %1,%0";
2400 /* Compute position (in OP[1]) and width (in OP[2])
2401 useful for copying IMM to a register using the zdepi
2402 instructions. Store the immediate value to insert in OP[0]. */
2403 static void
2404 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2406 int lsb, len;
2408 /* Find the least significant set bit in IMM. */
2409 for (lsb = 0; lsb < 32; lsb++)
2411 if ((imm & 1) != 0)
2412 break;
2413 imm >>= 1;
2416 /* Choose variants based on *sign* of the 5-bit field. */
2417 if ((imm & 0x10) == 0)
2418 len = (lsb <= 28) ? 4 : 32 - lsb;
2419 else
2421 /* Find the width of the bitstring in IMM. */
2422 for (len = 5; len < 32 - lsb; len++)
2424 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2425 break;
2428 /* Sign extend IMM as a 5-bit value. */
2429 imm = (imm & 0xf) - 0x10;
2432 op[0] = imm;
2433 op[1] = 31 - lsb;
2434 op[2] = len;
2437 /* Compute position (in OP[1]) and width (in OP[2])
2438 useful for copying IMM to a register using the depdi,z
2439 instructions. Store the immediate value to insert in OP[0]. */
2441 static void
2442 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2444 int lsb, len, maxlen;
2446 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2448 /* Find the least significant set bit in IMM. */
2449 for (lsb = 0; lsb < maxlen; lsb++)
2451 if ((imm & 1) != 0)
2452 break;
2453 imm >>= 1;
2456 /* Choose variants based on *sign* of the 5-bit field. */
2457 if ((imm & 0x10) == 0)
2458 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2459 else
2461 /* Find the width of the bitstring in IMM. */
2462 for (len = 5; len < maxlen - lsb; len++)
2464 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2465 break;
2468 /* Extend length if host is narrow and IMM is negative. */
2469 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2470 len += 32;
2472 /* Sign extend IMM as a 5-bit value. */
2473 imm = (imm & 0xf) - 0x10;
2476 op[0] = imm;
2477 op[1] = 63 - lsb;
2478 op[2] = len;
2481 /* Output assembler code to perform a doubleword move insn
2482 with operands OPERANDS. */
2484 const char *
2485 pa_output_move_double (rtx *operands)
2487 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2488 rtx latehalf[2];
2489 rtx addreg0 = 0, addreg1 = 0;
2490 int highonly = 0;
2492 /* First classify both operands. */
2494 if (REG_P (operands[0]))
2495 optype0 = REGOP;
2496 else if (offsettable_memref_p (operands[0]))
2497 optype0 = OFFSOP;
2498 else if (GET_CODE (operands[0]) == MEM)
2499 optype0 = MEMOP;
2500 else
2501 optype0 = RNDOP;
2503 if (REG_P (operands[1]))
2504 optype1 = REGOP;
2505 else if (CONSTANT_P (operands[1]))
2506 optype1 = CNSTOP;
2507 else if (offsettable_memref_p (operands[1]))
2508 optype1 = OFFSOP;
2509 else if (GET_CODE (operands[1]) == MEM)
2510 optype1 = MEMOP;
2511 else
2512 optype1 = RNDOP;
2514 /* Check for the cases that the operand constraints are not
2515 supposed to allow to happen. */
2516 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2518 /* Handle copies between general and floating registers. */
2520 if (optype0 == REGOP && optype1 == REGOP
2521 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2523 if (FP_REG_P (operands[0]))
2525 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2526 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2527 return "{fldds|fldd} -16(%%sp),%0";
2529 else
2531 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2532 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2533 return "{ldws|ldw} -12(%%sp),%R0";
2537 /* Handle auto decrementing and incrementing loads and stores
2538 specifically, since the structure of the function doesn't work
2539 for them without major modification. Do it better when we learn
2540 this port about the general inc/dec addressing of PA.
2541 (This was written by tege. Chide him if it doesn't work.) */
2543 if (optype0 == MEMOP)
2545 /* We have to output the address syntax ourselves, since print_operand
2546 doesn't deal with the addresses we want to use. Fix this later. */
2548 rtx addr = XEXP (operands[0], 0);
2549 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2551 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2553 operands[0] = XEXP (addr, 0);
2554 gcc_assert (GET_CODE (operands[1]) == REG
2555 && GET_CODE (operands[0]) == REG);
2557 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2559 /* No overlap between high target register and address
2560 register. (We do this in a non-obvious way to
2561 save a register file writeback) */
2562 if (GET_CODE (addr) == POST_INC)
2563 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2564 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2566 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2568 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2570 operands[0] = XEXP (addr, 0);
2571 gcc_assert (GET_CODE (operands[1]) == REG
2572 && GET_CODE (operands[0]) == REG);
2574 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2575 /* No overlap between high target register and address
2576 register. (We do this in a non-obvious way to save a
2577 register file writeback) */
2578 if (GET_CODE (addr) == PRE_INC)
2579 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2580 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2583 if (optype1 == MEMOP)
2585 /* We have to output the address syntax ourselves, since print_operand
2586 doesn't deal with the addresses we want to use. Fix this later. */
2588 rtx addr = XEXP (operands[1], 0);
2589 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2591 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2593 operands[1] = XEXP (addr, 0);
2594 gcc_assert (GET_CODE (operands[0]) == REG
2595 && GET_CODE (operands[1]) == REG);
2597 if (!reg_overlap_mentioned_p (high_reg, addr))
2599 /* No overlap between high target register and address
2600 register. (We do this in a non-obvious way to
2601 save a register file writeback) */
2602 if (GET_CODE (addr) == POST_INC)
2603 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2604 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2606 else
2608 /* This is an undefined situation. We should load into the
2609 address register *and* update that register. Probably
2610 we don't need to handle this at all. */
2611 if (GET_CODE (addr) == POST_INC)
2612 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2613 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2616 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2618 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2620 operands[1] = XEXP (addr, 0);
2621 gcc_assert (GET_CODE (operands[0]) == REG
2622 && GET_CODE (operands[1]) == REG);
2624 if (!reg_overlap_mentioned_p (high_reg, addr))
2626 /* No overlap between high target register and address
2627 register. (We do this in a non-obvious way to
2628 save a register file writeback) */
2629 if (GET_CODE (addr) == PRE_INC)
2630 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2631 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2633 else
2635 /* This is an undefined situation. We should load into the
2636 address register *and* update that register. Probably
2637 we don't need to handle this at all. */
2638 if (GET_CODE (addr) == PRE_INC)
2639 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2640 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2643 else if (GET_CODE (addr) == PLUS
2644 && GET_CODE (XEXP (addr, 0)) == MULT)
2646 rtx xoperands[4];
2648 /* Load address into left half of destination register. */
2649 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2650 xoperands[1] = XEXP (addr, 1);
2651 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2652 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2653 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2654 xoperands);
2655 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2657 else if (GET_CODE (addr) == PLUS
2658 && REG_P (XEXP (addr, 0))
2659 && REG_P (XEXP (addr, 1)))
2661 rtx xoperands[3];
2663 /* Load address into left half of destination register. */
2664 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2665 xoperands[1] = XEXP (addr, 0);
2666 xoperands[2] = XEXP (addr, 1);
2667 output_asm_insn ("{addl|add,l} %1,%2,%0",
2668 xoperands);
2669 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2673 /* If an operand is an unoffsettable memory ref, find a register
2674 we can increment temporarily to make it refer to the second word. */
2676 if (optype0 == MEMOP)
2677 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2679 if (optype1 == MEMOP)
2680 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2682 /* Ok, we can do one word at a time.
2683 Normally we do the low-numbered word first.
2685 In either case, set up in LATEHALF the operands to use
2686 for the high-numbered word and in some cases alter the
2687 operands in OPERANDS to be suitable for the low-numbered word. */
2689 if (optype0 == REGOP)
2690 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2691 else if (optype0 == OFFSOP)
2692 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2693 else
2694 latehalf[0] = operands[0];
2696 if (optype1 == REGOP)
2697 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2698 else if (optype1 == OFFSOP)
2699 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2700 else if (optype1 == CNSTOP)
2702 if (GET_CODE (operands[1]) == HIGH)
2704 operands[1] = XEXP (operands[1], 0);
2705 highonly = 1;
2707 split_double (operands[1], &operands[1], &latehalf[1]);
2709 else
2710 latehalf[1] = operands[1];
2712 /* If the first move would clobber the source of the second one,
2713 do them in the other order.
2715 This can happen in two cases:
2717 mem -> register where the first half of the destination register
2718 is the same register used in the memory's address. Reload
2719 can create such insns.
2721 mem in this case will be either register indirect or register
2722 indirect plus a valid offset.
2724 register -> register move where REGNO(dst) == REGNO(src + 1)
2725 someone (Tim/Tege?) claimed this can happen for parameter loads.
2727 Handle mem -> register case first. */
2728 if (optype0 == REGOP
2729 && (optype1 == MEMOP || optype1 == OFFSOP)
2730 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2732 /* Do the late half first. */
2733 if (addreg1)
2734 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2735 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2737 /* Then clobber. */
2738 if (addreg1)
2739 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2740 return pa_singlemove_string (operands);
2743 /* Now handle register -> register case. */
2744 if (optype0 == REGOP && optype1 == REGOP
2745 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2747 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2748 return pa_singlemove_string (operands);
2751 /* Normal case: do the two words, low-numbered first. */
2753 output_asm_insn (pa_singlemove_string (operands), operands);
2755 /* Make any unoffsettable addresses point at high-numbered word. */
2756 if (addreg0)
2757 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2758 if (addreg1)
2759 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2761 /* Do high-numbered word. */
2762 if (highonly)
2763 output_asm_insn ("ldil L'%1,%0", latehalf);
2764 else
2765 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2767 /* Undo the adds we just did. */
2768 if (addreg0)
2769 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2770 if (addreg1)
2771 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2773 return "";
2776 const char *
2777 pa_output_fp_move_double (rtx *operands)
2779 if (FP_REG_P (operands[0]))
2781 if (FP_REG_P (operands[1])
2782 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2783 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2784 else
2785 output_asm_insn ("fldd%F1 %1,%0", operands);
2787 else if (FP_REG_P (operands[1]))
2789 output_asm_insn ("fstd%F0 %1,%0", operands);
2791 else
2793 rtx xoperands[2];
2795 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2797 /* This is a pain. You have to be prepared to deal with an
2798 arbitrary address here including pre/post increment/decrement.
2800 so avoid this in the MD. */
2801 gcc_assert (GET_CODE (operands[0]) == REG);
2803 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2804 xoperands[0] = operands[0];
2805 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2807 return "";
2810 /* Return a REG that occurs in ADDR with coefficient 1.
2811 ADDR can be effectively incremented by incrementing REG. */
2813 static rtx
2814 find_addr_reg (rtx addr)
2816 while (GET_CODE (addr) == PLUS)
2818 if (GET_CODE (XEXP (addr, 0)) == REG)
2819 addr = XEXP (addr, 0);
2820 else if (GET_CODE (XEXP (addr, 1)) == REG)
2821 addr = XEXP (addr, 1);
2822 else if (CONSTANT_P (XEXP (addr, 0)))
2823 addr = XEXP (addr, 1);
2824 else if (CONSTANT_P (XEXP (addr, 1)))
2825 addr = XEXP (addr, 0);
2826 else
2827 gcc_unreachable ();
2829 gcc_assert (GET_CODE (addr) == REG);
2830 return addr;
2833 /* Emit code to perform a block move.
2835 OPERANDS[0] is the destination pointer as a REG, clobbered.
2836 OPERANDS[1] is the source pointer as a REG, clobbered.
2837 OPERANDS[2] is a register for temporary storage.
2838 OPERANDS[3] is a register for temporary storage.
2839 OPERANDS[4] is the size as a CONST_INT
2840 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2841 OPERANDS[6] is another temporary register. */
2843 const char *
2844 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2846 int align = INTVAL (operands[5]);
2847 unsigned long n_bytes = INTVAL (operands[4]);
2849 /* We can't move more than a word at a time because the PA
2850 has no longer integer move insns. (Could use fp mem ops?) */
2851 if (align > (TARGET_64BIT ? 8 : 4))
2852 align = (TARGET_64BIT ? 8 : 4);
2854 /* Note that we know each loop below will execute at least twice
2855 (else we would have open-coded the copy). */
2856 switch (align)
2858 case 8:
2859 /* Pre-adjust the loop counter. */
2860 operands[4] = GEN_INT (n_bytes - 16);
2861 output_asm_insn ("ldi %4,%2", operands);
2863 /* Copying loop. */
2864 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2865 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2866 output_asm_insn ("std,ma %3,8(%0)", operands);
2867 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2868 output_asm_insn ("std,ma %6,8(%0)", operands);
2870 /* Handle the residual. There could be up to 7 bytes of
2871 residual to copy! */
2872 if (n_bytes % 16 != 0)
2874 operands[4] = GEN_INT (n_bytes % 8);
2875 if (n_bytes % 16 >= 8)
2876 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2877 if (n_bytes % 8 != 0)
2878 output_asm_insn ("ldd 0(%1),%6", operands);
2879 if (n_bytes % 16 >= 8)
2880 output_asm_insn ("std,ma %3,8(%0)", operands);
2881 if (n_bytes % 8 != 0)
2882 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2884 return "";
2886 case 4:
2887 /* Pre-adjust the loop counter. */
2888 operands[4] = GEN_INT (n_bytes - 8);
2889 output_asm_insn ("ldi %4,%2", operands);
2891 /* Copying loop. */
2892 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2893 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2894 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2895 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2896 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2898 /* Handle the residual. There could be up to 7 bytes of
2899 residual to copy! */
2900 if (n_bytes % 8 != 0)
2902 operands[4] = GEN_INT (n_bytes % 4);
2903 if (n_bytes % 8 >= 4)
2904 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2905 if (n_bytes % 4 != 0)
2906 output_asm_insn ("ldw 0(%1),%6", operands);
2907 if (n_bytes % 8 >= 4)
2908 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2909 if (n_bytes % 4 != 0)
2910 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2912 return "";
2914 case 2:
2915 /* Pre-adjust the loop counter. */
2916 operands[4] = GEN_INT (n_bytes - 4);
2917 output_asm_insn ("ldi %4,%2", operands);
2919 /* Copying loop. */
2920 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2921 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2922 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2923 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2924 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2926 /* Handle the residual. */
2927 if (n_bytes % 4 != 0)
2929 if (n_bytes % 4 >= 2)
2930 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2931 if (n_bytes % 2 != 0)
2932 output_asm_insn ("ldb 0(%1),%6", operands);
2933 if (n_bytes % 4 >= 2)
2934 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2935 if (n_bytes % 2 != 0)
2936 output_asm_insn ("stb %6,0(%0)", operands);
2938 return "";
2940 case 1:
2941 /* Pre-adjust the loop counter. */
2942 operands[4] = GEN_INT (n_bytes - 2);
2943 output_asm_insn ("ldi %4,%2", operands);
2945 /* Copying loop. */
2946 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2947 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2948 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2949 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2950 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2952 /* Handle the residual. */
2953 if (n_bytes % 2 != 0)
2955 output_asm_insn ("ldb 0(%1),%3", operands);
2956 output_asm_insn ("stb %3,0(%0)", operands);
2958 return "";
2960 default:
2961 gcc_unreachable ();
2965 /* Count the number of insns necessary to handle this block move.
2967 Basic structure is the same as emit_block_move, except that we
2968 count insns rather than emit them. */
2970 static int
2971 compute_movmem_length (rtx_insn *insn)
2973 rtx pat = PATTERN (insn);
2974 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2975 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2976 unsigned int n_insns = 0;
2978 /* We can't move more than four bytes at a time because the PA
2979 has no longer integer move insns. (Could use fp mem ops?) */
2980 if (align > (TARGET_64BIT ? 8 : 4))
2981 align = (TARGET_64BIT ? 8 : 4);
2983 /* The basic copying loop. */
2984 n_insns = 6;
2986 /* Residuals. */
2987 if (n_bytes % (2 * align) != 0)
2989 if ((n_bytes % (2 * align)) >= align)
2990 n_insns += 2;
2992 if ((n_bytes % align) != 0)
2993 n_insns += 2;
2996 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2997 return n_insns * 4;
3000 /* Emit code to perform a block clear.
3002 OPERANDS[0] is the destination pointer as a REG, clobbered.
3003 OPERANDS[1] is a register for temporary storage.
3004 OPERANDS[2] is the size as a CONST_INT
3005 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3007 const char *
3008 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3010 int align = INTVAL (operands[3]);
3011 unsigned long n_bytes = INTVAL (operands[2]);
3013 /* We can't clear more than a word at a time because the PA
3014 has no longer integer move insns. */
3015 if (align > (TARGET_64BIT ? 8 : 4))
3016 align = (TARGET_64BIT ? 8 : 4);
3018 /* Note that we know each loop below will execute at least twice
3019 (else we would have open-coded the copy). */
3020 switch (align)
3022 case 8:
3023 /* Pre-adjust the loop counter. */
3024 operands[2] = GEN_INT (n_bytes - 16);
3025 output_asm_insn ("ldi %2,%1", operands);
3027 /* Loop. */
3028 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3029 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3030 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3032 /* Handle the residual. There could be up to 7 bytes of
3033 residual to copy! */
3034 if (n_bytes % 16 != 0)
3036 operands[2] = GEN_INT (n_bytes % 8);
3037 if (n_bytes % 16 >= 8)
3038 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3039 if (n_bytes % 8 != 0)
3040 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3042 return "";
3044 case 4:
3045 /* Pre-adjust the loop counter. */
3046 operands[2] = GEN_INT (n_bytes - 8);
3047 output_asm_insn ("ldi %2,%1", operands);
3049 /* Loop. */
3050 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3051 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3052 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3054 /* Handle the residual. There could be up to 7 bytes of
3055 residual to copy! */
3056 if (n_bytes % 8 != 0)
3058 operands[2] = GEN_INT (n_bytes % 4);
3059 if (n_bytes % 8 >= 4)
3060 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3061 if (n_bytes % 4 != 0)
3062 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3064 return "";
3066 case 2:
3067 /* Pre-adjust the loop counter. */
3068 operands[2] = GEN_INT (n_bytes - 4);
3069 output_asm_insn ("ldi %2,%1", operands);
3071 /* Loop. */
3072 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3073 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3074 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3076 /* Handle the residual. */
3077 if (n_bytes % 4 != 0)
3079 if (n_bytes % 4 >= 2)
3080 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3081 if (n_bytes % 2 != 0)
3082 output_asm_insn ("stb %%r0,0(%0)", operands);
3084 return "";
3086 case 1:
3087 /* Pre-adjust the loop counter. */
3088 operands[2] = GEN_INT (n_bytes - 2);
3089 output_asm_insn ("ldi %2,%1", operands);
3091 /* Loop. */
3092 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3093 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3094 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3096 /* Handle the residual. */
3097 if (n_bytes % 2 != 0)
3098 output_asm_insn ("stb %%r0,0(%0)", operands);
3100 return "";
3102 default:
3103 gcc_unreachable ();
3107 /* Count the number of insns necessary to handle this block move.
3109 Basic structure is the same as emit_block_move, except that we
3110 count insns rather than emit them. */
3112 static int
3113 compute_clrmem_length (rtx_insn *insn)
3115 rtx pat = PATTERN (insn);
3116 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3117 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3118 unsigned int n_insns = 0;
3120 /* We can't clear more than a word at a time because the PA
3121 has no longer integer move insns. */
3122 if (align > (TARGET_64BIT ? 8 : 4))
3123 align = (TARGET_64BIT ? 8 : 4);
3125 /* The basic loop. */
3126 n_insns = 4;
3128 /* Residuals. */
3129 if (n_bytes % (2 * align) != 0)
3131 if ((n_bytes % (2 * align)) >= align)
3132 n_insns++;
3134 if ((n_bytes % align) != 0)
3135 n_insns++;
3138 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3139 return n_insns * 4;
3143 const char *
3144 pa_output_and (rtx *operands)
3146 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3148 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3149 int ls0, ls1, ms0, p, len;
3151 for (ls0 = 0; ls0 < 32; ls0++)
3152 if ((mask & (1 << ls0)) == 0)
3153 break;
3155 for (ls1 = ls0; ls1 < 32; ls1++)
3156 if ((mask & (1 << ls1)) != 0)
3157 break;
3159 for (ms0 = ls1; ms0 < 32; ms0++)
3160 if ((mask & (1 << ms0)) == 0)
3161 break;
3163 gcc_assert (ms0 == 32);
3165 if (ls1 == 32)
3167 len = ls0;
3169 gcc_assert (len);
3171 operands[2] = GEN_INT (len);
3172 return "{extru|extrw,u} %1,31,%2,%0";
3174 else
3176 /* We could use this `depi' for the case above as well, but `depi'
3177 requires one more register file access than an `extru'. */
3179 p = 31 - ls0;
3180 len = ls1 - ls0;
3182 operands[2] = GEN_INT (p);
3183 operands[3] = GEN_INT (len);
3184 return "{depi|depwi} 0,%2,%3,%0";
3187 else
3188 return "and %1,%2,%0";
3191 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3192 storing the result in operands[0]. */
3193 const char *
3194 pa_output_64bit_and (rtx *operands)
3196 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3198 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3199 int ls0, ls1, ms0, p, len;
3201 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3202 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3203 break;
3205 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3206 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3207 break;
3209 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3210 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3211 break;
3213 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3215 if (ls1 == HOST_BITS_PER_WIDE_INT)
3217 len = ls0;
3219 gcc_assert (len);
3221 operands[2] = GEN_INT (len);
3222 return "extrd,u %1,63,%2,%0";
3224 else
3226 /* We could use this `depi' for the case above as well, but `depi'
3227 requires one more register file access than an `extru'. */
3229 p = 63 - ls0;
3230 len = ls1 - ls0;
3232 operands[2] = GEN_INT (p);
3233 operands[3] = GEN_INT (len);
3234 return "depdi 0,%2,%3,%0";
3237 else
3238 return "and %1,%2,%0";
3241 const char *
3242 pa_output_ior (rtx *operands)
3244 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3245 int bs0, bs1, p, len;
3247 if (INTVAL (operands[2]) == 0)
3248 return "copy %1,%0";
3250 for (bs0 = 0; bs0 < 32; bs0++)
3251 if ((mask & (1 << bs0)) != 0)
3252 break;
3254 for (bs1 = bs0; bs1 < 32; bs1++)
3255 if ((mask & (1 << bs1)) == 0)
3256 break;
3258 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3260 p = 31 - bs0;
3261 len = bs1 - bs0;
3263 operands[2] = GEN_INT (p);
3264 operands[3] = GEN_INT (len);
3265 return "{depi|depwi} -1,%2,%3,%0";
3268 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3269 storing the result in operands[0]. */
3270 const char *
3271 pa_output_64bit_ior (rtx *operands)
3273 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3274 int bs0, bs1, p, len;
3276 if (INTVAL (operands[2]) == 0)
3277 return "copy %1,%0";
3279 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3280 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3281 break;
3283 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3284 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3285 break;
3287 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3288 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3290 p = 63 - bs0;
3291 len = bs1 - bs0;
3293 operands[2] = GEN_INT (p);
3294 operands[3] = GEN_INT (len);
3295 return "depdi -1,%2,%3,%0";
3298 /* Target hook for assembling integer objects. This code handles
3299 aligned SI and DI integers specially since function references
3300 must be preceded by P%. */
3302 static bool
3303 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3305 if (size == UNITS_PER_WORD
3306 && aligned_p
3307 && function_label_operand (x, VOIDmode))
3309 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3311 /* We don't want an OPD when generating fast indirect calls. */
3312 if (!TARGET_FAST_INDIRECT_CALLS)
3313 fputs ("P%", asm_out_file);
3315 output_addr_const (asm_out_file, x);
3316 fputc ('\n', asm_out_file);
3317 return true;
3319 return default_assemble_integer (x, size, aligned_p);
3322 /* Output an ascii string. */
3323 void
3324 pa_output_ascii (FILE *file, const char *p, int size)
3326 int i;
3327 int chars_output;
3328 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3330 /* The HP assembler can only take strings of 256 characters at one
3331 time. This is a limitation on input line length, *not* the
3332 length of the string. Sigh. Even worse, it seems that the
3333 restriction is in number of input characters (see \xnn &
3334 \whatever). So we have to do this very carefully. */
3336 fputs ("\t.STRING \"", file);
3338 chars_output = 0;
3339 for (i = 0; i < size; i += 4)
3341 int co = 0;
3342 int io = 0;
3343 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3345 register unsigned int c = (unsigned char) p[i + io];
3347 if (c == '\"' || c == '\\')
3348 partial_output[co++] = '\\';
3349 if (c >= ' ' && c < 0177)
3350 partial_output[co++] = c;
3351 else
3353 unsigned int hexd;
3354 partial_output[co++] = '\\';
3355 partial_output[co++] = 'x';
3356 hexd = c / 16 - 0 + '0';
3357 if (hexd > '9')
3358 hexd -= '9' - 'a' + 1;
3359 partial_output[co++] = hexd;
3360 hexd = c % 16 - 0 + '0';
3361 if (hexd > '9')
3362 hexd -= '9' - 'a' + 1;
3363 partial_output[co++] = hexd;
3366 if (chars_output + co > 243)
3368 fputs ("\"\n\t.STRING \"", file);
3369 chars_output = 0;
3371 fwrite (partial_output, 1, (size_t) co, file);
3372 chars_output += co;
3373 co = 0;
3375 fputs ("\"\n", file);
3378 /* Try to rewrite floating point comparisons & branches to avoid
3379 useless add,tr insns.
3381 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3382 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3383 first attempt to remove useless add,tr insns. It is zero
3384 for the second pass as reorg sometimes leaves bogus REG_DEAD
3385 notes lying around.
3387 When CHECK_NOTES is zero we can only eliminate add,tr insns
3388 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3389 instructions. */
3390 static void
3391 remove_useless_addtr_insns (int check_notes)
3393 rtx_insn *insn;
3394 static int pass = 0;
3396 /* This is fairly cheap, so always run it when optimizing. */
3397 if (optimize > 0)
3399 int fcmp_count = 0;
3400 int fbranch_count = 0;
3402 /* Walk all the insns in this function looking for fcmp & fbranch
3403 instructions. Keep track of how many of each we find. */
3404 for (insn = get_insns (); insn; insn = next_insn (insn))
3406 rtx tmp;
3408 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3409 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3410 continue;
3412 tmp = PATTERN (insn);
3414 /* It must be a set. */
3415 if (GET_CODE (tmp) != SET)
3416 continue;
3418 /* If the destination is CCFP, then we've found an fcmp insn. */
3419 tmp = SET_DEST (tmp);
3420 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3422 fcmp_count++;
3423 continue;
3426 tmp = PATTERN (insn);
3427 /* If this is an fbranch instruction, bump the fbranch counter. */
3428 if (GET_CODE (tmp) == SET
3429 && SET_DEST (tmp) == pc_rtx
3430 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3431 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3432 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3433 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3435 fbranch_count++;
3436 continue;
3441 /* Find all floating point compare + branch insns. If possible,
3442 reverse the comparison & the branch to avoid add,tr insns. */
3443 for (insn = get_insns (); insn; insn = next_insn (insn))
3445 rtx tmp;
3446 rtx_insn *next;
3448 /* Ignore anything that isn't an INSN. */
3449 if (! NONJUMP_INSN_P (insn))
3450 continue;
3452 tmp = PATTERN (insn);
3454 /* It must be a set. */
3455 if (GET_CODE (tmp) != SET)
3456 continue;
3458 /* The destination must be CCFP, which is register zero. */
3459 tmp = SET_DEST (tmp);
3460 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3461 continue;
3463 /* INSN should be a set of CCFP.
3465 See if the result of this insn is used in a reversed FP
3466 conditional branch. If so, reverse our condition and
3467 the branch. Doing so avoids useless add,tr insns. */
3468 next = next_insn (insn);
3469 while (next)
3471 /* Jumps, calls and labels stop our search. */
3472 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3473 break;
3475 /* As does another fcmp insn. */
3476 if (NONJUMP_INSN_P (next)
3477 && GET_CODE (PATTERN (next)) == SET
3478 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3479 && REGNO (SET_DEST (PATTERN (next))) == 0)
3480 break;
3482 next = next_insn (next);
3485 /* Is NEXT_INSN a branch? */
3486 if (next && JUMP_P (next))
3488 rtx pattern = PATTERN (next);
3490 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3491 and CCFP dies, then reverse our conditional and the branch
3492 to avoid the add,tr. */
3493 if (GET_CODE (pattern) == SET
3494 && SET_DEST (pattern) == pc_rtx
3495 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3496 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3497 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3498 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3499 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3500 && (fcmp_count == fbranch_count
3501 || (check_notes
3502 && find_regno_note (next, REG_DEAD, 0))))
3504 /* Reverse the branch. */
3505 tmp = XEXP (SET_SRC (pattern), 1);
3506 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3507 XEXP (SET_SRC (pattern), 2) = tmp;
3508 INSN_CODE (next) = -1;
3510 /* Reverse our condition. */
3511 tmp = PATTERN (insn);
3512 PUT_CODE (XEXP (tmp, 1),
3513 (reverse_condition_maybe_unordered
3514 (GET_CODE (XEXP (tmp, 1)))));
3520 pass = !pass;
3524 /* You may have trouble believing this, but this is the 32 bit HP-PA
3525 stack layout. Wow.
3527 Offset Contents
3529 Variable arguments (optional; any number may be allocated)
3531 SP-(4*(N+9)) arg word N
3533 SP-56 arg word 5
3534 SP-52 arg word 4
3536 Fixed arguments (must be allocated; may remain unused)
3538 SP-48 arg word 3
3539 SP-44 arg word 2
3540 SP-40 arg word 1
3541 SP-36 arg word 0
3543 Frame Marker
3545 SP-32 External Data Pointer (DP)
3546 SP-28 External sr4
3547 SP-24 External/stub RP (RP')
3548 SP-20 Current RP
3549 SP-16 Static Link
3550 SP-12 Clean up
3551 SP-8 Calling Stub RP (RP'')
3552 SP-4 Previous SP
3554 Top of Frame
3556 SP-0 Stack Pointer (points to next available address)
3560 /* This function saves registers as follows. Registers marked with ' are
3561 this function's registers (as opposed to the previous function's).
3562 If a frame_pointer isn't needed, r4 is saved as a general register;
3563 the space for the frame pointer is still allocated, though, to keep
3564 things simple.
3567 Top of Frame
3569 SP (FP') Previous FP
3570 SP + 4 Alignment filler (sigh)
3571 SP + 8 Space for locals reserved here.
3575 SP + n All call saved register used.
3579 SP + o All call saved fp registers used.
3583 SP + p (SP') points to next available address.
3587 /* Global variables set by output_function_prologue(). */
3588 /* Size of frame. Need to know this to emit return insns from
3589 leaf procedures. */
3590 static HOST_WIDE_INT actual_fsize, local_fsize;
3591 static int save_fregs;
3593 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3594 Handle case where DISP > 8k by using the add_high_const patterns.
3596 Note in DISP > 8k case, we will leave the high part of the address
3597 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3599 static void
3600 store_reg (int reg, HOST_WIDE_INT disp, int base)
3602 rtx dest, src, basereg;
3603 rtx_insn *insn;
3605 src = gen_rtx_REG (word_mode, reg);
3606 basereg = gen_rtx_REG (Pmode, base);
3607 if (VAL_14_BITS_P (disp))
3609 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3610 insn = emit_move_insn (dest, src);
3612 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3614 rtx delta = GEN_INT (disp);
3615 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3617 emit_move_insn (tmpreg, delta);
3618 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3619 if (DO_FRAME_NOTES)
3621 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3622 gen_rtx_SET (tmpreg,
3623 gen_rtx_PLUS (Pmode, basereg, delta)));
3624 RTX_FRAME_RELATED_P (insn) = 1;
3626 dest = gen_rtx_MEM (word_mode, tmpreg);
3627 insn = emit_move_insn (dest, src);
3629 else
3631 rtx delta = GEN_INT (disp);
3632 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3633 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3635 emit_move_insn (tmpreg, high);
3636 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3637 insn = emit_move_insn (dest, src);
3638 if (DO_FRAME_NOTES)
3639 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3640 gen_rtx_SET (gen_rtx_MEM (word_mode,
3641 gen_rtx_PLUS (word_mode,
3642 basereg,
3643 delta)),
3644 src));
3647 if (DO_FRAME_NOTES)
3648 RTX_FRAME_RELATED_P (insn) = 1;
3651 /* Emit RTL to store REG at the memory location specified by BASE and then
3652 add MOD to BASE. MOD must be <= 8k. */
3654 static void
3655 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3657 rtx basereg, srcreg, delta;
3658 rtx_insn *insn;
3660 gcc_assert (VAL_14_BITS_P (mod));
3662 basereg = gen_rtx_REG (Pmode, base);
3663 srcreg = gen_rtx_REG (word_mode, reg);
3664 delta = GEN_INT (mod);
3666 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3667 if (DO_FRAME_NOTES)
3669 RTX_FRAME_RELATED_P (insn) = 1;
3671 /* RTX_FRAME_RELATED_P must be set on each frame related set
3672 in a parallel with more than one element. */
3673 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3674 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3678 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3679 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3680 whether to add a frame note or not.
3682 In the DISP > 8k case, we leave the high part of the address in %r1.
3683 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3685 static void
3686 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3688 rtx_insn *insn;
3690 if (VAL_14_BITS_P (disp))
3692 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3693 plus_constant (Pmode,
3694 gen_rtx_REG (Pmode, base), disp));
3696 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3698 rtx basereg = gen_rtx_REG (Pmode, base);
3699 rtx delta = GEN_INT (disp);
3700 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3702 emit_move_insn (tmpreg, delta);
3703 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3704 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3705 if (DO_FRAME_NOTES)
3706 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3707 gen_rtx_SET (tmpreg,
3708 gen_rtx_PLUS (Pmode, basereg, delta)));
3710 else
3712 rtx basereg = gen_rtx_REG (Pmode, base);
3713 rtx delta = GEN_INT (disp);
3714 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3716 emit_move_insn (tmpreg,
3717 gen_rtx_PLUS (Pmode, basereg,
3718 gen_rtx_HIGH (Pmode, delta)));
3719 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3720 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3723 if (DO_FRAME_NOTES && note)
3724 RTX_FRAME_RELATED_P (insn) = 1;
3727 HOST_WIDE_INT
3728 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3730 int freg_saved = 0;
3731 int i, j;
3733 /* The code in pa_expand_prologue and pa_expand_epilogue must
3734 be consistent with the rounding and size calculation done here.
3735 Change them at the same time. */
3737 /* We do our own stack alignment. First, round the size of the
3738 stack locals up to a word boundary. */
3739 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3741 /* Space for previous frame pointer + filler. If any frame is
3742 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3743 waste some space here for the sake of HP compatibility. The
3744 first slot is only used when the frame pointer is needed. */
3745 if (size || frame_pointer_needed)
3746 size += STARTING_FRAME_OFFSET;
3748 /* If the current function calls __builtin_eh_return, then we need
3749 to allocate stack space for registers that will hold data for
3750 the exception handler. */
3751 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3753 unsigned int i;
3755 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3756 continue;
3757 size += i * UNITS_PER_WORD;
3760 /* Account for space used by the callee general register saves. */
3761 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3762 if (df_regs_ever_live_p (i))
3763 size += UNITS_PER_WORD;
3765 /* Account for space used by the callee floating point register saves. */
3766 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3767 if (df_regs_ever_live_p (i)
3768 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3770 freg_saved = 1;
3772 /* We always save both halves of the FP register, so always
3773 increment the frame size by 8 bytes. */
3774 size += 8;
3777 /* If any of the floating registers are saved, account for the
3778 alignment needed for the floating point register save block. */
3779 if (freg_saved)
3781 size = (size + 7) & ~7;
3782 if (fregs_live)
3783 *fregs_live = 1;
3786 /* The various ABIs include space for the outgoing parameters in the
3787 size of the current function's stack frame. We don't need to align
3788 for the outgoing arguments as their alignment is set by the final
3789 rounding for the frame as a whole. */
3790 size += crtl->outgoing_args_size;
3792 /* Allocate space for the fixed frame marker. This space must be
3793 allocated for any function that makes calls or allocates
3794 stack space. */
3795 if (!crtl->is_leaf || size)
3796 size += TARGET_64BIT ? 48 : 32;
3798 /* Finally, round to the preferred stack boundary. */
3799 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3800 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3803 /* Generate the assembly code for function entry. FILE is a stdio
3804 stream to output the code to. SIZE is an int: how many units of
3805 temporary storage to allocate.
3807 Refer to the array `regs_ever_live' to determine which registers to
3808 save; `regs_ever_live[I]' is nonzero if register number I is ever
3809 used in the function. This function is responsible for knowing
3810 which registers should not be saved even if used. */
3812 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3813 of memory. If any fpu reg is used in the function, we allocate
3814 such a block here, at the bottom of the frame, just in case it's needed.
3816 If this function is a leaf procedure, then we may choose not
3817 to do a "save" insn. The decision about whether or not
3818 to do this is made in regclass.c. */
3820 static void
3821 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3823 /* The function's label and associated .PROC must never be
3824 separated and must be output *after* any profiling declarations
3825 to avoid changing spaces/subspaces within a procedure. */
3826 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3827 fputs ("\t.PROC\n", file);
3829 /* pa_expand_prologue does the dirty work now. We just need
3830 to output the assembler directives which denote the start
3831 of a function. */
3832 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3833 if (crtl->is_leaf)
3834 fputs (",NO_CALLS", file);
3835 else
3836 fputs (",CALLS", file);
3837 if (rp_saved)
3838 fputs (",SAVE_RP", file);
3840 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3841 at the beginning of the frame and that it is used as the frame
3842 pointer for the frame. We do this because our current frame
3843 layout doesn't conform to that specified in the HP runtime
3844 documentation and we need a way to indicate to programs such as
3845 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3846 isn't used by HP compilers but is supported by the assembler.
3847 However, SAVE_SP is supposed to indicate that the previous stack
3848 pointer has been saved in the frame marker. */
3849 if (frame_pointer_needed)
3850 fputs (",SAVE_SP", file);
3852 /* Pass on information about the number of callee register saves
3853 performed in the prologue.
3855 The compiler is supposed to pass the highest register number
3856 saved, the assembler then has to adjust that number before
3857 entering it into the unwind descriptor (to account for any
3858 caller saved registers with lower register numbers than the
3859 first callee saved register). */
3860 if (gr_saved)
3861 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3863 if (fr_saved)
3864 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3866 fputs ("\n\t.ENTRY\n", file);
3868 remove_useless_addtr_insns (0);
3871 void
3872 pa_expand_prologue (void)
3874 int merge_sp_adjust_with_store = 0;
3875 HOST_WIDE_INT size = get_frame_size ();
3876 HOST_WIDE_INT offset;
3877 int i;
3878 rtx tmpreg;
3879 rtx_insn *insn;
3881 gr_saved = 0;
3882 fr_saved = 0;
3883 save_fregs = 0;
3885 /* Compute total size for frame pointer, filler, locals and rounding to
3886 the next word boundary. Similar code appears in pa_compute_frame_size
3887 and must be changed in tandem with this code. */
3888 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3889 if (local_fsize || frame_pointer_needed)
3890 local_fsize += STARTING_FRAME_OFFSET;
3892 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3893 if (flag_stack_usage_info)
3894 current_function_static_stack_size = actual_fsize;
3896 /* Compute a few things we will use often. */
3897 tmpreg = gen_rtx_REG (word_mode, 1);
3899 /* Save RP first. The calling conventions manual states RP will
3900 always be stored into the caller's frame at sp - 20 or sp - 16
3901 depending on which ABI is in use. */
3902 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3904 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3905 rp_saved = true;
3907 else
3908 rp_saved = false;
3910 /* Allocate the local frame and set up the frame pointer if needed. */
3911 if (actual_fsize != 0)
3913 if (frame_pointer_needed)
3915 /* Copy the old frame pointer temporarily into %r1. Set up the
3916 new stack pointer, then store away the saved old frame pointer
3917 into the stack at sp and at the same time update the stack
3918 pointer by actual_fsize bytes. Two versions, first
3919 handles small (<8k) frames. The second handles large (>=8k)
3920 frames. */
3921 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3922 if (DO_FRAME_NOTES)
3923 RTX_FRAME_RELATED_P (insn) = 1;
3925 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3926 if (DO_FRAME_NOTES)
3927 RTX_FRAME_RELATED_P (insn) = 1;
3929 if (VAL_14_BITS_P (actual_fsize))
3930 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3931 else
3933 /* It is incorrect to store the saved frame pointer at *sp,
3934 then increment sp (writes beyond the current stack boundary).
3936 So instead use stwm to store at *sp and post-increment the
3937 stack pointer as an atomic operation. Then increment sp to
3938 finish allocating the new frame. */
3939 HOST_WIDE_INT adjust1 = 8192 - 64;
3940 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3942 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3943 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3944 adjust2, 1);
3947 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3948 we need to store the previous stack pointer (frame pointer)
3949 into the frame marker on targets that use the HP unwind
3950 library. This allows the HP unwind library to be used to
3951 unwind GCC frames. However, we are not fully compatible
3952 with the HP library because our frame layout differs from
3953 that specified in the HP runtime specification.
3955 We don't want a frame note on this instruction as the frame
3956 marker moves during dynamic stack allocation.
3958 This instruction also serves as a blockage to prevent
3959 register spills from being scheduled before the stack
3960 pointer is raised. This is necessary as we store
3961 registers using the frame pointer as a base register,
3962 and the frame pointer is set before sp is raised. */
3963 if (TARGET_HPUX_UNWIND_LIBRARY)
3965 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3966 GEN_INT (TARGET_64BIT ? -8 : -4));
3968 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3969 hard_frame_pointer_rtx);
3971 else
3972 emit_insn (gen_blockage ());
3974 /* no frame pointer needed. */
3975 else
3977 /* In some cases we can perform the first callee register save
3978 and allocating the stack frame at the same time. If so, just
3979 make a note of it and defer allocating the frame until saving
3980 the callee registers. */
3981 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3982 merge_sp_adjust_with_store = 1;
3983 /* Can not optimize. Adjust the stack frame by actual_fsize
3984 bytes. */
3985 else
3986 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3987 actual_fsize, 1);
3991 /* Normal register save.
3993 Do not save the frame pointer in the frame_pointer_needed case. It
3994 was done earlier. */
3995 if (frame_pointer_needed)
3997 offset = local_fsize;
3999 /* Saving the EH return data registers in the frame is the simplest
4000 way to get the frame unwind information emitted. We put them
4001 just before the general registers. */
4002 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4004 unsigned int i, regno;
4006 for (i = 0; ; ++i)
4008 regno = EH_RETURN_DATA_REGNO (i);
4009 if (regno == INVALID_REGNUM)
4010 break;
4012 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4013 offset += UNITS_PER_WORD;
4017 for (i = 18; i >= 4; i--)
4018 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4020 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4021 offset += UNITS_PER_WORD;
4022 gr_saved++;
4024 /* Account for %r3 which is saved in a special place. */
4025 gr_saved++;
4027 /* No frame pointer needed. */
4028 else
4030 offset = local_fsize - actual_fsize;
4032 /* Saving the EH return data registers in the frame is the simplest
4033 way to get the frame unwind information emitted. */
4034 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4036 unsigned int i, regno;
4038 for (i = 0; ; ++i)
4040 regno = EH_RETURN_DATA_REGNO (i);
4041 if (regno == INVALID_REGNUM)
4042 break;
4044 /* If merge_sp_adjust_with_store is nonzero, then we can
4045 optimize the first save. */
4046 if (merge_sp_adjust_with_store)
4048 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4049 merge_sp_adjust_with_store = 0;
4051 else
4052 store_reg (regno, offset, STACK_POINTER_REGNUM);
4053 offset += UNITS_PER_WORD;
4057 for (i = 18; i >= 3; i--)
4058 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4060 /* If merge_sp_adjust_with_store is nonzero, then we can
4061 optimize the first GR save. */
4062 if (merge_sp_adjust_with_store)
4064 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4065 merge_sp_adjust_with_store = 0;
4067 else
4068 store_reg (i, offset, STACK_POINTER_REGNUM);
4069 offset += UNITS_PER_WORD;
4070 gr_saved++;
4073 /* If we wanted to merge the SP adjustment with a GR save, but we never
4074 did any GR saves, then just emit the adjustment here. */
4075 if (merge_sp_adjust_with_store)
4076 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4077 actual_fsize, 1);
4080 /* The hppa calling conventions say that %r19, the pic offset
4081 register, is saved at sp - 32 (in this function's frame)
4082 when generating PIC code. FIXME: What is the correct thing
4083 to do for functions which make no calls and allocate no
4084 frame? Do we need to allocate a frame, or can we just omit
4085 the save? For now we'll just omit the save.
4087 We don't want a note on this insn as the frame marker can
4088 move if there is a dynamic stack allocation. */
4089 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4091 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4093 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4097 /* Align pointer properly (doubleword boundary). */
4098 offset = (offset + 7) & ~7;
4100 /* Floating point register store. */
4101 if (save_fregs)
4103 rtx base;
4105 /* First get the frame or stack pointer to the start of the FP register
4106 save area. */
4107 if (frame_pointer_needed)
4109 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4110 base = hard_frame_pointer_rtx;
4112 else
4114 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4115 base = stack_pointer_rtx;
4118 /* Now actually save the FP registers. */
4119 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4121 if (df_regs_ever_live_p (i)
4122 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4124 rtx addr, reg;
4125 rtx_insn *insn;
4126 addr = gen_rtx_MEM (DFmode,
4127 gen_rtx_POST_INC (word_mode, tmpreg));
4128 reg = gen_rtx_REG (DFmode, i);
4129 insn = emit_move_insn (addr, reg);
4130 if (DO_FRAME_NOTES)
4132 RTX_FRAME_RELATED_P (insn) = 1;
4133 if (TARGET_64BIT)
4135 rtx mem = gen_rtx_MEM (DFmode,
4136 plus_constant (Pmode, base,
4137 offset));
4138 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4139 gen_rtx_SET (mem, reg));
4141 else
4143 rtx meml = gen_rtx_MEM (SFmode,
4144 plus_constant (Pmode, base,
4145 offset));
4146 rtx memr = gen_rtx_MEM (SFmode,
4147 plus_constant (Pmode, base,
4148 offset + 4));
4149 rtx regl = gen_rtx_REG (SFmode, i);
4150 rtx regr = gen_rtx_REG (SFmode, i + 1);
4151 rtx setl = gen_rtx_SET (meml, regl);
4152 rtx setr = gen_rtx_SET (memr, regr);
4153 rtvec vec;
4155 RTX_FRAME_RELATED_P (setl) = 1;
4156 RTX_FRAME_RELATED_P (setr) = 1;
4157 vec = gen_rtvec (2, setl, setr);
4158 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4159 gen_rtx_SEQUENCE (VOIDmode, vec));
4162 offset += GET_MODE_SIZE (DFmode);
4163 fr_saved++;
4169 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4170 Handle case where DISP > 8k by using the add_high_const patterns. */
4172 static void
4173 load_reg (int reg, HOST_WIDE_INT disp, int base)
4175 rtx dest = gen_rtx_REG (word_mode, reg);
4176 rtx basereg = gen_rtx_REG (Pmode, base);
4177 rtx src;
4179 if (VAL_14_BITS_P (disp))
4180 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4181 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4183 rtx delta = GEN_INT (disp);
4184 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4186 emit_move_insn (tmpreg, delta);
4187 if (TARGET_DISABLE_INDEXING)
4189 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4190 src = gen_rtx_MEM (word_mode, tmpreg);
4192 else
4193 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4195 else
4197 rtx delta = GEN_INT (disp);
4198 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4199 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4201 emit_move_insn (tmpreg, high);
4202 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4205 emit_move_insn (dest, src);
4208 /* Update the total code bytes output to the text section. */
4210 static void
4211 update_total_code_bytes (unsigned int nbytes)
4213 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4214 && !IN_NAMED_SECTION_P (cfun->decl))
4216 unsigned int old_total = total_code_bytes;
4218 total_code_bytes += nbytes;
4220 /* Be prepared to handle overflows. */
4221 if (old_total > total_code_bytes)
4222 total_code_bytes = UINT_MAX;
4226 /* This function generates the assembly code for function exit.
4227 Args are as for output_function_prologue ().
4229 The function epilogue should not depend on the current stack
4230 pointer! It should use the frame pointer only. This is mandatory
4231 because of alloca; we also take advantage of it to omit stack
4232 adjustments before returning. */
4234 static void
4235 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4237 rtx_insn *insn = get_last_insn ();
4238 bool extra_nop;
4240 /* pa_expand_epilogue does the dirty work now. We just need
4241 to output the assembler directives which denote the end
4242 of a function.
4244 To make debuggers happy, emit a nop if the epilogue was completely
4245 eliminated due to a volatile call as the last insn in the
4246 current function. That way the return address (in %r2) will
4247 always point to a valid instruction in the current function. */
4249 /* Get the last real insn. */
4250 if (NOTE_P (insn))
4251 insn = prev_real_insn (insn);
4253 /* If it is a sequence, then look inside. */
4254 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4255 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4257 /* If insn is a CALL_INSN, then it must be a call to a volatile
4258 function (otherwise there would be epilogue insns). */
4259 if (insn && CALL_P (insn))
4261 fputs ("\tnop\n", file);
4262 extra_nop = true;
4264 else
4265 extra_nop = false;
4267 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4269 if (TARGET_SOM && TARGET_GAS)
4271 /* We are done with this subspace except possibly for some additional
4272 debug information. Forget that we are in this subspace to ensure
4273 that the next function is output in its own subspace. */
4274 in_section = NULL;
4275 cfun->machine->in_nsubspa = 2;
4278 /* Thunks do their own insn accounting. */
4279 if (cfun->is_thunk)
4280 return;
4282 if (INSN_ADDRESSES_SET_P ())
4284 last_address = extra_nop ? 4 : 0;
4285 insn = get_last_nonnote_insn ();
4286 if (insn)
4288 last_address += INSN_ADDRESSES (INSN_UID (insn));
4289 if (INSN_P (insn))
4290 last_address += insn_default_length (insn);
4292 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4293 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4295 else
4296 last_address = UINT_MAX;
4298 /* Finally, update the total number of code bytes output so far. */
4299 update_total_code_bytes (last_address);
4302 void
4303 pa_expand_epilogue (void)
4305 rtx tmpreg;
4306 HOST_WIDE_INT offset;
4307 HOST_WIDE_INT ret_off = 0;
4308 int i;
4309 int merge_sp_adjust_with_load = 0;
4311 /* We will use this often. */
4312 tmpreg = gen_rtx_REG (word_mode, 1);
4314 /* Try to restore RP early to avoid load/use interlocks when
4315 RP gets used in the return (bv) instruction. This appears to still
4316 be necessary even when we schedule the prologue and epilogue. */
4317 if (rp_saved)
4319 ret_off = TARGET_64BIT ? -16 : -20;
4320 if (frame_pointer_needed)
4322 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4323 ret_off = 0;
4325 else
4327 /* No frame pointer, and stack is smaller than 8k. */
4328 if (VAL_14_BITS_P (ret_off - actual_fsize))
4330 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4331 ret_off = 0;
4336 /* General register restores. */
4337 if (frame_pointer_needed)
4339 offset = local_fsize;
4341 /* If the current function calls __builtin_eh_return, then we need
4342 to restore the saved EH data registers. */
4343 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4345 unsigned int i, regno;
4347 for (i = 0; ; ++i)
4349 regno = EH_RETURN_DATA_REGNO (i);
4350 if (regno == INVALID_REGNUM)
4351 break;
4353 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4354 offset += UNITS_PER_WORD;
4358 for (i = 18; i >= 4; i--)
4359 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4361 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4362 offset += UNITS_PER_WORD;
4365 else
4367 offset = local_fsize - actual_fsize;
4369 /* If the current function calls __builtin_eh_return, then we need
4370 to restore the saved EH data registers. */
4371 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4373 unsigned int i, regno;
4375 for (i = 0; ; ++i)
4377 regno = EH_RETURN_DATA_REGNO (i);
4378 if (regno == INVALID_REGNUM)
4379 break;
4381 /* Only for the first load.
4382 merge_sp_adjust_with_load holds the register load
4383 with which we will merge the sp adjustment. */
4384 if (merge_sp_adjust_with_load == 0
4385 && local_fsize == 0
4386 && VAL_14_BITS_P (-actual_fsize))
4387 merge_sp_adjust_with_load = regno;
4388 else
4389 load_reg (regno, offset, STACK_POINTER_REGNUM);
4390 offset += UNITS_PER_WORD;
4394 for (i = 18; i >= 3; i--)
4396 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4398 /* Only for the first load.
4399 merge_sp_adjust_with_load holds the register load
4400 with which we will merge the sp adjustment. */
4401 if (merge_sp_adjust_with_load == 0
4402 && local_fsize == 0
4403 && VAL_14_BITS_P (-actual_fsize))
4404 merge_sp_adjust_with_load = i;
4405 else
4406 load_reg (i, offset, STACK_POINTER_REGNUM);
4407 offset += UNITS_PER_WORD;
4412 /* Align pointer properly (doubleword boundary). */
4413 offset = (offset + 7) & ~7;
4415 /* FP register restores. */
4416 if (save_fregs)
4418 /* Adjust the register to index off of. */
4419 if (frame_pointer_needed)
4420 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4421 else
4422 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4424 /* Actually do the restores now. */
4425 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4426 if (df_regs_ever_live_p (i)
4427 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4429 rtx src = gen_rtx_MEM (DFmode,
4430 gen_rtx_POST_INC (word_mode, tmpreg));
4431 rtx dest = gen_rtx_REG (DFmode, i);
4432 emit_move_insn (dest, src);
4436 /* Emit a blockage insn here to keep these insns from being moved to
4437 an earlier spot in the epilogue, or into the main instruction stream.
4439 This is necessary as we must not cut the stack back before all the
4440 restores are finished. */
4441 emit_insn (gen_blockage ());
4443 /* Reset stack pointer (and possibly frame pointer). The stack
4444 pointer is initially set to fp + 64 to avoid a race condition. */
4445 if (frame_pointer_needed)
4447 rtx delta = GEN_INT (-64);
4449 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4450 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4451 stack_pointer_rtx, delta));
4453 /* If we were deferring a callee register restore, do it now. */
4454 else if (merge_sp_adjust_with_load)
4456 rtx delta = GEN_INT (-actual_fsize);
4457 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4459 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4461 else if (actual_fsize != 0)
4462 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4463 - actual_fsize, 0);
4465 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4466 frame greater than 8k), do so now. */
4467 if (ret_off != 0)
4468 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4470 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4472 rtx sa = EH_RETURN_STACKADJ_RTX;
4474 emit_insn (gen_blockage ());
4475 emit_insn (TARGET_64BIT
4476 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4477 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4481 bool
4482 pa_can_use_return_insn (void)
4484 if (!reload_completed)
4485 return false;
4487 if (frame_pointer_needed)
4488 return false;
4490 if (df_regs_ever_live_p (2))
4491 return false;
4493 if (crtl->profile)
4494 return false;
4496 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4500 hppa_pic_save_rtx (void)
4502 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4505 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4506 #define NO_DEFERRED_PROFILE_COUNTERS 0
4507 #endif
4510 /* Vector of funcdef numbers. */
4511 static vec<int> funcdef_nos;
4513 /* Output deferred profile counters. */
4514 static void
4515 output_deferred_profile_counters (void)
4517 unsigned int i;
4518 int align, n;
4520 if (funcdef_nos.is_empty ())
4521 return;
4523 switch_to_section (data_section);
4524 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4525 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4527 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4529 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4530 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4533 funcdef_nos.release ();
4536 void
4537 hppa_profile_hook (int label_no)
4539 /* We use SImode for the address of the function in both 32 and
4540 64-bit code to avoid having to provide DImode versions of the
4541 lcla2 and load_offset_label_address insn patterns. */
4542 rtx reg = gen_reg_rtx (SImode);
4543 rtx_code_label *label_rtx = gen_label_rtx ();
4544 rtx begin_label_rtx;
4545 rtx_insn *call_insn;
4546 char begin_label_name[16];
4548 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4549 label_no);
4550 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4552 if (TARGET_64BIT)
4553 emit_move_insn (arg_pointer_rtx,
4554 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4555 GEN_INT (64)));
4557 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4559 /* The address of the function is loaded into %r25 with an instruction-
4560 relative sequence that avoids the use of relocations. The sequence
4561 is split so that the load_offset_label_address instruction can
4562 occupy the delay slot of the call to _mcount. */
4563 if (TARGET_PA_20)
4564 emit_insn (gen_lcla2 (reg, label_rtx));
4565 else
4566 emit_insn (gen_lcla1 (reg, label_rtx));
4568 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4569 reg, begin_label_rtx, label_rtx));
4571 #if !NO_DEFERRED_PROFILE_COUNTERS
4573 rtx count_label_rtx, addr, r24;
4574 char count_label_name[16];
4576 funcdef_nos.safe_push (label_no);
4577 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4578 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4580 addr = force_reg (Pmode, count_label_rtx);
4581 r24 = gen_rtx_REG (Pmode, 24);
4582 emit_move_insn (r24, addr);
4584 call_insn =
4585 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4586 gen_rtx_SYMBOL_REF (Pmode,
4587 "_mcount")),
4588 GEN_INT (TARGET_64BIT ? 24 : 12)));
4590 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4592 #else
4594 call_insn =
4595 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4596 gen_rtx_SYMBOL_REF (Pmode,
4597 "_mcount")),
4598 GEN_INT (TARGET_64BIT ? 16 : 8)));
4600 #endif
4602 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4603 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4605 /* Indicate the _mcount call cannot throw, nor will it execute a
4606 non-local goto. */
4607 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4610 /* Fetch the return address for the frame COUNT steps up from
4611 the current frame, after the prologue. FRAMEADDR is the
4612 frame pointer of the COUNT frame.
4614 We want to ignore any export stub remnants here. To handle this,
4615 we examine the code at the return address, and if it is an export
4616 stub, we return a memory rtx for the stub return address stored
4617 at frame-24.
4619 The value returned is used in two different ways:
4621 1. To find a function's caller.
4623 2. To change the return address for a function.
4625 This function handles most instances of case 1; however, it will
4626 fail if there are two levels of stubs to execute on the return
4627 path. The only way I believe that can happen is if the return value
4628 needs a parameter relocation, which never happens for C code.
4630 This function handles most instances of case 2; however, it will
4631 fail if we did not originally have stub code on the return path
4632 but will need stub code on the new return path. This can happen if
4633 the caller & callee are both in the main program, but the new
4634 return location is in a shared library. */
4637 pa_return_addr_rtx (int count, rtx frameaddr)
4639 rtx label;
4640 rtx rp;
4641 rtx saved_rp;
4642 rtx ins;
4644 /* The instruction stream at the return address of a PA1.X export stub is:
4646 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4647 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4648 0x00011820 | stub+16: mtsp r1,sr0
4649 0xe0400002 | stub+20: be,n 0(sr0,rp)
4651 0xe0400002 must be specified as -532676606 so that it won't be
4652 rejected as an invalid immediate operand on 64-bit hosts.
4654 The instruction stream at the return address of a PA2.0 export stub is:
4656 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4657 0xe840d002 | stub+12: bve,n (rp)
4660 HOST_WIDE_INT insns[4];
4661 int i, len;
4663 if (count != 0)
4664 return NULL_RTX;
4666 rp = get_hard_reg_initial_val (Pmode, 2);
4668 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4669 return rp;
4671 /* If there is no export stub then just use the value saved from
4672 the return pointer register. */
4674 saved_rp = gen_reg_rtx (Pmode);
4675 emit_move_insn (saved_rp, rp);
4677 /* Get pointer to the instruction stream. We have to mask out the
4678 privilege level from the two low order bits of the return address
4679 pointer here so that ins will point to the start of the first
4680 instruction that would have been executed if we returned. */
4681 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4682 label = gen_label_rtx ();
4684 if (TARGET_PA_20)
4686 insns[0] = 0x4bc23fd1;
4687 insns[1] = -398405630;
4688 len = 2;
4690 else
4692 insns[0] = 0x4bc23fd1;
4693 insns[1] = 0x004010a1;
4694 insns[2] = 0x00011820;
4695 insns[3] = -532676606;
4696 len = 4;
4699 /* Check the instruction stream at the normal return address for the
4700 export stub. If it is an export stub, than our return address is
4701 really in -24[frameaddr]. */
4703 for (i = 0; i < len; i++)
4705 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4706 rtx op1 = GEN_INT (insns[i]);
4707 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4710 /* Here we know that our return address points to an export
4711 stub. We don't want to return the address of the export stub,
4712 but rather the return address of the export stub. That return
4713 address is stored at -24[frameaddr]. */
4715 emit_move_insn (saved_rp,
4716 gen_rtx_MEM (Pmode,
4717 memory_address (Pmode,
4718 plus_constant (Pmode, frameaddr,
4719 -24))));
4721 emit_label (label);
4723 return saved_rp;
4726 void
4727 pa_emit_bcond_fp (rtx operands[])
4729 enum rtx_code code = GET_CODE (operands[0]);
4730 rtx operand0 = operands[1];
4731 rtx operand1 = operands[2];
4732 rtx label = operands[3];
4734 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4735 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4737 emit_jump_insn (gen_rtx_SET (pc_rtx,
4738 gen_rtx_IF_THEN_ELSE (VOIDmode,
4739 gen_rtx_fmt_ee (NE,
4740 VOIDmode,
4741 gen_rtx_REG (CCFPmode, 0),
4742 const0_rtx),
4743 gen_rtx_LABEL_REF (VOIDmode, label),
4744 pc_rtx)));
4748 /* Adjust the cost of a scheduling dependency. Return the new cost of
4749 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4751 static int
4752 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4754 enum attr_type attr_type;
4756 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4757 true dependencies as they are described with bypasses now. */
4758 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4759 return cost;
4761 if (! recog_memoized (insn))
4762 return 0;
4764 attr_type = get_attr_type (insn);
4766 switch (REG_NOTE_KIND (link))
4768 case REG_DEP_ANTI:
4769 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4770 cycles later. */
4772 if (attr_type == TYPE_FPLOAD)
4774 rtx pat = PATTERN (insn);
4775 rtx dep_pat = PATTERN (dep_insn);
4776 if (GET_CODE (pat) == PARALLEL)
4778 /* This happens for the fldXs,mb patterns. */
4779 pat = XVECEXP (pat, 0, 0);
4781 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4782 /* If this happens, we have to extend this to schedule
4783 optimally. Return 0 for now. */
4784 return 0;
4786 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4788 if (! recog_memoized (dep_insn))
4789 return 0;
4790 switch (get_attr_type (dep_insn))
4792 case TYPE_FPALU:
4793 case TYPE_FPMULSGL:
4794 case TYPE_FPMULDBL:
4795 case TYPE_FPDIVSGL:
4796 case TYPE_FPDIVDBL:
4797 case TYPE_FPSQRTSGL:
4798 case TYPE_FPSQRTDBL:
4799 /* A fpload can't be issued until one cycle before a
4800 preceding arithmetic operation has finished if
4801 the target of the fpload is any of the sources
4802 (or destination) of the arithmetic operation. */
4803 return insn_default_latency (dep_insn) - 1;
4805 default:
4806 return 0;
4810 else if (attr_type == TYPE_FPALU)
4812 rtx pat = PATTERN (insn);
4813 rtx dep_pat = PATTERN (dep_insn);
4814 if (GET_CODE (pat) == PARALLEL)
4816 /* This happens for the fldXs,mb patterns. */
4817 pat = XVECEXP (pat, 0, 0);
4819 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4820 /* If this happens, we have to extend this to schedule
4821 optimally. Return 0 for now. */
4822 return 0;
4824 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4826 if (! recog_memoized (dep_insn))
4827 return 0;
4828 switch (get_attr_type (dep_insn))
4830 case TYPE_FPDIVSGL:
4831 case TYPE_FPDIVDBL:
4832 case TYPE_FPSQRTSGL:
4833 case TYPE_FPSQRTDBL:
4834 /* An ALU flop can't be issued until two cycles before a
4835 preceding divide or sqrt operation has finished if
4836 the target of the ALU flop is any of the sources
4837 (or destination) of the divide or sqrt operation. */
4838 return insn_default_latency (dep_insn) - 2;
4840 default:
4841 return 0;
4846 /* For other anti dependencies, the cost is 0. */
4847 return 0;
4849 case REG_DEP_OUTPUT:
4850 /* Output dependency; DEP_INSN writes a register that INSN writes some
4851 cycles later. */
4852 if (attr_type == TYPE_FPLOAD)
4854 rtx pat = PATTERN (insn);
4855 rtx dep_pat = PATTERN (dep_insn);
4856 if (GET_CODE (pat) == PARALLEL)
4858 /* This happens for the fldXs,mb patterns. */
4859 pat = XVECEXP (pat, 0, 0);
4861 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4862 /* If this happens, we have to extend this to schedule
4863 optimally. Return 0 for now. */
4864 return 0;
4866 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4868 if (! recog_memoized (dep_insn))
4869 return 0;
4870 switch (get_attr_type (dep_insn))
4872 case TYPE_FPALU:
4873 case TYPE_FPMULSGL:
4874 case TYPE_FPMULDBL:
4875 case TYPE_FPDIVSGL:
4876 case TYPE_FPDIVDBL:
4877 case TYPE_FPSQRTSGL:
4878 case TYPE_FPSQRTDBL:
4879 /* A fpload can't be issued until one cycle before a
4880 preceding arithmetic operation has finished if
4881 the target of the fpload is the destination of the
4882 arithmetic operation.
4884 Exception: For PA7100LC, PA7200 and PA7300, the cost
4885 is 3 cycles, unless they bundle together. We also
4886 pay the penalty if the second insn is a fpload. */
4887 return insn_default_latency (dep_insn) - 1;
4889 default:
4890 return 0;
4894 else if (attr_type == TYPE_FPALU)
4896 rtx pat = PATTERN (insn);
4897 rtx dep_pat = PATTERN (dep_insn);
4898 if (GET_CODE (pat) == PARALLEL)
4900 /* This happens for the fldXs,mb patterns. */
4901 pat = XVECEXP (pat, 0, 0);
4903 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4904 /* If this happens, we have to extend this to schedule
4905 optimally. Return 0 for now. */
4906 return 0;
4908 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4910 if (! recog_memoized (dep_insn))
4911 return 0;
4912 switch (get_attr_type (dep_insn))
4914 case TYPE_FPDIVSGL:
4915 case TYPE_FPDIVDBL:
4916 case TYPE_FPSQRTSGL:
4917 case TYPE_FPSQRTDBL:
4918 /* An ALU flop can't be issued until two cycles before a
4919 preceding divide or sqrt operation has finished if
4920 the target of the ALU flop is also the target of
4921 the divide or sqrt operation. */
4922 return insn_default_latency (dep_insn) - 2;
4924 default:
4925 return 0;
4930 /* For other output dependencies, the cost is 0. */
4931 return 0;
4933 default:
4934 gcc_unreachable ();
4938 /* Adjust scheduling priorities. We use this to try and keep addil
4939 and the next use of %r1 close together. */
4940 static int
4941 pa_adjust_priority (rtx_insn *insn, int priority)
4943 rtx set = single_set (insn);
4944 rtx src, dest;
4945 if (set)
4947 src = SET_SRC (set);
4948 dest = SET_DEST (set);
4949 if (GET_CODE (src) == LO_SUM
4950 && symbolic_operand (XEXP (src, 1), VOIDmode)
4951 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4952 priority >>= 3;
4954 else if (GET_CODE (src) == MEM
4955 && GET_CODE (XEXP (src, 0)) == LO_SUM
4956 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4957 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4958 priority >>= 1;
4960 else if (GET_CODE (dest) == MEM
4961 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4962 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4963 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4964 priority >>= 3;
4966 return priority;
4969 /* The 700 can only issue a single insn at a time.
4970 The 7XXX processors can issue two insns at a time.
4971 The 8000 can issue 4 insns at a time. */
4972 static int
4973 pa_issue_rate (void)
4975 switch (pa_cpu)
4977 case PROCESSOR_700: return 1;
4978 case PROCESSOR_7100: return 2;
4979 case PROCESSOR_7100LC: return 2;
4980 case PROCESSOR_7200: return 2;
4981 case PROCESSOR_7300: return 2;
4982 case PROCESSOR_8000: return 4;
4984 default:
4985 gcc_unreachable ();
4991 /* Return any length plus adjustment needed by INSN which already has
4992 its length computed as LENGTH. Return LENGTH if no adjustment is
4993 necessary.
4995 Also compute the length of an inline block move here as it is too
4996 complicated to express as a length attribute in pa.md. */
4998 pa_adjust_insn_length (rtx_insn *insn, int length)
5000 rtx pat = PATTERN (insn);
5002 /* If length is negative or undefined, provide initial length. */
5003 if ((unsigned int) length >= INT_MAX)
5005 if (GET_CODE (pat) == SEQUENCE)
5006 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5008 switch (get_attr_type (insn))
5010 case TYPE_MILLI:
5011 length = pa_attr_length_millicode_call (insn);
5012 break;
5013 case TYPE_CALL:
5014 length = pa_attr_length_call (insn, 0);
5015 break;
5016 case TYPE_SIBCALL:
5017 length = pa_attr_length_call (insn, 1);
5018 break;
5019 case TYPE_DYNCALL:
5020 length = pa_attr_length_indirect_call (insn);
5021 break;
5022 case TYPE_SH_FUNC_ADRS:
5023 length = pa_attr_length_millicode_call (insn) + 20;
5024 break;
5025 default:
5026 gcc_unreachable ();
5030 /* Block move pattern. */
5031 if (NONJUMP_INSN_P (insn)
5032 && GET_CODE (pat) == PARALLEL
5033 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5034 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5035 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5036 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5037 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5038 length += compute_movmem_length (insn) - 4;
5039 /* Block clear pattern. */
5040 else if (NONJUMP_INSN_P (insn)
5041 && GET_CODE (pat) == PARALLEL
5042 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5043 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5044 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5045 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5046 length += compute_clrmem_length (insn) - 4;
5047 /* Conditional branch with an unfilled delay slot. */
5048 else if (JUMP_P (insn) && ! simplejump_p (insn))
5050 /* Adjust a short backwards conditional with an unfilled delay slot. */
5051 if (GET_CODE (pat) == SET
5052 && length == 4
5053 && JUMP_LABEL (insn) != NULL_RTX
5054 && ! forward_branch_p (insn))
5055 length += 4;
5056 else if (GET_CODE (pat) == PARALLEL
5057 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5058 && length == 4)
5059 length += 4;
5060 /* Adjust dbra insn with short backwards conditional branch with
5061 unfilled delay slot -- only for case where counter is in a
5062 general register register. */
5063 else if (GET_CODE (pat) == PARALLEL
5064 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5065 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5066 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5067 && length == 4
5068 && ! forward_branch_p (insn))
5069 length += 4;
5071 return length;
5074 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5076 static bool
5077 pa_print_operand_punct_valid_p (unsigned char code)
5079 if (code == '@'
5080 || code == '#'
5081 || code == '*'
5082 || code == '^')
5083 return true;
5085 return false;
5088 /* Print operand X (an rtx) in assembler syntax to file FILE.
5089 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5090 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5092 void
5093 pa_print_operand (FILE *file, rtx x, int code)
5095 switch (code)
5097 case '#':
5098 /* Output a 'nop' if there's nothing for the delay slot. */
5099 if (dbr_sequence_length () == 0)
5100 fputs ("\n\tnop", file);
5101 return;
5102 case '*':
5103 /* Output a nullification completer if there's nothing for the */
5104 /* delay slot or nullification is requested. */
5105 if (dbr_sequence_length () == 0 ||
5106 (final_sequence &&
5107 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5108 fputs (",n", file);
5109 return;
5110 case 'R':
5111 /* Print out the second register name of a register pair.
5112 I.e., R (6) => 7. */
5113 fputs (reg_names[REGNO (x) + 1], file);
5114 return;
5115 case 'r':
5116 /* A register or zero. */
5117 if (x == const0_rtx
5118 || (x == CONST0_RTX (DFmode))
5119 || (x == CONST0_RTX (SFmode)))
5121 fputs ("%r0", file);
5122 return;
5124 else
5125 break;
5126 case 'f':
5127 /* A register or zero (floating point). */
5128 if (x == const0_rtx
5129 || (x == CONST0_RTX (DFmode))
5130 || (x == CONST0_RTX (SFmode)))
5132 fputs ("%fr0", file);
5133 return;
5135 else
5136 break;
5137 case 'A':
5139 rtx xoperands[2];
5141 xoperands[0] = XEXP (XEXP (x, 0), 0);
5142 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5143 pa_output_global_address (file, xoperands[1], 0);
5144 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5145 return;
5148 case 'C': /* Plain (C)ondition */
5149 case 'X':
5150 switch (GET_CODE (x))
5152 case EQ:
5153 fputs ("=", file); break;
5154 case NE:
5155 fputs ("<>", file); break;
5156 case GT:
5157 fputs (">", file); break;
5158 case GE:
5159 fputs (">=", file); break;
5160 case GEU:
5161 fputs (">>=", file); break;
5162 case GTU:
5163 fputs (">>", file); break;
5164 case LT:
5165 fputs ("<", file); break;
5166 case LE:
5167 fputs ("<=", file); break;
5168 case LEU:
5169 fputs ("<<=", file); break;
5170 case LTU:
5171 fputs ("<<", file); break;
5172 default:
5173 gcc_unreachable ();
5175 return;
5176 case 'N': /* Condition, (N)egated */
5177 switch (GET_CODE (x))
5179 case EQ:
5180 fputs ("<>", file); break;
5181 case NE:
5182 fputs ("=", file); break;
5183 case GT:
5184 fputs ("<=", file); break;
5185 case GE:
5186 fputs ("<", file); break;
5187 case GEU:
5188 fputs ("<<", file); break;
5189 case GTU:
5190 fputs ("<<=", file); break;
5191 case LT:
5192 fputs (">=", file); break;
5193 case LE:
5194 fputs (">", file); break;
5195 case LEU:
5196 fputs (">>", file); break;
5197 case LTU:
5198 fputs (">>=", file); break;
5199 default:
5200 gcc_unreachable ();
5202 return;
5203 /* For floating point comparisons. Note that the output
5204 predicates are the complement of the desired mode. The
5205 conditions for GT, GE, LT, LE and LTGT cause an invalid
5206 operation exception if the result is unordered and this
5207 exception is enabled in the floating-point status register. */
5208 case 'Y':
5209 switch (GET_CODE (x))
5211 case EQ:
5212 fputs ("!=", file); break;
5213 case NE:
5214 fputs ("=", file); break;
5215 case GT:
5216 fputs ("!>", file); break;
5217 case GE:
5218 fputs ("!>=", file); break;
5219 case LT:
5220 fputs ("!<", file); break;
5221 case LE:
5222 fputs ("!<=", file); break;
5223 case LTGT:
5224 fputs ("!<>", file); break;
5225 case UNLE:
5226 fputs ("!?<=", file); break;
5227 case UNLT:
5228 fputs ("!?<", file); break;
5229 case UNGE:
5230 fputs ("!?>=", file); break;
5231 case UNGT:
5232 fputs ("!?>", file); break;
5233 case UNEQ:
5234 fputs ("!?=", file); break;
5235 case UNORDERED:
5236 fputs ("!?", file); break;
5237 case ORDERED:
5238 fputs ("?", file); break;
5239 default:
5240 gcc_unreachable ();
5242 return;
5243 case 'S': /* Condition, operands are (S)wapped. */
5244 switch (GET_CODE (x))
5246 case EQ:
5247 fputs ("=", file); break;
5248 case NE:
5249 fputs ("<>", file); break;
5250 case GT:
5251 fputs ("<", file); break;
5252 case GE:
5253 fputs ("<=", file); break;
5254 case GEU:
5255 fputs ("<<=", file); break;
5256 case GTU:
5257 fputs ("<<", file); break;
5258 case LT:
5259 fputs (">", file); break;
5260 case LE:
5261 fputs (">=", file); break;
5262 case LEU:
5263 fputs (">>=", file); break;
5264 case LTU:
5265 fputs (">>", file); break;
5266 default:
5267 gcc_unreachable ();
5269 return;
5270 case 'B': /* Condition, (B)oth swapped and negate. */
5271 switch (GET_CODE (x))
5273 case EQ:
5274 fputs ("<>", file); break;
5275 case NE:
5276 fputs ("=", file); break;
5277 case GT:
5278 fputs (">=", file); break;
5279 case GE:
5280 fputs (">", file); break;
5281 case GEU:
5282 fputs (">>", file); break;
5283 case GTU:
5284 fputs (">>=", file); break;
5285 case LT:
5286 fputs ("<=", file); break;
5287 case LE:
5288 fputs ("<", file); break;
5289 case LEU:
5290 fputs ("<<", file); break;
5291 case LTU:
5292 fputs ("<<=", file); break;
5293 default:
5294 gcc_unreachable ();
5296 return;
5297 case 'k':
5298 gcc_assert (GET_CODE (x) == CONST_INT);
5299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5300 return;
5301 case 'Q':
5302 gcc_assert (GET_CODE (x) == CONST_INT);
5303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5304 return;
5305 case 'L':
5306 gcc_assert (GET_CODE (x) == CONST_INT);
5307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5308 return;
5309 case 'o':
5310 gcc_assert (GET_CODE (x) == CONST_INT
5311 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5312 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5313 return;
5314 case 'O':
5315 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5316 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5317 return;
5318 case 'p':
5319 gcc_assert (GET_CODE (x) == CONST_INT);
5320 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5321 return;
5322 case 'P':
5323 gcc_assert (GET_CODE (x) == CONST_INT);
5324 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5325 return;
5326 case 'I':
5327 if (GET_CODE (x) == CONST_INT)
5328 fputs ("i", file);
5329 return;
5330 case 'M':
5331 case 'F':
5332 switch (GET_CODE (XEXP (x, 0)))
5334 case PRE_DEC:
5335 case PRE_INC:
5336 if (ASSEMBLER_DIALECT == 0)
5337 fputs ("s,mb", file);
5338 else
5339 fputs (",mb", file);
5340 break;
5341 case POST_DEC:
5342 case POST_INC:
5343 if (ASSEMBLER_DIALECT == 0)
5344 fputs ("s,ma", file);
5345 else
5346 fputs (",ma", file);
5347 break;
5348 case PLUS:
5349 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5350 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5352 if (ASSEMBLER_DIALECT == 0)
5353 fputs ("x", file);
5355 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5356 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5358 if (ASSEMBLER_DIALECT == 0)
5359 fputs ("x,s", file);
5360 else
5361 fputs (",s", file);
5363 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5364 fputs ("s", file);
5365 break;
5366 default:
5367 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5368 fputs ("s", file);
5369 break;
5371 return;
5372 case 'G':
5373 pa_output_global_address (file, x, 0);
5374 return;
5375 case 'H':
5376 pa_output_global_address (file, x, 1);
5377 return;
5378 case 0: /* Don't do anything special */
5379 break;
5380 case 'Z':
5382 unsigned op[3];
5383 compute_zdepwi_operands (INTVAL (x), op);
5384 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5385 return;
5387 case 'z':
5389 unsigned op[3];
5390 compute_zdepdi_operands (INTVAL (x), op);
5391 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5392 return;
5394 case 'c':
5395 /* We can get here from a .vtable_inherit due to our
5396 CONSTANT_ADDRESS_P rejecting perfectly good constant
5397 addresses. */
5398 break;
5399 default:
5400 gcc_unreachable ();
5402 if (GET_CODE (x) == REG)
5404 fputs (reg_names [REGNO (x)], file);
5405 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5407 fputs ("R", file);
5408 return;
5410 if (FP_REG_P (x)
5411 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5412 && (REGNO (x) & 1) == 0)
5413 fputs ("L", file);
5415 else if (GET_CODE (x) == MEM)
5417 int size = GET_MODE_SIZE (GET_MODE (x));
5418 rtx base = NULL_RTX;
5419 switch (GET_CODE (XEXP (x, 0)))
5421 case PRE_DEC:
5422 case POST_DEC:
5423 base = XEXP (XEXP (x, 0), 0);
5424 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5425 break;
5426 case PRE_INC:
5427 case POST_INC:
5428 base = XEXP (XEXP (x, 0), 0);
5429 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5430 break;
5431 case PLUS:
5432 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5433 fprintf (file, "%s(%s)",
5434 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5435 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5436 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5437 fprintf (file, "%s(%s)",
5438 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5439 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5440 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5441 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5443 /* Because the REG_POINTER flag can get lost during reload,
5444 pa_legitimate_address_p canonicalizes the order of the
5445 index and base registers in the combined move patterns. */
5446 rtx base = XEXP (XEXP (x, 0), 1);
5447 rtx index = XEXP (XEXP (x, 0), 0);
5449 fprintf (file, "%s(%s)",
5450 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5452 else
5453 output_address (GET_MODE (x), XEXP (x, 0));
5454 break;
5455 default:
5456 output_address (GET_MODE (x), XEXP (x, 0));
5457 break;
5460 else
5461 output_addr_const (file, x);
5464 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5466 void
5467 pa_output_global_address (FILE *file, rtx x, int round_constant)
5470 /* Imagine (high (const (plus ...))). */
5471 if (GET_CODE (x) == HIGH)
5472 x = XEXP (x, 0);
5474 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5475 output_addr_const (file, x);
5476 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5478 output_addr_const (file, x);
5479 fputs ("-$global$", file);
5481 else if (GET_CODE (x) == CONST)
5483 const char *sep = "";
5484 int offset = 0; /* assembler wants -$global$ at end */
5485 rtx base = NULL_RTX;
5487 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5489 case LABEL_REF:
5490 case SYMBOL_REF:
5491 base = XEXP (XEXP (x, 0), 0);
5492 output_addr_const (file, base);
5493 break;
5494 case CONST_INT:
5495 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5496 break;
5497 default:
5498 gcc_unreachable ();
5501 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5503 case LABEL_REF:
5504 case SYMBOL_REF:
5505 base = XEXP (XEXP (x, 0), 1);
5506 output_addr_const (file, base);
5507 break;
5508 case CONST_INT:
5509 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5510 break;
5511 default:
5512 gcc_unreachable ();
5515 /* How bogus. The compiler is apparently responsible for
5516 rounding the constant if it uses an LR field selector.
5518 The linker and/or assembler seem a better place since
5519 they have to do this kind of thing already.
5521 If we fail to do this, HP's optimizing linker may eliminate
5522 an addil, but not update the ldw/stw/ldo instruction that
5523 uses the result of the addil. */
5524 if (round_constant)
5525 offset = ((offset + 0x1000) & ~0x1fff);
5527 switch (GET_CODE (XEXP (x, 0)))
5529 case PLUS:
5530 if (offset < 0)
5532 offset = -offset;
5533 sep = "-";
5535 else
5536 sep = "+";
5537 break;
5539 case MINUS:
5540 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5541 sep = "-";
5542 break;
5544 default:
5545 gcc_unreachable ();
5548 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5549 fputs ("-$global$", file);
5550 if (offset)
5551 fprintf (file, "%s%d", sep, offset);
5553 else
5554 output_addr_const (file, x);
5557 /* Output boilerplate text to appear at the beginning of the file.
5558 There are several possible versions. */
5559 #define aputs(x) fputs(x, asm_out_file)
5560 static inline void
5561 pa_file_start_level (void)
5563 if (TARGET_64BIT)
5564 aputs ("\t.LEVEL 2.0w\n");
5565 else if (TARGET_PA_20)
5566 aputs ("\t.LEVEL 2.0\n");
5567 else if (TARGET_PA_11)
5568 aputs ("\t.LEVEL 1.1\n");
5569 else
5570 aputs ("\t.LEVEL 1.0\n");
5573 static inline void
5574 pa_file_start_space (int sortspace)
5576 aputs ("\t.SPACE $PRIVATE$");
5577 if (sortspace)
5578 aputs (",SORT=16");
5579 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5580 if (flag_tm)
5581 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5582 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5583 "\n\t.SPACE $TEXT$");
5584 if (sortspace)
5585 aputs (",SORT=8");
5586 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5587 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5590 static inline void
5591 pa_file_start_file (int want_version)
5593 if (write_symbols != NO_DEBUG)
5595 output_file_directive (asm_out_file, main_input_filename);
5596 if (want_version)
5597 aputs ("\t.version\t\"01.01\"\n");
5601 static inline void
5602 pa_file_start_mcount (const char *aswhat)
5604 if (profile_flag)
5605 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5608 static void
5609 pa_elf_file_start (void)
5611 pa_file_start_level ();
5612 pa_file_start_mcount ("ENTRY");
5613 pa_file_start_file (0);
5616 static void
5617 pa_som_file_start (void)
5619 pa_file_start_level ();
5620 pa_file_start_space (0);
5621 aputs ("\t.IMPORT $global$,DATA\n"
5622 "\t.IMPORT $$dyncall,MILLICODE\n");
5623 pa_file_start_mcount ("CODE");
5624 pa_file_start_file (0);
5627 static void
5628 pa_linux_file_start (void)
5630 pa_file_start_file (1);
5631 pa_file_start_level ();
5632 pa_file_start_mcount ("CODE");
5635 static void
5636 pa_hpux64_gas_file_start (void)
5638 pa_file_start_level ();
5639 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5640 if (profile_flag)
5641 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5642 #endif
5643 pa_file_start_file (1);
5646 static void
5647 pa_hpux64_hpas_file_start (void)
5649 pa_file_start_level ();
5650 pa_file_start_space (1);
5651 pa_file_start_mcount ("CODE");
5652 pa_file_start_file (0);
5654 #undef aputs
5656 /* Search the deferred plabel list for SYMBOL and return its internal
5657 label. If an entry for SYMBOL is not found, a new entry is created. */
5660 pa_get_deferred_plabel (rtx symbol)
5662 const char *fname = XSTR (symbol, 0);
5663 size_t i;
5665 /* See if we have already put this function on the list of deferred
5666 plabels. This list is generally small, so a liner search is not
5667 too ugly. If it proves too slow replace it with something faster. */
5668 for (i = 0; i < n_deferred_plabels; i++)
5669 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5670 break;
5672 /* If the deferred plabel list is empty, or this entry was not found
5673 on the list, create a new entry on the list. */
5674 if (deferred_plabels == NULL || i == n_deferred_plabels)
5676 tree id;
5678 if (deferred_plabels == 0)
5679 deferred_plabels = ggc_alloc<deferred_plabel> ();
5680 else
5681 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5682 deferred_plabels,
5683 n_deferred_plabels + 1);
5685 i = n_deferred_plabels++;
5686 deferred_plabels[i].internal_label = gen_label_rtx ();
5687 deferred_plabels[i].symbol = symbol;
5689 /* Gross. We have just implicitly taken the address of this
5690 function. Mark it in the same manner as assemble_name. */
5691 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5692 if (id)
5693 mark_referenced (id);
5696 return deferred_plabels[i].internal_label;
5699 static void
5700 output_deferred_plabels (void)
5702 size_t i;
5704 /* If we have some deferred plabels, then we need to switch into the
5705 data or readonly data section, and align it to a 4 byte boundary
5706 before outputting the deferred plabels. */
5707 if (n_deferred_plabels)
5709 switch_to_section (flag_pic ? data_section : readonly_data_section);
5710 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5713 /* Now output the deferred plabels. */
5714 for (i = 0; i < n_deferred_plabels; i++)
5716 targetm.asm_out.internal_label (asm_out_file, "L",
5717 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5718 assemble_integer (deferred_plabels[i].symbol,
5719 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5723 /* Initialize optabs to point to emulation routines. */
5725 static void
5726 pa_init_libfuncs (void)
5728 if (HPUX_LONG_DOUBLE_LIBRARY)
5730 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5731 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5732 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5733 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5734 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5735 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5736 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5737 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5738 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5740 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5741 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5742 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5743 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5744 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5745 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5746 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5748 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5749 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5750 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5751 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5753 set_conv_libfunc (sfix_optab, SImode, TFmode,
5754 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5755 : "_U_Qfcnvfxt_quad_to_sgl");
5756 set_conv_libfunc (sfix_optab, DImode, TFmode,
5757 "_U_Qfcnvfxt_quad_to_dbl");
5758 set_conv_libfunc (ufix_optab, SImode, TFmode,
5759 "_U_Qfcnvfxt_quad_to_usgl");
5760 set_conv_libfunc (ufix_optab, DImode, TFmode,
5761 "_U_Qfcnvfxt_quad_to_udbl");
5763 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5764 "_U_Qfcnvxf_sgl_to_quad");
5765 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5766 "_U_Qfcnvxf_dbl_to_quad");
5767 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5768 "_U_Qfcnvxf_usgl_to_quad");
5769 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5770 "_U_Qfcnvxf_udbl_to_quad");
5773 if (TARGET_SYNC_LIBCALL)
5774 init_sync_libfuncs (8);
5777 /* HP's millicode routines mean something special to the assembler.
5778 Keep track of which ones we have used. */
5780 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5781 static void import_milli (enum millicodes);
5782 static char imported[(int) end1000];
5783 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5784 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5785 #define MILLI_START 10
5787 static void
5788 import_milli (enum millicodes code)
5790 char str[sizeof (import_string)];
5792 if (!imported[(int) code])
5794 imported[(int) code] = 1;
5795 strcpy (str, import_string);
5796 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5797 output_asm_insn (str, 0);
5801 /* The register constraints have put the operands and return value in
5802 the proper registers. */
5804 const char *
5805 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5807 import_milli (mulI);
5808 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5811 /* Emit the rtl for doing a division by a constant. */
5813 /* Do magic division millicodes exist for this value? */
5814 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5816 /* We'll use an array to keep track of the magic millicodes and
5817 whether or not we've used them already. [n][0] is signed, [n][1] is
5818 unsigned. */
5820 static int div_milli[16][2];
5823 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5825 if (GET_CODE (operands[2]) == CONST_INT
5826 && INTVAL (operands[2]) > 0
5827 && INTVAL (operands[2]) < 16
5828 && pa_magic_milli[INTVAL (operands[2])])
5830 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5832 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5833 emit
5834 (gen_rtx_PARALLEL
5835 (VOIDmode,
5836 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5837 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5838 SImode,
5839 gen_rtx_REG (SImode, 26),
5840 operands[2])),
5841 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5842 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5843 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5844 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5845 gen_rtx_CLOBBER (VOIDmode, ret))));
5846 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5847 return 1;
5849 return 0;
5852 const char *
5853 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5855 int divisor;
5857 /* If the divisor is a constant, try to use one of the special
5858 opcodes .*/
5859 if (GET_CODE (operands[0]) == CONST_INT)
5861 static char buf[100];
5862 divisor = INTVAL (operands[0]);
5863 if (!div_milli[divisor][unsignedp])
5865 div_milli[divisor][unsignedp] = 1;
5866 if (unsignedp)
5867 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5868 else
5869 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5871 if (unsignedp)
5873 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5874 INTVAL (operands[0]));
5875 return pa_output_millicode_call (insn,
5876 gen_rtx_SYMBOL_REF (SImode, buf));
5878 else
5880 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5881 INTVAL (operands[0]));
5882 return pa_output_millicode_call (insn,
5883 gen_rtx_SYMBOL_REF (SImode, buf));
5886 /* Divisor isn't a special constant. */
5887 else
5889 if (unsignedp)
5891 import_milli (divU);
5892 return pa_output_millicode_call (insn,
5893 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5895 else
5897 import_milli (divI);
5898 return pa_output_millicode_call (insn,
5899 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5904 /* Output a $$rem millicode to do mod. */
5906 const char *
5907 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5909 if (unsignedp)
5911 import_milli (remU);
5912 return pa_output_millicode_call (insn,
5913 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5915 else
5917 import_milli (remI);
5918 return pa_output_millicode_call (insn,
5919 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5923 void
5924 pa_output_arg_descriptor (rtx_insn *call_insn)
5926 const char *arg_regs[4];
5927 machine_mode arg_mode;
5928 rtx link;
5929 int i, output_flag = 0;
5930 int regno;
5932 /* We neither need nor want argument location descriptors for the
5933 64bit runtime environment or the ELF32 environment. */
5934 if (TARGET_64BIT || TARGET_ELF32)
5935 return;
5937 for (i = 0; i < 4; i++)
5938 arg_regs[i] = 0;
5940 /* Specify explicitly that no argument relocations should take place
5941 if using the portable runtime calling conventions. */
5942 if (TARGET_PORTABLE_RUNTIME)
5944 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5945 asm_out_file);
5946 return;
5949 gcc_assert (CALL_P (call_insn));
5950 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5951 link; link = XEXP (link, 1))
5953 rtx use = XEXP (link, 0);
5955 if (! (GET_CODE (use) == USE
5956 && GET_CODE (XEXP (use, 0)) == REG
5957 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5958 continue;
5960 arg_mode = GET_MODE (XEXP (use, 0));
5961 regno = REGNO (XEXP (use, 0));
5962 if (regno >= 23 && regno <= 26)
5964 arg_regs[26 - regno] = "GR";
5965 if (arg_mode == DImode)
5966 arg_regs[25 - regno] = "GR";
5968 else if (regno >= 32 && regno <= 39)
5970 if (arg_mode == SFmode)
5971 arg_regs[(regno - 32) / 2] = "FR";
5972 else
5974 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5975 arg_regs[(regno - 34) / 2] = "FR";
5976 arg_regs[(regno - 34) / 2 + 1] = "FU";
5977 #else
5978 arg_regs[(regno - 34) / 2] = "FU";
5979 arg_regs[(regno - 34) / 2 + 1] = "FR";
5980 #endif
5984 fputs ("\t.CALL ", asm_out_file);
5985 for (i = 0; i < 4; i++)
5987 if (arg_regs[i])
5989 if (output_flag++)
5990 fputc (',', asm_out_file);
5991 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5994 fputc ('\n', asm_out_file);
5997 /* Inform reload about cases where moving X with a mode MODE to or from
5998 a register in RCLASS requires an extra scratch or immediate register.
5999 Return the class needed for the immediate register. */
6001 static reg_class_t
6002 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6003 machine_mode mode, secondary_reload_info *sri)
6005 int regno;
6006 enum reg_class rclass = (enum reg_class) rclass_i;
6008 /* Handle the easy stuff first. */
6009 if (rclass == R1_REGS)
6010 return NO_REGS;
6012 if (REG_P (x))
6014 regno = REGNO (x);
6015 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6016 return NO_REGS;
6018 else
6019 regno = -1;
6021 /* If we have something like (mem (mem (...)), we can safely assume the
6022 inner MEM will end up in a general register after reloading, so there's
6023 no need for a secondary reload. */
6024 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6025 return NO_REGS;
6027 /* Trying to load a constant into a FP register during PIC code
6028 generation requires %r1 as a scratch register. For float modes,
6029 the only legitimate constant is CONST0_RTX. However, there are
6030 a few patterns that accept constant double operands. */
6031 if (flag_pic
6032 && FP_REG_CLASS_P (rclass)
6033 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6035 switch (mode)
6037 case SImode:
6038 sri->icode = CODE_FOR_reload_insi_r1;
6039 break;
6041 case DImode:
6042 sri->icode = CODE_FOR_reload_indi_r1;
6043 break;
6045 case SFmode:
6046 sri->icode = CODE_FOR_reload_insf_r1;
6047 break;
6049 case DFmode:
6050 sri->icode = CODE_FOR_reload_indf_r1;
6051 break;
6053 default:
6054 gcc_unreachable ();
6056 return NO_REGS;
6059 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6060 register when we're generating PIC code or when the operand isn't
6061 readonly. */
6062 if (pa_symbolic_expression_p (x))
6064 if (GET_CODE (x) == HIGH)
6065 x = XEXP (x, 0);
6067 if (flag_pic || !read_only_operand (x, VOIDmode))
6069 switch (mode)
6071 case SImode:
6072 sri->icode = CODE_FOR_reload_insi_r1;
6073 break;
6075 case DImode:
6076 sri->icode = CODE_FOR_reload_indi_r1;
6077 break;
6079 default:
6080 gcc_unreachable ();
6082 return NO_REGS;
6086 /* Profiling showed the PA port spends about 1.3% of its compilation
6087 time in true_regnum from calls inside pa_secondary_reload_class. */
6088 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6089 regno = true_regnum (x);
6091 /* Handle reloads for floating point loads and stores. */
6092 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6093 && FP_REG_CLASS_P (rclass))
6095 if (MEM_P (x))
6097 x = XEXP (x, 0);
6099 /* We don't need a secondary reload for indexed memory addresses.
6101 When INT14_OK_STRICT is true, it might appear that we could
6102 directly allow register indirect memory addresses. However,
6103 this doesn't work because we don't support SUBREGs in
6104 floating-point register copies and reload doesn't tell us
6105 when it's going to use a SUBREG. */
6106 if (IS_INDEX_ADDR_P (x))
6107 return NO_REGS;
6110 /* Request a secondary reload with a general scratch register
6111 for everything else. ??? Could symbolic operands be handled
6112 directly when generating non-pic PA 2.0 code? */
6113 sri->icode = (in_p
6114 ? direct_optab_handler (reload_in_optab, mode)
6115 : direct_optab_handler (reload_out_optab, mode));
6116 return NO_REGS;
6119 /* A SAR<->FP register copy requires an intermediate general register
6120 and secondary memory. We need a secondary reload with a general
6121 scratch register for spills. */
6122 if (rclass == SHIFT_REGS)
6124 /* Handle spill. */
6125 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6127 sri->icode = (in_p
6128 ? direct_optab_handler (reload_in_optab, mode)
6129 : direct_optab_handler (reload_out_optab, mode));
6130 return NO_REGS;
6133 /* Handle FP copy. */
6134 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6135 return GENERAL_REGS;
6138 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6139 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6140 && FP_REG_CLASS_P (rclass))
6141 return GENERAL_REGS;
6143 return NO_REGS;
6146 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6147 is only marked as live on entry by df-scan when it is a fixed
6148 register. It isn't a fixed register in the 64-bit runtime,
6149 so we need to mark it here. */
6151 static void
6152 pa_extra_live_on_entry (bitmap regs)
6154 if (TARGET_64BIT)
6155 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6158 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6159 to prevent it from being deleted. */
6162 pa_eh_return_handler_rtx (void)
6164 rtx tmp;
6166 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6167 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6168 tmp = gen_rtx_MEM (word_mode, tmp);
6169 tmp->volatil = 1;
6170 return tmp;
6173 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6174 by invisible reference. As a GCC extension, we also pass anything
6175 with a zero or variable size by reference.
6177 The 64-bit runtime does not describe passing any types by invisible
6178 reference. The internals of GCC can't currently handle passing
6179 empty structures, and zero or variable length arrays when they are
6180 not passed entirely on the stack or by reference. Thus, as a GCC
6181 extension, we pass these types by reference. The HP compiler doesn't
6182 support these types, so hopefully there shouldn't be any compatibility
6183 issues. This may have to be revisited when HP releases a C99 compiler
6184 or updates the ABI. */
6186 static bool
6187 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6188 machine_mode mode, const_tree type,
6189 bool named ATTRIBUTE_UNUSED)
6191 HOST_WIDE_INT size;
6193 if (type)
6194 size = int_size_in_bytes (type);
6195 else
6196 size = GET_MODE_SIZE (mode);
6198 if (TARGET_64BIT)
6199 return size <= 0;
6200 else
6201 return size <= 0 || size > 8;
6204 enum direction
6205 pa_function_arg_padding (machine_mode mode, const_tree type)
6207 if (mode == BLKmode
6208 || (TARGET_64BIT
6209 && type
6210 && (AGGREGATE_TYPE_P (type)
6211 || TREE_CODE (type) == COMPLEX_TYPE
6212 || TREE_CODE (type) == VECTOR_TYPE)))
6214 /* Return none if justification is not required. */
6215 if (type
6216 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6217 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6218 return none;
6220 /* The directions set here are ignored when a BLKmode argument larger
6221 than a word is placed in a register. Different code is used for
6222 the stack and registers. This makes it difficult to have a
6223 consistent data representation for both the stack and registers.
6224 For both runtimes, the justification and padding for arguments on
6225 the stack and in registers should be identical. */
6226 if (TARGET_64BIT)
6227 /* The 64-bit runtime specifies left justification for aggregates. */
6228 return upward;
6229 else
6230 /* The 32-bit runtime architecture specifies right justification.
6231 When the argument is passed on the stack, the argument is padded
6232 with garbage on the left. The HP compiler pads with zeros. */
6233 return downward;
6236 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6237 return downward;
6238 else
6239 return none;
6243 /* Do what is necessary for `va_start'. We look at the current function
6244 to determine if stdargs or varargs is used and fill in an initial
6245 va_list. A pointer to this constructor is returned. */
6247 static rtx
6248 hppa_builtin_saveregs (void)
6250 rtx offset, dest;
6251 tree fntype = TREE_TYPE (current_function_decl);
6252 int argadj = ((!stdarg_p (fntype))
6253 ? UNITS_PER_WORD : 0);
6255 if (argadj)
6256 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6257 else
6258 offset = crtl->args.arg_offset_rtx;
6260 if (TARGET_64BIT)
6262 int i, off;
6264 /* Adjust for varargs/stdarg differences. */
6265 if (argadj)
6266 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6267 else
6268 offset = crtl->args.arg_offset_rtx;
6270 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6271 from the incoming arg pointer and growing to larger addresses. */
6272 for (i = 26, off = -64; i >= 19; i--, off += 8)
6273 emit_move_insn (gen_rtx_MEM (word_mode,
6274 plus_constant (Pmode,
6275 arg_pointer_rtx, off)),
6276 gen_rtx_REG (word_mode, i));
6278 /* The incoming args pointer points just beyond the flushback area;
6279 normally this is not a serious concern. However, when we are doing
6280 varargs/stdargs we want to make the arg pointer point to the start
6281 of the incoming argument area. */
6282 emit_move_insn (virtual_incoming_args_rtx,
6283 plus_constant (Pmode, arg_pointer_rtx, -64));
6285 /* Now return a pointer to the first anonymous argument. */
6286 return copy_to_reg (expand_binop (Pmode, add_optab,
6287 virtual_incoming_args_rtx,
6288 offset, 0, 0, OPTAB_LIB_WIDEN));
6291 /* Store general registers on the stack. */
6292 dest = gen_rtx_MEM (BLKmode,
6293 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6294 -16));
6295 set_mem_alias_set (dest, get_varargs_alias_set ());
6296 set_mem_align (dest, BITS_PER_WORD);
6297 move_block_from_reg (23, dest, 4);
6299 /* move_block_from_reg will emit code to store the argument registers
6300 individually as scalar stores.
6302 However, other insns may later load from the same addresses for
6303 a structure load (passing a struct to a varargs routine).
6305 The alias code assumes that such aliasing can never happen, so we
6306 have to keep memory referencing insns from moving up beyond the
6307 last argument register store. So we emit a blockage insn here. */
6308 emit_insn (gen_blockage ());
6310 return copy_to_reg (expand_binop (Pmode, add_optab,
6311 crtl->args.internal_arg_pointer,
6312 offset, 0, 0, OPTAB_LIB_WIDEN));
6315 static void
6316 hppa_va_start (tree valist, rtx nextarg)
6318 nextarg = expand_builtin_saveregs ();
6319 std_expand_builtin_va_start (valist, nextarg);
6322 static tree
6323 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6324 gimple_seq *post_p)
6326 if (TARGET_64BIT)
6328 /* Args grow upward. We can use the generic routines. */
6329 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6331 else /* !TARGET_64BIT */
6333 tree ptr = build_pointer_type (type);
6334 tree valist_type;
6335 tree t, u;
6336 unsigned int size, ofs;
6337 bool indirect;
6339 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6340 if (indirect)
6342 type = ptr;
6343 ptr = build_pointer_type (type);
6345 size = int_size_in_bytes (type);
6346 valist_type = TREE_TYPE (valist);
6348 /* Args grow down. Not handled by generic routines. */
6350 u = fold_convert (sizetype, size_in_bytes (type));
6351 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6352 t = fold_build_pointer_plus (valist, u);
6354 /* Align to 4 or 8 byte boundary depending on argument size. */
6356 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6357 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6358 t = fold_convert (valist_type, t);
6360 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6362 ofs = (8 - size) % 4;
6363 if (ofs != 0)
6364 t = fold_build_pointer_plus_hwi (t, ofs);
6366 t = fold_convert (ptr, t);
6367 t = build_va_arg_indirect_ref (t);
6369 if (indirect)
6370 t = build_va_arg_indirect_ref (t);
6372 return t;
6376 /* True if MODE is valid for the target. By "valid", we mean able to
6377 be manipulated in non-trivial ways. In particular, this means all
6378 the arithmetic is supported.
6380 Currently, TImode is not valid as the HP 64-bit runtime documentation
6381 doesn't document the alignment and calling conventions for this type.
6382 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6383 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6385 static bool
6386 pa_scalar_mode_supported_p (machine_mode mode)
6388 int precision = GET_MODE_PRECISION (mode);
6390 switch (GET_MODE_CLASS (mode))
6392 case MODE_PARTIAL_INT:
6393 case MODE_INT:
6394 if (precision == CHAR_TYPE_SIZE)
6395 return true;
6396 if (precision == SHORT_TYPE_SIZE)
6397 return true;
6398 if (precision == INT_TYPE_SIZE)
6399 return true;
6400 if (precision == LONG_TYPE_SIZE)
6401 return true;
6402 if (precision == LONG_LONG_TYPE_SIZE)
6403 return true;
6404 return false;
6406 case MODE_FLOAT:
6407 if (precision == FLOAT_TYPE_SIZE)
6408 return true;
6409 if (precision == DOUBLE_TYPE_SIZE)
6410 return true;
6411 if (precision == LONG_DOUBLE_TYPE_SIZE)
6412 return true;
6413 return false;
6415 case MODE_DECIMAL_FLOAT:
6416 return false;
6418 default:
6419 gcc_unreachable ();
6423 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6424 it branches into the delay slot. Otherwise, return FALSE. */
6426 static bool
6427 branch_to_delay_slot_p (rtx_insn *insn)
6429 rtx_insn *jump_insn;
6431 if (dbr_sequence_length ())
6432 return FALSE;
6434 jump_insn = next_active_insn (JUMP_LABEL (insn));
6435 while (insn)
6437 insn = next_active_insn (insn);
6438 if (jump_insn == insn)
6439 return TRUE;
6441 /* We can't rely on the length of asms. So, we return FALSE when
6442 the branch is followed by an asm. */
6443 if (!insn
6444 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6445 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6446 || get_attr_length (insn) > 0)
6447 break;
6450 return FALSE;
6453 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6455 This occurs when INSN has an unfilled delay slot and is followed
6456 by an asm. Disaster can occur if the asm is empty and the jump
6457 branches into the delay slot. So, we add a nop in the delay slot
6458 when this occurs. */
6460 static bool
6461 branch_needs_nop_p (rtx_insn *insn)
6463 rtx_insn *jump_insn;
6465 if (dbr_sequence_length ())
6466 return FALSE;
6468 jump_insn = next_active_insn (JUMP_LABEL (insn));
6469 while (insn)
6471 insn = next_active_insn (insn);
6472 if (!insn || jump_insn == insn)
6473 return TRUE;
6475 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6476 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6477 && get_attr_length (insn) > 0)
6478 break;
6481 return FALSE;
6484 /* Return TRUE if INSN, a forward jump insn, can use nullification
6485 to skip the following instruction. This avoids an extra cycle due
6486 to a mis-predicted branch when we fall through. */
6488 static bool
6489 use_skip_p (rtx_insn *insn)
6491 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6493 while (insn)
6495 insn = next_active_insn (insn);
6497 /* We can't rely on the length of asms, so we can't skip asms. */
6498 if (!insn
6499 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6500 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6501 break;
6502 if (get_attr_length (insn) == 4
6503 && jump_insn == next_active_insn (insn))
6504 return TRUE;
6505 if (get_attr_length (insn) > 0)
6506 break;
6509 return FALSE;
6512 /* This routine handles all the normal conditional branch sequences we
6513 might need to generate. It handles compare immediate vs compare
6514 register, nullification of delay slots, varying length branches,
6515 negated branches, and all combinations of the above. It returns the
6516 output appropriate to emit the branch corresponding to all given
6517 parameters. */
6519 const char *
6520 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6522 static char buf[100];
6523 bool useskip;
6524 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6525 int length = get_attr_length (insn);
6526 int xdelay;
6528 /* A conditional branch to the following instruction (e.g. the delay slot)
6529 is asking for a disaster. This can happen when not optimizing and
6530 when jump optimization fails.
6532 While it is usually safe to emit nothing, this can fail if the
6533 preceding instruction is a nullified branch with an empty delay
6534 slot and the same branch target as this branch. We could check
6535 for this but jump optimization should eliminate nop jumps. It
6536 is always safe to emit a nop. */
6537 if (branch_to_delay_slot_p (insn))
6538 return "nop";
6540 /* The doubleword form of the cmpib instruction doesn't have the LEU
6541 and GTU conditions while the cmpb instruction does. Since we accept
6542 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6543 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6544 operands[2] = gen_rtx_REG (DImode, 0);
6545 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6546 operands[1] = gen_rtx_REG (DImode, 0);
6548 /* If this is a long branch with its delay slot unfilled, set `nullify'
6549 as it can nullify the delay slot and save a nop. */
6550 if (length == 8 && dbr_sequence_length () == 0)
6551 nullify = 1;
6553 /* If this is a short forward conditional branch which did not get
6554 its delay slot filled, the delay slot can still be nullified. */
6555 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6556 nullify = forward_branch_p (insn);
6558 /* A forward branch over a single nullified insn can be done with a
6559 comclr instruction. This avoids a single cycle penalty due to
6560 mis-predicted branch if we fall through (branch not taken). */
6561 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6563 switch (length)
6565 /* All short conditional branches except backwards with an unfilled
6566 delay slot. */
6567 case 4:
6568 if (useskip)
6569 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6570 else
6571 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6572 if (GET_MODE (operands[1]) == DImode)
6573 strcat (buf, "*");
6574 if (negated)
6575 strcat (buf, "%B3");
6576 else
6577 strcat (buf, "%S3");
6578 if (useskip)
6579 strcat (buf, " %2,%r1,%%r0");
6580 else if (nullify)
6582 if (branch_needs_nop_p (insn))
6583 strcat (buf, ",n %2,%r1,%0%#");
6584 else
6585 strcat (buf, ",n %2,%r1,%0");
6587 else
6588 strcat (buf, " %2,%r1,%0");
6589 break;
6591 /* All long conditionals. Note a short backward branch with an
6592 unfilled delay slot is treated just like a long backward branch
6593 with an unfilled delay slot. */
6594 case 8:
6595 /* Handle weird backwards branch with a filled delay slot
6596 which is nullified. */
6597 if (dbr_sequence_length () != 0
6598 && ! forward_branch_p (insn)
6599 && nullify)
6601 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6602 if (GET_MODE (operands[1]) == DImode)
6603 strcat (buf, "*");
6604 if (negated)
6605 strcat (buf, "%S3");
6606 else
6607 strcat (buf, "%B3");
6608 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6610 /* Handle short backwards branch with an unfilled delay slot.
6611 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6612 taken and untaken branches. */
6613 else if (dbr_sequence_length () == 0
6614 && ! forward_branch_p (insn)
6615 && INSN_ADDRESSES_SET_P ()
6616 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6617 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6619 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6620 if (GET_MODE (operands[1]) == DImode)
6621 strcat (buf, "*");
6622 if (negated)
6623 strcat (buf, "%B3 %2,%r1,%0%#");
6624 else
6625 strcat (buf, "%S3 %2,%r1,%0%#");
6627 else
6629 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6630 if (GET_MODE (operands[1]) == DImode)
6631 strcat (buf, "*");
6632 if (negated)
6633 strcat (buf, "%S3");
6634 else
6635 strcat (buf, "%B3");
6636 if (nullify)
6637 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6638 else
6639 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6641 break;
6643 default:
6644 /* The reversed conditional branch must branch over one additional
6645 instruction if the delay slot is filled and needs to be extracted
6646 by pa_output_lbranch. If the delay slot is empty or this is a
6647 nullified forward branch, the instruction after the reversed
6648 condition branch must be nullified. */
6649 if (dbr_sequence_length () == 0
6650 || (nullify && forward_branch_p (insn)))
6652 nullify = 1;
6653 xdelay = 0;
6654 operands[4] = GEN_INT (length);
6656 else
6658 xdelay = 1;
6659 operands[4] = GEN_INT (length + 4);
6662 /* Create a reversed conditional branch which branches around
6663 the following insns. */
6664 if (GET_MODE (operands[1]) != DImode)
6666 if (nullify)
6668 if (negated)
6669 strcpy (buf,
6670 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6671 else
6672 strcpy (buf,
6673 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6675 else
6677 if (negated)
6678 strcpy (buf,
6679 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6680 else
6681 strcpy (buf,
6682 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6685 else
6687 if (nullify)
6689 if (negated)
6690 strcpy (buf,
6691 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6692 else
6693 strcpy (buf,
6694 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6696 else
6698 if (negated)
6699 strcpy (buf,
6700 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6701 else
6702 strcpy (buf,
6703 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6707 output_asm_insn (buf, operands);
6708 return pa_output_lbranch (operands[0], insn, xdelay);
6710 return buf;
6713 /* This routine handles output of long unconditional branches that
6714 exceed the maximum range of a simple branch instruction. Since
6715 we don't have a register available for the branch, we save register
6716 %r1 in the frame marker, load the branch destination DEST into %r1,
6717 execute the branch, and restore %r1 in the delay slot of the branch.
6719 Since long branches may have an insn in the delay slot and the
6720 delay slot is used to restore %r1, we in general need to extract
6721 this insn and execute it before the branch. However, to facilitate
6722 use of this function by conditional branches, we also provide an
6723 option to not extract the delay insn so that it will be emitted
6724 after the long branch. So, if there is an insn in the delay slot,
6725 it is extracted if XDELAY is nonzero.
6727 The lengths of the various long-branch sequences are 20, 16 and 24
6728 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6730 const char *
6731 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6733 rtx xoperands[2];
6735 xoperands[0] = dest;
6737 /* First, free up the delay slot. */
6738 if (xdelay && dbr_sequence_length () != 0)
6740 /* We can't handle a jump in the delay slot. */
6741 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6743 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6744 optimize, 0, NULL);
6746 /* Now delete the delay insn. */
6747 SET_INSN_DELETED (NEXT_INSN (insn));
6750 /* Output an insn to save %r1. The runtime documentation doesn't
6751 specify whether the "Clean Up" slot in the callers frame can
6752 be clobbered by the callee. It isn't copied by HP's builtin
6753 alloca, so this suggests that it can be clobbered if necessary.
6754 The "Static Link" location is copied by HP builtin alloca, so
6755 we avoid using it. Using the cleanup slot might be a problem
6756 if we have to interoperate with languages that pass cleanup
6757 information. However, it should be possible to handle these
6758 situations with GCC's asm feature.
6760 The "Current RP" slot is reserved for the called procedure, so
6761 we try to use it when we don't have a frame of our own. It's
6762 rather unlikely that we won't have a frame when we need to emit
6763 a very long branch.
6765 Really the way to go long term is a register scavenger; goto
6766 the target of the jump and find a register which we can use
6767 as a scratch to hold the value in %r1. Then, we wouldn't have
6768 to free up the delay slot or clobber a slot that may be needed
6769 for other purposes. */
6770 if (TARGET_64BIT)
6772 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6773 /* Use the return pointer slot in the frame marker. */
6774 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6775 else
6776 /* Use the slot at -40 in the frame marker since HP builtin
6777 alloca doesn't copy it. */
6778 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6780 else
6782 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6783 /* Use the return pointer slot in the frame marker. */
6784 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6785 else
6786 /* Use the "Clean Up" slot in the frame marker. In GCC,
6787 the only other use of this location is for copying a
6788 floating point double argument from a floating-point
6789 register to two general registers. The copy is done
6790 as an "atomic" operation when outputting a call, so it
6791 won't interfere with our using the location here. */
6792 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6795 if (TARGET_PORTABLE_RUNTIME)
6797 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6798 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6799 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6801 else if (flag_pic)
6803 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6804 if (TARGET_SOM || !TARGET_GAS)
6806 xoperands[1] = gen_label_rtx ();
6807 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6808 targetm.asm_out.internal_label (asm_out_file, "L",
6809 CODE_LABEL_NUMBER (xoperands[1]));
6810 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6812 else
6814 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6815 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6817 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6819 else
6820 /* Now output a very long branch to the original target. */
6821 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6823 /* Now restore the value of %r1 in the delay slot. */
6824 if (TARGET_64BIT)
6826 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6827 return "ldd -16(%%r30),%%r1";
6828 else
6829 return "ldd -40(%%r30),%%r1";
6831 else
6833 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6834 return "ldw -20(%%r30),%%r1";
6835 else
6836 return "ldw -12(%%r30),%%r1";
6840 /* This routine handles all the branch-on-bit conditional branch sequences we
6841 might need to generate. It handles nullification of delay slots,
6842 varying length branches, negated branches and all combinations of the
6843 above. it returns the appropriate output template to emit the branch. */
6845 const char *
6846 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6848 static char buf[100];
6849 bool useskip;
6850 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6851 int length = get_attr_length (insn);
6852 int xdelay;
6854 /* A conditional branch to the following instruction (e.g. the delay slot) is
6855 asking for a disaster. I do not think this can happen as this pattern
6856 is only used when optimizing; jump optimization should eliminate the
6857 jump. But be prepared just in case. */
6859 if (branch_to_delay_slot_p (insn))
6860 return "nop";
6862 /* If this is a long branch with its delay slot unfilled, set `nullify'
6863 as it can nullify the delay slot and save a nop. */
6864 if (length == 8 && dbr_sequence_length () == 0)
6865 nullify = 1;
6867 /* If this is a short forward conditional branch which did not get
6868 its delay slot filled, the delay slot can still be nullified. */
6869 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6870 nullify = forward_branch_p (insn);
6872 /* A forward branch over a single nullified insn can be done with a
6873 extrs instruction. This avoids a single cycle penalty due to
6874 mis-predicted branch if we fall through (branch not taken). */
6875 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6877 switch (length)
6880 /* All short conditional branches except backwards with an unfilled
6881 delay slot. */
6882 case 4:
6883 if (useskip)
6884 strcpy (buf, "{extrs,|extrw,s,}");
6885 else
6886 strcpy (buf, "bb,");
6887 if (useskip && GET_MODE (operands[0]) == DImode)
6888 strcpy (buf, "extrd,s,*");
6889 else if (GET_MODE (operands[0]) == DImode)
6890 strcpy (buf, "bb,*");
6891 if ((which == 0 && negated)
6892 || (which == 1 && ! negated))
6893 strcat (buf, ">=");
6894 else
6895 strcat (buf, "<");
6896 if (useskip)
6897 strcat (buf, " %0,%1,1,%%r0");
6898 else if (nullify && negated)
6900 if (branch_needs_nop_p (insn))
6901 strcat (buf, ",n %0,%1,%3%#");
6902 else
6903 strcat (buf, ",n %0,%1,%3");
6905 else if (nullify && ! negated)
6907 if (branch_needs_nop_p (insn))
6908 strcat (buf, ",n %0,%1,%2%#");
6909 else
6910 strcat (buf, ",n %0,%1,%2");
6912 else if (! nullify && negated)
6913 strcat (buf, " %0,%1,%3");
6914 else if (! nullify && ! negated)
6915 strcat (buf, " %0,%1,%2");
6916 break;
6918 /* All long conditionals. Note a short backward branch with an
6919 unfilled delay slot is treated just like a long backward branch
6920 with an unfilled delay slot. */
6921 case 8:
6922 /* Handle weird backwards branch with a filled delay slot
6923 which is nullified. */
6924 if (dbr_sequence_length () != 0
6925 && ! forward_branch_p (insn)
6926 && nullify)
6928 strcpy (buf, "bb,");
6929 if (GET_MODE (operands[0]) == DImode)
6930 strcat (buf, "*");
6931 if ((which == 0 && negated)
6932 || (which == 1 && ! negated))
6933 strcat (buf, "<");
6934 else
6935 strcat (buf, ">=");
6936 if (negated)
6937 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6938 else
6939 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6941 /* Handle short backwards branch with an unfilled delay slot.
6942 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6943 taken and untaken branches. */
6944 else if (dbr_sequence_length () == 0
6945 && ! forward_branch_p (insn)
6946 && INSN_ADDRESSES_SET_P ()
6947 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6948 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6950 strcpy (buf, "bb,");
6951 if (GET_MODE (operands[0]) == DImode)
6952 strcat (buf, "*");
6953 if ((which == 0 && negated)
6954 || (which == 1 && ! negated))
6955 strcat (buf, ">=");
6956 else
6957 strcat (buf, "<");
6958 if (negated)
6959 strcat (buf, " %0,%1,%3%#");
6960 else
6961 strcat (buf, " %0,%1,%2%#");
6963 else
6965 if (GET_MODE (operands[0]) == DImode)
6966 strcpy (buf, "extrd,s,*");
6967 else
6968 strcpy (buf, "{extrs,|extrw,s,}");
6969 if ((which == 0 && negated)
6970 || (which == 1 && ! negated))
6971 strcat (buf, "<");
6972 else
6973 strcat (buf, ">=");
6974 if (nullify && negated)
6975 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6976 else if (nullify && ! negated)
6977 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6978 else if (negated)
6979 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6980 else
6981 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6983 break;
6985 default:
6986 /* The reversed conditional branch must branch over one additional
6987 instruction if the delay slot is filled and needs to be extracted
6988 by pa_output_lbranch. If the delay slot is empty or this is a
6989 nullified forward branch, the instruction after the reversed
6990 condition branch must be nullified. */
6991 if (dbr_sequence_length () == 0
6992 || (nullify && forward_branch_p (insn)))
6994 nullify = 1;
6995 xdelay = 0;
6996 operands[4] = GEN_INT (length);
6998 else
7000 xdelay = 1;
7001 operands[4] = GEN_INT (length + 4);
7004 if (GET_MODE (operands[0]) == DImode)
7005 strcpy (buf, "bb,*");
7006 else
7007 strcpy (buf, "bb,");
7008 if ((which == 0 && negated)
7009 || (which == 1 && !negated))
7010 strcat (buf, "<");
7011 else
7012 strcat (buf, ">=");
7013 if (nullify)
7014 strcat (buf, ",n %0,%1,.+%4");
7015 else
7016 strcat (buf, " %0,%1,.+%4");
7017 output_asm_insn (buf, operands);
7018 return pa_output_lbranch (negated ? operands[3] : operands[2],
7019 insn, xdelay);
7021 return buf;
7024 /* This routine handles all the branch-on-variable-bit conditional branch
7025 sequences we might need to generate. It handles nullification of delay
7026 slots, varying length branches, negated branches and all combinations
7027 of the above. it returns the appropriate output template to emit the
7028 branch. */
7030 const char *
7031 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7032 int which)
7034 static char buf[100];
7035 bool useskip;
7036 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7037 int length = get_attr_length (insn);
7038 int xdelay;
7040 /* A conditional branch to the following instruction (e.g. the delay slot) is
7041 asking for a disaster. I do not think this can happen as this pattern
7042 is only used when optimizing; jump optimization should eliminate the
7043 jump. But be prepared just in case. */
7045 if (branch_to_delay_slot_p (insn))
7046 return "nop";
7048 /* If this is a long branch with its delay slot unfilled, set `nullify'
7049 as it can nullify the delay slot and save a nop. */
7050 if (length == 8 && dbr_sequence_length () == 0)
7051 nullify = 1;
7053 /* If this is a short forward conditional branch which did not get
7054 its delay slot filled, the delay slot can still be nullified. */
7055 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7056 nullify = forward_branch_p (insn);
7058 /* A forward branch over a single nullified insn can be done with a
7059 extrs instruction. This avoids a single cycle penalty due to
7060 mis-predicted branch if we fall through (branch not taken). */
7061 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7063 switch (length)
7066 /* All short conditional branches except backwards with an unfilled
7067 delay slot. */
7068 case 4:
7069 if (useskip)
7070 strcpy (buf, "{vextrs,|extrw,s,}");
7071 else
7072 strcpy (buf, "{bvb,|bb,}");
7073 if (useskip && GET_MODE (operands[0]) == DImode)
7074 strcpy (buf, "extrd,s,*");
7075 else if (GET_MODE (operands[0]) == DImode)
7076 strcpy (buf, "bb,*");
7077 if ((which == 0 && negated)
7078 || (which == 1 && ! negated))
7079 strcat (buf, ">=");
7080 else
7081 strcat (buf, "<");
7082 if (useskip)
7083 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7084 else if (nullify && negated)
7086 if (branch_needs_nop_p (insn))
7087 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7088 else
7089 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7091 else if (nullify && ! negated)
7093 if (branch_needs_nop_p (insn))
7094 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7095 else
7096 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7098 else if (! nullify && negated)
7099 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7100 else if (! nullify && ! negated)
7101 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7102 break;
7104 /* All long conditionals. Note a short backward branch with an
7105 unfilled delay slot is treated just like a long backward branch
7106 with an unfilled delay slot. */
7107 case 8:
7108 /* Handle weird backwards branch with a filled delay slot
7109 which is nullified. */
7110 if (dbr_sequence_length () != 0
7111 && ! forward_branch_p (insn)
7112 && nullify)
7114 strcpy (buf, "{bvb,|bb,}");
7115 if (GET_MODE (operands[0]) == DImode)
7116 strcat (buf, "*");
7117 if ((which == 0 && negated)
7118 || (which == 1 && ! negated))
7119 strcat (buf, "<");
7120 else
7121 strcat (buf, ">=");
7122 if (negated)
7123 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7124 else
7125 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7127 /* Handle short backwards branch with an unfilled delay slot.
7128 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7129 taken and untaken branches. */
7130 else if (dbr_sequence_length () == 0
7131 && ! forward_branch_p (insn)
7132 && INSN_ADDRESSES_SET_P ()
7133 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7134 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7136 strcpy (buf, "{bvb,|bb,}");
7137 if (GET_MODE (operands[0]) == DImode)
7138 strcat (buf, "*");
7139 if ((which == 0 && negated)
7140 || (which == 1 && ! negated))
7141 strcat (buf, ">=");
7142 else
7143 strcat (buf, "<");
7144 if (negated)
7145 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7146 else
7147 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7149 else
7151 strcpy (buf, "{vextrs,|extrw,s,}");
7152 if (GET_MODE (operands[0]) == DImode)
7153 strcpy (buf, "extrd,s,*");
7154 if ((which == 0 && negated)
7155 || (which == 1 && ! negated))
7156 strcat (buf, "<");
7157 else
7158 strcat (buf, ">=");
7159 if (nullify && negated)
7160 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7161 else if (nullify && ! negated)
7162 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7163 else if (negated)
7164 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7165 else
7166 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7168 break;
7170 default:
7171 /* The reversed conditional branch must branch over one additional
7172 instruction if the delay slot is filled and needs to be extracted
7173 by pa_output_lbranch. If the delay slot is empty or this is a
7174 nullified forward branch, the instruction after the reversed
7175 condition branch must be nullified. */
7176 if (dbr_sequence_length () == 0
7177 || (nullify && forward_branch_p (insn)))
7179 nullify = 1;
7180 xdelay = 0;
7181 operands[4] = GEN_INT (length);
7183 else
7185 xdelay = 1;
7186 operands[4] = GEN_INT (length + 4);
7189 if (GET_MODE (operands[0]) == DImode)
7190 strcpy (buf, "bb,*");
7191 else
7192 strcpy (buf, "{bvb,|bb,}");
7193 if ((which == 0 && negated)
7194 || (which == 1 && !negated))
7195 strcat (buf, "<");
7196 else
7197 strcat (buf, ">=");
7198 if (nullify)
7199 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7200 else
7201 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7202 output_asm_insn (buf, operands);
7203 return pa_output_lbranch (negated ? operands[3] : operands[2],
7204 insn, xdelay);
7206 return buf;
7209 /* Return the output template for emitting a dbra type insn.
7211 Note it may perform some output operations on its own before
7212 returning the final output string. */
7213 const char *
7214 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7216 int length = get_attr_length (insn);
7218 /* A conditional branch to the following instruction (e.g. the delay slot) is
7219 asking for a disaster. Be prepared! */
7221 if (branch_to_delay_slot_p (insn))
7223 if (which_alternative == 0)
7224 return "ldo %1(%0),%0";
7225 else if (which_alternative == 1)
7227 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7228 output_asm_insn ("ldw -16(%%r30),%4", operands);
7229 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7230 return "{fldws|fldw} -16(%%r30),%0";
7232 else
7234 output_asm_insn ("ldw %0,%4", operands);
7235 return "ldo %1(%4),%4\n\tstw %4,%0";
7239 if (which_alternative == 0)
7241 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7242 int xdelay;
7244 /* If this is a long branch with its delay slot unfilled, set `nullify'
7245 as it can nullify the delay slot and save a nop. */
7246 if (length == 8 && dbr_sequence_length () == 0)
7247 nullify = 1;
7249 /* If this is a short forward conditional branch which did not get
7250 its delay slot filled, the delay slot can still be nullified. */
7251 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7252 nullify = forward_branch_p (insn);
7254 switch (length)
7256 case 4:
7257 if (nullify)
7259 if (branch_needs_nop_p (insn))
7260 return "addib,%C2,n %1,%0,%3%#";
7261 else
7262 return "addib,%C2,n %1,%0,%3";
7264 else
7265 return "addib,%C2 %1,%0,%3";
7267 case 8:
7268 /* Handle weird backwards branch with a fulled delay slot
7269 which is nullified. */
7270 if (dbr_sequence_length () != 0
7271 && ! forward_branch_p (insn)
7272 && nullify)
7273 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7274 /* Handle short backwards branch with an unfilled delay slot.
7275 Using a addb;nop rather than addi;bl saves 1 cycle for both
7276 taken and untaken branches. */
7277 else if (dbr_sequence_length () == 0
7278 && ! forward_branch_p (insn)
7279 && INSN_ADDRESSES_SET_P ()
7280 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7281 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7282 return "addib,%C2 %1,%0,%3%#";
7284 /* Handle normal cases. */
7285 if (nullify)
7286 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7287 else
7288 return "addi,%N2 %1,%0,%0\n\tb %3";
7290 default:
7291 /* The reversed conditional branch must branch over one additional
7292 instruction if the delay slot is filled and needs to be extracted
7293 by pa_output_lbranch. If the delay slot is empty or this is a
7294 nullified forward branch, the instruction after the reversed
7295 condition branch must be nullified. */
7296 if (dbr_sequence_length () == 0
7297 || (nullify && forward_branch_p (insn)))
7299 nullify = 1;
7300 xdelay = 0;
7301 operands[4] = GEN_INT (length);
7303 else
7305 xdelay = 1;
7306 operands[4] = GEN_INT (length + 4);
7309 if (nullify)
7310 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7311 else
7312 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7314 return pa_output_lbranch (operands[3], insn, xdelay);
7318 /* Deal with gross reload from FP register case. */
7319 else if (which_alternative == 1)
7321 /* Move loop counter from FP register to MEM then into a GR,
7322 increment the GR, store the GR into MEM, and finally reload
7323 the FP register from MEM from within the branch's delay slot. */
7324 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7325 operands);
7326 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7327 if (length == 24)
7328 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7329 else if (length == 28)
7330 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7331 else
7333 operands[5] = GEN_INT (length - 16);
7334 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7335 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7336 return pa_output_lbranch (operands[3], insn, 0);
7339 /* Deal with gross reload from memory case. */
7340 else
7342 /* Reload loop counter from memory, the store back to memory
7343 happens in the branch's delay slot. */
7344 output_asm_insn ("ldw %0,%4", operands);
7345 if (length == 12)
7346 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7347 else if (length == 16)
7348 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7349 else
7351 operands[5] = GEN_INT (length - 4);
7352 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7353 return pa_output_lbranch (operands[3], insn, 0);
7358 /* Return the output template for emitting a movb type insn.
7360 Note it may perform some output operations on its own before
7361 returning the final output string. */
7362 const char *
7363 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7364 int reverse_comparison)
7366 int length = get_attr_length (insn);
7368 /* A conditional branch to the following instruction (e.g. the delay slot) is
7369 asking for a disaster. Be prepared! */
7371 if (branch_to_delay_slot_p (insn))
7373 if (which_alternative == 0)
7374 return "copy %1,%0";
7375 else if (which_alternative == 1)
7377 output_asm_insn ("stw %1,-16(%%r30)", operands);
7378 return "{fldws|fldw} -16(%%r30),%0";
7380 else if (which_alternative == 2)
7381 return "stw %1,%0";
7382 else
7383 return "mtsar %r1";
7386 /* Support the second variant. */
7387 if (reverse_comparison)
7388 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7390 if (which_alternative == 0)
7392 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7393 int xdelay;
7395 /* If this is a long branch with its delay slot unfilled, set `nullify'
7396 as it can nullify the delay slot and save a nop. */
7397 if (length == 8 && dbr_sequence_length () == 0)
7398 nullify = 1;
7400 /* If this is a short forward conditional branch which did not get
7401 its delay slot filled, the delay slot can still be nullified. */
7402 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7403 nullify = forward_branch_p (insn);
7405 switch (length)
7407 case 4:
7408 if (nullify)
7410 if (branch_needs_nop_p (insn))
7411 return "movb,%C2,n %1,%0,%3%#";
7412 else
7413 return "movb,%C2,n %1,%0,%3";
7415 else
7416 return "movb,%C2 %1,%0,%3";
7418 case 8:
7419 /* Handle weird backwards branch with a filled delay slot
7420 which is nullified. */
7421 if (dbr_sequence_length () != 0
7422 && ! forward_branch_p (insn)
7423 && nullify)
7424 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7426 /* Handle short backwards branch with an unfilled delay slot.
7427 Using a movb;nop rather than or;bl saves 1 cycle for both
7428 taken and untaken branches. */
7429 else if (dbr_sequence_length () == 0
7430 && ! forward_branch_p (insn)
7431 && INSN_ADDRESSES_SET_P ()
7432 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7433 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7434 return "movb,%C2 %1,%0,%3%#";
7435 /* Handle normal cases. */
7436 if (nullify)
7437 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7438 else
7439 return "or,%N2 %1,%%r0,%0\n\tb %3";
7441 default:
7442 /* The reversed conditional branch must branch over one additional
7443 instruction if the delay slot is filled and needs to be extracted
7444 by pa_output_lbranch. If the delay slot is empty or this is a
7445 nullified forward branch, the instruction after the reversed
7446 condition branch must be nullified. */
7447 if (dbr_sequence_length () == 0
7448 || (nullify && forward_branch_p (insn)))
7450 nullify = 1;
7451 xdelay = 0;
7452 operands[4] = GEN_INT (length);
7454 else
7456 xdelay = 1;
7457 operands[4] = GEN_INT (length + 4);
7460 if (nullify)
7461 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7462 else
7463 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7465 return pa_output_lbranch (operands[3], insn, xdelay);
7468 /* Deal with gross reload for FP destination register case. */
7469 else if (which_alternative == 1)
7471 /* Move source register to MEM, perform the branch test, then
7472 finally load the FP register from MEM from within the branch's
7473 delay slot. */
7474 output_asm_insn ("stw %1,-16(%%r30)", operands);
7475 if (length == 12)
7476 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7477 else if (length == 16)
7478 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7479 else
7481 operands[4] = GEN_INT (length - 4);
7482 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7483 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7484 return pa_output_lbranch (operands[3], insn, 0);
7487 /* Deal with gross reload from memory case. */
7488 else if (which_alternative == 2)
7490 /* Reload loop counter from memory, the store back to memory
7491 happens in the branch's delay slot. */
7492 if (length == 8)
7493 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7494 else if (length == 12)
7495 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7496 else
7498 operands[4] = GEN_INT (length);
7499 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7500 operands);
7501 return pa_output_lbranch (operands[3], insn, 0);
7504 /* Handle SAR as a destination. */
7505 else
7507 if (length == 8)
7508 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7509 else if (length == 12)
7510 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7511 else
7513 operands[4] = GEN_INT (length);
7514 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7515 operands);
7516 return pa_output_lbranch (operands[3], insn, 0);
7521 /* Copy any FP arguments in INSN into integer registers. */
7522 static void
7523 copy_fp_args (rtx_insn *insn)
7525 rtx link;
7526 rtx xoperands[2];
7528 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7530 int arg_mode, regno;
7531 rtx use = XEXP (link, 0);
7533 if (! (GET_CODE (use) == USE
7534 && GET_CODE (XEXP (use, 0)) == REG
7535 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7536 continue;
7538 arg_mode = GET_MODE (XEXP (use, 0));
7539 regno = REGNO (XEXP (use, 0));
7541 /* Is it a floating point register? */
7542 if (regno >= 32 && regno <= 39)
7544 /* Copy the FP register into an integer register via memory. */
7545 if (arg_mode == SFmode)
7547 xoperands[0] = XEXP (use, 0);
7548 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7549 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7550 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7552 else
7554 xoperands[0] = XEXP (use, 0);
7555 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7556 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7557 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7558 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7564 /* Compute length of the FP argument copy sequence for INSN. */
7565 static int
7566 length_fp_args (rtx_insn *insn)
7568 int length = 0;
7569 rtx link;
7571 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7573 int arg_mode, regno;
7574 rtx use = XEXP (link, 0);
7576 if (! (GET_CODE (use) == USE
7577 && GET_CODE (XEXP (use, 0)) == REG
7578 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7579 continue;
7581 arg_mode = GET_MODE (XEXP (use, 0));
7582 regno = REGNO (XEXP (use, 0));
7584 /* Is it a floating point register? */
7585 if (regno >= 32 && regno <= 39)
7587 if (arg_mode == SFmode)
7588 length += 8;
7589 else
7590 length += 12;
7594 return length;
7597 /* Return the attribute length for the millicode call instruction INSN.
7598 The length must match the code generated by pa_output_millicode_call.
7599 We include the delay slot in the returned length as it is better to
7600 over estimate the length than to under estimate it. */
7603 pa_attr_length_millicode_call (rtx_insn *insn)
7605 unsigned long distance = -1;
7606 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7608 if (INSN_ADDRESSES_SET_P ())
7610 distance = (total + insn_current_reference_address (insn));
7611 if (distance < total)
7612 distance = -1;
7615 if (TARGET_64BIT)
7617 if (!TARGET_LONG_CALLS && distance < 7600000)
7618 return 8;
7620 return 20;
7622 else if (TARGET_PORTABLE_RUNTIME)
7623 return 24;
7624 else
7626 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7627 return 8;
7629 if (!flag_pic)
7630 return 12;
7632 return 24;
7636 /* INSN is a function call.
7638 CALL_DEST is the routine we are calling. */
7640 const char *
7641 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7643 int attr_length = get_attr_length (insn);
7644 int seq_length = dbr_sequence_length ();
7645 rtx xoperands[3];
7647 xoperands[0] = call_dest;
7648 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7650 /* Handle the common case where we are sure that the branch will
7651 reach the beginning of the $CODE$ subspace. The within reach
7652 form of the $$sh_func_adrs call has a length of 28. Because it
7653 has an attribute type of sh_func_adrs, it never has a nonzero
7654 sequence length (i.e., the delay slot is never filled). */
7655 if (!TARGET_LONG_CALLS
7656 && (attr_length == 8
7657 || (attr_length == 28
7658 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7660 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7662 else
7664 if (TARGET_64BIT)
7666 /* It might seem that one insn could be saved by accessing
7667 the millicode function using the linkage table. However,
7668 this doesn't work in shared libraries and other dynamically
7669 loaded objects. Using a pc-relative sequence also avoids
7670 problems related to the implicit use of the gp register. */
7671 output_asm_insn ("b,l .+8,%%r1", xoperands);
7673 if (TARGET_GAS)
7675 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7676 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7678 else
7680 xoperands[1] = gen_label_rtx ();
7681 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7682 targetm.asm_out.internal_label (asm_out_file, "L",
7683 CODE_LABEL_NUMBER (xoperands[1]));
7684 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7687 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7689 else if (TARGET_PORTABLE_RUNTIME)
7691 /* Pure portable runtime doesn't allow be/ble; we also don't
7692 have PIC support in the assembler/linker, so this sequence
7693 is needed. */
7695 /* Get the address of our target into %r1. */
7696 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7697 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7699 /* Get our return address into %r31. */
7700 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7701 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7703 /* Jump to our target address in %r1. */
7704 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7706 else if (!flag_pic)
7708 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7709 if (TARGET_PA_20)
7710 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7711 else
7712 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7714 else
7716 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7717 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7719 if (TARGET_SOM || !TARGET_GAS)
7721 /* The HP assembler can generate relocations for the
7722 difference of two symbols. GAS can do this for a
7723 millicode symbol but not an arbitrary external
7724 symbol when generating SOM output. */
7725 xoperands[1] = gen_label_rtx ();
7726 targetm.asm_out.internal_label (asm_out_file, "L",
7727 CODE_LABEL_NUMBER (xoperands[1]));
7728 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7729 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7731 else
7733 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7734 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7735 xoperands);
7738 /* Jump to our target address in %r1. */
7739 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7743 if (seq_length == 0)
7744 output_asm_insn ("nop", xoperands);
7746 return "";
7749 /* Return the attribute length of the call instruction INSN. The SIBCALL
7750 flag indicates whether INSN is a regular call or a sibling call. The
7751 length returned must be longer than the code actually generated by
7752 pa_output_call. Since branch shortening is done before delay branch
7753 sequencing, there is no way to determine whether or not the delay
7754 slot will be filled during branch shortening. Even when the delay
7755 slot is filled, we may have to add a nop if the delay slot contains
7756 a branch that can't reach its target. Thus, we always have to include
7757 the delay slot in the length estimate. This used to be done in
7758 pa_adjust_insn_length but we do it here now as some sequences always
7759 fill the delay slot and we can save four bytes in the estimate for
7760 these sequences. */
7763 pa_attr_length_call (rtx_insn *insn, int sibcall)
7765 int local_call;
7766 rtx call, call_dest;
7767 tree call_decl;
7768 int length = 0;
7769 rtx pat = PATTERN (insn);
7770 unsigned long distance = -1;
7772 gcc_assert (CALL_P (insn));
7774 if (INSN_ADDRESSES_SET_P ())
7776 unsigned long total;
7778 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7779 distance = (total + insn_current_reference_address (insn));
7780 if (distance < total)
7781 distance = -1;
7784 gcc_assert (GET_CODE (pat) == PARALLEL);
7786 /* Get the call rtx. */
7787 call = XVECEXP (pat, 0, 0);
7788 if (GET_CODE (call) == SET)
7789 call = SET_SRC (call);
7791 gcc_assert (GET_CODE (call) == CALL);
7793 /* Determine if this is a local call. */
7794 call_dest = XEXP (XEXP (call, 0), 0);
7795 call_decl = SYMBOL_REF_DECL (call_dest);
7796 local_call = call_decl && targetm.binds_local_p (call_decl);
7798 /* pc-relative branch. */
7799 if (!TARGET_LONG_CALLS
7800 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7801 || distance < MAX_PCREL17F_OFFSET))
7802 length += 8;
7804 /* 64-bit plabel sequence. */
7805 else if (TARGET_64BIT && !local_call)
7806 length += sibcall ? 28 : 24;
7808 /* non-pic long absolute branch sequence. */
7809 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7810 length += 12;
7812 /* long pc-relative branch sequence. */
7813 else if (TARGET_LONG_PIC_SDIFF_CALL
7814 || (TARGET_GAS && !TARGET_SOM
7815 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7817 length += 20;
7819 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7820 length += 8;
7823 /* 32-bit plabel sequence. */
7824 else
7826 length += 32;
7828 if (TARGET_SOM)
7829 length += length_fp_args (insn);
7831 if (flag_pic)
7832 length += 4;
7834 if (!TARGET_PA_20)
7836 if (!sibcall)
7837 length += 8;
7839 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7840 length += 8;
7844 return length;
7847 /* INSN is a function call.
7849 CALL_DEST is the routine we are calling. */
7851 const char *
7852 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7854 int seq_length = dbr_sequence_length ();
7855 tree call_decl = SYMBOL_REF_DECL (call_dest);
7856 int local_call = call_decl && targetm.binds_local_p (call_decl);
7857 rtx xoperands[2];
7859 xoperands[0] = call_dest;
7861 /* Handle the common case where we're sure that the branch will reach
7862 the beginning of the "$CODE$" subspace. This is the beginning of
7863 the current function if we are in a named section. */
7864 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7866 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7867 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7869 else
7871 if (TARGET_64BIT && !local_call)
7873 /* ??? As far as I can tell, the HP linker doesn't support the
7874 long pc-relative sequence described in the 64-bit runtime
7875 architecture. So, we use a slightly longer indirect call. */
7876 xoperands[0] = pa_get_deferred_plabel (call_dest);
7877 xoperands[1] = gen_label_rtx ();
7879 /* If this isn't a sibcall, we put the load of %r27 into the
7880 delay slot. We can't do this in a sibcall as we don't
7881 have a second call-clobbered scratch register available.
7882 We don't need to do anything when generating fast indirect
7883 calls. */
7884 if (seq_length != 0 && !sibcall)
7886 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7887 optimize, 0, NULL);
7889 /* Now delete the delay insn. */
7890 SET_INSN_DELETED (NEXT_INSN (insn));
7891 seq_length = 0;
7894 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7895 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7896 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7898 if (sibcall)
7900 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7901 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7902 output_asm_insn ("bve (%%r1)", xoperands);
7904 else
7906 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7907 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7908 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7909 seq_length = 1;
7912 else
7914 int indirect_call = 0;
7916 /* Emit a long call. There are several different sequences
7917 of increasing length and complexity. In most cases,
7918 they don't allow an instruction in the delay slot. */
7919 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7920 && !TARGET_LONG_PIC_SDIFF_CALL
7921 && !(TARGET_GAS && !TARGET_SOM
7922 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7923 && !TARGET_64BIT)
7924 indirect_call = 1;
7926 if (seq_length != 0
7927 && !sibcall
7928 && (!TARGET_PA_20
7929 || indirect_call
7930 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7932 /* A non-jump insn in the delay slot. By definition we can
7933 emit this insn before the call (and in fact before argument
7934 relocating. */
7935 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7936 NULL);
7938 /* Now delete the delay insn. */
7939 SET_INSN_DELETED (NEXT_INSN (insn));
7940 seq_length = 0;
7943 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7945 /* This is the best sequence for making long calls in
7946 non-pic code. Unfortunately, GNU ld doesn't provide
7947 the stub needed for external calls, and GAS's support
7948 for this with the SOM linker is buggy. It is safe
7949 to use this for local calls. */
7950 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7951 if (sibcall)
7952 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7953 else
7955 if (TARGET_PA_20)
7956 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7957 xoperands);
7958 else
7959 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7961 output_asm_insn ("copy %%r31,%%r2", xoperands);
7962 seq_length = 1;
7965 else
7967 if (TARGET_LONG_PIC_SDIFF_CALL)
7969 /* The HP assembler and linker can handle relocations
7970 for the difference of two symbols. The HP assembler
7971 recognizes the sequence as a pc-relative call and
7972 the linker provides stubs when needed. */
7973 xoperands[1] = gen_label_rtx ();
7974 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7975 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7976 targetm.asm_out.internal_label (asm_out_file, "L",
7977 CODE_LABEL_NUMBER (xoperands[1]));
7978 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7980 else if (TARGET_GAS && !TARGET_SOM
7981 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7983 /* GAS currently can't generate the relocations that
7984 are needed for the SOM linker under HP-UX using this
7985 sequence. The GNU linker doesn't generate the stubs
7986 that are needed for external calls on TARGET_ELF32
7987 with this sequence. For now, we have to use a
7988 longer plabel sequence when using GAS. */
7989 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7990 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7991 xoperands);
7992 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7993 xoperands);
7995 else
7997 /* Emit a long plabel-based call sequence. This is
7998 essentially an inline implementation of $$dyncall.
7999 We don't actually try to call $$dyncall as this is
8000 as difficult as calling the function itself. */
8001 xoperands[0] = pa_get_deferred_plabel (call_dest);
8002 xoperands[1] = gen_label_rtx ();
8004 /* Since the call is indirect, FP arguments in registers
8005 need to be copied to the general registers. Then, the
8006 argument relocation stub will copy them back. */
8007 if (TARGET_SOM)
8008 copy_fp_args (insn);
8010 if (flag_pic)
8012 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8013 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8014 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8016 else
8018 output_asm_insn ("addil LR'%0-$global$,%%r27",
8019 xoperands);
8020 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8021 xoperands);
8024 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8025 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8026 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8027 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8029 if (!sibcall && !TARGET_PA_20)
8031 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8032 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8033 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8034 else
8035 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8039 if (TARGET_PA_20)
8041 if (sibcall)
8042 output_asm_insn ("bve (%%r1)", xoperands);
8043 else
8045 if (indirect_call)
8047 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8048 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8049 seq_length = 1;
8051 else
8052 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8055 else
8057 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8058 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8059 xoperands);
8061 if (sibcall)
8063 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8064 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8065 else
8066 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8068 else
8070 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8071 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8072 else
8073 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8075 if (indirect_call)
8076 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8077 else
8078 output_asm_insn ("copy %%r31,%%r2", xoperands);
8079 seq_length = 1;
8086 if (seq_length == 0)
8087 output_asm_insn ("nop", xoperands);
8089 return "";
8092 /* Return the attribute length of the indirect call instruction INSN.
8093 The length must match the code generated by output_indirect call.
8094 The returned length includes the delay slot. Currently, the delay
8095 slot of an indirect call sequence is not exposed and it is used by
8096 the sequence itself. */
8099 pa_attr_length_indirect_call (rtx_insn *insn)
8101 unsigned long distance = -1;
8102 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8104 if (INSN_ADDRESSES_SET_P ())
8106 distance = (total + insn_current_reference_address (insn));
8107 if (distance < total)
8108 distance = -1;
8111 if (TARGET_64BIT)
8112 return 12;
8114 if (TARGET_FAST_INDIRECT_CALLS
8115 || (!TARGET_LONG_CALLS
8116 && !TARGET_PORTABLE_RUNTIME
8117 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8118 || distance < MAX_PCREL17F_OFFSET)))
8119 return 8;
8121 if (flag_pic)
8122 return 20;
8124 if (TARGET_PORTABLE_RUNTIME)
8125 return 16;
8127 /* Out of reach, can use ble. */
8128 return 12;
8131 const char *
8132 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8134 rtx xoperands[1];
8136 if (TARGET_64BIT)
8138 xoperands[0] = call_dest;
8139 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8140 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8141 return "";
8144 /* First the special case for kernels, level 0 systems, etc. */
8145 if (TARGET_FAST_INDIRECT_CALLS)
8146 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8148 /* Now the normal case -- we can reach $$dyncall directly or
8149 we're sure that we can get there via a long-branch stub.
8151 No need to check target flags as the length uniquely identifies
8152 the remaining cases. */
8153 if (pa_attr_length_indirect_call (insn) == 8)
8155 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8156 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8157 variant of the B,L instruction can't be used on the SOM target. */
8158 if (TARGET_PA_20 && !TARGET_SOM)
8159 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8160 else
8161 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8164 /* Long millicode call, but we are not generating PIC or portable runtime
8165 code. */
8166 if (pa_attr_length_indirect_call (insn) == 12)
8167 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8169 /* Long millicode call for portable runtime. */
8170 if (pa_attr_length_indirect_call (insn) == 16)
8171 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8173 /* We need a long PIC call to $$dyncall. */
8174 xoperands[0] = NULL_RTX;
8175 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8176 if (TARGET_SOM || !TARGET_GAS)
8178 xoperands[0] = gen_label_rtx ();
8179 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8180 targetm.asm_out.internal_label (asm_out_file, "L",
8181 CODE_LABEL_NUMBER (xoperands[0]));
8182 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8184 else
8186 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8187 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8188 xoperands);
8190 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8191 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8192 return "";
8195 /* In HPUX 8.0's shared library scheme, special relocations are needed
8196 for function labels if they might be passed to a function
8197 in a shared library (because shared libraries don't live in code
8198 space), and special magic is needed to construct their address. */
8200 void
8201 pa_encode_label (rtx sym)
8203 const char *str = XSTR (sym, 0);
8204 int len = strlen (str) + 1;
8205 char *newstr, *p;
8207 p = newstr = XALLOCAVEC (char, len + 1);
8208 *p++ = '@';
8209 strcpy (p, str);
8211 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8214 static void
8215 pa_encode_section_info (tree decl, rtx rtl, int first)
8217 int old_referenced = 0;
8219 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8220 old_referenced
8221 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8223 default_encode_section_info (decl, rtl, first);
8225 if (first && TEXT_SPACE_P (decl))
8227 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8228 if (TREE_CODE (decl) == FUNCTION_DECL)
8229 pa_encode_label (XEXP (rtl, 0));
8231 else if (old_referenced)
8232 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8235 /* This is sort of inverse to pa_encode_section_info. */
8237 static const char *
8238 pa_strip_name_encoding (const char *str)
8240 str += (*str == '@');
8241 str += (*str == '*');
8242 return str;
8245 /* Returns 1 if OP is a function label involved in a simple addition
8246 with a constant. Used to keep certain patterns from matching
8247 during instruction combination. */
8249 pa_is_function_label_plus_const (rtx op)
8251 /* Strip off any CONST. */
8252 if (GET_CODE (op) == CONST)
8253 op = XEXP (op, 0);
8255 return (GET_CODE (op) == PLUS
8256 && function_label_operand (XEXP (op, 0), VOIDmode)
8257 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8260 /* Output assembly code for a thunk to FUNCTION. */
8262 static void
8263 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8264 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8265 tree function)
8267 static unsigned int current_thunk_number;
8268 int val_14 = VAL_14_BITS_P (delta);
8269 unsigned int old_last_address = last_address, nbytes = 0;
8270 char label[16];
8271 rtx xoperands[4];
8273 xoperands[0] = XEXP (DECL_RTL (function), 0);
8274 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8275 xoperands[2] = GEN_INT (delta);
8277 final_start_function (emit_barrier (), file, 1);
8279 /* Output the thunk. We know that the function is in the same
8280 translation unit (i.e., the same space) as the thunk, and that
8281 thunks are output after their method. Thus, we don't need an
8282 external branch to reach the function. With SOM and GAS,
8283 functions and thunks are effectively in different sections.
8284 Thus, we can always use a IA-relative branch and the linker
8285 will add a long branch stub if necessary.
8287 However, we have to be careful when generating PIC code on the
8288 SOM port to ensure that the sequence does not transfer to an
8289 import stub for the target function as this could clobber the
8290 return value saved at SP-24. This would also apply to the
8291 32-bit linux port if the multi-space model is implemented. */
8292 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8293 && !(flag_pic && TREE_PUBLIC (function))
8294 && (TARGET_GAS || last_address < 262132))
8295 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8296 && ((targetm_common.have_named_sections
8297 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8298 /* The GNU 64-bit linker has rather poor stub management.
8299 So, we use a long branch from thunks that aren't in
8300 the same section as the target function. */
8301 && ((!TARGET_64BIT
8302 && (DECL_SECTION_NAME (thunk_fndecl)
8303 != DECL_SECTION_NAME (function)))
8304 || ((DECL_SECTION_NAME (thunk_fndecl)
8305 == DECL_SECTION_NAME (function))
8306 && last_address < 262132)))
8307 /* In this case, we need to be able to reach the start of
8308 the stub table even though the function is likely closer
8309 and can be jumped to directly. */
8310 || (targetm_common.have_named_sections
8311 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8312 && DECL_SECTION_NAME (function) == NULL
8313 && total_code_bytes < MAX_PCREL17F_OFFSET)
8314 /* Likewise. */
8315 || (!targetm_common.have_named_sections
8316 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8318 if (!val_14)
8319 output_asm_insn ("addil L'%2,%%r26", xoperands);
8321 output_asm_insn ("b %0", xoperands);
8323 if (val_14)
8325 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8326 nbytes += 8;
8328 else
8330 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8331 nbytes += 12;
8334 else if (TARGET_64BIT)
8336 /* We only have one call-clobbered scratch register, so we can't
8337 make use of the delay slot if delta doesn't fit in 14 bits. */
8338 if (!val_14)
8340 output_asm_insn ("addil L'%2,%%r26", xoperands);
8341 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8344 output_asm_insn ("b,l .+8,%%r1", xoperands);
8346 if (TARGET_GAS)
8348 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8349 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8351 else
8353 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8354 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8357 if (val_14)
8359 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8360 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8361 nbytes += 20;
8363 else
8365 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8366 nbytes += 24;
8369 else if (TARGET_PORTABLE_RUNTIME)
8371 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8372 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8374 if (!val_14)
8375 output_asm_insn ("addil L'%2,%%r26", xoperands);
8377 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8379 if (val_14)
8381 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8382 nbytes += 16;
8384 else
8386 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8387 nbytes += 20;
8390 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8392 /* The function is accessible from outside this module. The only
8393 way to avoid an import stub between the thunk and function is to
8394 call the function directly with an indirect sequence similar to
8395 that used by $$dyncall. This is possible because $$dyncall acts
8396 as the import stub in an indirect call. */
8397 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8398 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8399 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8400 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8401 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8402 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8403 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8404 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8405 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8407 if (!val_14)
8409 output_asm_insn ("addil L'%2,%%r26", xoperands);
8410 nbytes += 4;
8413 if (TARGET_PA_20)
8415 output_asm_insn ("bve (%%r22)", xoperands);
8416 nbytes += 36;
8418 else if (TARGET_NO_SPACE_REGS)
8420 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8421 nbytes += 36;
8423 else
8425 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8426 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8427 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8428 nbytes += 44;
8431 if (val_14)
8432 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8433 else
8434 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8436 else if (flag_pic)
8438 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8440 if (TARGET_SOM || !TARGET_GAS)
8442 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8443 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8445 else
8447 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8448 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8451 if (!val_14)
8452 output_asm_insn ("addil L'%2,%%r26", xoperands);
8454 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8456 if (val_14)
8458 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8459 nbytes += 20;
8461 else
8463 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8464 nbytes += 24;
8467 else
8469 if (!val_14)
8470 output_asm_insn ("addil L'%2,%%r26", xoperands);
8472 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8473 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8475 if (val_14)
8477 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8478 nbytes += 12;
8480 else
8482 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8483 nbytes += 16;
8487 final_end_function ();
8489 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8491 switch_to_section (data_section);
8492 output_asm_insn (".align 4", xoperands);
8493 ASM_OUTPUT_LABEL (file, label);
8494 output_asm_insn (".word P'%0", xoperands);
8497 current_thunk_number++;
8498 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8499 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8500 last_address += nbytes;
8501 if (old_last_address > last_address)
8502 last_address = UINT_MAX;
8503 update_total_code_bytes (nbytes);
8506 /* Only direct calls to static functions are allowed to be sibling (tail)
8507 call optimized.
8509 This restriction is necessary because some linker generated stubs will
8510 store return pointers into rp' in some cases which might clobber a
8511 live value already in rp'.
8513 In a sibcall the current function and the target function share stack
8514 space. Thus if the path to the current function and the path to the
8515 target function save a value in rp', they save the value into the
8516 same stack slot, which has undesirable consequences.
8518 Because of the deferred binding nature of shared libraries any function
8519 with external scope could be in a different load module and thus require
8520 rp' to be saved when calling that function. So sibcall optimizations
8521 can only be safe for static function.
8523 Note that GCC never needs return value relocations, so we don't have to
8524 worry about static calls with return value relocations (which require
8525 saving rp').
8527 It is safe to perform a sibcall optimization when the target function
8528 will never return. */
8529 static bool
8530 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8532 if (TARGET_PORTABLE_RUNTIME)
8533 return false;
8535 /* Sibcalls are not ok because the arg pointer register is not a fixed
8536 register. This prevents the sibcall optimization from occurring. In
8537 addition, there are problems with stub placement using GNU ld. This
8538 is because a normal sibcall branch uses a 17-bit relocation while
8539 a regular call branch uses a 22-bit relocation. As a result, more
8540 care needs to be taken in the placement of long-branch stubs. */
8541 if (TARGET_64BIT)
8542 return false;
8544 /* Sibcalls are only ok within a translation unit. */
8545 return (decl && !TREE_PUBLIC (decl));
8548 /* ??? Addition is not commutative on the PA due to the weird implicit
8549 space register selection rules for memory addresses. Therefore, we
8550 don't consider a + b == b + a, as this might be inside a MEM. */
8551 static bool
8552 pa_commutative_p (const_rtx x, int outer_code)
8554 return (COMMUTATIVE_P (x)
8555 && (TARGET_NO_SPACE_REGS
8556 || (outer_code != UNKNOWN && outer_code != MEM)
8557 || GET_CODE (x) != PLUS));
8560 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8561 use in fmpyadd instructions. */
8563 pa_fmpyaddoperands (rtx *operands)
8565 machine_mode mode = GET_MODE (operands[0]);
8567 /* Must be a floating point mode. */
8568 if (mode != SFmode && mode != DFmode)
8569 return 0;
8571 /* All modes must be the same. */
8572 if (! (mode == GET_MODE (operands[1])
8573 && mode == GET_MODE (operands[2])
8574 && mode == GET_MODE (operands[3])
8575 && mode == GET_MODE (operands[4])
8576 && mode == GET_MODE (operands[5])))
8577 return 0;
8579 /* All operands must be registers. */
8580 if (! (GET_CODE (operands[1]) == REG
8581 && GET_CODE (operands[2]) == REG
8582 && GET_CODE (operands[3]) == REG
8583 && GET_CODE (operands[4]) == REG
8584 && GET_CODE (operands[5]) == REG))
8585 return 0;
8587 /* Only 2 real operands to the addition. One of the input operands must
8588 be the same as the output operand. */
8589 if (! rtx_equal_p (operands[3], operands[4])
8590 && ! rtx_equal_p (operands[3], operands[5]))
8591 return 0;
8593 /* Inout operand of add cannot conflict with any operands from multiply. */
8594 if (rtx_equal_p (operands[3], operands[0])
8595 || rtx_equal_p (operands[3], operands[1])
8596 || rtx_equal_p (operands[3], operands[2]))
8597 return 0;
8599 /* multiply cannot feed into addition operands. */
8600 if (rtx_equal_p (operands[4], operands[0])
8601 || rtx_equal_p (operands[5], operands[0]))
8602 return 0;
8604 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8605 if (mode == SFmode
8606 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8607 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8608 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8609 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8610 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8611 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8612 return 0;
8614 /* Passed. Operands are suitable for fmpyadd. */
8615 return 1;
8618 #if !defined(USE_COLLECT2)
8619 static void
8620 pa_asm_out_constructor (rtx symbol, int priority)
8622 if (!function_label_operand (symbol, VOIDmode))
8623 pa_encode_label (symbol);
8625 #ifdef CTORS_SECTION_ASM_OP
8626 default_ctor_section_asm_out_constructor (symbol, priority);
8627 #else
8628 # ifdef TARGET_ASM_NAMED_SECTION
8629 default_named_section_asm_out_constructor (symbol, priority);
8630 # else
8631 default_stabs_asm_out_constructor (symbol, priority);
8632 # endif
8633 #endif
8636 static void
8637 pa_asm_out_destructor (rtx symbol, int priority)
8639 if (!function_label_operand (symbol, VOIDmode))
8640 pa_encode_label (symbol);
8642 #ifdef DTORS_SECTION_ASM_OP
8643 default_dtor_section_asm_out_destructor (symbol, priority);
8644 #else
8645 # ifdef TARGET_ASM_NAMED_SECTION
8646 default_named_section_asm_out_destructor (symbol, priority);
8647 # else
8648 default_stabs_asm_out_destructor (symbol, priority);
8649 # endif
8650 #endif
8652 #endif
8654 /* This function places uninitialized global data in the bss section.
8655 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8656 function on the SOM port to prevent uninitialized global data from
8657 being placed in the data section. */
8659 void
8660 pa_asm_output_aligned_bss (FILE *stream,
8661 const char *name,
8662 unsigned HOST_WIDE_INT size,
8663 unsigned int align)
8665 switch_to_section (bss_section);
8666 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8668 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8669 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8670 #endif
8672 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8673 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8674 #endif
8676 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8677 ASM_OUTPUT_LABEL (stream, name);
8678 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8681 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8682 that doesn't allow the alignment of global common storage to be directly
8683 specified. The SOM linker aligns common storage based on the rounded
8684 value of the NUM_BYTES parameter in the .comm directive. It's not
8685 possible to use the .align directive as it doesn't affect the alignment
8686 of the label associated with a .comm directive. */
8688 void
8689 pa_asm_output_aligned_common (FILE *stream,
8690 const char *name,
8691 unsigned HOST_WIDE_INT size,
8692 unsigned int align)
8694 unsigned int max_common_align;
8696 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8697 if (align > max_common_align)
8699 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8700 "for global common data. Using %u",
8701 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8702 align = max_common_align;
8705 switch_to_section (bss_section);
8707 assemble_name (stream, name);
8708 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8709 MAX (size, align / BITS_PER_UNIT));
8712 /* We can't use .comm for local common storage as the SOM linker effectively
8713 treats the symbol as universal and uses the same storage for local symbols
8714 with the same name in different object files. The .block directive
8715 reserves an uninitialized block of storage. However, it's not common
8716 storage. Fortunately, GCC never requests common storage with the same
8717 name in any given translation unit. */
8719 void
8720 pa_asm_output_aligned_local (FILE *stream,
8721 const char *name,
8722 unsigned HOST_WIDE_INT size,
8723 unsigned int align)
8725 switch_to_section (bss_section);
8726 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8728 #ifdef LOCAL_ASM_OP
8729 fprintf (stream, "%s", LOCAL_ASM_OP);
8730 assemble_name (stream, name);
8731 fprintf (stream, "\n");
8732 #endif
8734 ASM_OUTPUT_LABEL (stream, name);
8735 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8738 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8739 use in fmpysub instructions. */
8741 pa_fmpysuboperands (rtx *operands)
8743 machine_mode mode = GET_MODE (operands[0]);
8745 /* Must be a floating point mode. */
8746 if (mode != SFmode && mode != DFmode)
8747 return 0;
8749 /* All modes must be the same. */
8750 if (! (mode == GET_MODE (operands[1])
8751 && mode == GET_MODE (operands[2])
8752 && mode == GET_MODE (operands[3])
8753 && mode == GET_MODE (operands[4])
8754 && mode == GET_MODE (operands[5])))
8755 return 0;
8757 /* All operands must be registers. */
8758 if (! (GET_CODE (operands[1]) == REG
8759 && GET_CODE (operands[2]) == REG
8760 && GET_CODE (operands[3]) == REG
8761 && GET_CODE (operands[4]) == REG
8762 && GET_CODE (operands[5]) == REG))
8763 return 0;
8765 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8766 operation, so operands[4] must be the same as operand[3]. */
8767 if (! rtx_equal_p (operands[3], operands[4]))
8768 return 0;
8770 /* multiply cannot feed into subtraction. */
8771 if (rtx_equal_p (operands[5], operands[0]))
8772 return 0;
8774 /* Inout operand of sub cannot conflict with any operands from multiply. */
8775 if (rtx_equal_p (operands[3], operands[0])
8776 || rtx_equal_p (operands[3], operands[1])
8777 || rtx_equal_p (operands[3], operands[2]))
8778 return 0;
8780 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8781 if (mode == SFmode
8782 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8783 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8784 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8785 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8786 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8787 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8788 return 0;
8790 /* Passed. Operands are suitable for fmpysub. */
8791 return 1;
8794 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8795 constants for a MULT embedded inside a memory address. */
8797 pa_mem_shadd_constant_p (int val)
8799 if (val == 2 || val == 4 || val == 8)
8800 return 1;
8801 else
8802 return 0;
8805 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8806 constants for shadd instructions. */
8808 pa_shadd_constant_p (int val)
8810 if (val == 1 || val == 2 || val == 3)
8811 return 1;
8812 else
8813 return 0;
8816 /* Return TRUE if INSN branches forward. */
8818 static bool
8819 forward_branch_p (rtx_insn *insn)
8821 rtx lab = JUMP_LABEL (insn);
8823 /* The INSN must have a jump label. */
8824 gcc_assert (lab != NULL_RTX);
8826 if (INSN_ADDRESSES_SET_P ())
8827 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8829 while (insn)
8831 if (insn == lab)
8832 return true;
8833 else
8834 insn = NEXT_INSN (insn);
8837 return false;
8840 /* Output an unconditional move and branch insn. */
8842 const char *
8843 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8845 int length = get_attr_length (insn);
8847 /* These are the cases in which we win. */
8848 if (length == 4)
8849 return "mov%I1b,tr %1,%0,%2";
8851 /* None of the following cases win, but they don't lose either. */
8852 if (length == 8)
8854 if (dbr_sequence_length () == 0)
8856 /* Nothing in the delay slot, fake it by putting the combined
8857 insn (the copy or add) in the delay slot of a bl. */
8858 if (GET_CODE (operands[1]) == CONST_INT)
8859 return "b %2\n\tldi %1,%0";
8860 else
8861 return "b %2\n\tcopy %1,%0";
8863 else
8865 /* Something in the delay slot, but we've got a long branch. */
8866 if (GET_CODE (operands[1]) == CONST_INT)
8867 return "ldi %1,%0\n\tb %2";
8868 else
8869 return "copy %1,%0\n\tb %2";
8873 if (GET_CODE (operands[1]) == CONST_INT)
8874 output_asm_insn ("ldi %1,%0", operands);
8875 else
8876 output_asm_insn ("copy %1,%0", operands);
8877 return pa_output_lbranch (operands[2], insn, 1);
8880 /* Output an unconditional add and branch insn. */
8882 const char *
8883 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8885 int length = get_attr_length (insn);
8887 /* To make life easy we want operand0 to be the shared input/output
8888 operand and operand1 to be the readonly operand. */
8889 if (operands[0] == operands[1])
8890 operands[1] = operands[2];
8892 /* These are the cases in which we win. */
8893 if (length == 4)
8894 return "add%I1b,tr %1,%0,%3";
8896 /* None of the following cases win, but they don't lose either. */
8897 if (length == 8)
8899 if (dbr_sequence_length () == 0)
8900 /* Nothing in the delay slot, fake it by putting the combined
8901 insn (the copy or add) in the delay slot of a bl. */
8902 return "b %3\n\tadd%I1 %1,%0,%0";
8903 else
8904 /* Something in the delay slot, but we've got a long branch. */
8905 return "add%I1 %1,%0,%0\n\tb %3";
8908 output_asm_insn ("add%I1 %1,%0,%0", operands);
8909 return pa_output_lbranch (operands[3], insn, 1);
8912 /* We use this hook to perform a PA specific optimization which is difficult
8913 to do in earlier passes. */
8915 static void
8916 pa_reorg (void)
8918 remove_useless_addtr_insns (1);
8920 if (pa_cpu < PROCESSOR_8000)
8921 pa_combine_instructions ();
8924 /* The PA has a number of odd instructions which can perform multiple
8925 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8926 it may be profitable to combine two instructions into one instruction
8927 with two outputs. It's not profitable PA2.0 machines because the
8928 two outputs would take two slots in the reorder buffers.
8930 This routine finds instructions which can be combined and combines
8931 them. We only support some of the potential combinations, and we
8932 only try common ways to find suitable instructions.
8934 * addb can add two registers or a register and a small integer
8935 and jump to a nearby (+-8k) location. Normally the jump to the
8936 nearby location is conditional on the result of the add, but by
8937 using the "true" condition we can make the jump unconditional.
8938 Thus addb can perform two independent operations in one insn.
8940 * movb is similar to addb in that it can perform a reg->reg
8941 or small immediate->reg copy and jump to a nearby (+-8k location).
8943 * fmpyadd and fmpysub can perform a FP multiply and either an
8944 FP add or FP sub if the operands of the multiply and add/sub are
8945 independent (there are other minor restrictions). Note both
8946 the fmpy and fadd/fsub can in theory move to better spots according
8947 to data dependencies, but for now we require the fmpy stay at a
8948 fixed location.
8950 * Many of the memory operations can perform pre & post updates
8951 of index registers. GCC's pre/post increment/decrement addressing
8952 is far too simple to take advantage of all the possibilities. This
8953 pass may not be suitable since those insns may not be independent.
8955 * comclr can compare two ints or an int and a register, nullify
8956 the following instruction and zero some other register. This
8957 is more difficult to use as it's harder to find an insn which
8958 will generate a comclr than finding something like an unconditional
8959 branch. (conditional moves & long branches create comclr insns).
8961 * Most arithmetic operations can conditionally skip the next
8962 instruction. They can be viewed as "perform this operation
8963 and conditionally jump to this nearby location" (where nearby
8964 is an insns away). These are difficult to use due to the
8965 branch length restrictions. */
8967 static void
8968 pa_combine_instructions (void)
8970 rtx_insn *anchor;
8972 /* This can get expensive since the basic algorithm is on the
8973 order of O(n^2) (or worse). Only do it for -O2 or higher
8974 levels of optimization. */
8975 if (optimize < 2)
8976 return;
8978 /* Walk down the list of insns looking for "anchor" insns which
8979 may be combined with "floating" insns. As the name implies,
8980 "anchor" instructions don't move, while "floating" insns may
8981 move around. */
8982 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8983 rtx_insn *new_rtx = make_insn_raw (par);
8985 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8987 enum attr_pa_combine_type anchor_attr;
8988 enum attr_pa_combine_type floater_attr;
8990 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8991 Also ignore any special USE insns. */
8992 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8993 || GET_CODE (PATTERN (anchor)) == USE
8994 || GET_CODE (PATTERN (anchor)) == CLOBBER)
8995 continue;
8997 anchor_attr = get_attr_pa_combine_type (anchor);
8998 /* See if anchor is an insn suitable for combination. */
8999 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9000 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9001 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9002 && ! forward_branch_p (anchor)))
9004 rtx_insn *floater;
9006 for (floater = PREV_INSN (anchor);
9007 floater;
9008 floater = PREV_INSN (floater))
9010 if (NOTE_P (floater)
9011 || (NONJUMP_INSN_P (floater)
9012 && (GET_CODE (PATTERN (floater)) == USE
9013 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9014 continue;
9016 /* Anything except a regular INSN will stop our search. */
9017 if (! NONJUMP_INSN_P (floater))
9019 floater = NULL;
9020 break;
9023 /* See if FLOATER is suitable for combination with the
9024 anchor. */
9025 floater_attr = get_attr_pa_combine_type (floater);
9026 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9027 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9028 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9029 && floater_attr == PA_COMBINE_TYPE_FMPY))
9031 /* If ANCHOR and FLOATER can be combined, then we're
9032 done with this pass. */
9033 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9034 SET_DEST (PATTERN (floater)),
9035 XEXP (SET_SRC (PATTERN (floater)), 0),
9036 XEXP (SET_SRC (PATTERN (floater)), 1)))
9037 break;
9040 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9041 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9043 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9045 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9046 SET_DEST (PATTERN (floater)),
9047 XEXP (SET_SRC (PATTERN (floater)), 0),
9048 XEXP (SET_SRC (PATTERN (floater)), 1)))
9049 break;
9051 else
9053 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9054 SET_DEST (PATTERN (floater)),
9055 SET_SRC (PATTERN (floater)),
9056 SET_SRC (PATTERN (floater))))
9057 break;
9062 /* If we didn't find anything on the backwards scan try forwards. */
9063 if (!floater
9064 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9065 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9067 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9069 if (NOTE_P (floater)
9070 || (NONJUMP_INSN_P (floater)
9071 && (GET_CODE (PATTERN (floater)) == USE
9072 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9074 continue;
9076 /* Anything except a regular INSN will stop our search. */
9077 if (! NONJUMP_INSN_P (floater))
9079 floater = NULL;
9080 break;
9083 /* See if FLOATER is suitable for combination with the
9084 anchor. */
9085 floater_attr = get_attr_pa_combine_type (floater);
9086 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9087 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9088 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9089 && floater_attr == PA_COMBINE_TYPE_FMPY))
9091 /* If ANCHOR and FLOATER can be combined, then we're
9092 done with this pass. */
9093 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9094 SET_DEST (PATTERN (floater)),
9095 XEXP (SET_SRC (PATTERN (floater)),
9097 XEXP (SET_SRC (PATTERN (floater)),
9098 1)))
9099 break;
9104 /* FLOATER will be nonzero if we found a suitable floating
9105 insn for combination with ANCHOR. */
9106 if (floater
9107 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9108 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9110 /* Emit the new instruction and delete the old anchor. */
9111 emit_insn_before (gen_rtx_PARALLEL
9112 (VOIDmode,
9113 gen_rtvec (2, PATTERN (anchor),
9114 PATTERN (floater))),
9115 anchor);
9117 SET_INSN_DELETED (anchor);
9119 /* Emit a special USE insn for FLOATER, then delete
9120 the floating insn. */
9121 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9122 delete_insn (floater);
9124 continue;
9126 else if (floater
9127 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9129 rtx temp;
9130 /* Emit the new_jump instruction and delete the old anchor. */
9131 temp
9132 = emit_jump_insn_before (gen_rtx_PARALLEL
9133 (VOIDmode,
9134 gen_rtvec (2, PATTERN (anchor),
9135 PATTERN (floater))),
9136 anchor);
9138 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9139 SET_INSN_DELETED (anchor);
9141 /* Emit a special USE insn for FLOATER, then delete
9142 the floating insn. */
9143 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9144 delete_insn (floater);
9145 continue;
9151 static int
9152 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9153 int reversed, rtx dest,
9154 rtx src1, rtx src2)
9156 int insn_code_number;
9157 rtx_insn *start, *end;
9159 /* Create a PARALLEL with the patterns of ANCHOR and
9160 FLOATER, try to recognize it, then test constraints
9161 for the resulting pattern.
9163 If the pattern doesn't match or the constraints
9164 aren't met keep searching for a suitable floater
9165 insn. */
9166 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9167 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9168 INSN_CODE (new_rtx) = -1;
9169 insn_code_number = recog_memoized (new_rtx);
9170 basic_block bb = BLOCK_FOR_INSN (anchor);
9171 if (insn_code_number < 0
9172 || (extract_insn (new_rtx),
9173 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9174 return 0;
9176 if (reversed)
9178 start = anchor;
9179 end = floater;
9181 else
9183 start = floater;
9184 end = anchor;
9187 /* There's up to three operands to consider. One
9188 output and two inputs.
9190 The output must not be used between FLOATER & ANCHOR
9191 exclusive. The inputs must not be set between
9192 FLOATER and ANCHOR exclusive. */
9194 if (reg_used_between_p (dest, start, end))
9195 return 0;
9197 if (reg_set_between_p (src1, start, end))
9198 return 0;
9200 if (reg_set_between_p (src2, start, end))
9201 return 0;
9203 /* If we get here, then everything is good. */
9204 return 1;
9207 /* Return nonzero if references for INSN are delayed.
9209 Millicode insns are actually function calls with some special
9210 constraints on arguments and register usage.
9212 Millicode calls always expect their arguments in the integer argument
9213 registers, and always return their result in %r29 (ret1). They
9214 are expected to clobber their arguments, %r1, %r29, and the return
9215 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9217 This function tells reorg that the references to arguments and
9218 millicode calls do not appear to happen until after the millicode call.
9219 This allows reorg to put insns which set the argument registers into the
9220 delay slot of the millicode call -- thus they act more like traditional
9221 CALL_INSNs.
9223 Note we cannot consider side effects of the insn to be delayed because
9224 the branch and link insn will clobber the return pointer. If we happened
9225 to use the return pointer in the delay slot of the call, then we lose.
9227 get_attr_type will try to recognize the given insn, so make sure to
9228 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9229 in particular. */
9231 pa_insn_refs_are_delayed (rtx_insn *insn)
9233 return ((NONJUMP_INSN_P (insn)
9234 && GET_CODE (PATTERN (insn)) != SEQUENCE
9235 && GET_CODE (PATTERN (insn)) != USE
9236 && GET_CODE (PATTERN (insn)) != CLOBBER
9237 && get_attr_type (insn) == TYPE_MILLI));
9240 /* Promote the return value, but not the arguments. */
9242 static machine_mode
9243 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9244 machine_mode mode,
9245 int *punsignedp ATTRIBUTE_UNUSED,
9246 const_tree fntype ATTRIBUTE_UNUSED,
9247 int for_return)
9249 if (for_return == 0)
9250 return mode;
9251 return promote_mode (type, mode, punsignedp);
9254 /* On the HP-PA the value is found in register(s) 28(-29), unless
9255 the mode is SF or DF. Then the value is returned in fr4 (32).
9257 This must perform the same promotions as PROMOTE_MODE, else promoting
9258 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9260 Small structures must be returned in a PARALLEL on PA64 in order
9261 to match the HP Compiler ABI. */
9263 static rtx
9264 pa_function_value (const_tree valtype,
9265 const_tree func ATTRIBUTE_UNUSED,
9266 bool outgoing ATTRIBUTE_UNUSED)
9268 machine_mode valmode;
9270 if (AGGREGATE_TYPE_P (valtype)
9271 || TREE_CODE (valtype) == COMPLEX_TYPE
9272 || TREE_CODE (valtype) == VECTOR_TYPE)
9274 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9276 /* Handle aggregates that fit exactly in a word or double word. */
9277 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9278 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9280 if (TARGET_64BIT)
9282 /* Aggregates with a size less than or equal to 128 bits are
9283 returned in GR 28(-29). They are left justified. The pad
9284 bits are undefined. Larger aggregates are returned in
9285 memory. */
9286 rtx loc[2];
9287 int i, offset = 0;
9288 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9290 for (i = 0; i < ub; i++)
9292 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9293 gen_rtx_REG (DImode, 28 + i),
9294 GEN_INT (offset));
9295 offset += 8;
9298 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9300 else if (valsize > UNITS_PER_WORD)
9302 /* Aggregates 5 to 8 bytes in size are returned in general
9303 registers r28-r29 in the same manner as other non
9304 floating-point objects. The data is right-justified and
9305 zero-extended to 64 bits. This is opposite to the normal
9306 justification used on big endian targets and requires
9307 special treatment. */
9308 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9309 gen_rtx_REG (DImode, 28), const0_rtx);
9310 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9314 if ((INTEGRAL_TYPE_P (valtype)
9315 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9316 || POINTER_TYPE_P (valtype))
9317 valmode = word_mode;
9318 else
9319 valmode = TYPE_MODE (valtype);
9321 if (TREE_CODE (valtype) == REAL_TYPE
9322 && !AGGREGATE_TYPE_P (valtype)
9323 && TYPE_MODE (valtype) != TFmode
9324 && !TARGET_SOFT_FLOAT)
9325 return gen_rtx_REG (valmode, 32);
9327 return gen_rtx_REG (valmode, 28);
9330 /* Implement the TARGET_LIBCALL_VALUE hook. */
9332 static rtx
9333 pa_libcall_value (machine_mode mode,
9334 const_rtx fun ATTRIBUTE_UNUSED)
9336 if (! TARGET_SOFT_FLOAT
9337 && (mode == SFmode || mode == DFmode))
9338 return gen_rtx_REG (mode, 32);
9339 else
9340 return gen_rtx_REG (mode, 28);
9343 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9345 static bool
9346 pa_function_value_regno_p (const unsigned int regno)
9348 if (regno == 28
9349 || (! TARGET_SOFT_FLOAT && regno == 32))
9350 return true;
9352 return false;
9355 /* Update the data in CUM to advance over an argument
9356 of mode MODE and data type TYPE.
9357 (TYPE is null for libcalls where that information may not be available.) */
9359 static void
9360 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9361 const_tree type, bool named ATTRIBUTE_UNUSED)
9363 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9364 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9366 cum->nargs_prototype--;
9367 cum->words += (arg_size
9368 + ((cum->words & 01)
9369 && type != NULL_TREE
9370 && arg_size > 1));
9373 /* Return the location of a parameter that is passed in a register or NULL
9374 if the parameter has any component that is passed in memory.
9376 This is new code and will be pushed to into the net sources after
9377 further testing.
9379 ??? We might want to restructure this so that it looks more like other
9380 ports. */
9381 static rtx
9382 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9383 const_tree type, bool named ATTRIBUTE_UNUSED)
9385 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9386 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9387 int alignment = 0;
9388 int arg_size;
9389 int fpr_reg_base;
9390 int gpr_reg_base;
9391 rtx retval;
9393 if (mode == VOIDmode)
9394 return NULL_RTX;
9396 arg_size = FUNCTION_ARG_SIZE (mode, type);
9398 /* If this arg would be passed partially or totally on the stack, then
9399 this routine should return zero. pa_arg_partial_bytes will
9400 handle arguments which are split between regs and stack slots if
9401 the ABI mandates split arguments. */
9402 if (!TARGET_64BIT)
9404 /* The 32-bit ABI does not split arguments. */
9405 if (cum->words + arg_size > max_arg_words)
9406 return NULL_RTX;
9408 else
9410 if (arg_size > 1)
9411 alignment = cum->words & 1;
9412 if (cum->words + alignment >= max_arg_words)
9413 return NULL_RTX;
9416 /* The 32bit ABIs and the 64bit ABIs are rather different,
9417 particularly in their handling of FP registers. We might
9418 be able to cleverly share code between them, but I'm not
9419 going to bother in the hope that splitting them up results
9420 in code that is more easily understood. */
9422 if (TARGET_64BIT)
9424 /* Advance the base registers to their current locations.
9426 Remember, gprs grow towards smaller register numbers while
9427 fprs grow to higher register numbers. Also remember that
9428 although FP regs are 32-bit addressable, we pretend that
9429 the registers are 64-bits wide. */
9430 gpr_reg_base = 26 - cum->words;
9431 fpr_reg_base = 32 + cum->words;
9433 /* Arguments wider than one word and small aggregates need special
9434 treatment. */
9435 if (arg_size > 1
9436 || mode == BLKmode
9437 || (type && (AGGREGATE_TYPE_P (type)
9438 || TREE_CODE (type) == COMPLEX_TYPE
9439 || TREE_CODE (type) == VECTOR_TYPE)))
9441 /* Double-extended precision (80-bit), quad-precision (128-bit)
9442 and aggregates including complex numbers are aligned on
9443 128-bit boundaries. The first eight 64-bit argument slots
9444 are associated one-to-one, with general registers r26
9445 through r19, and also with floating-point registers fr4
9446 through fr11. Arguments larger than one word are always
9447 passed in general registers.
9449 Using a PARALLEL with a word mode register results in left
9450 justified data on a big-endian target. */
9452 rtx loc[8];
9453 int i, offset = 0, ub = arg_size;
9455 /* Align the base register. */
9456 gpr_reg_base -= alignment;
9458 ub = MIN (ub, max_arg_words - cum->words - alignment);
9459 for (i = 0; i < ub; i++)
9461 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9462 gen_rtx_REG (DImode, gpr_reg_base),
9463 GEN_INT (offset));
9464 gpr_reg_base -= 1;
9465 offset += 8;
9468 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9471 else
9473 /* If the argument is larger than a word, then we know precisely
9474 which registers we must use. */
9475 if (arg_size > 1)
9477 if (cum->words)
9479 gpr_reg_base = 23;
9480 fpr_reg_base = 38;
9482 else
9484 gpr_reg_base = 25;
9485 fpr_reg_base = 34;
9488 /* Structures 5 to 8 bytes in size are passed in the general
9489 registers in the same manner as other non floating-point
9490 objects. The data is right-justified and zero-extended
9491 to 64 bits. This is opposite to the normal justification
9492 used on big endian targets and requires special treatment.
9493 We now define BLOCK_REG_PADDING to pad these objects.
9494 Aggregates, complex and vector types are passed in the same
9495 manner as structures. */
9496 if (mode == BLKmode
9497 || (type && (AGGREGATE_TYPE_P (type)
9498 || TREE_CODE (type) == COMPLEX_TYPE
9499 || TREE_CODE (type) == VECTOR_TYPE)))
9501 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9502 gen_rtx_REG (DImode, gpr_reg_base),
9503 const0_rtx);
9504 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9507 else
9509 /* We have a single word (32 bits). A simple computation
9510 will get us the register #s we need. */
9511 gpr_reg_base = 26 - cum->words;
9512 fpr_reg_base = 32 + 2 * cum->words;
9516 /* Determine if the argument needs to be passed in both general and
9517 floating point registers. */
9518 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9519 /* If we are doing soft-float with portable runtime, then there
9520 is no need to worry about FP regs. */
9521 && !TARGET_SOFT_FLOAT
9522 /* The parameter must be some kind of scalar float, else we just
9523 pass it in integer registers. */
9524 && GET_MODE_CLASS (mode) == MODE_FLOAT
9525 /* The target function must not have a prototype. */
9526 && cum->nargs_prototype <= 0
9527 /* libcalls do not need to pass items in both FP and general
9528 registers. */
9529 && type != NULL_TREE
9530 /* All this hair applies to "outgoing" args only. This includes
9531 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9532 && !cum->incoming)
9533 /* Also pass outgoing floating arguments in both registers in indirect
9534 calls with the 32 bit ABI and the HP assembler since there is no
9535 way to the specify argument locations in static functions. */
9536 || (!TARGET_64BIT
9537 && !TARGET_GAS
9538 && !cum->incoming
9539 && cum->indirect
9540 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9542 retval
9543 = gen_rtx_PARALLEL
9544 (mode,
9545 gen_rtvec (2,
9546 gen_rtx_EXPR_LIST (VOIDmode,
9547 gen_rtx_REG (mode, fpr_reg_base),
9548 const0_rtx),
9549 gen_rtx_EXPR_LIST (VOIDmode,
9550 gen_rtx_REG (mode, gpr_reg_base),
9551 const0_rtx)));
9553 else
9555 /* See if we should pass this parameter in a general register. */
9556 if (TARGET_SOFT_FLOAT
9557 /* Indirect calls in the normal 32bit ABI require all arguments
9558 to be passed in general registers. */
9559 || (!TARGET_PORTABLE_RUNTIME
9560 && !TARGET_64BIT
9561 && !TARGET_ELF32
9562 && cum->indirect)
9563 /* If the parameter is not a scalar floating-point parameter,
9564 then it belongs in GPRs. */
9565 || GET_MODE_CLASS (mode) != MODE_FLOAT
9566 /* Structure with single SFmode field belongs in GPR. */
9567 || (type && AGGREGATE_TYPE_P (type)))
9568 retval = gen_rtx_REG (mode, gpr_reg_base);
9569 else
9570 retval = gen_rtx_REG (mode, fpr_reg_base);
9572 return retval;
9575 /* Arguments larger than one word are double word aligned. */
9577 static unsigned int
9578 pa_function_arg_boundary (machine_mode mode, const_tree type)
9580 bool singleword = (type
9581 ? (integer_zerop (TYPE_SIZE (type))
9582 || !TREE_CONSTANT (TYPE_SIZE (type))
9583 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9584 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9586 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9589 /* If this arg would be passed totally in registers or totally on the stack,
9590 then this routine should return zero. */
9592 static int
9593 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9594 tree type, bool named ATTRIBUTE_UNUSED)
9596 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9597 unsigned int max_arg_words = 8;
9598 unsigned int offset = 0;
9600 if (!TARGET_64BIT)
9601 return 0;
9603 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9604 offset = 1;
9606 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9607 /* Arg fits fully into registers. */
9608 return 0;
9609 else if (cum->words + offset >= max_arg_words)
9610 /* Arg fully on the stack. */
9611 return 0;
9612 else
9613 /* Arg is split. */
9614 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9618 /* A get_unnamed_section callback for switching to the text section.
9620 This function is only used with SOM. Because we don't support
9621 named subspaces, we can only create a new subspace or switch back
9622 to the default text subspace. */
9624 static void
9625 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9627 gcc_assert (TARGET_SOM);
9628 if (TARGET_GAS)
9630 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9632 /* We only want to emit a .nsubspa directive once at the
9633 start of the function. */
9634 cfun->machine->in_nsubspa = 1;
9636 /* Create a new subspace for the text. This provides
9637 better stub placement and one-only functions. */
9638 if (cfun->decl
9639 && DECL_ONE_ONLY (cfun->decl)
9640 && !DECL_WEAK (cfun->decl))
9642 output_section_asm_op ("\t.SPACE $TEXT$\n"
9643 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9644 "ACCESS=44,SORT=24,COMDAT");
9645 return;
9648 else
9650 /* There isn't a current function or the body of the current
9651 function has been completed. So, we are changing to the
9652 text section to output debugging information. Thus, we
9653 need to forget that we are in the text section so that
9654 varasm.c will call us when text_section is selected again. */
9655 gcc_assert (!cfun || !cfun->machine
9656 || cfun->machine->in_nsubspa == 2);
9657 in_section = NULL;
9659 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9660 return;
9662 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9665 /* A get_unnamed_section callback for switching to comdat data
9666 sections. This function is only used with SOM. */
9668 static void
9669 som_output_comdat_data_section_asm_op (const void *data)
9671 in_section = NULL;
9672 output_section_asm_op (data);
9675 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9677 static void
9678 pa_som_asm_init_sections (void)
9680 text_section
9681 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9683 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9684 is not being generated. */
9685 som_readonly_data_section
9686 = get_unnamed_section (0, output_section_asm_op,
9687 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9689 /* When secondary definitions are not supported, SOM makes readonly
9690 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9691 the comdat flag. */
9692 som_one_only_readonly_data_section
9693 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9694 "\t.SPACE $TEXT$\n"
9695 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9696 "ACCESS=0x2c,SORT=16,COMDAT");
9699 /* When secondary definitions are not supported, SOM makes data one-only
9700 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9701 som_one_only_data_section
9702 = get_unnamed_section (SECTION_WRITE,
9703 som_output_comdat_data_section_asm_op,
9704 "\t.SPACE $PRIVATE$\n"
9705 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9706 "ACCESS=31,SORT=24,COMDAT");
9708 if (flag_tm)
9709 som_tm_clone_table_section
9710 = get_unnamed_section (0, output_section_asm_op,
9711 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9713 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9714 which reference data within the $TEXT$ space (for example constant
9715 strings in the $LIT$ subspace).
9717 The assemblers (GAS and HP as) both have problems with handling
9718 the difference of two symbols which is the other correct way to
9719 reference constant data during PIC code generation.
9721 So, there's no way to reference constant data which is in the
9722 $TEXT$ space during PIC generation. Instead place all constant
9723 data into the $PRIVATE$ subspace (this reduces sharing, but it
9724 works correctly). */
9725 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9727 /* We must not have a reference to an external symbol defined in a
9728 shared library in a readonly section, else the SOM linker will
9729 complain.
9731 So, we force exception information into the data section. */
9732 exception_section = data_section;
9735 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9737 static section *
9738 pa_som_tm_clone_table_section (void)
9740 return som_tm_clone_table_section;
9743 /* On hpux10, the linker will give an error if we have a reference
9744 in the read-only data section to a symbol defined in a shared
9745 library. Therefore, expressions that might require a reloc can
9746 not be placed in the read-only data section. */
9748 static section *
9749 pa_select_section (tree exp, int reloc,
9750 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9752 if (TREE_CODE (exp) == VAR_DECL
9753 && TREE_READONLY (exp)
9754 && !TREE_THIS_VOLATILE (exp)
9755 && DECL_INITIAL (exp)
9756 && (DECL_INITIAL (exp) == error_mark_node
9757 || TREE_CONSTANT (DECL_INITIAL (exp)))
9758 && !reloc)
9760 if (TARGET_SOM
9761 && DECL_ONE_ONLY (exp)
9762 && !DECL_WEAK (exp))
9763 return som_one_only_readonly_data_section;
9764 else
9765 return readonly_data_section;
9767 else if (CONSTANT_CLASS_P (exp) && !reloc)
9768 return readonly_data_section;
9769 else if (TARGET_SOM
9770 && TREE_CODE (exp) == VAR_DECL
9771 && DECL_ONE_ONLY (exp)
9772 && !DECL_WEAK (exp))
9773 return som_one_only_data_section;
9774 else
9775 return data_section;
9778 /* Implement pa_reloc_rw_mask. */
9780 static int
9781 pa_reloc_rw_mask (void)
9783 /* We force (const (plus (symbol) (const_int))) to memory when the
9784 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9785 handle this construct in read-only memory and we want to avoid
9786 this for ELF. So, we always force an RTX needing relocation to
9787 the data section. */
9788 return 3;
9791 static void
9792 pa_globalize_label (FILE *stream, const char *name)
9794 /* We only handle DATA objects here, functions are globalized in
9795 ASM_DECLARE_FUNCTION_NAME. */
9796 if (! FUNCTION_NAME_P (name))
9798 fputs ("\t.EXPORT ", stream);
9799 assemble_name (stream, name);
9800 fputs (",DATA\n", stream);
9804 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9806 static rtx
9807 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9808 int incoming ATTRIBUTE_UNUSED)
9810 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9813 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9815 bool
9816 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9818 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9819 PA64 ABI says that objects larger than 128 bits are returned in memory.
9820 Note, int_size_in_bytes can return -1 if the size of the object is
9821 variable or larger than the maximum value that can be expressed as
9822 a HOST_WIDE_INT. It can also return zero for an empty type. The
9823 simplest way to handle variable and empty types is to pass them in
9824 memory. This avoids problems in defining the boundaries of argument
9825 slots, allocating registers, etc. */
9826 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9827 || int_size_in_bytes (type) <= 0);
9830 /* Structure to hold declaration and name of external symbols that are
9831 emitted by GCC. We generate a vector of these symbols and output them
9832 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9833 This avoids putting out names that are never really used. */
9835 typedef struct GTY(()) extern_symbol
9837 tree decl;
9838 const char *name;
9839 } extern_symbol;
9841 /* Define gc'd vector type for extern_symbol. */
9843 /* Vector of extern_symbol pointers. */
9844 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9846 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9847 /* Mark DECL (name NAME) as an external reference (assembler output
9848 file FILE). This saves the names to output at the end of the file
9849 if actually referenced. */
9851 void
9852 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9854 gcc_assert (file == asm_out_file);
9855 extern_symbol p = {decl, name};
9856 vec_safe_push (extern_symbols, p);
9859 /* Output text required at the end of an assembler file.
9860 This includes deferred plabels and .import directives for
9861 all external symbols that were actually referenced. */
9863 static void
9864 pa_hpux_file_end (void)
9866 unsigned int i;
9867 extern_symbol *p;
9869 if (!NO_DEFERRED_PROFILE_COUNTERS)
9870 output_deferred_profile_counters ();
9872 output_deferred_plabels ();
9874 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9876 tree decl = p->decl;
9878 if (!TREE_ASM_WRITTEN (decl)
9879 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9880 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9883 vec_free (extern_symbols);
9885 #endif
9887 /* Return true if a change from mode FROM to mode TO for a register
9888 in register class RCLASS is invalid. */
9890 bool
9891 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9892 enum reg_class rclass)
9894 if (from == to)
9895 return false;
9897 /* Reject changes to/from complex and vector modes. */
9898 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9899 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9900 return true;
9902 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9903 return false;
9905 /* There is no way to load QImode or HImode values directly from
9906 memory. SImode loads to the FP registers are not zero extended.
9907 On the 64-bit target, this conflicts with the definition of
9908 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9909 with different sizes in the floating-point registers. */
9910 if (MAYBE_FP_REG_CLASS_P (rclass))
9911 return true;
9913 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9914 in specific sets of registers. Thus, we cannot allow changing
9915 to a larger mode when it's larger than a word. */
9916 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9917 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9918 return true;
9920 return false;
9923 /* Returns TRUE if it is a good idea to tie two pseudo registers
9924 when one has mode MODE1 and one has mode MODE2.
9925 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9926 for any hard reg, then this must be FALSE for correct output.
9928 We should return FALSE for QImode and HImode because these modes
9929 are not ok in the floating-point registers. However, this prevents
9930 tieing these modes to SImode and DImode in the general registers.
9931 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9932 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9933 in the floating-point registers. */
9935 bool
9936 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9938 /* Don't tie modes in different classes. */
9939 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9940 return false;
9942 return true;
9946 /* Length in units of the trampoline instruction code. */
9948 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9951 /* Output assembler code for a block containing the constant parts
9952 of a trampoline, leaving space for the variable parts.\
9954 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9955 and then branches to the specified routine.
9957 This code template is copied from text segment to stack location
9958 and then patched with pa_trampoline_init to contain valid values,
9959 and then entered as a subroutine.
9961 It is best to keep this as small as possible to avoid having to
9962 flush multiple lines in the cache. */
9964 static void
9965 pa_asm_trampoline_template (FILE *f)
9967 if (!TARGET_64BIT)
9969 fputs ("\tldw 36(%r22),%r21\n", f);
9970 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9971 if (ASSEMBLER_DIALECT == 0)
9972 fputs ("\tdepi 0,31,2,%r21\n", f);
9973 else
9974 fputs ("\tdepwi 0,31,2,%r21\n", f);
9975 fputs ("\tldw 4(%r21),%r19\n", f);
9976 fputs ("\tldw 0(%r21),%r21\n", f);
9977 if (TARGET_PA_20)
9979 fputs ("\tbve (%r21)\n", f);
9980 fputs ("\tldw 40(%r22),%r29\n", f);
9981 fputs ("\t.word 0\n", f);
9982 fputs ("\t.word 0\n", f);
9984 else
9986 fputs ("\tldsid (%r21),%r1\n", f);
9987 fputs ("\tmtsp %r1,%sr0\n", f);
9988 fputs ("\tbe 0(%sr0,%r21)\n", f);
9989 fputs ("\tldw 40(%r22),%r29\n", f);
9991 fputs ("\t.word 0\n", f);
9992 fputs ("\t.word 0\n", f);
9993 fputs ("\t.word 0\n", f);
9994 fputs ("\t.word 0\n", f);
9996 else
9998 fputs ("\t.dword 0\n", f);
9999 fputs ("\t.dword 0\n", f);
10000 fputs ("\t.dword 0\n", f);
10001 fputs ("\t.dword 0\n", f);
10002 fputs ("\tmfia %r31\n", f);
10003 fputs ("\tldd 24(%r31),%r1\n", f);
10004 fputs ("\tldd 24(%r1),%r27\n", f);
10005 fputs ("\tldd 16(%r1),%r1\n", f);
10006 fputs ("\tbve (%r1)\n", f);
10007 fputs ("\tldd 32(%r31),%r31\n", f);
10008 fputs ("\t.dword 0 ; fptr\n", f);
10009 fputs ("\t.dword 0 ; static link\n", f);
10013 /* Emit RTL insns to initialize the variable parts of a trampoline.
10014 FNADDR is an RTX for the address of the function's pure code.
10015 CXT is an RTX for the static chain value for the function.
10017 Move the function address to the trampoline template at offset 36.
10018 Move the static chain value to trampoline template at offset 40.
10019 Move the trampoline address to trampoline template at offset 44.
10020 Move r19 to trampoline template at offset 48. The latter two
10021 words create a plabel for the indirect call to the trampoline.
10023 A similar sequence is used for the 64-bit port but the plabel is
10024 at the beginning of the trampoline.
10026 Finally, the cache entries for the trampoline code are flushed.
10027 This is necessary to ensure that the trampoline instruction sequence
10028 is written to memory prior to any attempts at prefetching the code
10029 sequence. */
10031 static void
10032 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10034 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10035 rtx start_addr = gen_reg_rtx (Pmode);
10036 rtx end_addr = gen_reg_rtx (Pmode);
10037 rtx line_length = gen_reg_rtx (Pmode);
10038 rtx r_tramp, tmp;
10040 emit_block_move (m_tramp, assemble_trampoline_template (),
10041 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10042 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10044 if (!TARGET_64BIT)
10046 tmp = adjust_address (m_tramp, Pmode, 36);
10047 emit_move_insn (tmp, fnaddr);
10048 tmp = adjust_address (m_tramp, Pmode, 40);
10049 emit_move_insn (tmp, chain_value);
10051 /* Create a fat pointer for the trampoline. */
10052 tmp = adjust_address (m_tramp, Pmode, 44);
10053 emit_move_insn (tmp, r_tramp);
10054 tmp = adjust_address (m_tramp, Pmode, 48);
10055 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10057 /* fdc and fic only use registers for the address to flush,
10058 they do not accept integer displacements. We align the
10059 start and end addresses to the beginning of their respective
10060 cache lines to minimize the number of lines flushed. */
10061 emit_insn (gen_andsi3 (start_addr, r_tramp,
10062 GEN_INT (-MIN_CACHELINE_SIZE)));
10063 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10064 TRAMPOLINE_CODE_SIZE-1));
10065 emit_insn (gen_andsi3 (end_addr, tmp,
10066 GEN_INT (-MIN_CACHELINE_SIZE)));
10067 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10068 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10069 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10070 gen_reg_rtx (Pmode),
10071 gen_reg_rtx (Pmode)));
10073 else
10075 tmp = adjust_address (m_tramp, Pmode, 56);
10076 emit_move_insn (tmp, fnaddr);
10077 tmp = adjust_address (m_tramp, Pmode, 64);
10078 emit_move_insn (tmp, chain_value);
10080 /* Create a fat pointer for the trampoline. */
10081 tmp = adjust_address (m_tramp, Pmode, 16);
10082 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10083 r_tramp, 32)));
10084 tmp = adjust_address (m_tramp, Pmode, 24);
10085 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10087 /* fdc and fic only use registers for the address to flush,
10088 they do not accept integer displacements. We align the
10089 start and end addresses to the beginning of their respective
10090 cache lines to minimize the number of lines flushed. */
10091 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10092 emit_insn (gen_anddi3 (start_addr, tmp,
10093 GEN_INT (-MIN_CACHELINE_SIZE)));
10094 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10095 TRAMPOLINE_CODE_SIZE - 1));
10096 emit_insn (gen_anddi3 (end_addr, tmp,
10097 GEN_INT (-MIN_CACHELINE_SIZE)));
10098 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10099 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10100 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10101 gen_reg_rtx (Pmode),
10102 gen_reg_rtx (Pmode)));
10105 #ifdef HAVE_ENABLE_EXECUTE_STACK
10106  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10107      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10108 #endif
10111 /* Perform any machine-specific adjustment in the address of the trampoline.
10112 ADDR contains the address that was passed to pa_trampoline_init.
10113 Adjust the trampoline address to point to the plabel at offset 44. */
10115 static rtx
10116 pa_trampoline_adjust_address (rtx addr)
10118 if (!TARGET_64BIT)
10119 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10120 return addr;
10123 static rtx
10124 pa_delegitimize_address (rtx orig_x)
10126 rtx x = delegitimize_mem_from_attrs (orig_x);
10128 if (GET_CODE (x) == LO_SUM
10129 && GET_CODE (XEXP (x, 1)) == UNSPEC
10130 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10131 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10132 return x;
10135 static rtx
10136 pa_internal_arg_pointer (void)
10138 /* The argument pointer and the hard frame pointer are the same in
10139 the 32-bit runtime, so we don't need a copy. */
10140 if (TARGET_64BIT)
10141 return copy_to_reg (virtual_incoming_args_rtx);
10142 else
10143 return virtual_incoming_args_rtx;
10146 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10147 Frame pointer elimination is automatically handled. */
10149 static bool
10150 pa_can_eliminate (const int from, const int to)
10152 /* The argument cannot be eliminated in the 64-bit runtime. */
10153 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10154 return false;
10156 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10157 ? ! frame_pointer_needed
10158 : true);
10161 /* Define the offset between two registers, FROM to be eliminated and its
10162 replacement TO, at the start of a routine. */
10163 HOST_WIDE_INT
10164 pa_initial_elimination_offset (int from, int to)
10166 HOST_WIDE_INT offset;
10168 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10169 && to == STACK_POINTER_REGNUM)
10170 offset = -pa_compute_frame_size (get_frame_size (), 0);
10171 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10172 offset = 0;
10173 else
10174 gcc_unreachable ();
10176 return offset;
10179 static void
10180 pa_conditional_register_usage (void)
10182 int i;
10184 if (!TARGET_64BIT && !TARGET_PA_11)
10186 for (i = 56; i <= FP_REG_LAST; i++)
10187 fixed_regs[i] = call_used_regs[i] = 1;
10188 for (i = 33; i < 56; i += 2)
10189 fixed_regs[i] = call_used_regs[i] = 1;
10191 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10193 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10194 fixed_regs[i] = call_used_regs[i] = 1;
10196 if (flag_pic)
10197 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10200 /* Target hook for c_mode_for_suffix. */
10202 static machine_mode
10203 pa_c_mode_for_suffix (char suffix)
10205 if (HPUX_LONG_DOUBLE_LIBRARY)
10207 if (suffix == 'q')
10208 return TFmode;
10211 return VOIDmode;
10214 /* Target hook for function_section. */
10216 static section *
10217 pa_function_section (tree decl, enum node_frequency freq,
10218 bool startup, bool exit)
10220 /* Put functions in text section if target doesn't have named sections. */
10221 if (!targetm_common.have_named_sections)
10222 return text_section;
10224 /* Force nested functions into the same section as the containing
10225 function. */
10226 if (decl
10227 && DECL_SECTION_NAME (decl) == NULL
10228 && DECL_CONTEXT (decl) != NULL_TREE
10229 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10230 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10231 return function_section (DECL_CONTEXT (decl));
10233 /* Otherwise, use the default function section. */
10234 return default_function_section (decl, freq, startup, exit);
10237 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10239 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10240 that need more than three instructions to load prior to reload. This
10241 limit is somewhat arbitrary. It takes three instructions to load a
10242 CONST_INT from memory but two are memory accesses. It may be better
10243 to increase the allowed range for CONST_INTS. We may also be able
10244 to handle CONST_DOUBLES. */
10246 static bool
10247 pa_legitimate_constant_p (machine_mode mode, rtx x)
10249 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10250 return false;
10252 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10253 return false;
10255 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10256 legitimate constants. The other variants can't be handled by
10257 the move patterns after reload starts. */
10258 if (tls_referenced_p (x))
10259 return false;
10261 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10262 return false;
10264 if (TARGET_64BIT
10265 && HOST_BITS_PER_WIDE_INT > 32
10266 && GET_CODE (x) == CONST_INT
10267 && !reload_in_progress
10268 && !reload_completed
10269 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10270 && !pa_cint_ok_for_move (UINTVAL (x)))
10271 return false;
10273 if (function_label_operand (x, mode))
10274 return false;
10276 return true;
10279 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10281 static unsigned int
10282 pa_section_type_flags (tree decl, const char *name, int reloc)
10284 unsigned int flags;
10286 flags = default_section_type_flags (decl, name, reloc);
10288 /* Function labels are placed in the constant pool. This can
10289 cause a section conflict if decls are put in ".data.rel.ro"
10290 or ".data.rel.ro.local" using the __attribute__ construct. */
10291 if (strcmp (name, ".data.rel.ro") == 0
10292 || strcmp (name, ".data.rel.ro.local") == 0)
10293 flags |= SECTION_WRITE | SECTION_RELRO;
10295 return flags;
10298 /* pa_legitimate_address_p recognizes an RTL expression that is a
10299 valid memory address for an instruction. The MODE argument is the
10300 machine mode for the MEM expression that wants to use this address.
10302 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10303 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10304 available with floating point loads and stores, and integer loads.
10305 We get better code by allowing indexed addresses in the initial
10306 RTL generation.
10308 The acceptance of indexed addresses as legitimate implies that we
10309 must provide patterns for doing indexed integer stores, or the move
10310 expanders must force the address of an indexed store to a register.
10311 We have adopted the latter approach.
10313 Another function of pa_legitimate_address_p is to ensure that
10314 the base register is a valid pointer for indexed instructions.
10315 On targets that have non-equivalent space registers, we have to
10316 know at the time of assembler output which register in a REG+REG
10317 pair is the base register. The REG_POINTER flag is sometimes lost
10318 in reload and the following passes, so it can't be relied on during
10319 code generation. Thus, we either have to canonicalize the order
10320 of the registers in REG+REG indexed addresses, or treat REG+REG
10321 addresses separately and provide patterns for both permutations.
10323 The latter approach requires several hundred additional lines of
10324 code in pa.md. The downside to canonicalizing is that a PLUS
10325 in the wrong order can't combine to form to make a scaled indexed
10326 memory operand. As we won't need to canonicalize the operands if
10327 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10329 We initially break out scaled indexed addresses in canonical order
10330 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10331 scaled indexed addresses during RTL generation. However, fold_rtx
10332 has its own opinion on how the operands of a PLUS should be ordered.
10333 If one of the operands is equivalent to a constant, it will make
10334 that operand the second operand. As the base register is likely to
10335 be equivalent to a SYMBOL_REF, we have made it the second operand.
10337 pa_legitimate_address_p accepts REG+REG as legitimate when the
10338 operands are in the order INDEX+BASE on targets with non-equivalent
10339 space registers, and in any order on targets with equivalent space
10340 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10342 We treat a SYMBOL_REF as legitimate if it is part of the current
10343 function's constant-pool, because such addresses can actually be
10344 output as REG+SMALLINT. */
10346 static bool
10347 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10349 if ((REG_P (x)
10350 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10351 : REG_OK_FOR_BASE_P (x)))
10352 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10353 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10354 && REG_P (XEXP (x, 0))
10355 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10356 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10357 return true;
10359 if (GET_CODE (x) == PLUS)
10361 rtx base, index;
10363 /* For REG+REG, the base register should be in XEXP (x, 1),
10364 so check it first. */
10365 if (REG_P (XEXP (x, 1))
10366 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10367 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10368 base = XEXP (x, 1), index = XEXP (x, 0);
10369 else if (REG_P (XEXP (x, 0))
10370 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10371 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10372 base = XEXP (x, 0), index = XEXP (x, 1);
10373 else
10374 return false;
10376 if (GET_CODE (index) == CONST_INT)
10378 if (INT_5_BITS (index))
10379 return true;
10381 /* When INT14_OK_STRICT is false, a secondary reload is needed
10382 to adjust the displacement of SImode and DImode floating point
10383 instructions but this may fail when the register also needs
10384 reloading. So, we return false when STRICT is true. We
10385 also reject long displacements for float mode addresses since
10386 the majority of accesses will use floating point instructions
10387 that don't support 14-bit offsets. */
10388 if (!INT14_OK_STRICT
10389 && (strict || !(reload_in_progress || reload_completed))
10390 && mode != QImode
10391 && mode != HImode)
10392 return false;
10394 return base14_operand (index, mode);
10397 if (!TARGET_DISABLE_INDEXING
10398 /* Only accept the "canonical" INDEX+BASE operand order
10399 on targets with non-equivalent space registers. */
10400 && (TARGET_NO_SPACE_REGS
10401 ? REG_P (index)
10402 : (base == XEXP (x, 1) && REG_P (index)
10403 && (reload_completed
10404 || (reload_in_progress && HARD_REGISTER_P (base))
10405 || REG_POINTER (base))
10406 && (reload_completed
10407 || (reload_in_progress && HARD_REGISTER_P (index))
10408 || !REG_POINTER (index))))
10409 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10410 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10411 : REG_OK_FOR_INDEX_P (index))
10412 && borx_reg_operand (base, Pmode)
10413 && borx_reg_operand (index, Pmode))
10414 return true;
10416 if (!TARGET_DISABLE_INDEXING
10417 && GET_CODE (index) == MULT
10418 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10419 && REG_P (XEXP (index, 0))
10420 && GET_MODE (XEXP (index, 0)) == Pmode
10421 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10422 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10423 && GET_CODE (XEXP (index, 1)) == CONST_INT
10424 && INTVAL (XEXP (index, 1))
10425 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10426 && borx_reg_operand (base, Pmode))
10427 return true;
10429 return false;
10432 if (GET_CODE (x) == LO_SUM)
10434 rtx y = XEXP (x, 0);
10436 if (GET_CODE (y) == SUBREG)
10437 y = SUBREG_REG (y);
10439 if (REG_P (y)
10440 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10441 : REG_OK_FOR_BASE_P (y)))
10443 /* Needed for -fPIC */
10444 if (mode == Pmode
10445 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10446 return true;
10448 if (!INT14_OK_STRICT
10449 && (strict || !(reload_in_progress || reload_completed))
10450 && mode != QImode
10451 && mode != HImode)
10452 return false;
10454 if (CONSTANT_P (XEXP (x, 1)))
10455 return true;
10457 return false;
10460 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10461 return true;
10463 return false;
10466 /* Look for machine dependent ways to make the invalid address AD a
10467 valid address.
10469 For the PA, transform:
10471 memory(X + <large int>)
10473 into:
10475 if (<large int> & mask) >= 16
10476 Y = (<large int> & ~mask) + mask + 1 Round up.
10477 else
10478 Y = (<large int> & ~mask) Round down.
10479 Z = X + Y
10480 memory (Z + (<large int> - Y));
10482 This makes reload inheritance and reload_cse work better since Z
10483 can be reused.
10485 There may be more opportunities to improve code with this hook. */
10488 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10489 int opnum, int type,
10490 int ind_levels ATTRIBUTE_UNUSED)
10492 long offset, newoffset, mask;
10493 rtx new_rtx, temp = NULL_RTX;
10495 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10496 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10498 if (optimize && GET_CODE (ad) == PLUS)
10499 temp = simplify_binary_operation (PLUS, Pmode,
10500 XEXP (ad, 0), XEXP (ad, 1));
10502 new_rtx = temp ? temp : ad;
10504 if (optimize
10505 && GET_CODE (new_rtx) == PLUS
10506 && GET_CODE (XEXP (new_rtx, 0)) == REG
10507 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10509 offset = INTVAL (XEXP ((new_rtx), 1));
10511 /* Choose rounding direction. Round up if we are >= halfway. */
10512 if ((offset & mask) >= ((mask + 1) / 2))
10513 newoffset = (offset & ~mask) + mask + 1;
10514 else
10515 newoffset = offset & ~mask;
10517 /* Ensure that long displacements are aligned. */
10518 if (mask == 0x3fff
10519 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10520 || (TARGET_64BIT && (mode) == DImode)))
10521 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10523 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10525 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10526 GEN_INT (newoffset));
10527 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10528 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10529 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10530 opnum, (enum reload_type) type);
10531 return ad;
10535 return NULL_RTX;
10538 /* Output address vector. */
10540 void
10541 pa_output_addr_vec (rtx lab, rtx body)
10543 int idx, vlen = XVECLEN (body, 0);
10545 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10546 if (TARGET_GAS)
10547 fputs ("\t.begin_brtab\n", asm_out_file);
10548 for (idx = 0; idx < vlen; idx++)
10550 ASM_OUTPUT_ADDR_VEC_ELT
10551 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10553 if (TARGET_GAS)
10554 fputs ("\t.end_brtab\n", asm_out_file);
10557 /* Output address difference vector. */
10559 void
10560 pa_output_addr_diff_vec (rtx lab, rtx body)
10562 rtx base = XEXP (XEXP (body, 0), 0);
10563 int idx, vlen = XVECLEN (body, 1);
10565 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10566 if (TARGET_GAS)
10567 fputs ("\t.begin_brtab\n", asm_out_file);
10568 for (idx = 0; idx < vlen; idx++)
10570 ASM_OUTPUT_ADDR_DIFF_ELT
10571 (asm_out_file,
10572 body,
10573 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10574 CODE_LABEL_NUMBER (base));
10576 if (TARGET_GAS)
10577 fputs ("\t.end_brtab\n", asm_out_file);
10580 /* This is a helper function for the other atomic operations. This function
10581 emits a loop that contains SEQ that iterates until a compare-and-swap
10582 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10583 a set of instructions that takes a value from OLD_REG as an input and
10584 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10585 set to the current contents of MEM. After SEQ, a compare-and-swap will
10586 attempt to update MEM with NEW_REG. The function returns true when the
10587 loop was generated successfully. */
10589 static bool
10590 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10592 machine_mode mode = GET_MODE (mem);
10593 rtx_code_label *label;
10594 rtx cmp_reg, success, oldval;
10596 /* The loop we want to generate looks like
10598 cmp_reg = mem;
10599 label:
10600 old_reg = cmp_reg;
10601 seq;
10602 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10603 if (success)
10604 goto label;
10606 Note that we only do the plain load from memory once. Subsequent
10607 iterations use the value loaded by the compare-and-swap pattern. */
10609 label = gen_label_rtx ();
10610 cmp_reg = gen_reg_rtx (mode);
10612 emit_move_insn (cmp_reg, mem);
10613 emit_label (label);
10614 emit_move_insn (old_reg, cmp_reg);
10615 if (seq)
10616 emit_insn (seq);
10618 success = NULL_RTX;
10619 oldval = cmp_reg;
10620 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10621 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10622 MEMMODEL_RELAXED))
10623 return false;
10625 if (oldval != cmp_reg)
10626 emit_move_insn (cmp_reg, oldval);
10628 /* Mark this jump predicted not taken. */
10629 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10630 GET_MODE (success), 1, label, 0);
10631 return true;
10634 /* This function tries to implement an atomic exchange operation using a
10635 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10636 *MEM are returned, using TARGET if possible. No memory model is required
10637 since a compare_and_swap loop is seq-cst. */
10640 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10642 machine_mode mode = GET_MODE (mem);
10644 if (can_compare_and_swap_p (mode, true))
10646 if (!target || !register_operand (target, mode))
10647 target = gen_reg_rtx (mode);
10648 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10649 return target;
10652 return NULL_RTX;
10655 #include "gt-pa.h"